"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST).""" from __future__ import annotations import re from datetime import date, datetime from pathlib import Path # Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST). S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE) def yyyymmdd_in_name(name: str) -> str | None: m = S2_PREP_DATE_RE.search(name) return m.group(1) if m else None def yyyymmdd_from_iso(iso_d: str) -> str: return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d") def acquisition_yyyymmdd_in_window( prepared_s2: Path, window_start: date, window_end: date ) -> set[str]: """All S2 acquisition days (from REFL filenames) inside [window_start, window_end].""" out: set[str] = set() if not prepared_s2.is_dir(): return out for p in prepared_s2.glob("*REFL.tif"): m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name) if not m: continue d = datetime.strptime(m.group(1), "%Y%m%d").date() if window_start <= d <= window_end: out.add(m.group(1)) return out def build_masked_s2_dir( prepared_s2: Path, excluded_yyyymmdd: set[str], dest: Path, patterns: tuple[str, ...], ) -> int: """Symlink all files matching ``patterns`` except excluded acquisition days.""" dest.mkdir(parents=True, exist_ok=True) n = 0 for pattern in patterns: for src in sorted(prepared_s2.glob(pattern)): if not src.is_file() and not src.is_symlink(): continue y = yyyymmdd_in_name(src.name) if y and y in excluded_yyyymmdd: continue link = dest / src.name if link.exists() or link.is_symlink(): link.unlink() link.symlink_to(src.resolve()) n += 1 return n def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None: """Fail if the withheld validation acquisition is present in the fusion input dir.""" for p in masked_s2_dir.iterdir(): y = yyyymmdd_in_name(p.name) if y == withheld_yyyymmdd: raise RuntimeError( f"Data leakage: withheld acquisition {withheld_yyyymmdd} " f"found in masked S2 dir {masked_s2_dir}" ) def build_masked_s2_dir_bti( prepared_s2: Path, excluded_yyyymmdd: set[str], dest: Path, ) -> int: return build_masked_s2_dir( prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif") ) def build_masked_s2_dir_itb( prepared_s2: Path, excluded_yyyymmdd: set[str], dest: Path, ) -> int: return build_masked_s2_dir( prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif") )