efast-phenocam-validation/gap_validation/s2_mask_dir.py
2026-05-17 15:55:15 +02:00

91 lines
2.8 KiB
Python

"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
from __future__ import annotations
import re
from datetime import date, datetime
from pathlib import Path
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
def yyyymmdd_in_name(name: str) -> str | None:
m = S2_PREP_DATE_RE.search(name)
return m.group(1) if m else None
def yyyymmdd_from_iso(iso_d: str) -> str:
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
def acquisition_yyyymmdd_in_window(
prepared_s2: Path, window_start: date, window_end: date
) -> set[str]:
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
out: set[str] = set()
if not prepared_s2.is_dir():
return out
for p in prepared_s2.glob("*REFL.tif"):
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
if not m:
continue
d = datetime.strptime(m.group(1), "%Y%m%d").date()
if window_start <= d <= window_end:
out.add(m.group(1))
return out
def build_masked_s2_dir(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
patterns: tuple[str, ...],
) -> int:
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
dest.mkdir(parents=True, exist_ok=True)
n = 0
for pattern in patterns:
for src in sorted(prepared_s2.glob(pattern)):
if not src.is_file() and not src.is_symlink():
continue
y = yyyymmdd_in_name(src.name)
if y and y in excluded_yyyymmdd:
continue
link = dest / src.name
if link.exists() or link.is_symlink():
link.unlink()
link.symlink_to(src.resolve())
n += 1
return n
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
for p in masked_s2_dir.iterdir():
y = yyyymmdd_in_name(p.name)
if y == withheld_yyyymmdd:
raise RuntimeError(
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
f"found in masked S2 dir {masked_s2_dir}"
)
def build_masked_s2_dir_bti(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
) -> int:
return build_masked_s2_dir(
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
)
def build_masked_s2_dir_itb(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
) -> int:
return build_masked_s2_dir(
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
)