91 lines
2.8 KiB
Python
91 lines
2.8 KiB
Python
"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from datetime import date, datetime
|
|
from pathlib import Path
|
|
|
|
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
|
|
S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
|
|
|
|
|
|
def yyyymmdd_in_name(name: str) -> str | None:
|
|
m = S2_PREP_DATE_RE.search(name)
|
|
return m.group(1) if m else None
|
|
|
|
|
|
def yyyymmdd_from_iso(iso_d: str) -> str:
|
|
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
|
|
|
|
|
def acquisition_yyyymmdd_in_window(
|
|
prepared_s2: Path, window_start: date, window_end: date
|
|
) -> set[str]:
|
|
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
|
|
out: set[str] = set()
|
|
if not prepared_s2.is_dir():
|
|
return out
|
|
for p in prepared_s2.glob("*REFL.tif"):
|
|
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
|
|
if not m:
|
|
continue
|
|
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
|
if window_start <= d <= window_end:
|
|
out.add(m.group(1))
|
|
return out
|
|
|
|
|
|
def build_masked_s2_dir(
|
|
prepared_s2: Path,
|
|
excluded_yyyymmdd: set[str],
|
|
dest: Path,
|
|
patterns: tuple[str, ...],
|
|
) -> int:
|
|
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
n = 0
|
|
for pattern in patterns:
|
|
for src in sorted(prepared_s2.glob(pattern)):
|
|
if not src.is_file() and not src.is_symlink():
|
|
continue
|
|
y = yyyymmdd_in_name(src.name)
|
|
if y and y in excluded_yyyymmdd:
|
|
continue
|
|
link = dest / src.name
|
|
if link.exists() or link.is_symlink():
|
|
link.unlink()
|
|
link.symlink_to(src.resolve())
|
|
n += 1
|
|
return n
|
|
|
|
|
|
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
|
|
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
|
|
for p in masked_s2_dir.iterdir():
|
|
y = yyyymmdd_in_name(p.name)
|
|
if y == withheld_yyyymmdd:
|
|
raise RuntimeError(
|
|
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
|
|
f"found in masked S2 dir {masked_s2_dir}"
|
|
)
|
|
|
|
|
|
def build_masked_s2_dir_bti(
|
|
prepared_s2: Path,
|
|
excluded_yyyymmdd: set[str],
|
|
dest: Path,
|
|
) -> int:
|
|
return build_masked_s2_dir(
|
|
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
|
)
|
|
|
|
|
|
def build_masked_s2_dir_itb(
|
|
prepared_s2: Path,
|
|
excluded_yyyymmdd: set[str],
|
|
dest: Path,
|
|
) -> int:
|
|
return build_masked_s2_dir(
|
|
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
|
)
|