added gap validation.
This commit is contained in:
parent
374be6865d
commit
740249115b
12 changed files with 997 additions and 116 deletions
|
|
@ -1,8 +1,9 @@
|
|||
"""Symlink prepared S2 into a temp dir, omitting one acquisition (REFL + DIST_CLOUD)."""
|
||||
"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
|
||||
|
|
@ -14,10 +15,34 @@ def yyyymmdd_in_name(name: str) -> str | None:
|
|||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def yyyymmdd_from_iso(iso_d: str) -> str:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
||||
|
||||
|
||||
def acquisition_yyyymmdd_in_window(
|
||||
prepared_s2: Path, window_start: date, window_end: date
|
||||
) -> set[str]:
|
||||
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
|
||||
out: set[str] = set()
|
||||
if not prepared_s2.is_dir():
|
||||
return out
|
||||
for p in prepared_s2.glob("*REFL.tif"):
|
||||
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
||||
if window_start <= d <= window_end:
|
||||
out.add(m.group(1))
|
||||
return out
|
||||
|
||||
|
||||
def build_masked_s2_dir(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path, patterns: tuple[str, ...]
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
patterns: tuple[str, ...],
|
||||
) -> int:
|
||||
"""Symlink all files matching ``patterns`` except the withheld acquisition day."""
|
||||
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
n = 0
|
||||
for pattern in patterns:
|
||||
|
|
@ -25,7 +50,7 @@ def build_masked_s2_dir(
|
|||
if not src.is_file() and not src.is_symlink():
|
||||
continue
|
||||
y = yyyymmdd_in_name(src.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
if y and y in excluded_yyyymmdd:
|
||||
continue
|
||||
link = dest / src.name
|
||||
if link.exists() or link.is_symlink():
|
||||
|
|
@ -35,17 +60,32 @@ def build_masked_s2_dir(
|
|||
return n
|
||||
|
||||
|
||||
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
|
||||
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
|
||||
for p in masked_s2_dir.iterdir():
|
||||
y = yyyymmdd_in_name(p.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
raise RuntimeError(
|
||||
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
|
||||
f"found in masked S2 dir {masked_s2_dir}"
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_bti(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_itb(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue