"""Gap windows, phenological midpoints, manifest and withheld-image sidecar.""" from __future__ import annotations import json import re from datetime import date, datetime, timedelta from pathlib import Path from phenology_timesat import phenocam_phenology_path REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$") DEFAULT_GAP_LENGTHS = (15, 30) TRANSITIONS = ("green_up", "green_down") def validation_dir(site_name: str, season: int) -> Path: return Path(f"data/{site_name}/{season}/validation") def _parse_iso_date(s, season: int) -> date | None: if not s or not isinstance(s, str): return None try: d = datetime.strptime(s[:10], "%Y-%m-%d").date() except ValueError: return None y0, y1 = date(season, 1, 1), date(season, 12, 31) return d if y0 <= d <= y1 else None def transition_midpoint( site_name: str, season: int, transition: str, phenology_path: Path | None = None, ) -> date | None: """TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing.""" if transition not in TRANSITIONS: raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}") path = phenology_path or phenocam_phenology_path(site_name, season) if not path.is_file(): return None try: rec = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None key = ( "green_up_50pct_date" if transition == "green_up" else "green_down_50pct_date" ) return _parse_iso_date(rec.get(key), season) def phenology_midpoint( site_name: str, season: int, phenology_path: Path | None = None ) -> date: """Legacy: green-up if in season, else green-down, else July 1.""" for tr in ("green_up", "green_down"): d = transition_midpoint(site_name, season, tr, phenology_path) if d: return d return date(season, 7, 1) def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]: """[start, end] inclusive, gap_days wide, clamped to calendar year.""" half = gap_days // 2 start = mid - timedelta(days=half) end = mid + timedelta(days=gap_days - 1 - half) y0, y1 = date(season, 1, 1), date(season, 12, 31) if start < y0: end = min(y1, end + (y0 - start)) start = y0 if end > y1: start = max(y0, start - (end - y1)) end = y1 return start, end def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]: """Return sorted (acquisition_date, filename) for *REFL.tif.""" out: list[tuple[date, str]] = [] if not prepared_s2.is_dir(): return out for p in sorted(prepared_s2.glob("*REFL.tif")): m = REFL_DATE_RE.search(p.name) if not m: continue d = datetime.strptime(m.group(1), "%Y%m%d").date() out.append((d, p.name)) out.sort(key=lambda x: x[0]) return out def nearest_s2_acquisition( prediction: date, pairs: list[tuple[date, str]] ) -> tuple[date, str] | None: if not pairs: return None return min(pairs, key=lambda t: abs((t[0] - prediction).days)) def build_manifest_entries( site_name: str, season: int, gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS, transitions: tuple[str, ...] = TRANSITIONS, s2_calendar_strategy: str = "aggressive", ) -> list[dict]: """One entry per (transition, gap_days): phenology midpoint, window, withheld S2.""" prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2") pairs = list_s2_refl_dates(prepared_s2) entries: list[dict] = [] for transition in transitions: mid = transition_midpoint(site_name, season, transition) if mid is None: continue for gap_days in gap_lengths: w0, w1 = centered_window(mid, gap_days, season) prediction = mid ns = nearest_s2_acquisition(prediction, pairs) if ns is None: withheld_date = None withheld_filename = None else: withheld_date, withheld_filename = ns[0].isoformat(), ns[1] entries.append( { "transition": transition, "gap_days": gap_days, "midpoint_rule": f"{transition}_50pct_date", "midpoint_date": mid.isoformat(), "window_start": w0.isoformat(), "window_end": w1.isoformat(), "prediction_date": prediction.isoformat(), "withheld_s2_date": withheld_date, "withheld_s2_filename": withheld_filename, } ) return entries def write_gap_withheld_images( site_name: str, season: int, entries: list[dict], ) -> Path: """Reproducibility sidecar for withheld scenes and gap placement.""" path = validation_dir(site_name, season) / "gap_withheld_images.json" records = [] for e in entries: records.append( { "site_name": site_name, "season": season, "transition": e.get("transition"), "gap_days": e.get("gap_days"), "midpoint_date": e.get("midpoint_date"), "window_start": e.get("window_start"), "window_end": e.get("window_end"), "withheld_s2_date": e.get("withheld_s2_date"), "withheld_s2_filename": e.get("withheld_s2_filename"), } ) path.write_text( json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2) + "\n", encoding="utf-8", ) return path def write_manifest( site_name: str, season: int, site_position: tuple[float, float], s2_calendar_strategy: str = "aggressive", *, gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS, transitions: tuple[str, ...] = TRANSITIONS, ) -> Path: out_dir = validation_dir(site_name, season) out_dir.mkdir(parents=True, exist_ok=True) entries = build_manifest_entries( site_name, season, gap_lengths=gap_lengths, transitions=transitions, s2_calendar_strategy=s2_calendar_strategy, ) path = out_dir / "gap_manifest.json" payload = { "site_name": site_name, "season": season, "site_position_lat_lon": list(site_position), "s2_calendar_strategy": s2_calendar_strategy, "entries": entries, } path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") write_gap_withheld_images(site_name, season, entries) return path def load_manifest(site_name: str, season: int) -> dict: path = validation_dir(site_name, season) / "gap_manifest.json" if not path.is_file(): raise FileNotFoundError(f"Missing manifest: {path}") return json.loads(path.read_text(encoding="utf-8"))