From 740249115b827ef4ccd33a4b9fb65fdfc4b04493 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Sun, 17 May 2026 15:55:15 +0200 Subject: [PATCH] added gap validation. --- README.md | 19 +- gap_validation/batch_spatial.py | 111 +++++++++++ gap_validation/batch_temporal.py | 56 ++++++ gap_validation/calendar.py | 164 +++++++++++----- gap_validation/fusion_masked.py | 109 +++++++++-- gap_validation/phenology_offsets.py | 146 ++++++++++++++ gap_validation/run.py | 103 +++++++--- gap_validation/s2_mask_dir.py | 56 +++++- gap_validation/spatial_metrics.py | 11 +- gap_validation/temporal_pc.py | 288 ++++++++++++++++++++++++++++ gap_validation/whittaker_compare.py | 41 ++-- webapp/gap_validation.html | 9 +- 12 files changed, 997 insertions(+), 116 deletions(-) create mode 100644 gap_validation/batch_spatial.py create mode 100644 gap_validation/batch_temporal.py create mode 100644 gap_validation/phenology_offsets.py create mode 100644 gap_validation/temporal_pc.py diff --git a/README.md b/README.md index ceafc27..6f4f7e7 100644 --- a/README.md +++ b/README.md @@ -52,10 +52,25 @@ By default, most stages in `run.py` are **commented out** (metrics-only). Uncomm With prepared data and EFAST installed: ```bash -python -m gap_validation.run --site innsbruck --season 2024 --lat 47.116171 --lon 11.320308 +# Phenology sidecars (TIMESAT 50 % amplitude) +python -m phenology_timesat --all + +# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up) +python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \ + --strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30 + +# All six sites, best BtI scenario per site +python -m gap_validation.batch_spatial + +# Full-season NSE_PC on gap-degraded stack (slow) +python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 +python -m gap_validation.batch_temporal + +# TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier) +python -m gap_validation.phenology_offsets ``` -Writes `data/{site}/{season}/validation/gap_manifest.json`, `gap_validation_summary.json`, and masked fusion under `validation/fusion/`. See `python -m gap_validation.run --help`. +Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`. ## Data layout diff --git a/gap_validation/batch_spatial.py b/gap_validation/batch_spatial.py new file mode 100644 index 0000000..5c57ec1 --- /dev/null +++ b/gap_validation/batch_spatial.py @@ -0,0 +1,111 @@ +"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site).""" + +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path + +from gap_validation.calendar import DEFAULT_GAP_LENGTHS, TRANSITIONS +from gap_validation.run import run_validation + +# Primary season per site (matches scripts/export_thesis_tables.py). +PRIMARY_SEASON = { + "forthgr": 2024, + "innsbruck": 2024, + "pitsalu": 2024, + "vindeln2": 2023, + "sunflowerjerez1": 2024, + "institutekarnobat": 2024, +} + + +def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]: + data = json.loads(geojson.read_text(encoding="utf-8")) + out: dict[str, tuple[float, float]] = {} + for feat in data.get("features", []): + props = feat.get("properties") or {} + name = props.get("sitename") + coords = (feat.get("geometry") or {}).get("coordinates") + if not name or not coords or len(coords) < 2: + continue + lon, lat = float(coords[0]), float(coords[1]) + out[str(name)] = (lat, lon) + return out + + +def _parse_scenario(key: str) -> tuple[str, int | None, str]: + """``aggressive_sigma20`` → (strategy, sigma, bti).""" + mode = "itb" if key.endswith("_itb") else "bti" + base = key.replace("_itb", "") + m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base) + if not m: + raise ValueError(f"Cannot parse scenario key: {key!r}") + strategy = m.group(1) + sigma = int(m.group(2)) + return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode + + +def _best_bti_from_metrics(metrics_path: Path) -> str | None: + if not metrics_path.is_file(): + return None + temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {} + best_key, best_nse = None, None + for k, v in temporal.items(): + if not k.endswith("_itb") and isinstance(v, dict): + n = v.get("nse_pc") + if isinstance(n, (int, float)) and (best_nse is None or n > best_nse): + best_nse = n + best_key = k + return best_key + + +def main() -> None: + ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).") + ap.add_argument("--data-dir", type=Path, default=Path("data")) + ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson")) + ap.add_argument("--skip-fusion", action="store_true") + ap.add_argument("--write-manifest-only", action="store_true") + ap.add_argument( + "--gap-days", + type=int, + action="append", + help="Filter gap lengths (default: all 15 and 30 in manifest).", + ) + args = ap.parse_args() + positions = _site_positions(args.sites_geojson) + gap_filter = args.gap_days + + for site, season in sorted(PRIMARY_SEASON.items()): + pos = positions.get(site) + if not pos: + print(f"[skip] No coordinates for {site}") + continue + metrics_path = args.data_dir / site / str(season) / "metrics.json" + scenario_key = _best_bti_from_metrics(metrics_path) + if not scenario_key: + print(f"[skip] {site} {season}: no metrics.json / BtI scenarios") + continue + strategy, sigma, mode = _parse_scenario(scenario_key) + sigma_kw = 30 if sigma == 30 else None + print(f"=== {site} {season} {scenario_key} ===") + out = run_validation( + site, + season, + pos, + strategy, + sigma_kw, + mode, + skip_manifest=False, + skip_fusion=args.skip_fusion, + write_manifest_only=args.write_manifest_only, + gap_days_filter=gap_filter, + transition_filter=None, + s2_calendar_strategy=strategy, + ) + print(out) + + +if __name__ == "__main__": + main() diff --git a/gap_validation/batch_temporal.py b/gap_validation/batch_temporal.py new file mode 100644 index 0000000..2e8ef84 --- /dev/null +++ b/gap_validation/batch_temporal.py @@ -0,0 +1,56 @@ +"""Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario).""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from gap_validation.batch_spatial import ( + PRIMARY_SEASON, + _best_bti_from_metrics, + _parse_scenario, + _site_positions, +) +from gap_validation.temporal_pc import run_temporal_pc + + +def main() -> None: + ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).") + ap.add_argument("--data-dir", type=Path, default=Path("data")) + ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson")) + ap.add_argument("--skip-fusion", action="store_true") + ap.add_argument("--gap-days", type=int, action="append") + args = ap.parse_args() + positions = _site_positions(args.sites_geojson) + + for site, season in sorted(PRIMARY_SEASON.items()): + pos = positions.get(site) + if not pos: + print(f"[skip] No coordinates for {site}") + continue + metrics_path = args.data_dir / site / str(season) / "metrics.json" + scenario_key = _best_bti_from_metrics(metrics_path) + if not scenario_key: + print(f"[skip] {site} {season}: no metrics.json") + continue + strategy, sigma, mode = _parse_scenario(scenario_key) + sigma_kw = 30 if sigma == 30 else None + print(f"=== {site} {season} temporal {scenario_key} ===") + out = run_temporal_pc( + site, + season, + pos, + strategy, + sigma_kw, + mode, + skip_manifest=False, + skip_fusion=args.skip_fusion, + gap_days_filter=args.gap_days, + transition_filter=None, + s2_calendar_strategy=strategy, + ) + print(out) + + +if __name__ == "__main__": + main() diff --git a/gap_validation/calendar.py b/gap_validation/calendar.py index 4ca053b..f89bf4b 100644 --- a/gap_validation/calendar.py +++ b/gap_validation/calendar.py @@ -1,4 +1,4 @@ -"""Gap windows and nearest S2 acquisition (manifest inputs).""" +"""Gap windows, phenological midpoints, manifest and withheld-image sidecar.""" from __future__ import annotations @@ -10,43 +10,58 @@ from pathlib import Path from phenology_timesat import phenocam_phenology_path REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$") +DEFAULT_GAP_LENGTHS = (15, 30) +TRANSITIONS = ("green_up", "green_down") def validation_dir(site_name: str, season: int) -> Path: return Path(f"data/{site_name}/{season}/validation") -def phenology_midpoint( - site_name: str, season: int, phenology_path: Path | None = None -) -> date: - """Pick fusion gap midpoint: green-up if in season, else green-down, else July 1.""" - path = phenology_path or phenocam_phenology_path(site_name, season) +def _parse_iso_date(s, season: int) -> date | None: + if not s or not isinstance(s, str): + return None + try: + d = datetime.strptime(s[:10], "%Y-%m-%d").date() + except ValueError: + return None y0, y1 = date(season, 1, 1), date(season, 12, 31) - fallback = date(season, 7, 1) + return d if y0 <= d <= y1 else None + + +def transition_midpoint( + site_name: str, + season: int, + transition: str, + phenology_path: Path | None = None, +) -> date | None: + """TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing.""" + if transition not in TRANSITIONS: + raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}") + path = phenology_path or phenocam_phenology_path(site_name, season) if not path.is_file(): - return fallback + return None try: rec = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): - return fallback - up_s = rec.get("green_up_50pct_date") - dn_s = rec.get("green_down_50pct_date") + return None + key = ( + "green_up_50pct_date" + if transition == "green_up" + else "green_down_50pct_date" + ) + return _parse_iso_date(rec.get(key), season) - def _parse(s) -> date | None: - if not s or not isinstance(s, str): - return None - try: - d = datetime.strptime(s[:10], "%Y-%m-%d").date() - except ValueError: - return None - return d if y0 <= d <= y1 else None - up, dn = _parse(up_s), _parse(dn_s) - if up: - return up - if dn: - return dn - return fallback +def phenology_midpoint( + site_name: str, season: int, phenology_path: Path | None = None +) -> date: + """Legacy: green-up if in season, else green-down, else July 1.""" + for tr in ("green_up", "green_down"): + d = transition_midpoint(site_name, season, tr, phenology_path) + if d: + return d + return date(season, 7, 1) def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]: @@ -84,43 +99,77 @@ def nearest_s2_acquisition( ) -> tuple[date, str] | None: if not pairs: return None - best = min(pairs, key=lambda t: abs((t[0] - prediction).days)) - return best + return min(pairs, key=lambda t: abs((t[0] - prediction).days)) def build_manifest_entries( site_name: str, season: int, - gap_lengths: tuple[int, ...] = (15, 30, 60, 90), + gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS, + transitions: tuple[str, ...] = TRANSITIONS, s2_calendar_strategy: str = "aggressive", ) -> list[dict]: - """One entry per gap length: window, prediction=midpoint, withheld = nearest S2 to midpoint.""" - mid = phenology_midpoint(site_name, season) + """One entry per (transition, gap_days): phenology midpoint, window, withheld S2.""" prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2") pairs = list_s2_refl_dates(prepared_s2) - entries = [] - for gap_days in gap_lengths: - w0, w1 = centered_window(mid, gap_days, season) - prediction = mid - ns = nearest_s2_acquisition(prediction, pairs) - if ns is None: - withheld_date = None - withheld_filename = None - else: - withheld_date, withheld_filename = ns[0].isoformat(), ns[1] - entries.append( + entries: list[dict] = [] + for transition in transitions: + mid = transition_midpoint(site_name, season, transition) + if mid is None: + continue + for gap_days in gap_lengths: + w0, w1 = centered_window(mid, gap_days, season) + prediction = mid + ns = nearest_s2_acquisition(prediction, pairs) + if ns is None: + withheld_date = None + withheld_filename = None + else: + withheld_date, withheld_filename = ns[0].isoformat(), ns[1] + entries.append( + { + "transition": transition, + "gap_days": gap_days, + "midpoint_rule": f"{transition}_50pct_date", + "midpoint_date": mid.isoformat(), + "window_start": w0.isoformat(), + "window_end": w1.isoformat(), + "prediction_date": prediction.isoformat(), + "withheld_s2_date": withheld_date, + "withheld_s2_filename": withheld_filename, + } + ) + return entries + + +def write_gap_withheld_images( + site_name: str, + season: int, + entries: list[dict], +) -> Path: + """Reproducibility sidecar for withheld scenes and gap placement.""" + path = validation_dir(site_name, season) / "gap_withheld_images.json" + records = [] + for e in entries: + records.append( { - "gap_days": gap_days, - "midpoint_rule": "green_up_50pct else green_down_50pct else July01", - "midpoint_date": mid.isoformat(), - "window_start": w0.isoformat(), - "window_end": w1.isoformat(), - "prediction_date": prediction.isoformat(), - "withheld_s2_date": withheld_date, - "withheld_s2_filename": withheld_filename, + "site_name": site_name, + "season": season, + "transition": e.get("transition"), + "gap_days": e.get("gap_days"), + "midpoint_date": e.get("midpoint_date"), + "window_start": e.get("window_start"), + "window_end": e.get("window_end"), + "withheld_s2_date": e.get("withheld_s2_date"), + "withheld_s2_filename": e.get("withheld_s2_filename"), } ) - return entries + path.write_text( + json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2) + + "\n", + encoding="utf-8", + ) + return path def write_manifest( @@ -128,20 +177,29 @@ def write_manifest( season: int, site_position: tuple[float, float], s2_calendar_strategy: str = "aggressive", + *, + gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS, + transitions: tuple[str, ...] = TRANSITIONS, ) -> Path: out_dir = validation_dir(site_name, season) out_dir.mkdir(parents=True, exist_ok=True) + entries = build_manifest_entries( + site_name, + season, + gap_lengths=gap_lengths, + transitions=transitions, + s2_calendar_strategy=s2_calendar_strategy, + ) path = out_dir / "gap_manifest.json" payload = { "site_name": site_name, "season": season, "site_position_lat_lon": list(site_position), "s2_calendar_strategy": s2_calendar_strategy, - "entries": build_manifest_entries( - site_name, season, s2_calendar_strategy=s2_calendar_strategy - ), + "entries": entries, } path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + write_gap_withheld_images(site_name, season, entries) return path diff --git a/gap_validation/fusion_masked.py b/gap_validation/fusion_masked.py index df732bc..db8bf69 100644 --- a/gap_validation/fusion_masked.py +++ b/gap_validation/fusion_masked.py @@ -1,14 +1,20 @@ -"""EFAST with symlinked S2 dir (withhold one acquisition); outputs under validation/.""" +"""EFAST with symlinked S2 dir (gap window omitted); outputs under validation/.""" from __future__ import annotations +from datetime import datetime from pathlib import Path from tempfile import TemporaryDirectory from fusion import run_efast, run_efast_itb from preparation import _get_base_dir, _get_itb_base_dir -from gap_validation.s2_mask_dir import build_masked_s2_dir_bti, build_masked_s2_dir_itb +from gap_validation.s2_mask_dir import ( + acquisition_yyyymmdd_in_window, + assert_no_leakage, + build_masked_s2_dir_bti, + build_masked_s2_dir_itb, +) def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path: @@ -19,20 +25,35 @@ def validation_fusion_dir( site_name: str, season: int, gap_days: int, + transition: str, strategy: str, sigma: int | None, mode: str, ) -> Path: - """``data/.../validation/fusion/gap_{n}/{strategy}_sigma{20|30}_{bti|itb}/``.""" + """``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``.""" sig = 30 if sigma == 30 else 20 return ( Path(f"data/{site_name}/{season}/validation") / "fusion" - / f"gap_{gap_days}" + / f"gap_{gap_days}_{transition}" / f"{strategy}_sigma{sig}_{mode}" ) +def excluded_acquisition_days( + prepared_s2: Path, + window_start_iso: str, + window_end_iso: str, + withheld_yyyymmdd: str, +) -> set[str]: + """Union of gap-window S2 days and the withheld validation acquisition.""" + w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date() + w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date() + excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1) + excluded.add(withheld_yyyymmdd) + return excluded + + def run_masked_fusion_one_date( season: int, site_position: tuple[float, float], @@ -41,19 +62,24 @@ def run_masked_fusion_one_date( sigma: int | None, mode: str, prediction_date_iso: str, + window_start_iso: str, + window_end_iso: str, withheld_yyyymmdd: str, fusion_output_dir: Path, ) -> Path: - """Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only; return output dir.""" + """Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only.""" fusion_output_dir.mkdir(parents=True, exist_ok=True) date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}" - s3_dir = prepared_s3_dir(season, site_name, strategy) with TemporaryDirectory(prefix="gapval_s2_") as tmp: tmp_s2 = Path(tmp) / "s2" if mode == "bti": prep_s2 = _get_base_dir(season, site_name, strategy) / "s2" - build_masked_s2_dir_bti(prep_s2, withheld_yyyymmdd, tmp_s2) + excl = excluded_acquisition_days( + prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd + ) + build_masked_s2_dir_bti(prep_s2, excl, tmp_s2) + assert_no_leakage(withheld_yyyymmdd, tmp_s2) run_efast( season, site_position, @@ -62,13 +88,16 @@ def run_masked_fusion_one_date( sigma=sigma, date_range=date_range, s2_output_dir=tmp_s2, - s3_output_dir=s3_dir, + s3_output_dir=prepared_s3_dir(season, site_name, strategy), fusion_output_dir=fusion_output_dir, ) elif mode == "itb": prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2" - s3_itb = _get_itb_base_dir(season, site_name, strategy) / "s3" - build_masked_s2_dir_itb(prep_s2, withheld_yyyymmdd, tmp_s2) + excl = excluded_acquisition_days( + prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd + ) + build_masked_s2_dir_itb(prep_s2, excl, tmp_s2) + assert_no_leakage(withheld_yyyymmdd, tmp_s2) run_efast_itb( season, site_position, @@ -77,7 +106,7 @@ def run_masked_fusion_one_date( sigma=sigma, date_range=date_range, s2_output_dir=tmp_s2, - s3_output_dir=s3_itb, + s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3", fusion_output_dir=fusion_output_dir, ) else: @@ -86,6 +115,64 @@ def run_masked_fusion_one_date( return fusion_output_dir +def run_masked_fusion_season( + season: int, + site_position: tuple[float, float], + site_name: str, + strategy: str, + sigma: int | None, + mode: str, + window_start_iso: str, + window_end_iso: str, + withheld_yyyymmdd: str, + fusion_output_dir: Path, +) -> Path: + """Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier).""" + fusion_output_dir.mkdir(parents=True, exist_ok=True) + date_range = f"{season}-01-01/{season}-12-31" + + with TemporaryDirectory(prefix="gapval_s2_") as tmp: + tmp_s2 = Path(tmp) / "s2" + if mode == "bti": + prep_s2 = _get_base_dir(season, site_name, strategy) / "s2" + excl = excluded_acquisition_days( + prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd + ) + build_masked_s2_dir_bti(prep_s2, excl, tmp_s2) + assert_no_leakage(withheld_yyyymmdd, tmp_s2) + run_efast( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma, + date_range=date_range, + s2_output_dir=tmp_s2, + s3_output_dir=prepared_s3_dir(season, site_name, strategy), + fusion_output_dir=fusion_output_dir, + ) + else: + prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2" + excl = excluded_acquisition_days( + prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd + ) + build_masked_s2_dir_itb(prep_s2, excl, tmp_s2) + assert_no_leakage(withheld_yyyymmdd, tmp_s2) + run_efast_itb( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma, + date_range=date_range, + s2_output_dir=tmp_s2, + s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3", + fusion_output_dir=fusion_output_dir, + ) + + return fusion_output_dir + + def production_fusion_path( season: int, site_name: str, diff --git a/gap_validation/phenology_offsets.py b/gap_validation/phenology_offsets.py new file mode 100644 index 0000000..44afab9 --- /dev/null +++ b/gap_validation/phenology_offsets.py @@ -0,0 +1,146 @@ +"""TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference.""" + +from __future__ import annotations + +import argparse +import json +from datetime import datetime +from pathlib import Path + +from phenology_timesat import ( + build_yraw_three_years, + phenocam_phenology_path, + run_timesat_phenology_from_yraw, +) + +from gap_validation.batch_spatial import ( + PRIMARY_SEASON, + _best_bti_from_metrics, + _parse_scenario, + _site_positions, +) +from gap_validation.calendar import load_manifest, validation_dir +from gap_validation.temporal_pc import _fusion_gcc_timeseries + + +def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None: + if not iso_a or not iso_b: + return None + try: + a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date() + b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date() + return abs((a - b).days) + except ValueError: + return None + + +def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]: + y1, y2, y3 = season - 1, season, season + 1 + yraw, _mode = build_yraw_three_years(by_date, y1, y2, y3) + out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3)) + return { + "green_up": out.get("green_up_50pct_date"), + "green_down": out.get("green_down_50pct_date"), + } + + +def _temporal_fusion_dir( + site: str, season: int, gap_days: int, transition: str, scenario_key: str +) -> Path: + strategy, sigma, mode = _parse_scenario(scenario_key) + sig = 30 if sigma == 30 else 20 + return ( + validation_dir(site, season) + / "temporal" + / f"gap_{gap_days}_{transition}" + / f"{strategy}_sigma{sig}_{mode}" + ) + + +def compute_offsets_for_site( + site: str, + season: int, + site_position: tuple[float, float], + *, + gap_days_list: tuple[int, ...] = (15, 30), +) -> list[dict]: + base = Path(f"data/{site}/{season}") + metrics_path = base / "metrics.json" + scenario_key = _best_bti_from_metrics(metrics_path) + if not scenario_key: + return [] + ref_path = phenocam_phenology_path(site, season) + reference = ( + json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {} + ) + manifest = load_manifest(site, season) + rows: list[dict] = [] + for entry in manifest["entries"]: + gd = entry.get("gap_days") + tr = entry.get("transition") + if gd not in gap_days_list or tr not in ("green_up", "green_down"): + continue + fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key) + if not fusion_dir.is_dir(): + continue + _, _, mode = _parse_scenario(scenario_key) + ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode) + if len(ts) < 10: + continue + fused = _timesat_transitions(ts, season) + ref_key = ( + "green_up_50pct_date" + if tr == "green_up" + else "green_down_50pct_date" + ) + ref_date = reference.get(ref_key) + fused_date = fused.get("green_up" if tr == "green_up" else "green_down") + rows.append( + { + "site_name": site, + "season": season, + "transition": tr, + "gap_days": gd, + "scenario": scenario_key, + "reference_date": ref_date, + "fused_date": fused_date, + "abs_day_offset": _day_offset(fused_date, ref_date), + "window_start": entry.get("window_start"), + "window_end": entry.get("window_end"), + } + ) + return rows + + +def write_phenology_offsets( + site: str, + season: int, + site_position: tuple[float, float], + *, + gap_days_list: tuple[int, ...] = (15, 30), +) -> Path: + rows = compute_offsets_for_site( + site, season, site_position, gap_days_list=gap_days_list + ) + out = validation_dir(site, season) / "gap_phenology_offsets.json" + payload = {"site_name": site, "season": season, "records": rows} + out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + return out + + +def main() -> None: + ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.") + ap.add_argument("--data-dir", type=Path, default=Path("data")) + ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson")) + args = ap.parse_args() + positions = _site_positions(args.sites_geojson) + for site, season in sorted(PRIMARY_SEASON.items()): + pos = positions.get(site) + if not pos: + continue + p = write_phenology_offsets(site, season, pos) + print(p) + + +if __name__ == "__main__": + main() diff --git a/gap_validation/run.py b/gap_validation/run.py index 583bb65..753fe56 100644 --- a/gap_validation/run.py +++ b/gap_validation/run.py @@ -9,7 +9,13 @@ import sys from datetime import datetime from pathlib import Path -from gap_validation.calendar import load_manifest, validation_dir, write_manifest +from gap_validation.calendar import ( + DEFAULT_GAP_LENGTHS, + TRANSITIONS, + load_manifest, + validation_dir, + write_manifest, +) from gap_validation.fusion_masked import ( production_fusion_path, run_masked_fusion_one_date, @@ -65,6 +71,19 @@ def _git_rev() -> str | None: return None +def _filter_entries( + entries: list[dict], + gap_days_filter: list[int] | None, + transition_filter: list[str] | None, +) -> list[dict]: + out = entries + if gap_days_filter: + out = [e for e in out if e.get("gap_days") in gap_days_filter] + if transition_filter: + out = [e for e in out if e.get("transition") in transition_filter] + return out + + def run_validation( site_name: str, season: int, @@ -77,7 +96,10 @@ def run_validation( skip_fusion: bool, write_manifest_only: bool, gap_days_filter: list[int] | None, + transition_filter: list[str] | None, s2_calendar_strategy: str, + manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS, + manifest_transitions: tuple[str, ...] = TRANSITIONS, ) -> Path: base = Path(f"data/{site_name}/{season}") vdir = validation_dir(site_name, season) @@ -85,24 +107,31 @@ def run_validation( if not skip_manifest: write_manifest( - site_name, season, site_position, s2_calendar_strategy=s2_calendar_strategy + site_name, + season, + site_position, + s2_calendar_strategy=s2_calendar_strategy, + gap_lengths=manifest_gap_lengths, + transitions=manifest_transitions, ) if write_manifest_only: return vdir / "gap_manifest.json" manifest = load_manifest(site_name, season) - entries = manifest["entries"] - if gap_days_filter: - entries = [e for e in entries if e.get("gap_days") in gap_days_filter] + entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter) results: list[dict] = [] for entry in entries: gap_days = entry["gap_days"] + transition = entry.get("transition", "green_up") pred = entry["prediction_date"] + w0 = entry["window_start"] + w1 = entry["window_end"] fn = entry.get("withheld_s2_filename") if not fn: results.append( { + "transition": transition, "gap_days": gap_days, "error": "no_withheld_s2_filename", "entry": entry, @@ -114,6 +143,7 @@ def run_validation( if not wh_ymd: results.append( { + "transition": transition, "gap_days": gap_days, "error": "could_not_parse_withheld_yyyymmdd", "withheld_s2_filename": fn, @@ -125,20 +155,33 @@ def run_validation( ) fusion_out = validation_fusion_dir( - site_name, season, gap_days, strategy, sigma, mode + site_name, season, gap_days, transition, strategy, sigma, mode ) if not skip_fusion: - run_masked_fusion_one_date( - season, - site_position, - site_name, - strategy, - sigma, - mode, - pred, - wh_ymd, - fusion_out, - ) + try: + run_masked_fusion_one_date( + season, + site_position, + site_name, + strategy, + sigma, + mode, + pred, + w0, + w1, + wh_ymd, + fusion_out, + ) + except RuntimeError as e: + results.append( + { + "transition": transition, + "gap_days": gap_days, + "error": str(e), + "entry": entry, + } + ) + continue fused_gap = _fused_file(fusion_out, mode, ymd) prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd) @@ -146,6 +189,7 @@ def run_validation( if wh_path is None or not fused_gap.is_file(): results.append( { + "transition": transition, "gap_days": gap_days, "prediction_date": pred, "withheld_s2_filename": fn, @@ -165,14 +209,17 @@ def run_validation( fused_gap, prod if prod.is_file() else None, mode, - whittaker_context=(base, strategy, pred, withheld_iso), + whittaker_context=(base, strategy, pred, withheld_iso, w0, w1), ) fusion_nse = (spatial.get("gap") or {}).get("nse_s2") wh_nse = (spatial.get("whittaker") or {}).get("nse_s2") results.append( { + "transition": transition, "gap_days": gap_days, "prediction_date": pred, + "window_start": w0, + "window_end": w1, "withheld_s2_filename": fn, "scenario": { "strategy": strategy, @@ -186,6 +233,7 @@ def run_validation( }, "spatial": spatial, "whittaker_crossover_row": { + "transition": transition, "gap_days": gap_days, "nse_s2_fusion": fusion_nse, "nse_s2_whittaker": wh_nse, @@ -206,15 +254,15 @@ def run_validation( "command_line": sys.argv, "git_commit": _git_rev(), "manifest": str(vdir / "gap_manifest.json"), + "gap_withheld_images": str(vdir / "gap_withheld_images.json"), "results": results, "whittaker_crossover": { scenario: { "metric": "nse_s2_spatial_vs_withheld_s2_gcc", "whittaker_definition": ( "Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; " - "withheld acquisition removed from the fit; prediction is a spatially constant " - "field at the smoothed GCC(prediction_date), compared to withheld S2 GCC on the " - "same valid mask as fusion (aligned with baseline.s2_whittaker_lambda400 spirit)." + "all S2 dates in the gap window and the withheld acquisition removed; " + "prediction is a spatially constant field at smoothed GCC(prediction_date)." ), "first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker( crossover_rows, @@ -250,6 +298,12 @@ def main() -> None: metavar="N", help="Restrict to gap length(s); repeatable (default: all manifest lengths).", ) + ap.add_argument( + "--transition", + choices=list(TRANSITIONS), + action="append", + help="Restrict to transition(s); repeatable (default: all in manifest).", + ) ap.add_argument("--skip-manifest", action="store_true") ap.add_argument( "--skip-fusion", @@ -259,7 +313,7 @@ def main() -> None: ap.add_argument( "--write-manifest-only", action="store_true", - help="Write gap_manifest.json and exit (no EFAST).", + help="Write gap_manifest.json + gap_withheld_images.json and exit.", ) ap.add_argument( "--s2-calendar-strategy", @@ -270,6 +324,8 @@ def main() -> None: args = ap.parse_args() sigma_kw = 30 if args.sigma == 30 else None site_position = (args.lat, args.lon) + gap_filter = args.gap_days if args.gap_days else None + trans_filter = args.transition if args.transition else None out = run_validation( args.site, args.season, @@ -280,7 +336,8 @@ def main() -> None: skip_manifest=args.skip_manifest, skip_fusion=args.skip_fusion, write_manifest_only=args.write_manifest_only, - gap_days_filter=args.gap_days, + gap_days_filter=gap_filter, + transition_filter=trans_filter, s2_calendar_strategy=args.s2_calendar_strategy, ) print(out) diff --git a/gap_validation/s2_mask_dir.py b/gap_validation/s2_mask_dir.py index 6112dcf..b279585 100644 --- a/gap_validation/s2_mask_dir.py +++ b/gap_validation/s2_mask_dir.py @@ -1,8 +1,9 @@ -"""Symlink prepared S2 into a temp dir, omitting one acquisition (REFL + DIST_CLOUD).""" +"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST).""" from __future__ import annotations import re +from datetime import date, datetime from pathlib import Path # Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST). @@ -14,10 +15,34 @@ def yyyymmdd_in_name(name: str) -> str | None: return m.group(1) if m else None +def yyyymmdd_from_iso(iso_d: str) -> str: + return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d") + + +def acquisition_yyyymmdd_in_window( + prepared_s2: Path, window_start: date, window_end: date +) -> set[str]: + """All S2 acquisition days (from REFL filenames) inside [window_start, window_end].""" + out: set[str] = set() + if not prepared_s2.is_dir(): + return out + for p in prepared_s2.glob("*REFL.tif"): + m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name) + if not m: + continue + d = datetime.strptime(m.group(1), "%Y%m%d").date() + if window_start <= d <= window_end: + out.add(m.group(1)) + return out + + def build_masked_s2_dir( - prepared_s2: Path, withheld_yyyymmdd: str, dest: Path, patterns: tuple[str, ...] + prepared_s2: Path, + excluded_yyyymmdd: set[str], + dest: Path, + patterns: tuple[str, ...], ) -> int: - """Symlink all files matching ``patterns`` except the withheld acquisition day.""" + """Symlink all files matching ``patterns`` except excluded acquisition days.""" dest.mkdir(parents=True, exist_ok=True) n = 0 for pattern in patterns: @@ -25,7 +50,7 @@ def build_masked_s2_dir( if not src.is_file() and not src.is_symlink(): continue y = yyyymmdd_in_name(src.name) - if y == withheld_yyyymmdd: + if y and y in excluded_yyyymmdd: continue link = dest / src.name if link.exists() or link.is_symlink(): @@ -35,17 +60,32 @@ def build_masked_s2_dir( return n +def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None: + """Fail if the withheld validation acquisition is present in the fusion input dir.""" + for p in masked_s2_dir.iterdir(): + y = yyyymmdd_in_name(p.name) + if y == withheld_yyyymmdd: + raise RuntimeError( + f"Data leakage: withheld acquisition {withheld_yyyymmdd} " + f"found in masked S2 dir {masked_s2_dir}" + ) + + def build_masked_s2_dir_bti( - prepared_s2: Path, withheld_yyyymmdd: str, dest: Path + prepared_s2: Path, + excluded_yyyymmdd: set[str], + dest: Path, ) -> int: return build_masked_s2_dir( - prepared_s2, withheld_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif") + prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif") ) def build_masked_s2_dir_itb( - prepared_s2: Path, withheld_yyyymmdd: str, dest: Path + prepared_s2: Path, + excluded_yyyymmdd: set[str], + dest: Path, ) -> int: return build_masked_s2_dir( - prepared_s2, withheld_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif") + prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif") ) diff --git a/gap_validation/spatial_metrics.py b/gap_validation/spatial_metrics.py index 64e126a..7663b3f 100644 --- a/gap_validation/spatial_metrics.py +++ b/gap_validation/spatial_metrics.py @@ -148,7 +148,7 @@ def evaluate_gap_vs_withheld( fused_nogap_path: Path | None, mode: str, *, - whittaker_context: tuple[Path, str, str, str] | None = None, + whittaker_context: tuple[Path, str, str, str, str, str] | None = None, ) -> dict: """Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask. @@ -170,9 +170,14 @@ def evaluate_gap_vs_withheld( if whittaker_context is not None: from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series - base, strategy, prediction_iso, withheld_iso = whittaker_context + base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context wgcc = whittaker_gcc_on_gap_masked_series( - base, strategy, prediction_iso, withheld_iso + base, + strategy, + prediction_iso, + withheld_iso, + window_start_iso=w0, + window_end_iso=w1, ) if wgcc is not None: out["whittaker"] = constant_field_scores(yt, float(wgcc), mask) diff --git a/gap_validation/temporal_pc.py b/gap_validation/temporal_pc.py new file mode 100644 index 0000000..40429e7 --- /dev/null +++ b/gap_validation/temporal_pc.py @@ -0,0 +1,288 @@ +"""Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation).""" + +from __future__ import annotations + +import argparse +import json +import re +from datetime import datetime +from pathlib import Path + +from metrics_indices import _get_gcc_from_original +from metrics_stats import ( + WHITTAKER_LAMBDA_DAYS_SQ, + _norm_date_key, + _s2_gcc_series_from_preselection, + _whittaker_smooth_dict, + calculate_temporal_metrics, + load_timeseries, +) + +from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest +from gap_validation.fusion_masked import run_masked_fusion_season, validation_fusion_dir +from gap_validation.run import ( + _filter_entries, + _scenario_key, + _withheld_iso, + _yyyymmdd_from_withheld_filename, +) +from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker + + +def _fusion_gcc_timeseries( + fusion_dir: Path, site_position: tuple[float, float], mode: str +) -> dict[str, float]: + """3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``.""" + pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif" + out: dict[str, float] = {} + for p in sorted(fusion_dir.glob(pattern)): + m = re.search(r"_(\d{8})\.tif$", p.name) + if not m: + continue + d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat() + gcc = _get_gcc_from_original(p, site_position) + if gcc is not None: + out[d] = float(gcc) + return out + + +def whittaker_timeseries_gap_degraded( + base: Path, + strategy: str, + window_start_iso: str, + window_end_iso: str, + withheld_iso: str, + lam: float = WHITTAKER_LAMBDA_DAYS_SQ, +) -> dict[str, float]: + """Daily Whittaker GCC on S2 preselection with gap window + withheld day removed.""" + all_gcc, flags = _s2_gcc_series_from_preselection(base) + if not all_gcc: + return {} + idx = 0 if strategy == "aggressive" else 1 + w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date() + w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date() + wh_k = _norm_date_key(withheld_iso) + + def in_window(dk: str) -> bool: + try: + d = datetime.strptime(dk[:10], "%Y-%m-%d").date() + except ValueError: + return False + return w0 <= d <= w1 + + kept = sorted( + (d, g) + for d, g in all_gcc.items() + if d in flags + and not flags[d][idx] + and _norm_date_key(d) != wh_k + and not in_window(_norm_date_key(d) or "") + ) + if len(kept) < 2: + return {} + obs_d, obs_v = zip(*kept) + return _whittaker_smooth_dict(obs_d, obs_v, lam) + + +def run_temporal_pc( + site_name: str, + season: int, + site_position: tuple[float, float], + strategy: str, + sigma: int | None, + mode: str, + *, + skip_manifest: bool, + skip_fusion: bool, + gap_days_filter: list[int] | None, + transition_filter: list[str] | None, + s2_calendar_strategy: str, +) -> Path: + """Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``.""" + base = Path(f"data/{site_name}/{season}") + vdir = validation_dir(site_name, season) + vdir.mkdir(parents=True, exist_ok=True) + + if not skip_manifest: + write_manifest( + site_name, + season, + site_position, + s2_calendar_strategy=s2_calendar_strategy, + ) + + manifest = load_manifest(site_name, season) + entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter) + phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json" + phenocam_ts = load_timeseries(phenocam_ts_path) + + nogap_metrics_path = base / "metrics.json" + nogap_nse: dict[str, float | None] = {} + if nogap_metrics_path.is_file(): + m = json.loads(nogap_metrics_path.read_text(encoding="utf-8")) + sk = _scenario_key(strategy, sigma, mode) + block = (m.get("temporal") or {}).get(sk) or {} + nogap_nse["nse_pc"] = block.get("nse_pc") + + results: list[dict] = [] + crossover_rows: list[dict] = [] + + for entry in entries: + transition = entry.get("transition", "green_up") + gap_days = entry["gap_days"] + pred = entry["prediction_date"] + w0, w1 = entry["window_start"], entry["window_end"] + fn = entry.get("withheld_s2_filename") + if not fn: + results.append( + {"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"} + ) + continue + wh_ymd = _yyyymmdd_from_withheld_filename(fn) + if not wh_ymd: + results.append( + { + "transition": transition, + "gap_days": gap_days, + "error": "bad_withheld_filename", + } + ) + continue + withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}" + + temporal_dir = ( + vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode) + ) + if not skip_fusion: + try: + run_masked_fusion_season( + season, + site_position, + site_name, + strategy, + sigma, + mode, + w0, + w1, + wh_ymd, + temporal_dir, + ) + except RuntimeError as e: + results.append( + { + "transition": transition, + "gap_days": gap_days, + "error": str(e), + } + ) + continue + fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode) + else: + fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode) + + fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts) + wh_ts = whittaker_timeseries_gap_degraded( + base, strategy, w0, w1, withheld_iso + ) + wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts) + + row: dict = { + "transition": transition, + "gap_days": gap_days, + "prediction_date": pred, + "window_start": w0, + "window_end": w1, + "withheld_s2_filename": fn, + "temporal": { + "fused": fused_metrics, + "whittaker": wh_metrics, + }, + "fusion_dir": str(temporal_dir), + } + if fused_metrics and nogap_nse.get("nse_pc") is not None: + g_rmse = fused_metrics.get("rmse") + ng_rmse = None + if nogap_metrics_path.is_file(): + sk = _scenario_key(strategy, sigma, mode) + ng_rmse = ( + (json.loads(nogap_metrics_path.read_text()).get("temporal") or {}) + .get(sk, {}) + .get("rmse") + ) + n_g = fused_metrics.get("nse_pc") + n_ng = nogap_nse["nse_pc"] + if g_rmse is not None and ng_rmse is not None: + row["delta_rmse"] = float(g_rmse - ng_rmse) + if n_g is not None and n_ng is not None: + row["delta_nse"] = float(n_ng - n_g) + + fn_pc = (fused_metrics or {}).get("nse_pc") + wh_pc = (wh_metrics or {}).get("nse_pc") + row["utility_crossover_row"] = { + "transition": transition, + "gap_days": gap_days, + "nse_pc_fusion": fn_pc, + "nse_pc_whittaker": wh_pc, + } + crossover_rows.append(row["utility_crossover_row"]) + results.append(row) + + scenario = _scenario_key(strategy, sigma, mode) + payload = { + "site_name": site_name, + "season": season, + "scenario": scenario, + "tier": "temporal_nse_pc", + "manifest": str(vdir / "gap_manifest.json"), + "results": results, + "utility_crossover": { + scenario: { + "metric": "nse_pc_vs_phenocam_gcc90", + "first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker( + crossover_rows, + fusion_key="nse_pc_fusion", + whittaker_key="nse_pc_whittaker", + ), + "by_gap": crossover_rows, + } + }, + } + out_path = vdir / "gap_metrics.json" + out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + return out_path + + +def main() -> None: + ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.") + ap.add_argument("--site", required=True) + ap.add_argument("--season", type=int, required=True) + ap.add_argument("--lat", type=float, required=True) + ap.add_argument("--lon", type=float, required=True) + ap.add_argument("--strategy", default="aggressive") + ap.add_argument("--sigma", type=int, default=20, choices=[20, 30]) + ap.add_argument("--mode", default="bti", choices=["bti", "itb"]) + ap.add_argument("--gap-days", type=int, action="append") + ap.add_argument("--transition", choices=list(TRANSITIONS), action="append") + ap.add_argument("--skip-manifest", action="store_true") + ap.add_argument("--skip-fusion", action="store_true") + ap.add_argument("--s2-calendar-strategy", default="aggressive") + args = ap.parse_args() + sigma_kw = 30 if args.sigma == 30 else None + out = run_temporal_pc( + args.site, + args.season, + (args.lat, args.lon), + args.strategy, + sigma_kw, + args.mode, + skip_manifest=args.skip_manifest, + skip_fusion=args.skip_fusion, + gap_days_filter=args.gap_days, + transition_filter=args.transition, + s2_calendar_strategy=args.s2_calendar_strategy, + ) + print(out) + + +if __name__ == "__main__": + main() diff --git a/gap_validation/whittaker_compare.py b/gap_validation/whittaker_compare.py index cb23aee..8cb3fb3 100644 --- a/gap_validation/whittaker_compare.py +++ b/gap_validation/whittaker_compare.py @@ -2,6 +2,7 @@ from __future__ import annotations +from datetime import date, datetime from pathlib import Path from metrics_stats import ( @@ -12,32 +13,48 @@ from metrics_stats import ( ) +def _date_in_window(dk: str, start: date, end: date) -> bool: + try: + d = datetime.strptime(dk[:10], "%Y-%m-%d").date() + except ValueError: + return False + return start <= d <= end + + def whittaker_gcc_on_gap_masked_series( base: Path, strategy: str, prediction_iso: str, withheld_iso: str, + *, + window_start_iso: str | None = None, + window_end_iso: str | None = None, lam: float = WHITTAKER_LAMBDA_DAYS_SQ, ) -> float | None: - """Whittaker smooth on cloud-screened S2 GCC **excluding** the withheld acquisition day. - - Comparator aligned with ``baseline.s2_whittaker_lambda400`` in ``metrics_stats`` (same λ, - same preselection GCC), but the withheld date is removed so the smoother does not see - the target acquisition. Value at ``prediction_iso`` (YYYY-MM-DD) is returned. - """ + """Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day.""" pred_k = _norm_date_key(prediction_iso) wh_k = _norm_date_key(withheld_iso) if not pred_k or not wh_k: return None + w0 = w1 = None + if window_start_iso and window_end_iso: + w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date() + w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date() all_gcc, flags = _s2_gcc_series_from_preselection(base) if not all_gcc: return None idx = 0 if strategy == "aggressive" else 1 - kept = sorted( - (d, g) - for d, g in all_gcc.items() - if d in flags and not flags[d][idx] and _norm_date_key(d) != wh_k - ) + kept = [] + for d, g in all_gcc.items(): + if d not in flags or flags[d][idx]: + continue + dk = _norm_date_key(d) + if not dk or dk == wh_k: + continue + if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1): + continue + kept.append((d, g)) + kept.sort(key=lambda t: t[0]) if len(kept) < 2: return None obs_d, obs_v = zip(*kept) @@ -57,7 +74,7 @@ def first_gap_where_fusion_below_whittaker( for r in rows if r.get(fusion_key) is not None and r.get(whittaker_key) is not None ] - eligible.sort(key=lambda r: r["gap_days"]) + eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"])) for r in eligible: if r[fusion_key] < r[whittaker_key]: return int(r["gap_days"]) diff --git a/webapp/gap_validation.html b/webapp/gap_validation.html index f050ff6..73e4d9a 100644 --- a/webapp/gap_validation.html +++ b/webapp/gap_validation.html @@ -104,10 +104,10 @@ if (!manifest?.entries?.length) return ""; let h = `

Gap manifest

`; h += `

From data/${siteName}/${season}/validation/gap_manifest.json. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.

`; - h += ``; + h += `
Gap daysPredictionWindowWithheld S2
`; for (const e of manifest.entries) { const w = `${e.window_start} → ${e.window_end}`; - h += ``; + h += ``; } h += `
TransitionGap daysPredictionWindowWithheld S2
${e.gap_days}${e.prediction_date}${w}${e.withheld_s2_filename || "—"}
${e.transition || "—"}${e.gap_days}${e.prediction_date}${w}${e.withheld_s2_filename || "—"}
`; return h; @@ -116,7 +116,7 @@ function resultsTable(results) { if (!results?.length) return `

No result rows in summary.

`; const head = ` - GapPredictionWithheld REFL + TransitionGapPredictionWithheld REFL RMSE
gap NSES2
gap RMSE
no gap @@ -130,7 +130,7 @@ for (const r of results) { if (r.error) { parts.push( - `${r.gap_days ?? "—"}${r.error}${r.fused_gap_path || ""}` + `${r.transition ?? "—"}${r.gap_days ?? "—"}${r.error}${r.fused_gap_path || ""}` ); continue; } @@ -142,6 +142,7 @@ const p = r.paths || {}; const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("
"); parts.push(` + ${r.transition || "—"} ${r.gap_days} ${r.prediction_date || "—"} ${r.withheld_s2_filename || "—"}