"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site).""" from __future__ import annotations import argparse import json import re from pathlib import Path from gap_validation.run import run_validation # Primary season per site (matches scripts/export_thesis_tables.py). PRIMARY_SEASON = { "forthgr": 2024, "innsbruck": 2024, "pitsalu": 2024, "vindeln2": 2023, "sunflowerjerez1": 2024, "institutekarnobat": 2024, } def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]: data = json.loads(geojson.read_text(encoding="utf-8")) out: dict[str, tuple[float, float]] = {} for feat in data.get("features", []): props = feat.get("properties") or {} name = props.get("sitename") coords = (feat.get("geometry") or {}).get("coordinates") if not name or not coords or len(coords) < 2: continue lon, lat = float(coords[0]), float(coords[1]) out[str(name)] = (lat, lon) return out def _parse_scenario(key: str) -> tuple[str, int | None, str]: """``aggressive_sigma20`` → (strategy, sigma, bti).""" mode = "itb" if key.endswith("_itb") else "bti" base = key.replace("_itb", "") m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base) if not m: raise ValueError(f"Cannot parse scenario key: {key!r}") strategy = m.group(1) sigma = int(m.group(2)) return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode def _best_from_metrics(metrics_path: Path, workflow: str) -> str | None: """Best scenario key (max no-gap NSE_PC) for ``workflow`` (``bti`` or ``itb``).""" if workflow not in ("bti", "itb"): raise ValueError(f"workflow must be bti or itb, got {workflow!r}") if not metrics_path.is_file(): return None temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {} want_itb = workflow == "itb" best_key, best_nse = None, None for k, v in temporal.items(): if k.endswith("_itb") != want_itb or not isinstance(v, dict): continue n = v.get("nse_pc") if isinstance(n, (int, float)) and (best_nse is None or n > best_nse): best_nse = n best_key = k return best_key def _best_bti_from_metrics(metrics_path: Path) -> str | None: return _best_from_metrics(metrics_path, "bti") def _best_itb_from_metrics(metrics_path: Path) -> str | None: return _best_from_metrics(metrics_path, "itb") def _resolve_workflows(workflow: str) -> tuple[str, ...]: return ("bti", "itb") if workflow == "both" else (workflow,) def main() -> None: ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).") ap.add_argument("--data-dir", type=Path, default=Path("data")) ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson")) ap.add_argument("--skip-fusion", action="store_true") ap.add_argument("--write-manifest-only", action="store_true") ap.add_argument( "--workflow", choices=["bti", "itb", "both"], default="both", help="Fusion workflow(s) to validate (default: both best BtI and best ItB).", ) ap.add_argument( "--gap-days", type=int, action="append", help="Filter gap lengths (default: all 15 and 30 in manifest).", ) args = ap.parse_args() positions = _site_positions(args.sites_geojson) gap_filter = args.gap_days workflows = _resolve_workflows(args.workflow) for site, season in sorted(PRIMARY_SEASON.items()): pos = positions.get(site) if not pos: print(f"[skip] No coordinates for {site}") continue metrics_path = args.data_dir / site / str(season) / "metrics.json" for workflow in workflows: scenario_key = _best_from_metrics(metrics_path, workflow) if not scenario_key: print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios") continue strategy, sigma, mode = _parse_scenario(scenario_key) sigma_kw = 30 if sigma == 30 else None print(f"=== {site} {season} {scenario_key} ===") out = run_validation( site, season, pos, strategy, sigma_kw, mode, skip_manifest=False, skip_fusion=args.skip_fusion, write_manifest_only=args.write_manifest_only, gap_days_filter=gap_filter, transition_filter=None, s2_calendar_strategy=strategy, ) print(out) if __name__ == "__main__": main()