efast-phenocam-validation/gap_validation/whittaker_compare.py
2026-05-17 15:55:15 +02:00

81 lines
2.4 KiB
Python

"""Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
from __future__ import annotations
from datetime import date, datetime
from pathlib import Path
from metrics_stats import (
WHITTAKER_LAMBDA_DAYS_SQ,
_norm_date_key,
_s2_gcc_series_from_preselection,
_whittaker_smooth_dict,
)
def _date_in_window(dk: str, start: date, end: date) -> bool:
try:
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
except ValueError:
return False
return start <= d <= end
def whittaker_gcc_on_gap_masked_series(
base: Path,
strategy: str,
prediction_iso: str,
withheld_iso: str,
*,
window_start_iso: str | None = None,
window_end_iso: str | None = None,
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
) -> float | None:
"""Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
pred_k = _norm_date_key(prediction_iso)
wh_k = _norm_date_key(withheld_iso)
if not pred_k or not wh_k:
return None
w0 = w1 = None
if window_start_iso and window_end_iso:
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
all_gcc, flags = _s2_gcc_series_from_preselection(base)
if not all_gcc:
return None
idx = 0 if strategy == "aggressive" else 1
kept = []
for d, g in all_gcc.items():
if d not in flags or flags[d][idx]:
continue
dk = _norm_date_key(d)
if not dk or dk == wh_k:
continue
if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
continue
kept.append((d, g))
kept.sort(key=lambda t: t[0])
if len(kept) < 2:
return None
obs_d, obs_v = zip(*kept)
smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
return smooth.get(pred_k)
def first_gap_where_fusion_below_whittaker(
rows: list[dict],
*,
fusion_key: str = "nse_s2",
whittaker_key: str = "nse_s2",
) -> int | None:
"""Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
eligible = [
r
for r in rows
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
]
eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
for r in eligible:
if r[fusion_key] < r[whittaker_key]:
return int(r["gap_days"])
return None