added gap validation.

This commit is contained in:
Felix Delattre 2026-05-17 15:55:15 +02:00
parent 374be6865d
commit 740249115b
12 changed files with 997 additions and 116 deletions

View file

@ -2,6 +2,7 @@
from __future__ import annotations
from datetime import date, datetime
from pathlib import Path
from metrics_stats import (
@ -12,32 +13,48 @@ from metrics_stats import (
)
def _date_in_window(dk: str, start: date, end: date) -> bool:
try:
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
except ValueError:
return False
return start <= d <= end
def whittaker_gcc_on_gap_masked_series(
base: Path,
strategy: str,
prediction_iso: str,
withheld_iso: str,
*,
window_start_iso: str | None = None,
window_end_iso: str | None = None,
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
) -> float | None:
"""Whittaker smooth on cloud-screened S2 GCC **excluding** the withheld acquisition day.
Comparator aligned with ``baseline.s2_whittaker_lambda400`` in ``metrics_stats`` (same λ,
same preselection GCC), but the withheld date is removed so the smoother does not see
the target acquisition. Value at ``prediction_iso`` (YYYY-MM-DD) is returned.
"""
"""Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
pred_k = _norm_date_key(prediction_iso)
wh_k = _norm_date_key(withheld_iso)
if not pred_k or not wh_k:
return None
w0 = w1 = None
if window_start_iso and window_end_iso:
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
all_gcc, flags = _s2_gcc_series_from_preselection(base)
if not all_gcc:
return None
idx = 0 if strategy == "aggressive" else 1
kept = sorted(
(d, g)
for d, g in all_gcc.items()
if d in flags and not flags[d][idx] and _norm_date_key(d) != wh_k
)
kept = []
for d, g in all_gcc.items():
if d not in flags or flags[d][idx]:
continue
dk = _norm_date_key(d)
if not dk or dk == wh_k:
continue
if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
continue
kept.append((d, g))
kept.sort(key=lambda t: t[0])
if len(kept) < 2:
return None
obs_d, obs_v = zip(*kept)
@ -57,7 +74,7 @@ def first_gap_where_fusion_below_whittaker(
for r in rows
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
]
eligible.sort(key=lambda r: r["gap_days"])
eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
for r in eligible:
if r[fusion_key] < r[whittaker_key]:
return int(r["gap_days"])