added gap validation.

This commit is contained in:
Felix Delattre 2026-05-17 15:55:15 +02:00
parent 374be6865d
commit 740249115b
12 changed files with 997 additions and 116 deletions

View file

@ -1,4 +1,4 @@
"""Gap windows and nearest S2 acquisition (manifest inputs)."""
"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
from __future__ import annotations
@ -10,43 +10,58 @@ from pathlib import Path
from phenology_timesat import phenocam_phenology_path
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
DEFAULT_GAP_LENGTHS = (15, 30)
TRANSITIONS = ("green_up", "green_down")
def validation_dir(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/validation")
def phenology_midpoint(
site_name: str, season: int, phenology_path: Path | None = None
) -> date:
"""Pick fusion gap midpoint: green-up if in season, else green-down, else July 1."""
path = phenology_path or phenocam_phenology_path(site_name, season)
def _parse_iso_date(s, season: int) -> date | None:
if not s or not isinstance(s, str):
return None
try:
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
except ValueError:
return None
y0, y1 = date(season, 1, 1), date(season, 12, 31)
fallback = date(season, 7, 1)
return d if y0 <= d <= y1 else None
def transition_midpoint(
site_name: str,
season: int,
transition: str,
phenology_path: Path | None = None,
) -> date | None:
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
if transition not in TRANSITIONS:
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
path = phenology_path or phenocam_phenology_path(site_name, season)
if not path.is_file():
return fallback
return None
try:
rec = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return fallback
up_s = rec.get("green_up_50pct_date")
dn_s = rec.get("green_down_50pct_date")
return None
key = (
"green_up_50pct_date"
if transition == "green_up"
else "green_down_50pct_date"
)
return _parse_iso_date(rec.get(key), season)
def _parse(s) -> date | None:
if not s or not isinstance(s, str):
return None
try:
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
except ValueError:
return None
return d if y0 <= d <= y1 else None
up, dn = _parse(up_s), _parse(dn_s)
if up:
return up
if dn:
return dn
return fallback
def phenology_midpoint(
site_name: str, season: int, phenology_path: Path | None = None
) -> date:
"""Legacy: green-up if in season, else green-down, else July 1."""
for tr in ("green_up", "green_down"):
d = transition_midpoint(site_name, season, tr, phenology_path)
if d:
return d
return date(season, 7, 1)
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
@ -84,43 +99,77 @@ def nearest_s2_acquisition(
) -> tuple[date, str] | None:
if not pairs:
return None
best = min(pairs, key=lambda t: abs((t[0] - prediction).days))
return best
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
def build_manifest_entries(
site_name: str,
season: int,
gap_lengths: tuple[int, ...] = (15, 30, 60, 90),
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
s2_calendar_strategy: str = "aggressive",
) -> list[dict]:
"""One entry per gap length: window, prediction=midpoint, withheld = nearest S2 to midpoint."""
mid = phenology_midpoint(site_name, season)
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
pairs = list_s2_refl_dates(prepared_s2)
entries = []
for gap_days in gap_lengths:
w0, w1 = centered_window(mid, gap_days, season)
prediction = mid
ns = nearest_s2_acquisition(prediction, pairs)
if ns is None:
withheld_date = None
withheld_filename = None
else:
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
entries.append(
entries: list[dict] = []
for transition in transitions:
mid = transition_midpoint(site_name, season, transition)
if mid is None:
continue
for gap_days in gap_lengths:
w0, w1 = centered_window(mid, gap_days, season)
prediction = mid
ns = nearest_s2_acquisition(prediction, pairs)
if ns is None:
withheld_date = None
withheld_filename = None
else:
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
entries.append(
{
"transition": transition,
"gap_days": gap_days,
"midpoint_rule": f"{transition}_50pct_date",
"midpoint_date": mid.isoformat(),
"window_start": w0.isoformat(),
"window_end": w1.isoformat(),
"prediction_date": prediction.isoformat(),
"withheld_s2_date": withheld_date,
"withheld_s2_filename": withheld_filename,
}
)
return entries
def write_gap_withheld_images(
site_name: str,
season: int,
entries: list[dict],
) -> Path:
"""Reproducibility sidecar for withheld scenes and gap placement."""
path = validation_dir(site_name, season) / "gap_withheld_images.json"
records = []
for e in entries:
records.append(
{
"gap_days": gap_days,
"midpoint_rule": "green_up_50pct else green_down_50pct else July01",
"midpoint_date": mid.isoformat(),
"window_start": w0.isoformat(),
"window_end": w1.isoformat(),
"prediction_date": prediction.isoformat(),
"withheld_s2_date": withheld_date,
"withheld_s2_filename": withheld_filename,
"site_name": site_name,
"season": season,
"transition": e.get("transition"),
"gap_days": e.get("gap_days"),
"midpoint_date": e.get("midpoint_date"),
"window_start": e.get("window_start"),
"window_end": e.get("window_end"),
"withheld_s2_date": e.get("withheld_s2_date"),
"withheld_s2_filename": e.get("withheld_s2_filename"),
}
)
return entries
path.write_text(
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
+ "\n",
encoding="utf-8",
)
return path
def write_manifest(
@ -128,20 +177,29 @@ def write_manifest(
season: int,
site_position: tuple[float, float],
s2_calendar_strategy: str = "aggressive",
*,
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
) -> Path:
out_dir = validation_dir(site_name, season)
out_dir.mkdir(parents=True, exist_ok=True)
entries = build_manifest_entries(
site_name,
season,
gap_lengths=gap_lengths,
transitions=transitions,
s2_calendar_strategy=s2_calendar_strategy,
)
path = out_dir / "gap_manifest.json"
payload = {
"site_name": site_name,
"season": season,
"site_position_lat_lon": list(site_position),
"s2_calendar_strategy": s2_calendar_strategy,
"entries": build_manifest_entries(
site_name, season, s2_calendar_strategy=s2_calendar_strategy
),
"entries": entries,
}
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
write_gap_withheld_images(site_name, season, entries)
return path