efast-phenocam-validation/gap_validation/calendar.py
2026-05-17 15:55:15 +02:00

210 lines
6.9 KiB
Python

"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
from __future__ import annotations
import json
import re
from datetime import date, datetime, timedelta
from pathlib import Path
from phenology_timesat import phenocam_phenology_path
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
DEFAULT_GAP_LENGTHS = (15, 30)
TRANSITIONS = ("green_up", "green_down")
def validation_dir(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/validation")
def _parse_iso_date(s, season: int) -> date | None:
if not s or not isinstance(s, str):
return None
try:
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
except ValueError:
return None
y0, y1 = date(season, 1, 1), date(season, 12, 31)
return d if y0 <= d <= y1 else None
def transition_midpoint(
site_name: str,
season: int,
transition: str,
phenology_path: Path | None = None,
) -> date | None:
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
if transition not in TRANSITIONS:
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
path = phenology_path or phenocam_phenology_path(site_name, season)
if not path.is_file():
return None
try:
rec = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return None
key = (
"green_up_50pct_date"
if transition == "green_up"
else "green_down_50pct_date"
)
return _parse_iso_date(rec.get(key), season)
def phenology_midpoint(
site_name: str, season: int, phenology_path: Path | None = None
) -> date:
"""Legacy: green-up if in season, else green-down, else July 1."""
for tr in ("green_up", "green_down"):
d = transition_midpoint(site_name, season, tr, phenology_path)
if d:
return d
return date(season, 7, 1)
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
"""[start, end] inclusive, gap_days wide, clamped to calendar year."""
half = gap_days // 2
start = mid - timedelta(days=half)
end = mid + timedelta(days=gap_days - 1 - half)
y0, y1 = date(season, 1, 1), date(season, 12, 31)
if start < y0:
end = min(y1, end + (y0 - start))
start = y0
if end > y1:
start = max(y0, start - (end - y1))
end = y1
return start, end
def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
"""Return sorted (acquisition_date, filename) for *REFL.tif."""
out: list[tuple[date, str]] = []
if not prepared_s2.is_dir():
return out
for p in sorted(prepared_s2.glob("*REFL.tif")):
m = REFL_DATE_RE.search(p.name)
if not m:
continue
d = datetime.strptime(m.group(1), "%Y%m%d").date()
out.append((d, p.name))
out.sort(key=lambda x: x[0])
return out
def nearest_s2_acquisition(
prediction: date, pairs: list[tuple[date, str]]
) -> tuple[date, str] | None:
if not pairs:
return None
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
def build_manifest_entries(
site_name: str,
season: int,
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
s2_calendar_strategy: str = "aggressive",
) -> list[dict]:
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
pairs = list_s2_refl_dates(prepared_s2)
entries: list[dict] = []
for transition in transitions:
mid = transition_midpoint(site_name, season, transition)
if mid is None:
continue
for gap_days in gap_lengths:
w0, w1 = centered_window(mid, gap_days, season)
prediction = mid
ns = nearest_s2_acquisition(prediction, pairs)
if ns is None:
withheld_date = None
withheld_filename = None
else:
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
entries.append(
{
"transition": transition,
"gap_days": gap_days,
"midpoint_rule": f"{transition}_50pct_date",
"midpoint_date": mid.isoformat(),
"window_start": w0.isoformat(),
"window_end": w1.isoformat(),
"prediction_date": prediction.isoformat(),
"withheld_s2_date": withheld_date,
"withheld_s2_filename": withheld_filename,
}
)
return entries
def write_gap_withheld_images(
site_name: str,
season: int,
entries: list[dict],
) -> Path:
"""Reproducibility sidecar for withheld scenes and gap placement."""
path = validation_dir(site_name, season) / "gap_withheld_images.json"
records = []
for e in entries:
records.append(
{
"site_name": site_name,
"season": season,
"transition": e.get("transition"),
"gap_days": e.get("gap_days"),
"midpoint_date": e.get("midpoint_date"),
"window_start": e.get("window_start"),
"window_end": e.get("window_end"),
"withheld_s2_date": e.get("withheld_s2_date"),
"withheld_s2_filename": e.get("withheld_s2_filename"),
}
)
path.write_text(
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
+ "\n",
encoding="utf-8",
)
return path
def write_manifest(
site_name: str,
season: int,
site_position: tuple[float, float],
s2_calendar_strategy: str = "aggressive",
*,
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
) -> Path:
out_dir = validation_dir(site_name, season)
out_dir.mkdir(parents=True, exist_ok=True)
entries = build_manifest_entries(
site_name,
season,
gap_lengths=gap_lengths,
transitions=transitions,
s2_calendar_strategy=s2_calendar_strategy,
)
path = out_dir / "gap_manifest.json"
payload = {
"site_name": site_name,
"season": season,
"site_position_lat_lon": list(site_position),
"s2_calendar_strategy": s2_calendar_strategy,
"entries": entries,
}
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
write_gap_withheld_images(site_name, season, entries)
return path
def load_manifest(site_name: str, season: int) -> dict:
path = validation_dir(site_name, season) / "gap_manifest.json"
if not path.is_file():
raise FileNotFoundError(f"Missing manifest: {path}")
return json.loads(path.read_text(encoding="utf-8"))