210 lines
6.9 KiB
Python
210 lines
6.9 KiB
Python
"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from datetime import date, datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
from phenology_timesat import phenocam_phenology_path
|
|
|
|
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
|
|
DEFAULT_GAP_LENGTHS = (15, 30)
|
|
TRANSITIONS = ("green_up", "green_down")
|
|
|
|
|
|
def validation_dir(site_name: str, season: int) -> Path:
|
|
return Path(f"data/{site_name}/{season}/validation")
|
|
|
|
|
|
def _parse_iso_date(s, season: int) -> date | None:
|
|
if not s or not isinstance(s, str):
|
|
return None
|
|
try:
|
|
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
|
|
except ValueError:
|
|
return None
|
|
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
|
return d if y0 <= d <= y1 else None
|
|
|
|
|
|
def transition_midpoint(
|
|
site_name: str,
|
|
season: int,
|
|
transition: str,
|
|
phenology_path: Path | None = None,
|
|
) -> date | None:
|
|
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
|
|
if transition not in TRANSITIONS:
|
|
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
|
|
path = phenology_path or phenocam_phenology_path(site_name, season)
|
|
if not path.is_file():
|
|
return None
|
|
try:
|
|
rec = json.loads(path.read_text(encoding="utf-8"))
|
|
except (OSError, json.JSONDecodeError):
|
|
return None
|
|
key = (
|
|
"green_up_50pct_date"
|
|
if transition == "green_up"
|
|
else "green_down_50pct_date"
|
|
)
|
|
return _parse_iso_date(rec.get(key), season)
|
|
|
|
|
|
def phenology_midpoint(
|
|
site_name: str, season: int, phenology_path: Path | None = None
|
|
) -> date:
|
|
"""Legacy: green-up if in season, else green-down, else July 1."""
|
|
for tr in ("green_up", "green_down"):
|
|
d = transition_midpoint(site_name, season, tr, phenology_path)
|
|
if d:
|
|
return d
|
|
return date(season, 7, 1)
|
|
|
|
|
|
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
|
|
"""[start, end] inclusive, gap_days wide, clamped to calendar year."""
|
|
half = gap_days // 2
|
|
start = mid - timedelta(days=half)
|
|
end = mid + timedelta(days=gap_days - 1 - half)
|
|
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
|
if start < y0:
|
|
end = min(y1, end + (y0 - start))
|
|
start = y0
|
|
if end > y1:
|
|
start = max(y0, start - (end - y1))
|
|
end = y1
|
|
return start, end
|
|
|
|
|
|
def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
|
|
"""Return sorted (acquisition_date, filename) for *REFL.tif."""
|
|
out: list[tuple[date, str]] = []
|
|
if not prepared_s2.is_dir():
|
|
return out
|
|
for p in sorted(prepared_s2.glob("*REFL.tif")):
|
|
m = REFL_DATE_RE.search(p.name)
|
|
if not m:
|
|
continue
|
|
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
|
out.append((d, p.name))
|
|
out.sort(key=lambda x: x[0])
|
|
return out
|
|
|
|
|
|
def nearest_s2_acquisition(
|
|
prediction: date, pairs: list[tuple[date, str]]
|
|
) -> tuple[date, str] | None:
|
|
if not pairs:
|
|
return None
|
|
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
|
|
|
|
|
|
def build_manifest_entries(
|
|
site_name: str,
|
|
season: int,
|
|
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
|
transitions: tuple[str, ...] = TRANSITIONS,
|
|
s2_calendar_strategy: str = "aggressive",
|
|
) -> list[dict]:
|
|
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
|
|
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
|
|
pairs = list_s2_refl_dates(prepared_s2)
|
|
entries: list[dict] = []
|
|
for transition in transitions:
|
|
mid = transition_midpoint(site_name, season, transition)
|
|
if mid is None:
|
|
continue
|
|
for gap_days in gap_lengths:
|
|
w0, w1 = centered_window(mid, gap_days, season)
|
|
prediction = mid
|
|
ns = nearest_s2_acquisition(prediction, pairs)
|
|
if ns is None:
|
|
withheld_date = None
|
|
withheld_filename = None
|
|
else:
|
|
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
|
|
entries.append(
|
|
{
|
|
"transition": transition,
|
|
"gap_days": gap_days,
|
|
"midpoint_rule": f"{transition}_50pct_date",
|
|
"midpoint_date": mid.isoformat(),
|
|
"window_start": w0.isoformat(),
|
|
"window_end": w1.isoformat(),
|
|
"prediction_date": prediction.isoformat(),
|
|
"withheld_s2_date": withheld_date,
|
|
"withheld_s2_filename": withheld_filename,
|
|
}
|
|
)
|
|
return entries
|
|
|
|
|
|
def write_gap_withheld_images(
|
|
site_name: str,
|
|
season: int,
|
|
entries: list[dict],
|
|
) -> Path:
|
|
"""Reproducibility sidecar for withheld scenes and gap placement."""
|
|
path = validation_dir(site_name, season) / "gap_withheld_images.json"
|
|
records = []
|
|
for e in entries:
|
|
records.append(
|
|
{
|
|
"site_name": site_name,
|
|
"season": season,
|
|
"transition": e.get("transition"),
|
|
"gap_days": e.get("gap_days"),
|
|
"midpoint_date": e.get("midpoint_date"),
|
|
"window_start": e.get("window_start"),
|
|
"window_end": e.get("window_end"),
|
|
"withheld_s2_date": e.get("withheld_s2_date"),
|
|
"withheld_s2_filename": e.get("withheld_s2_filename"),
|
|
}
|
|
)
|
|
path.write_text(
|
|
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
return path
|
|
|
|
|
|
def write_manifest(
|
|
site_name: str,
|
|
season: int,
|
|
site_position: tuple[float, float],
|
|
s2_calendar_strategy: str = "aggressive",
|
|
*,
|
|
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
|
transitions: tuple[str, ...] = TRANSITIONS,
|
|
) -> Path:
|
|
out_dir = validation_dir(site_name, season)
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
entries = build_manifest_entries(
|
|
site_name,
|
|
season,
|
|
gap_lengths=gap_lengths,
|
|
transitions=transitions,
|
|
s2_calendar_strategy=s2_calendar_strategy,
|
|
)
|
|
path = out_dir / "gap_manifest.json"
|
|
payload = {
|
|
"site_name": site_name,
|
|
"season": season,
|
|
"site_position_lat_lon": list(site_position),
|
|
"s2_calendar_strategy": s2_calendar_strategy,
|
|
"entries": entries,
|
|
}
|
|
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
write_gap_withheld_images(site_name, season, entries)
|
|
return path
|
|
|
|
|
|
def load_manifest(site_name: str, season: int) -> dict:
|
|
path = validation_dir(site_name, season) / "gap_manifest.json"
|
|
if not path.is_file():
|
|
raise FileNotFoundError(f"Missing manifest: {path}")
|
|
return json.loads(path.read_text(encoding="utf-8"))
|