added gap validation.
This commit is contained in:
parent
374be6865d
commit
740249115b
12 changed files with 997 additions and 116 deletions
19
README.md
19
README.md
|
|
@ -52,10 +52,25 @@ By default, most stages in `run.py` are **commented out** (metrics-only). Uncomm
|
|||
With prepared data and EFAST installed:
|
||||
|
||||
```bash
|
||||
python -m gap_validation.run --site innsbruck --season 2024 --lat 47.116171 --lon 11.320308
|
||||
# Phenology sidecars (TIMESAT 50 % amplitude)
|
||||
python -m phenology_timesat --all
|
||||
|
||||
# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up)
|
||||
python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \
|
||||
--strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30
|
||||
|
||||
# All six sites, best BtI scenario per site
|
||||
python -m gap_validation.batch_spatial
|
||||
|
||||
# Full-season NSE_PC on gap-degraded stack (slow)
|
||||
python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688
|
||||
python -m gap_validation.batch_temporal
|
||||
|
||||
# TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier)
|
||||
python -m gap_validation.phenology_offsets
|
||||
```
|
||||
|
||||
Writes `data/{site}/{season}/validation/gap_manifest.json`, `gap_validation_summary.json`, and masked fusion under `validation/fusion/`. See `python -m gap_validation.run --help`.
|
||||
Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`.
|
||||
|
||||
## Data layout
|
||||
|
||||
|
|
|
|||
111
gap_validation/batch_spatial.py
Normal file
111
gap_validation/batch_spatial.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.calendar import DEFAULT_GAP_LENGTHS, TRANSITIONS
|
||||
from gap_validation.run import run_validation
|
||||
|
||||
# Primary season per site (matches scripts/export_thesis_tables.py).
|
||||
PRIMARY_SEASON = {
|
||||
"forthgr": 2024,
|
||||
"innsbruck": 2024,
|
||||
"pitsalu": 2024,
|
||||
"vindeln2": 2023,
|
||||
"sunflowerjerez1": 2024,
|
||||
"institutekarnobat": 2024,
|
||||
}
|
||||
|
||||
|
||||
def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]:
|
||||
data = json.loads(geojson.read_text(encoding="utf-8"))
|
||||
out: dict[str, tuple[float, float]] = {}
|
||||
for feat in data.get("features", []):
|
||||
props = feat.get("properties") or {}
|
||||
name = props.get("sitename")
|
||||
coords = (feat.get("geometry") or {}).get("coordinates")
|
||||
if not name or not coords or len(coords) < 2:
|
||||
continue
|
||||
lon, lat = float(coords[0]), float(coords[1])
|
||||
out[str(name)] = (lat, lon)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_scenario(key: str) -> tuple[str, int | None, str]:
|
||||
"""``aggressive_sigma20`` → (strategy, sigma, bti)."""
|
||||
mode = "itb" if key.endswith("_itb") else "bti"
|
||||
base = key.replace("_itb", "")
|
||||
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
|
||||
if not m:
|
||||
raise ValueError(f"Cannot parse scenario key: {key!r}")
|
||||
strategy = m.group(1)
|
||||
sigma = int(m.group(2))
|
||||
return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode
|
||||
|
||||
|
||||
def _best_bti_from_metrics(metrics_path: Path) -> str | None:
|
||||
if not metrics_path.is_file():
|
||||
return None
|
||||
temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {}
|
||||
best_key, best_nse = None, None
|
||||
for k, v in temporal.items():
|
||||
if not k.endswith("_itb") and isinstance(v, dict):
|
||||
n = v.get("nse_pc")
|
||||
if isinstance(n, (int, float)) and (best_nse is None or n > best_nse):
|
||||
best_nse = n
|
||||
best_key = k
|
||||
return best_key
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument("--write-manifest-only", action="store_true")
|
||||
ap.add_argument(
|
||||
"--gap-days",
|
||||
type=int,
|
||||
action="append",
|
||||
help="Filter gap lengths (default: all 15 and 30 in manifest).",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
gap_filter = args.gap_days
|
||||
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
print(f"[skip] No coordinates for {site}")
|
||||
continue
|
||||
metrics_path = args.data_dir / site / str(season) / "metrics.json"
|
||||
scenario_key = _best_bti_from_metrics(metrics_path)
|
||||
if not scenario_key:
|
||||
print(f"[skip] {site} {season}: no metrics.json / BtI scenarios")
|
||||
continue
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sigma_kw = 30 if sigma == 30 else None
|
||||
print(f"=== {site} {season} {scenario_key} ===")
|
||||
out = run_validation(
|
||||
site,
|
||||
season,
|
||||
pos,
|
||||
strategy,
|
||||
sigma_kw,
|
||||
mode,
|
||||
skip_manifest=False,
|
||||
skip_fusion=args.skip_fusion,
|
||||
write_manifest_only=args.write_manifest_only,
|
||||
gap_days_filter=gap_filter,
|
||||
transition_filter=None,
|
||||
s2_calendar_strategy=strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
gap_validation/batch_temporal.py
Normal file
56
gap_validation/batch_temporal.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.batch_spatial import (
|
||||
PRIMARY_SEASON,
|
||||
_best_bti_from_metrics,
|
||||
_parse_scenario,
|
||||
_site_positions,
|
||||
)
|
||||
from gap_validation.temporal_pc import run_temporal_pc
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument("--gap-days", type=int, action="append")
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
print(f"[skip] No coordinates for {site}")
|
||||
continue
|
||||
metrics_path = args.data_dir / site / str(season) / "metrics.json"
|
||||
scenario_key = _best_bti_from_metrics(metrics_path)
|
||||
if not scenario_key:
|
||||
print(f"[skip] {site} {season}: no metrics.json")
|
||||
continue
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sigma_kw = 30 if sigma == 30 else None
|
||||
print(f"=== {site} {season} temporal {scenario_key} ===")
|
||||
out = run_temporal_pc(
|
||||
site,
|
||||
season,
|
||||
pos,
|
||||
strategy,
|
||||
sigma_kw,
|
||||
mode,
|
||||
skip_manifest=False,
|
||||
skip_fusion=args.skip_fusion,
|
||||
gap_days_filter=args.gap_days,
|
||||
transition_filter=None,
|
||||
s2_calendar_strategy=strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
"""Gap windows and nearest S2 acquisition (manifest inputs)."""
|
||||
"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -10,43 +10,58 @@ from pathlib import Path
|
|||
from phenology_timesat import phenocam_phenology_path
|
||||
|
||||
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
|
||||
DEFAULT_GAP_LENGTHS = (15, 30)
|
||||
TRANSITIONS = ("green_up", "green_down")
|
||||
|
||||
|
||||
def validation_dir(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/validation")
|
||||
|
||||
|
||||
def phenology_midpoint(
|
||||
site_name: str, season: int, phenology_path: Path | None = None
|
||||
) -> date:
|
||||
"""Pick fusion gap midpoint: green-up if in season, else green-down, else July 1."""
|
||||
path = phenology_path or phenocam_phenology_path(site_name, season)
|
||||
def _parse_iso_date(s, season: int) -> date | None:
|
||||
if not s or not isinstance(s, str):
|
||||
return None
|
||||
try:
|
||||
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return None
|
||||
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
||||
fallback = date(season, 7, 1)
|
||||
return d if y0 <= d <= y1 else None
|
||||
|
||||
|
||||
def transition_midpoint(
|
||||
site_name: str,
|
||||
season: int,
|
||||
transition: str,
|
||||
phenology_path: Path | None = None,
|
||||
) -> date | None:
|
||||
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
|
||||
if transition not in TRANSITIONS:
|
||||
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
|
||||
path = phenology_path or phenocam_phenology_path(site_name, season)
|
||||
if not path.is_file():
|
||||
return fallback
|
||||
return None
|
||||
try:
|
||||
rec = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return fallback
|
||||
up_s = rec.get("green_up_50pct_date")
|
||||
dn_s = rec.get("green_down_50pct_date")
|
||||
return None
|
||||
key = (
|
||||
"green_up_50pct_date"
|
||||
if transition == "green_up"
|
||||
else "green_down_50pct_date"
|
||||
)
|
||||
return _parse_iso_date(rec.get(key), season)
|
||||
|
||||
def _parse(s) -> date | None:
|
||||
if not s or not isinstance(s, str):
|
||||
return None
|
||||
try:
|
||||
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return None
|
||||
return d if y0 <= d <= y1 else None
|
||||
|
||||
up, dn = _parse(up_s), _parse(dn_s)
|
||||
if up:
|
||||
return up
|
||||
if dn:
|
||||
return dn
|
||||
return fallback
|
||||
def phenology_midpoint(
|
||||
site_name: str, season: int, phenology_path: Path | None = None
|
||||
) -> date:
|
||||
"""Legacy: green-up if in season, else green-down, else July 1."""
|
||||
for tr in ("green_up", "green_down"):
|
||||
d = transition_midpoint(site_name, season, tr, phenology_path)
|
||||
if d:
|
||||
return d
|
||||
return date(season, 7, 1)
|
||||
|
||||
|
||||
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
|
||||
|
|
@ -84,43 +99,77 @@ def nearest_s2_acquisition(
|
|||
) -> tuple[date, str] | None:
|
||||
if not pairs:
|
||||
return None
|
||||
best = min(pairs, key=lambda t: abs((t[0] - prediction).days))
|
||||
return best
|
||||
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
|
||||
|
||||
|
||||
def build_manifest_entries(
|
||||
site_name: str,
|
||||
season: int,
|
||||
gap_lengths: tuple[int, ...] = (15, 30, 60, 90),
|
||||
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
transitions: tuple[str, ...] = TRANSITIONS,
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
) -> list[dict]:
|
||||
"""One entry per gap length: window, prediction=midpoint, withheld = nearest S2 to midpoint."""
|
||||
mid = phenology_midpoint(site_name, season)
|
||||
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
|
||||
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
|
||||
pairs = list_s2_refl_dates(prepared_s2)
|
||||
entries = []
|
||||
for gap_days in gap_lengths:
|
||||
w0, w1 = centered_window(mid, gap_days, season)
|
||||
prediction = mid
|
||||
ns = nearest_s2_acquisition(prediction, pairs)
|
||||
if ns is None:
|
||||
withheld_date = None
|
||||
withheld_filename = None
|
||||
else:
|
||||
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
|
||||
entries.append(
|
||||
entries: list[dict] = []
|
||||
for transition in transitions:
|
||||
mid = transition_midpoint(site_name, season, transition)
|
||||
if mid is None:
|
||||
continue
|
||||
for gap_days in gap_lengths:
|
||||
w0, w1 = centered_window(mid, gap_days, season)
|
||||
prediction = mid
|
||||
ns = nearest_s2_acquisition(prediction, pairs)
|
||||
if ns is None:
|
||||
withheld_date = None
|
||||
withheld_filename = None
|
||||
else:
|
||||
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
|
||||
entries.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"midpoint_rule": f"{transition}_50pct_date",
|
||||
"midpoint_date": mid.isoformat(),
|
||||
"window_start": w0.isoformat(),
|
||||
"window_end": w1.isoformat(),
|
||||
"prediction_date": prediction.isoformat(),
|
||||
"withheld_s2_date": withheld_date,
|
||||
"withheld_s2_filename": withheld_filename,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def write_gap_withheld_images(
|
||||
site_name: str,
|
||||
season: int,
|
||||
entries: list[dict],
|
||||
) -> Path:
|
||||
"""Reproducibility sidecar for withheld scenes and gap placement."""
|
||||
path = validation_dir(site_name, season) / "gap_withheld_images.json"
|
||||
records = []
|
||||
for e in entries:
|
||||
records.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"midpoint_rule": "green_up_50pct else green_down_50pct else July01",
|
||||
"midpoint_date": mid.isoformat(),
|
||||
"window_start": w0.isoformat(),
|
||||
"window_end": w1.isoformat(),
|
||||
"prediction_date": prediction.isoformat(),
|
||||
"withheld_s2_date": withheld_date,
|
||||
"withheld_s2_filename": withheld_filename,
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"transition": e.get("transition"),
|
||||
"gap_days": e.get("gap_days"),
|
||||
"midpoint_date": e.get("midpoint_date"),
|
||||
"window_start": e.get("window_start"),
|
||||
"window_end": e.get("window_end"),
|
||||
"withheld_s2_date": e.get("withheld_s2_date"),
|
||||
"withheld_s2_filename": e.get("withheld_s2_filename"),
|
||||
}
|
||||
)
|
||||
return entries
|
||||
path.write_text(
|
||||
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def write_manifest(
|
||||
|
|
@ -128,20 +177,29 @@ def write_manifest(
|
|||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
*,
|
||||
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
transitions: tuple[str, ...] = TRANSITIONS,
|
||||
) -> Path:
|
||||
out_dir = validation_dir(site_name, season)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
entries = build_manifest_entries(
|
||||
site_name,
|
||||
season,
|
||||
gap_lengths=gap_lengths,
|
||||
transitions=transitions,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
)
|
||||
path = out_dir / "gap_manifest.json"
|
||||
payload = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"site_position_lat_lon": list(site_position),
|
||||
"s2_calendar_strategy": s2_calendar_strategy,
|
||||
"entries": build_manifest_entries(
|
||||
site_name, season, s2_calendar_strategy=s2_calendar_strategy
|
||||
),
|
||||
"entries": entries,
|
||||
}
|
||||
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
write_gap_withheld_images(site_name, season, entries)
|
||||
return path
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
"""EFAST with symlinked S2 dir (withhold one acquisition); outputs under validation/."""
|
||||
"""EFAST with symlinked S2 dir (gap window omitted); outputs under validation/."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from fusion import run_efast, run_efast_itb
|
||||
from preparation import _get_base_dir, _get_itb_base_dir
|
||||
|
||||
from gap_validation.s2_mask_dir import build_masked_s2_dir_bti, build_masked_s2_dir_itb
|
||||
from gap_validation.s2_mask_dir import (
|
||||
acquisition_yyyymmdd_in_window,
|
||||
assert_no_leakage,
|
||||
build_masked_s2_dir_bti,
|
||||
build_masked_s2_dir_itb,
|
||||
)
|
||||
|
||||
|
||||
def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
|
||||
|
|
@ -19,20 +25,35 @@ def validation_fusion_dir(
|
|||
site_name: str,
|
||||
season: int,
|
||||
gap_days: int,
|
||||
transition: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
) -> Path:
|
||||
"""``data/.../validation/fusion/gap_{n}/{strategy}_sigma{20|30}_{bti|itb}/``."""
|
||||
"""``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``."""
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return (
|
||||
Path(f"data/{site_name}/{season}/validation")
|
||||
/ "fusion"
|
||||
/ f"gap_{gap_days}"
|
||||
/ f"gap_{gap_days}_{transition}"
|
||||
/ f"{strategy}_sigma{sig}_{mode}"
|
||||
)
|
||||
|
||||
|
||||
def excluded_acquisition_days(
|
||||
prepared_s2: Path,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
) -> set[str]:
|
||||
"""Union of gap-window S2 days and the withheld validation acquisition."""
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1)
|
||||
excluded.add(withheld_yyyymmdd)
|
||||
return excluded
|
||||
|
||||
|
||||
def run_masked_fusion_one_date(
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
|
|
@ -41,19 +62,24 @@ def run_masked_fusion_one_date(
|
|||
sigma: int | None,
|
||||
mode: str,
|
||||
prediction_date_iso: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
fusion_output_dir: Path,
|
||||
) -> Path:
|
||||
"""Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only; return output dir."""
|
||||
"""Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only."""
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
|
||||
s3_dir = prepared_s3_dir(season, site_name, strategy)
|
||||
|
||||
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
|
||||
tmp_s2 = Path(tmp) / "s2"
|
||||
if mode == "bti":
|
||||
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
|
||||
build_masked_s2_dir_bti(prep_s2, withheld_yyyymmdd, tmp_s2)
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
|
|
@ -62,13 +88,16 @@ def run_masked_fusion_one_date(
|
|||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=s3_dir,
|
||||
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
elif mode == "itb":
|
||||
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
|
||||
s3_itb = _get_itb_base_dir(season, site_name, strategy) / "s3"
|
||||
build_masked_s2_dir_itb(prep_s2, withheld_yyyymmdd, tmp_s2)
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
|
|
@ -77,7 +106,7 @@ def run_masked_fusion_one_date(
|
|||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=s3_itb,
|
||||
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
else:
|
||||
|
|
@ -86,6 +115,64 @@ def run_masked_fusion_one_date(
|
|||
return fusion_output_dir
|
||||
|
||||
|
||||
def run_masked_fusion_season(
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
fusion_output_dir: Path,
|
||||
) -> Path:
|
||||
"""Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier)."""
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_range = f"{season}-01-01/{season}-12-31"
|
||||
|
||||
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
|
||||
tmp_s2 = Path(tmp) / "s2"
|
||||
if mode == "bti":
|
||||
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
else:
|
||||
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
|
||||
return fusion_output_dir
|
||||
|
||||
|
||||
def production_fusion_path(
|
||||
season: int,
|
||||
site_name: str,
|
||||
|
|
|
|||
146
gap_validation/phenology_offsets.py
Normal file
146
gap_validation/phenology_offsets.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from phenology_timesat import (
|
||||
build_yraw_three_years,
|
||||
phenocam_phenology_path,
|
||||
run_timesat_phenology_from_yraw,
|
||||
)
|
||||
|
||||
from gap_validation.batch_spatial import (
|
||||
PRIMARY_SEASON,
|
||||
_best_bti_from_metrics,
|
||||
_parse_scenario,
|
||||
_site_positions,
|
||||
)
|
||||
from gap_validation.calendar import load_manifest, validation_dir
|
||||
from gap_validation.temporal_pc import _fusion_gcc_timeseries
|
||||
|
||||
|
||||
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
|
||||
if not iso_a or not iso_b:
|
||||
return None
|
||||
try:
|
||||
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
|
||||
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
|
||||
return abs((a - b).days)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
|
||||
y1, y2, y3 = season - 1, season, season + 1
|
||||
yraw, _mode = build_yraw_three_years(by_date, y1, y2, y3)
|
||||
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
|
||||
return {
|
||||
"green_up": out.get("green_up_50pct_date"),
|
||||
"green_down": out.get("green_down_50pct_date"),
|
||||
}
|
||||
|
||||
|
||||
def _temporal_fusion_dir(
|
||||
site: str, season: int, gap_days: int, transition: str, scenario_key: str
|
||||
) -> Path:
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return (
|
||||
validation_dir(site, season)
|
||||
/ "temporal"
|
||||
/ f"gap_{gap_days}_{transition}"
|
||||
/ f"{strategy}_sigma{sig}_{mode}"
|
||||
)
|
||||
|
||||
|
||||
def compute_offsets_for_site(
|
||||
site: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
*,
|
||||
gap_days_list: tuple[int, ...] = (15, 30),
|
||||
) -> list[dict]:
|
||||
base = Path(f"data/{site}/{season}")
|
||||
metrics_path = base / "metrics.json"
|
||||
scenario_key = _best_bti_from_metrics(metrics_path)
|
||||
if not scenario_key:
|
||||
return []
|
||||
ref_path = phenocam_phenology_path(site, season)
|
||||
reference = (
|
||||
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
|
||||
)
|
||||
manifest = load_manifest(site, season)
|
||||
rows: list[dict] = []
|
||||
for entry in manifest["entries"]:
|
||||
gd = entry.get("gap_days")
|
||||
tr = entry.get("transition")
|
||||
if gd not in gap_days_list or tr not in ("green_up", "green_down"):
|
||||
continue
|
||||
fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key)
|
||||
if not fusion_dir.is_dir():
|
||||
continue
|
||||
_, _, mode = _parse_scenario(scenario_key)
|
||||
ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode)
|
||||
if len(ts) < 10:
|
||||
continue
|
||||
fused = _timesat_transitions(ts, season)
|
||||
ref_key = (
|
||||
"green_up_50pct_date"
|
||||
if tr == "green_up"
|
||||
else "green_down_50pct_date"
|
||||
)
|
||||
ref_date = reference.get(ref_key)
|
||||
fused_date = fused.get("green_up" if tr == "green_up" else "green_down")
|
||||
rows.append(
|
||||
{
|
||||
"site_name": site,
|
||||
"season": season,
|
||||
"transition": tr,
|
||||
"gap_days": gd,
|
||||
"scenario": scenario_key,
|
||||
"reference_date": ref_date,
|
||||
"fused_date": fused_date,
|
||||
"abs_day_offset": _day_offset(fused_date, ref_date),
|
||||
"window_start": entry.get("window_start"),
|
||||
"window_end": entry.get("window_end"),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def write_phenology_offsets(
|
||||
site: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
*,
|
||||
gap_days_list: tuple[int, ...] = (15, 30),
|
||||
) -> Path:
|
||||
rows = compute_offsets_for_site(
|
||||
site, season, site_position, gap_days_list=gap_days_list
|
||||
)
|
||||
out = validation_dir(site, season) / "gap_phenology_offsets.json"
|
||||
payload = {"site_name": site, "season": season, "records": rows}
|
||||
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
return out
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
continue
|
||||
p = write_phenology_offsets(site, season, pos)
|
||||
print(p)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -9,7 +9,13 @@ import sys
|
|||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.calendar import load_manifest, validation_dir, write_manifest
|
||||
from gap_validation.calendar import (
|
||||
DEFAULT_GAP_LENGTHS,
|
||||
TRANSITIONS,
|
||||
load_manifest,
|
||||
validation_dir,
|
||||
write_manifest,
|
||||
)
|
||||
from gap_validation.fusion_masked import (
|
||||
production_fusion_path,
|
||||
run_masked_fusion_one_date,
|
||||
|
|
@ -65,6 +71,19 @@ def _git_rev() -> str | None:
|
|||
return None
|
||||
|
||||
|
||||
def _filter_entries(
|
||||
entries: list[dict],
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
) -> list[dict]:
|
||||
out = entries
|
||||
if gap_days_filter:
|
||||
out = [e for e in out if e.get("gap_days") in gap_days_filter]
|
||||
if transition_filter:
|
||||
out = [e for e in out if e.get("transition") in transition_filter]
|
||||
return out
|
||||
|
||||
|
||||
def run_validation(
|
||||
site_name: str,
|
||||
season: int,
|
||||
|
|
@ -77,7 +96,10 @@ def run_validation(
|
|||
skip_fusion: bool,
|
||||
write_manifest_only: bool,
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
s2_calendar_strategy: str,
|
||||
manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
manifest_transitions: tuple[str, ...] = TRANSITIONS,
|
||||
) -> Path:
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
vdir = validation_dir(site_name, season)
|
||||
|
|
@ -85,24 +107,31 @@ def run_validation(
|
|||
|
||||
if not skip_manifest:
|
||||
write_manifest(
|
||||
site_name, season, site_position, s2_calendar_strategy=s2_calendar_strategy
|
||||
site_name,
|
||||
season,
|
||||
site_position,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
gap_lengths=manifest_gap_lengths,
|
||||
transitions=manifest_transitions,
|
||||
)
|
||||
if write_manifest_only:
|
||||
return vdir / "gap_manifest.json"
|
||||
|
||||
manifest = load_manifest(site_name, season)
|
||||
entries = manifest["entries"]
|
||||
if gap_days_filter:
|
||||
entries = [e for e in entries if e.get("gap_days") in gap_days_filter]
|
||||
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
|
||||
|
||||
results: list[dict] = []
|
||||
for entry in entries:
|
||||
gap_days = entry["gap_days"]
|
||||
transition = entry.get("transition", "green_up")
|
||||
pred = entry["prediction_date"]
|
||||
w0 = entry["window_start"]
|
||||
w1 = entry["window_end"]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "no_withheld_s2_filename",
|
||||
"entry": entry,
|
||||
|
|
@ -114,6 +143,7 @@ def run_validation(
|
|||
if not wh_ymd:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "could_not_parse_withheld_yyyymmdd",
|
||||
"withheld_s2_filename": fn,
|
||||
|
|
@ -125,20 +155,33 @@ def run_validation(
|
|||
)
|
||||
|
||||
fusion_out = validation_fusion_dir(
|
||||
site_name, season, gap_days, strategy, sigma, mode
|
||||
site_name, season, gap_days, transition, strategy, sigma, mode
|
||||
)
|
||||
if not skip_fusion:
|
||||
run_masked_fusion_one_date(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
pred,
|
||||
wh_ymd,
|
||||
fusion_out,
|
||||
)
|
||||
try:
|
||||
run_masked_fusion_one_date(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
pred,
|
||||
w0,
|
||||
w1,
|
||||
wh_ymd,
|
||||
fusion_out,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": str(e),
|
||||
"entry": entry,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
fused_gap = _fused_file(fusion_out, mode, ymd)
|
||||
prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
|
||||
|
|
@ -146,6 +189,7 @@ def run_validation(
|
|||
if wh_path is None or not fused_gap.is_file():
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"withheld_s2_filename": fn,
|
||||
|
|
@ -165,14 +209,17 @@ def run_validation(
|
|||
fused_gap,
|
||||
prod if prod.is_file() else None,
|
||||
mode,
|
||||
whittaker_context=(base, strategy, pred, withheld_iso),
|
||||
whittaker_context=(base, strategy, pred, withheld_iso, w0, w1),
|
||||
)
|
||||
fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
|
||||
wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"window_start": w0,
|
||||
"window_end": w1,
|
||||
"withheld_s2_filename": fn,
|
||||
"scenario": {
|
||||
"strategy": strategy,
|
||||
|
|
@ -186,6 +233,7 @@ def run_validation(
|
|||
},
|
||||
"spatial": spatial,
|
||||
"whittaker_crossover_row": {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"nse_s2_fusion": fusion_nse,
|
||||
"nse_s2_whittaker": wh_nse,
|
||||
|
|
@ -206,15 +254,15 @@ def run_validation(
|
|||
"command_line": sys.argv,
|
||||
"git_commit": _git_rev(),
|
||||
"manifest": str(vdir / "gap_manifest.json"),
|
||||
"gap_withheld_images": str(vdir / "gap_withheld_images.json"),
|
||||
"results": results,
|
||||
"whittaker_crossover": {
|
||||
scenario: {
|
||||
"metric": "nse_s2_spatial_vs_withheld_s2_gcc",
|
||||
"whittaker_definition": (
|
||||
"Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
|
||||
"withheld acquisition removed from the fit; prediction is a spatially constant "
|
||||
"field at the smoothed GCC(prediction_date), compared to withheld S2 GCC on the "
|
||||
"same valid mask as fusion (aligned with baseline.s2_whittaker_lambda400 spirit)."
|
||||
"all S2 dates in the gap window and the withheld acquisition removed; "
|
||||
"prediction is a spatially constant field at smoothed GCC(prediction_date)."
|
||||
),
|
||||
"first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
|
||||
crossover_rows,
|
||||
|
|
@ -250,6 +298,12 @@ def main() -> None:
|
|||
metavar="N",
|
||||
help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--transition",
|
||||
choices=list(TRANSITIONS),
|
||||
action="append",
|
||||
help="Restrict to transition(s); repeatable (default: all in manifest).",
|
||||
)
|
||||
ap.add_argument("--skip-manifest", action="store_true")
|
||||
ap.add_argument(
|
||||
"--skip-fusion",
|
||||
|
|
@ -259,7 +313,7 @@ def main() -> None:
|
|||
ap.add_argument(
|
||||
"--write-manifest-only",
|
||||
action="store_true",
|
||||
help="Write gap_manifest.json and exit (no EFAST).",
|
||||
help="Write gap_manifest.json + gap_withheld_images.json and exit.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--s2-calendar-strategy",
|
||||
|
|
@ -270,6 +324,8 @@ def main() -> None:
|
|||
args = ap.parse_args()
|
||||
sigma_kw = 30 if args.sigma == 30 else None
|
||||
site_position = (args.lat, args.lon)
|
||||
gap_filter = args.gap_days if args.gap_days else None
|
||||
trans_filter = args.transition if args.transition else None
|
||||
out = run_validation(
|
||||
args.site,
|
||||
args.season,
|
||||
|
|
@ -280,7 +336,8 @@ def main() -> None:
|
|||
skip_manifest=args.skip_manifest,
|
||||
skip_fusion=args.skip_fusion,
|
||||
write_manifest_only=args.write_manifest_only,
|
||||
gap_days_filter=args.gap_days,
|
||||
gap_days_filter=gap_filter,
|
||||
transition_filter=trans_filter,
|
||||
s2_calendar_strategy=args.s2_calendar_strategy,
|
||||
)
|
||||
print(out)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
"""Symlink prepared S2 into a temp dir, omitting one acquisition (REFL + DIST_CLOUD)."""
|
||||
"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
|
||||
|
|
@ -14,10 +15,34 @@ def yyyymmdd_in_name(name: str) -> str | None:
|
|||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def yyyymmdd_from_iso(iso_d: str) -> str:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
||||
|
||||
|
||||
def acquisition_yyyymmdd_in_window(
|
||||
prepared_s2: Path, window_start: date, window_end: date
|
||||
) -> set[str]:
|
||||
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
|
||||
out: set[str] = set()
|
||||
if not prepared_s2.is_dir():
|
||||
return out
|
||||
for p in prepared_s2.glob("*REFL.tif"):
|
||||
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
||||
if window_start <= d <= window_end:
|
||||
out.add(m.group(1))
|
||||
return out
|
||||
|
||||
|
||||
def build_masked_s2_dir(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path, patterns: tuple[str, ...]
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
patterns: tuple[str, ...],
|
||||
) -> int:
|
||||
"""Symlink all files matching ``patterns`` except the withheld acquisition day."""
|
||||
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
n = 0
|
||||
for pattern in patterns:
|
||||
|
|
@ -25,7 +50,7 @@ def build_masked_s2_dir(
|
|||
if not src.is_file() and not src.is_symlink():
|
||||
continue
|
||||
y = yyyymmdd_in_name(src.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
if y and y in excluded_yyyymmdd:
|
||||
continue
|
||||
link = dest / src.name
|
||||
if link.exists() or link.is_symlink():
|
||||
|
|
@ -35,17 +60,32 @@ def build_masked_s2_dir(
|
|||
return n
|
||||
|
||||
|
||||
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
|
||||
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
|
||||
for p in masked_s2_dir.iterdir():
|
||||
y = yyyymmdd_in_name(p.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
raise RuntimeError(
|
||||
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
|
||||
f"found in masked S2 dir {masked_s2_dir}"
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_bti(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_itb(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ def evaluate_gap_vs_withheld(
|
|||
fused_nogap_path: Path | None,
|
||||
mode: str,
|
||||
*,
|
||||
whittaker_context: tuple[Path, str, str, str] | None = None,
|
||||
whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
|
||||
) -> dict:
|
||||
"""Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask.
|
||||
|
||||
|
|
@ -170,9 +170,14 @@ def evaluate_gap_vs_withheld(
|
|||
if whittaker_context is not None:
|
||||
from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
|
||||
|
||||
base, strategy, prediction_iso, withheld_iso = whittaker_context
|
||||
base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context
|
||||
wgcc = whittaker_gcc_on_gap_masked_series(
|
||||
base, strategy, prediction_iso, withheld_iso
|
||||
base,
|
||||
strategy,
|
||||
prediction_iso,
|
||||
withheld_iso,
|
||||
window_start_iso=w0,
|
||||
window_end_iso=w1,
|
||||
)
|
||||
if wgcc is not None:
|
||||
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask)
|
||||
|
|
|
|||
288
gap_validation/temporal_pc.py
Normal file
288
gap_validation/temporal_pc.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
"""Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_indices import _get_gcc_from_original
|
||||
from metrics_stats import (
|
||||
WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
_norm_date_key,
|
||||
_s2_gcc_series_from_preselection,
|
||||
_whittaker_smooth_dict,
|
||||
calculate_temporal_metrics,
|
||||
load_timeseries,
|
||||
)
|
||||
|
||||
from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest
|
||||
from gap_validation.fusion_masked import run_masked_fusion_season, validation_fusion_dir
|
||||
from gap_validation.run import (
|
||||
_filter_entries,
|
||||
_scenario_key,
|
||||
_withheld_iso,
|
||||
_yyyymmdd_from_withheld_filename,
|
||||
)
|
||||
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
|
||||
|
||||
|
||||
def _fusion_gcc_timeseries(
|
||||
fusion_dir: Path, site_position: tuple[float, float], mode: str
|
||||
) -> dict[str, float]:
|
||||
"""3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``."""
|
||||
pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif"
|
||||
out: dict[str, float] = {}
|
||||
for p in sorted(fusion_dir.glob(pattern)):
|
||||
m = re.search(r"_(\d{8})\.tif$", p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat()
|
||||
gcc = _get_gcc_from_original(p, site_position)
|
||||
if gcc is not None:
|
||||
out[d] = float(gcc)
|
||||
return out
|
||||
|
||||
|
||||
def whittaker_timeseries_gap_degraded(
|
||||
base: Path,
|
||||
strategy: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_iso: str,
|
||||
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
) -> dict[str, float]:
|
||||
"""Daily Whittaker GCC on S2 preselection with gap window + withheld day removed."""
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if not all_gcc:
|
||||
return {}
|
||||
idx = 0 if strategy == "aggressive" else 1
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
wh_k = _norm_date_key(withheld_iso)
|
||||
|
||||
def in_window(dk: str) -> bool:
|
||||
try:
|
||||
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return False
|
||||
return w0 <= d <= w1
|
||||
|
||||
kept = sorted(
|
||||
(d, g)
|
||||
for d, g in all_gcc.items()
|
||||
if d in flags
|
||||
and not flags[d][idx]
|
||||
and _norm_date_key(d) != wh_k
|
||||
and not in_window(_norm_date_key(d) or "")
|
||||
)
|
||||
if len(kept) < 2:
|
||||
return {}
|
||||
obs_d, obs_v = zip(*kept)
|
||||
return _whittaker_smooth_dict(obs_d, obs_v, lam)
|
||||
|
||||
|
||||
def run_temporal_pc(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
*,
|
||||
skip_manifest: bool,
|
||||
skip_fusion: bool,
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
s2_calendar_strategy: str,
|
||||
) -> Path:
|
||||
"""Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``."""
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
vdir = validation_dir(site_name, season)
|
||||
vdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not skip_manifest:
|
||||
write_manifest(
|
||||
site_name,
|
||||
season,
|
||||
site_position,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
)
|
||||
|
||||
manifest = load_manifest(site_name, season)
|
||||
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
|
||||
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
|
||||
phenocam_ts = load_timeseries(phenocam_ts_path)
|
||||
|
||||
nogap_metrics_path = base / "metrics.json"
|
||||
nogap_nse: dict[str, float | None] = {}
|
||||
if nogap_metrics_path.is_file():
|
||||
m = json.loads(nogap_metrics_path.read_text(encoding="utf-8"))
|
||||
sk = _scenario_key(strategy, sigma, mode)
|
||||
block = (m.get("temporal") or {}).get(sk) or {}
|
||||
nogap_nse["nse_pc"] = block.get("nse_pc")
|
||||
|
||||
results: list[dict] = []
|
||||
crossover_rows: list[dict] = []
|
||||
|
||||
for entry in entries:
|
||||
transition = entry.get("transition", "green_up")
|
||||
gap_days = entry["gap_days"]
|
||||
pred = entry["prediction_date"]
|
||||
w0, w1 = entry["window_start"], entry["window_end"]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn:
|
||||
results.append(
|
||||
{"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"}
|
||||
)
|
||||
continue
|
||||
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not wh_ymd:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "bad_withheld_filename",
|
||||
}
|
||||
)
|
||||
continue
|
||||
withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
|
||||
|
||||
temporal_dir = (
|
||||
vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode)
|
||||
)
|
||||
if not skip_fusion:
|
||||
try:
|
||||
run_masked_fusion_season(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
w0,
|
||||
w1,
|
||||
wh_ymd,
|
||||
temporal_dir,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
|
||||
else:
|
||||
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
|
||||
|
||||
fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||
wh_ts = whittaker_timeseries_gap_degraded(
|
||||
base, strategy, w0, w1, withheld_iso
|
||||
)
|
||||
wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts)
|
||||
|
||||
row: dict = {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"window_start": w0,
|
||||
"window_end": w1,
|
||||
"withheld_s2_filename": fn,
|
||||
"temporal": {
|
||||
"fused": fused_metrics,
|
||||
"whittaker": wh_metrics,
|
||||
},
|
||||
"fusion_dir": str(temporal_dir),
|
||||
}
|
||||
if fused_metrics and nogap_nse.get("nse_pc") is not None:
|
||||
g_rmse = fused_metrics.get("rmse")
|
||||
ng_rmse = None
|
||||
if nogap_metrics_path.is_file():
|
||||
sk = _scenario_key(strategy, sigma, mode)
|
||||
ng_rmse = (
|
||||
(json.loads(nogap_metrics_path.read_text()).get("temporal") or {})
|
||||
.get(sk, {})
|
||||
.get("rmse")
|
||||
)
|
||||
n_g = fused_metrics.get("nse_pc")
|
||||
n_ng = nogap_nse["nse_pc"]
|
||||
if g_rmse is not None and ng_rmse is not None:
|
||||
row["delta_rmse"] = float(g_rmse - ng_rmse)
|
||||
if n_g is not None and n_ng is not None:
|
||||
row["delta_nse"] = float(n_ng - n_g)
|
||||
|
||||
fn_pc = (fused_metrics or {}).get("nse_pc")
|
||||
wh_pc = (wh_metrics or {}).get("nse_pc")
|
||||
row["utility_crossover_row"] = {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"nse_pc_fusion": fn_pc,
|
||||
"nse_pc_whittaker": wh_pc,
|
||||
}
|
||||
crossover_rows.append(row["utility_crossover_row"])
|
||||
results.append(row)
|
||||
|
||||
scenario = _scenario_key(strategy, sigma, mode)
|
||||
payload = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"scenario": scenario,
|
||||
"tier": "temporal_nse_pc",
|
||||
"manifest": str(vdir / "gap_manifest.json"),
|
||||
"results": results,
|
||||
"utility_crossover": {
|
||||
scenario: {
|
||||
"metric": "nse_pc_vs_phenocam_gcc90",
|
||||
"first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker(
|
||||
crossover_rows,
|
||||
fusion_key="nse_pc_fusion",
|
||||
whittaker_key="nse_pc_whittaker",
|
||||
),
|
||||
"by_gap": crossover_rows,
|
||||
}
|
||||
},
|
||||
}
|
||||
out_path = vdir / "gap_metrics.json"
|
||||
out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.")
|
||||
ap.add_argument("--site", required=True)
|
||||
ap.add_argument("--season", type=int, required=True)
|
||||
ap.add_argument("--lat", type=float, required=True)
|
||||
ap.add_argument("--lon", type=float, required=True)
|
||||
ap.add_argument("--strategy", default="aggressive")
|
||||
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
|
||||
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
|
||||
ap.add_argument("--gap-days", type=int, action="append")
|
||||
ap.add_argument("--transition", choices=list(TRANSITIONS), action="append")
|
||||
ap.add_argument("--skip-manifest", action="store_true")
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument("--s2-calendar-strategy", default="aggressive")
|
||||
args = ap.parse_args()
|
||||
sigma_kw = 30 if args.sigma == 30 else None
|
||||
out = run_temporal_pc(
|
||||
args.site,
|
||||
args.season,
|
||||
(args.lat, args.lon),
|
||||
args.strategy,
|
||||
sigma_kw,
|
||||
args.mode,
|
||||
skip_manifest=args.skip_manifest,
|
||||
skip_fusion=args.skip_fusion,
|
||||
gap_days_filter=args.gap_days,
|
||||
transition_filter=args.transition,
|
||||
s2_calendar_strategy=args.s2_calendar_strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_stats import (
|
||||
|
|
@ -12,32 +13,48 @@ from metrics_stats import (
|
|||
)
|
||||
|
||||
|
||||
def _date_in_window(dk: str, start: date, end: date) -> bool:
|
||||
try:
|
||||
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return False
|
||||
return start <= d <= end
|
||||
|
||||
|
||||
def whittaker_gcc_on_gap_masked_series(
|
||||
base: Path,
|
||||
strategy: str,
|
||||
prediction_iso: str,
|
||||
withheld_iso: str,
|
||||
*,
|
||||
window_start_iso: str | None = None,
|
||||
window_end_iso: str | None = None,
|
||||
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
) -> float | None:
|
||||
"""Whittaker smooth on cloud-screened S2 GCC **excluding** the withheld acquisition day.
|
||||
|
||||
Comparator aligned with ``baseline.s2_whittaker_lambda400`` in ``metrics_stats`` (same λ,
|
||||
same preselection GCC), but the withheld date is removed so the smoother does not see
|
||||
the target acquisition. Value at ``prediction_iso`` (YYYY-MM-DD) is returned.
|
||||
"""
|
||||
"""Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
|
||||
pred_k = _norm_date_key(prediction_iso)
|
||||
wh_k = _norm_date_key(withheld_iso)
|
||||
if not pred_k or not wh_k:
|
||||
return None
|
||||
w0 = w1 = None
|
||||
if window_start_iso and window_end_iso:
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if not all_gcc:
|
||||
return None
|
||||
idx = 0 if strategy == "aggressive" else 1
|
||||
kept = sorted(
|
||||
(d, g)
|
||||
for d, g in all_gcc.items()
|
||||
if d in flags and not flags[d][idx] and _norm_date_key(d) != wh_k
|
||||
)
|
||||
kept = []
|
||||
for d, g in all_gcc.items():
|
||||
if d not in flags or flags[d][idx]:
|
||||
continue
|
||||
dk = _norm_date_key(d)
|
||||
if not dk or dk == wh_k:
|
||||
continue
|
||||
if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
|
||||
continue
|
||||
kept.append((d, g))
|
||||
kept.sort(key=lambda t: t[0])
|
||||
if len(kept) < 2:
|
||||
return None
|
||||
obs_d, obs_v = zip(*kept)
|
||||
|
|
@ -57,7 +74,7 @@ def first_gap_where_fusion_below_whittaker(
|
|||
for r in rows
|
||||
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
|
||||
]
|
||||
eligible.sort(key=lambda r: r["gap_days"])
|
||||
eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
|
||||
for r in eligible:
|
||||
if r[fusion_key] < r[whittaker_key]:
|
||||
return int(r["gap_days"])
|
||||
|
|
|
|||
|
|
@ -104,10 +104,10 @@
|
|||
if (!manifest?.entries?.length) return "";
|
||||
let h = `<h2>Gap manifest</h2>`;
|
||||
h += `<p class="section-note">From <code>data/${siteName}/${season}/validation/gap_manifest.json</code>. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.</p>`;
|
||||
h += `<table><tr><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
|
||||
h += `<table><tr><th>Transition</th><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
|
||||
for (const e of manifest.entries) {
|
||||
const w = `${e.window_start} → ${e.window_end}`;
|
||||
h += `<tr><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
|
||||
h += `<tr><td>${e.transition || "—"}</td><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
|
||||
}
|
||||
h += `</table>`;
|
||||
return h;
|
||||
|
|
@ -116,7 +116,7 @@
|
|||
function resultsTable(results) {
|
||||
if (!results?.length) return `<p class="empty">No result rows in summary.</p>`;
|
||||
const head = `<tr>
|
||||
<th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
|
||||
<th>Transition</th><th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
|
||||
<th class="num">RMSE<br><span style="font-weight:normal">gap</span></th>
|
||||
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">gap</span></th>
|
||||
<th class="num">RMSE<br><span style="font-weight:normal">no gap</span></th>
|
||||
|
|
@ -130,7 +130,7 @@
|
|||
for (const r of results) {
|
||||
if (r.error) {
|
||||
parts.push(
|
||||
`<tr><td>${r.gap_days ?? "—"}</td><td colspan="10" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
|
||||
`<tr><td>${r.transition ?? "—"}</td><td>${r.gap_days ?? "—"}</td><td colspan="9" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -142,6 +142,7 @@
|
|||
const p = r.paths || {};
|
||||
const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("<br>");
|
||||
parts.push(`<tr>
|
||||
<td>${r.transition || "—"}</td>
|
||||
<td>${r.gap_days}</td>
|
||||
<td>${r.prediction_date || "—"}</td>
|
||||
<td style="font-size:11px">${r.withheld_s2_filename || "—"}</td>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue