efast-phenocam-validation/fusion_phenology.py
Felix Delattre e3af4bf2f4 foo
2026-05-29 08:41:44 +02:00

263 lines
8.4 KiB
Python

"""
No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
(:mod:`gap_validation.phenology_offsets`).
"""
from __future__ import annotations
import argparse
import json
import re
from datetime import datetime
from pathlib import Path
from metrics_stats import _norm_date_key, load_timeseries
from phenology_timesat import (
_timesat as _timesat_pkg,
build_yraw_three_years,
iter_sites_seasons_from_sites_geojson,
phenocam_phenology_path,
run_timesat_phenology_from_yraw,
)
FUSION_SCENARIO_KEYS: tuple[str, ...] = (
"aggressive_sigma20",
"aggressive_sigma30",
"nonaggressive_sigma20",
"nonaggressive_sigma30",
"aggressive_sigma20_itb",
"aggressive_sigma30_itb",
"nonaggressive_sigma20_itb",
"nonaggressive_sigma30_itb",
)
def fusion_phenology_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/fusion_phenology.json")
def parse_scenario_key(key: str) -> tuple[str, int, str]:
"""``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
mode = "itb" if key.endswith("_itb") else "bti"
base = key.replace("_itb", "")
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
if not m:
raise ValueError(f"Cannot parse scenario key: {key!r}")
return m.group(1), int(m.group(2)), mode
def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
strategy, sigma, mode = parse_scenario_key(scenario_key)
if mode == "bti":
processed = f"processed_{strategy}_sigma{sigma}"
else:
processed = f"processed_{strategy}_itb_sigma{sigma}"
return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
"""YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
raw = load_timeseries(ts_path)
out: dict[str, float] = {}
for k, v in raw.items():
nk = _norm_date_key(k)
if nk and v is not None:
try:
fv = float(v)
except (TypeError, ValueError):
continue
if fv == fv: # finite
out[nk] = fv
return out
def timesat_transitions_from_by_date(
by_date: dict[str, float], season: int
) -> dict[str, str | float | None]:
"""Run TIMESAT on fusion GCC; return transition dates for *season*."""
if len(by_date) < 10:
return {
"green_up_50pct_date": None,
"green_down_50pct_date": None,
"timesat_input": None,
"n_values": len(by_date),
}
y1, y2, y3 = season - 1, season, season + 1
yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
return {
"green_up_50pct_date": out.get("green_up_50pct_date"),
"green_down_50pct_date": out.get("green_down_50pct_date"),
"timesat_input": stack_mode,
"n_values": len(by_date),
}
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
if not iso_a or not iso_b:
return None
try:
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
return abs((a - b).days)
except ValueError:
return None
def _offsets_vs_reference(
fused: dict[str, str | float | None], reference: dict
) -> dict[str, int | None]:
ref_up = reference.get("green_up_50pct_date")
ref_dn = reference.get("green_down_50pct_date")
fup = fused.get("green_up_50pct_date")
fdn = fused.get("green_down_50pct_date")
return {
"abs_day_offset_green_up": _day_offset(fup, ref_up),
"abs_day_offset_green_down": _day_offset(fdn, ref_dn),
}
def compute_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> dict:
ref_path = phenocam_phenology_path(site_name, season)
reference = (
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
)
scenarios: dict[str, dict] = {}
for key in scenario_keys:
ts_path = fusion_gcc_timeseries_path(site_name, season, key)
if not ts_path.is_file():
scenarios[key] = {
"workflow": parse_scenario_key(key)[2],
"missing_timeseries": str(ts_path),
}
continue
by_date = fusion_gcc_by_date(ts_path)
fused = timesat_transitions_from_by_date(by_date, season)
strategy, sigma, mode = parse_scenario_key(key)
scenarios[key] = {
"workflow": mode,
"strategy": strategy,
"sigma": sigma,
"timeseries_path": str(ts_path),
**fused,
**_offsets_vs_reference(fused, reference),
}
return {
"site_name": site_name,
"season": season,
"reference": {
"source": str(ref_path) if ref_path.is_file() else None,
"green_up_50pct_date": reference.get("green_up_50pct_date"),
"green_down_50pct_date": reference.get("green_down_50pct_date"),
},
"scenarios": scenarios,
}
def write_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> Path | None:
if _timesat_pkg is None:
out = fusion_phenology_path(site_name, season)
print(
f"[Fusion phenology] Skipped (no timesat); would write {out}. "
"pip install timesat"
)
return None
payload = compute_fusion_phenology_for_site(
site_name, season, scenario_keys=scenario_keys
)
out = fusion_phenology_path(site_name, season)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
n_ok = sum(
1
for s in payload["scenarios"].values()
if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
)
print(
f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
f"≥1 transition date)"
)
return out
def write_fusion_phenology_all(
*,
sites_geojson: str | Path = "data/sites.geojson",
seasons: dict[str, int] | None = None,
) -> int:
if seasons:
pairs = sorted((s, seasons[s]) for s in seasons.keys())
else:
pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
n = 0
for site, season in pairs:
print(f"=== {site} {season} ===")
if write_fusion_phenology_for_site(site, season):
n += 1
print(f"[Fusion phenology] Processed {n} site/season pair(s).")
return n
def main() -> None:
ap = argparse.ArgumentParser(
description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
)
ap.add_argument("--site", type=str, default=None)
ap.add_argument("--season", type=int, default=None)
ap.add_argument(
"--all",
action="store_true",
help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
)
ap.add_argument(
"--primary-only",
action="store_true",
help="With --all: only thesis primary seasons per site.",
)
ap.add_argument(
"--sites-geojson",
type=Path,
default=Path("data/sites.geojson"),
)
args = ap.parse_args()
if _timesat_pkg is None:
raise SystemExit("Install timesat: pip install timesat")
primary = {
"forthgr": 2024,
"innsbruck": 2024,
"pitsalu": 2024,
"vindeln2": 2023,
"sunflowerjerez1": 2024,
"institutekarnobat": 2024,
}
if args.all:
write_fusion_phenology_all(
sites_geojson=args.sites_geojson,
seasons=primary if args.primary_only else None,
)
return
if not args.site or args.season is None:
raise SystemExit("Provide --site and --season, or use --all --primary-only")
write_fusion_phenology_for_site(args.site, args.season)
if __name__ == "__main__":
main()