This commit is contained in:
Felix Delattre 2026-05-29 08:41:44 +02:00
parent be17f64aa2
commit e3af4bf2f4
5 changed files with 333 additions and 32 deletions

1
.gitignore vendored
View file

@ -44,5 +44,4 @@ dist/
Thumbs.db
AGENTS.md
METHODOLOGY.md
.vibe

263
fusion_phenology.py Normal file
View file

@ -0,0 +1,263 @@
"""
No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
(:mod:`gap_validation.phenology_offsets`).
"""
from __future__ import annotations
import argparse
import json
import re
from datetime import datetime
from pathlib import Path
from metrics_stats import _norm_date_key, load_timeseries
from phenology_timesat import (
_timesat as _timesat_pkg,
build_yraw_three_years,
iter_sites_seasons_from_sites_geojson,
phenocam_phenology_path,
run_timesat_phenology_from_yraw,
)
FUSION_SCENARIO_KEYS: tuple[str, ...] = (
"aggressive_sigma20",
"aggressive_sigma30",
"nonaggressive_sigma20",
"nonaggressive_sigma30",
"aggressive_sigma20_itb",
"aggressive_sigma30_itb",
"nonaggressive_sigma20_itb",
"nonaggressive_sigma30_itb",
)
def fusion_phenology_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/fusion_phenology.json")
def parse_scenario_key(key: str) -> tuple[str, int, str]:
"""``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
mode = "itb" if key.endswith("_itb") else "bti"
base = key.replace("_itb", "")
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
if not m:
raise ValueError(f"Cannot parse scenario key: {key!r}")
return m.group(1), int(m.group(2)), mode
def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
strategy, sigma, mode = parse_scenario_key(scenario_key)
if mode == "bti":
processed = f"processed_{strategy}_sigma{sigma}"
else:
processed = f"processed_{strategy}_itb_sigma{sigma}"
return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
"""YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
raw = load_timeseries(ts_path)
out: dict[str, float] = {}
for k, v in raw.items():
nk = _norm_date_key(k)
if nk and v is not None:
try:
fv = float(v)
except (TypeError, ValueError):
continue
if fv == fv: # finite
out[nk] = fv
return out
def timesat_transitions_from_by_date(
by_date: dict[str, float], season: int
) -> dict[str, str | float | None]:
"""Run TIMESAT on fusion GCC; return transition dates for *season*."""
if len(by_date) < 10:
return {
"green_up_50pct_date": None,
"green_down_50pct_date": None,
"timesat_input": None,
"n_values": len(by_date),
}
y1, y2, y3 = season - 1, season, season + 1
yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
return {
"green_up_50pct_date": out.get("green_up_50pct_date"),
"green_down_50pct_date": out.get("green_down_50pct_date"),
"timesat_input": stack_mode,
"n_values": len(by_date),
}
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
if not iso_a or not iso_b:
return None
try:
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
return abs((a - b).days)
except ValueError:
return None
def _offsets_vs_reference(
fused: dict[str, str | float | None], reference: dict
) -> dict[str, int | None]:
ref_up = reference.get("green_up_50pct_date")
ref_dn = reference.get("green_down_50pct_date")
fup = fused.get("green_up_50pct_date")
fdn = fused.get("green_down_50pct_date")
return {
"abs_day_offset_green_up": _day_offset(fup, ref_up),
"abs_day_offset_green_down": _day_offset(fdn, ref_dn),
}
def compute_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> dict:
ref_path = phenocam_phenology_path(site_name, season)
reference = (
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
)
scenarios: dict[str, dict] = {}
for key in scenario_keys:
ts_path = fusion_gcc_timeseries_path(site_name, season, key)
if not ts_path.is_file():
scenarios[key] = {
"workflow": parse_scenario_key(key)[2],
"missing_timeseries": str(ts_path),
}
continue
by_date = fusion_gcc_by_date(ts_path)
fused = timesat_transitions_from_by_date(by_date, season)
strategy, sigma, mode = parse_scenario_key(key)
scenarios[key] = {
"workflow": mode,
"strategy": strategy,
"sigma": sigma,
"timeseries_path": str(ts_path),
**fused,
**_offsets_vs_reference(fused, reference),
}
return {
"site_name": site_name,
"season": season,
"reference": {
"source": str(ref_path) if ref_path.is_file() else None,
"green_up_50pct_date": reference.get("green_up_50pct_date"),
"green_down_50pct_date": reference.get("green_down_50pct_date"),
},
"scenarios": scenarios,
}
def write_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> Path | None:
if _timesat_pkg is None:
out = fusion_phenology_path(site_name, season)
print(
f"[Fusion phenology] Skipped (no timesat); would write {out}. "
"pip install timesat"
)
return None
payload = compute_fusion_phenology_for_site(
site_name, season, scenario_keys=scenario_keys
)
out = fusion_phenology_path(site_name, season)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
n_ok = sum(
1
for s in payload["scenarios"].values()
if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
)
print(
f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
f"≥1 transition date)"
)
return out
def write_fusion_phenology_all(
*,
sites_geojson: str | Path = "data/sites.geojson",
seasons: dict[str, int] | None = None,
) -> int:
if seasons:
pairs = sorted((s, seasons[s]) for s in seasons.keys())
else:
pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
n = 0
for site, season in pairs:
print(f"=== {site} {season} ===")
if write_fusion_phenology_for_site(site, season):
n += 1
print(f"[Fusion phenology] Processed {n} site/season pair(s).")
return n
def main() -> None:
ap = argparse.ArgumentParser(
description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
)
ap.add_argument("--site", type=str, default=None)
ap.add_argument("--season", type=int, default=None)
ap.add_argument(
"--all",
action="store_true",
help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
)
ap.add_argument(
"--primary-only",
action="store_true",
help="With --all: only thesis primary seasons per site.",
)
ap.add_argument(
"--sites-geojson",
type=Path,
default=Path("data/sites.geojson"),
)
args = ap.parse_args()
if _timesat_pkg is None:
raise SystemExit("Install timesat: pip install timesat")
primary = {
"forthgr": 2024,
"innsbruck": 2024,
"pitsalu": 2024,
"vindeln2": 2023,
"sunflowerjerez1": 2024,
"institutekarnobat": 2024,
}
if args.all:
write_fusion_phenology_all(
sites_geojson=args.sites_geojson,
seasons=primary if args.primary_only else None,
)
return
if not args.site or args.season is None:
raise SystemExit("Provide --site and --season, or use --all --primary-only")
write_fusion_phenology_for_site(args.site, args.season)
if __name__ == "__main__":
main()

View file

@ -7,11 +7,8 @@ import json
from datetime import datetime
from pathlib import Path
from phenology_timesat import (
build_yraw_three_years,
phenocam_phenology_path,
run_timesat_phenology_from_yraw,
)
from fusion_phenology import timesat_transitions_from_by_date
from phenology_timesat import phenocam_phenology_path
from gap_validation.batch_spatial import (
PRIMARY_SEASON,
@ -35,9 +32,7 @@ def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
y1, y2, y3 = season - 1, season, season + 1
yraw, _mode = build_yraw_three_years(by_date, y1, y2, y3)
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
out = timesat_transitions_from_by_date(by_date, season)
return {
"green_up": out.get("green_up_50pct_date"),
"green_down": out.get("green_down_50pct_date"),

View file

@ -11,6 +11,8 @@ from scipy.stats import pearsonr
# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
VALID_REFL_THRESHOLD = 0.001
GCC_DENOM_EPS = 1e-3
MAX_REPORTED_NSE_S2 = 20.0
def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
@ -18,15 +20,27 @@ def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.nd
out = np.full_like(blue, np.nan, dtype=np.float64)
m = (
np.isfinite(t)
& (t > 0)
& (t >= GCC_DENOM_EPS)
& np.isfinite(blue)
& np.isfinite(green)
& np.isfinite(red)
& (blue > GCC_DENOM_EPS)
& (green > GCC_DENOM_EPS)
& (red > GCC_DENOM_EPS)
)
out[m] = green[m].astype(np.float64) / t[m]
return out.astype(np.float32)
def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
"""Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
with rasterio.open(fusion_path) as src:
if src.count < 3:
return None
stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
"""Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
with rasterio.open(fusion_path) as src:
@ -73,8 +87,10 @@ def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
d = src.read(1).astype(np.float32)
return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
stacks = src.read().astype(np.float32)
ok = np.isfinite(stacks).all(axis=0) & (
np.nanmax(stacks, axis=0) > VALID_REFL_THRESHOLD
with np.errstate(all="ignore"):
mx = np.nanmax(stacks, axis=0)
ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
mx > VALID_REFL_THRESHOLD
)
return ok
@ -95,7 +111,11 @@ def spatial_scores(
mae = float(np.mean(np.abs(yt - yp)))
bias = float(np.mean(yp - yt))
den = float(np.sum((yt - mean_t) ** 2))
nse_s2 = float(1.0 - np.sum((yt - yp) ** 2) / den) if den > 0 else None
nse_s2 = None
if den > 0:
raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
if abs(raw) <= MAX_REPORTED_NSE_S2:
nse_s2 = raw
r = None
if np.std(yt) > 0 and np.std(yp) > 0:
r = float(pearsonr(yt, yp)[0])
@ -122,6 +142,28 @@ def withheld_gcc_on_fusion_grid(
return yt, yp, prof
def mask_gap_whittaker(
yt: np.ndarray,
y_gap: np.ndarray,
fused_gap_path: Path,
mode: str,
) -> np.ndarray:
"""Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
m = (
valid_mask_fused(fused_gap_path, mode)
& np.isfinite(yt)
& np.isfinite(y_gap)
& (yt > VALID_REFL_THRESHOLD)
& (yt <= 1.0)
& (y_gap > VALID_REFL_THRESHOLD)
& (y_gap <= 1.0)
)
pos = _positive_bgr_mask(fused_gap_path)
if pos is not None:
m &= pos
return m
def common_valid_mask(
yt: np.ndarray,
y_gap: np.ndarray,
@ -129,16 +171,14 @@ def common_valid_mask(
fused_gap_path: Path,
mode: str,
) -> np.ndarray:
"""Shared finite mask: truth GCC, gap/nogap preds, and fusion valid-data rules."""
m = (
valid_mask_fused(fused_gap_path, mode)
& np.isfinite(yt)
& np.isfinite(y_gap)
& (yt > VALID_REFL_THRESHOLD)
& (y_gap > VALID_REFL_THRESHOLD)
)
"""Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
if y_nogap is not None:
m &= np.isfinite(y_nogap) & (y_nogap > VALID_REFL_THRESHOLD)
m &= (
np.isfinite(y_nogap)
& (y_nogap > VALID_REFL_THRESHOLD)
& (y_nogap <= 1.0)
)
return m
@ -150,18 +190,20 @@ def evaluate_gap_vs_withheld(
*,
whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
) -> dict:
"""Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask.
"""Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
``delta_rmse`` = RMSE_gap RMSE_no_gap; ``delta_nse`` = NSE_no_gap NSE_gap (higher gap loss positive delta_nse).
``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
``delta_nse`` = NSE_no_gap NSE_gap, not exported to thesis tables).
"""
yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
y_nogap = None
if fused_nogap_path is not None and fused_nogap_path.is_file():
y_nogap, _ = read_fused_gcc(fused_nogap_path)
mask = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
out: dict = {"gap": spatial_scores(yt, y_gap, mask)}
mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
if y_nogap is not None:
out["no_gap"] = spatial_scores(yt, y_nogap, mask)
mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
g, ng = out["gap"], out["no_gap"]
if g.get("rmse") is not None and ng.get("rmse") is not None:
out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
@ -180,7 +222,7 @@ def evaluate_gap_vs_withheld(
window_end_iso=w1,
)
if wgcc is not None:
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask)
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
return out

View file

@ -11,7 +11,7 @@ from rasterio.vrt import WarpedVRT
from rasterio import shutil as rio_shutil
RESOLUTION_RATIO = 21
# Centred temporal MA on S3 LR stack (METHODOLOGY §5.4.3); odd ≥3, or 1 to disable.
# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
S3_MOVING_AVERAGE_WINDOW_DAYS = 5
@ -79,7 +79,9 @@ def _get_itb_base_dir(season, site_name, cleaning_strategy):
def _compute_gcc_from_refl_array(blue, green, red):
total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
total = (
blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
)
mask = (total > 0) & np.isfinite(total)
gcc = np.zeros_like(green, dtype=np.float32)
gcc[mask] = green[mask].astype(np.float32) / total[mask]
@ -90,8 +92,8 @@ def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
dst_s2_dir.mkdir(parents=True, exist_ok=True)
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
dst = dst_s2_dir / src.name
if dst.exists():
continue
if dst.is_symlink() or dst.exists():
dst.unlink(missing_ok=True)
try:
dst.symlink_to(src.resolve())
except OSError: