diff --git a/.gitignore b/.gitignore index 3a62807..b4c999e 100644 --- a/.gitignore +++ b/.gitignore @@ -44,5 +44,4 @@ dist/ Thumbs.db AGENTS.md -METHODOLOGY.md .vibe \ No newline at end of file diff --git a/fusion_phenology.py b/fusion_phenology.py new file mode 100644 index 0000000..e14fb8c --- /dev/null +++ b/fusion_phenology.py @@ -0,0 +1,263 @@ +""" +No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude). + +Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario +directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes +``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates +and day offsets vs.\ PhenoCam ``phenocam_phenology.json``. + +Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json`` +(:mod:`gap_validation.phenology_offsets`). +""" + +from __future__ import annotations + +import argparse +import json +import re +from datetime import datetime +from pathlib import Path + +from metrics_stats import _norm_date_key, load_timeseries +from phenology_timesat import ( + _timesat as _timesat_pkg, + build_yraw_three_years, + iter_sites_seasons_from_sites_geojson, + phenocam_phenology_path, + run_timesat_phenology_from_yraw, +) + +FUSION_SCENARIO_KEYS: tuple[str, ...] = ( + "aggressive_sigma20", + "aggressive_sigma30", + "nonaggressive_sigma20", + "nonaggressive_sigma30", + "aggressive_sigma20_itb", + "aggressive_sigma30_itb", + "nonaggressive_sigma20_itb", + "nonaggressive_sigma30_itb", +) + + +def fusion_phenology_path(site_name: str, season: int) -> Path: + return Path(f"data/{site_name}/{season}/fusion_phenology.json") + + +def parse_scenario_key(key: str) -> tuple[str, int, str]: + """``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode).""" + mode = "itb" if key.endswith("_itb") else "bti" + base = key.replace("_itb", "") + m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base) + if not m: + raise ValueError(f"Cannot parse scenario key: {key!r}") + return m.group(1), int(m.group(2)), mode + + +def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path: + strategy, sigma, mode = parse_scenario_key(scenario_key) + if mode == "bti": + processed = f"processed_{strategy}_sigma{sigma}" + else: + processed = f"processed_{strategy}_itb_sigma{sigma}" + return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json") + + +def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]: + """YYYY-MM-DD → GCC from fusion ``timeseries.json``.""" + raw = load_timeseries(ts_path) + out: dict[str, float] = {} + for k, v in raw.items(): + nk = _norm_date_key(k) + if nk and v is not None: + try: + fv = float(v) + except (TypeError, ValueError): + continue + if fv == fv: # finite + out[nk] = fv + return out + + +def timesat_transitions_from_by_date( + by_date: dict[str, float], season: int +) -> dict[str, str | float | None]: + """Run TIMESAT on fusion GCC; return transition dates for *season*.""" + if len(by_date) < 10: + return { + "green_up_50pct_date": None, + "green_down_50pct_date": None, + "timesat_input": None, + "n_values": len(by_date), + } + y1, y2, y3 = season - 1, season, season + 1 + yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3) + out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3)) + return { + "green_up_50pct_date": out.get("green_up_50pct_date"), + "green_down_50pct_date": out.get("green_down_50pct_date"), + "timesat_input": stack_mode, + "n_values": len(by_date), + } + + +def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None: + if not iso_a or not iso_b: + return None + try: + a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date() + b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date() + return abs((a - b).days) + except ValueError: + return None + + +def _offsets_vs_reference( + fused: dict[str, str | float | None], reference: dict +) -> dict[str, int | None]: + ref_up = reference.get("green_up_50pct_date") + ref_dn = reference.get("green_down_50pct_date") + fup = fused.get("green_up_50pct_date") + fdn = fused.get("green_down_50pct_date") + return { + "abs_day_offset_green_up": _day_offset(fup, ref_up), + "abs_day_offset_green_down": _day_offset(fdn, ref_dn), + } + + +def compute_fusion_phenology_for_site( + site_name: str, + season: int, + *, + scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS, +) -> dict: + ref_path = phenocam_phenology_path(site_name, season) + reference = ( + json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {} + ) + scenarios: dict[str, dict] = {} + for key in scenario_keys: + ts_path = fusion_gcc_timeseries_path(site_name, season, key) + if not ts_path.is_file(): + scenarios[key] = { + "workflow": parse_scenario_key(key)[2], + "missing_timeseries": str(ts_path), + } + continue + by_date = fusion_gcc_by_date(ts_path) + fused = timesat_transitions_from_by_date(by_date, season) + strategy, sigma, mode = parse_scenario_key(key) + scenarios[key] = { + "workflow": mode, + "strategy": strategy, + "sigma": sigma, + "timeseries_path": str(ts_path), + **fused, + **_offsets_vs_reference(fused, reference), + } + return { + "site_name": site_name, + "season": season, + "reference": { + "source": str(ref_path) if ref_path.is_file() else None, + "green_up_50pct_date": reference.get("green_up_50pct_date"), + "green_down_50pct_date": reference.get("green_down_50pct_date"), + }, + "scenarios": scenarios, + } + + +def write_fusion_phenology_for_site( + site_name: str, + season: int, + *, + scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS, +) -> Path | None: + if _timesat_pkg is None: + out = fusion_phenology_path(site_name, season) + print( + f"[Fusion phenology] Skipped (no timesat); would write {out}. " + "pip install timesat" + ) + return None + payload = compute_fusion_phenology_for_site( + site_name, season, scenario_keys=scenario_keys + ) + out = fusion_phenology_path(site_name, season) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + n_ok = sum( + 1 + for s in payload["scenarios"].values() + if s.get("green_up_50pct_date") or s.get("green_down_50pct_date") + ) + print( + f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with " + f"≥1 transition date)" + ) + return out + + +def write_fusion_phenology_all( + *, + sites_geojson: str | Path = "data/sites.geojson", + seasons: dict[str, int] | None = None, +) -> int: + if seasons: + pairs = sorted((s, seasons[s]) for s in seasons.keys()) + else: + pairs = iter_sites_seasons_from_sites_geojson(sites_geojson) + n = 0 + for site, season in pairs: + print(f"=== {site} {season} ===") + if write_fusion_phenology_for_site(site, season): + n += 1 + print(f"[Fusion phenology] Processed {n} site/season pair(s).") + return n + + +def main() -> None: + ap = argparse.ArgumentParser( + description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries." + ) + ap.add_argument("--site", type=str, default=None) + ap.add_argument("--season", type=int, default=None) + ap.add_argument( + "--all", + action="store_true", + help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).", + ) + ap.add_argument( + "--primary-only", + action="store_true", + help="With --all: only thesis primary seasons per site.", + ) + ap.add_argument( + "--sites-geojson", + type=Path, + default=Path("data/sites.geojson"), + ) + args = ap.parse_args() + if _timesat_pkg is None: + raise SystemExit("Install timesat: pip install timesat") + + primary = { + "forthgr": 2024, + "innsbruck": 2024, + "pitsalu": 2024, + "vindeln2": 2023, + "sunflowerjerez1": 2024, + "institutekarnobat": 2024, + } + if args.all: + write_fusion_phenology_all( + sites_geojson=args.sites_geojson, + seasons=primary if args.primary_only else None, + ) + return + if not args.site or args.season is None: + raise SystemExit("Provide --site and --season, or use --all --primary-only") + write_fusion_phenology_for_site(args.site, args.season) + + +if __name__ == "__main__": + main() diff --git a/gap_validation/phenology_offsets.py b/gap_validation/phenology_offsets.py index 44afab9..7f779a7 100644 --- a/gap_validation/phenology_offsets.py +++ b/gap_validation/phenology_offsets.py @@ -7,11 +7,8 @@ import json from datetime import datetime from pathlib import Path -from phenology_timesat import ( - build_yraw_three_years, - phenocam_phenology_path, - run_timesat_phenology_from_yraw, -) +from fusion_phenology import timesat_transitions_from_by_date +from phenology_timesat import phenocam_phenology_path from gap_validation.batch_spatial import ( PRIMARY_SEASON, @@ -35,9 +32,7 @@ def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None: def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]: - y1, y2, y3 = season - 1, season, season + 1 - yraw, _mode = build_yraw_three_years(by_date, y1, y2, y3) - out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3)) + out = timesat_transitions_from_by_date(by_date, season) return { "green_up": out.get("green_up_50pct_date"), "green_down": out.get("green_down_50pct_date"), diff --git a/gap_validation/spatial_metrics.py b/gap_validation/spatial_metrics.py index 7663b3f..b405166 100644 --- a/gap_validation/spatial_metrics.py +++ b/gap_validation/spatial_metrics.py @@ -11,6 +11,8 @@ from scipy.stats import pearsonr # Match postprocessing valid mask on reflectance (METH / postprocessing.py). VALID_REFL_THRESHOLD = 0.001 +GCC_DENOM_EPS = 1e-3 +MAX_REPORTED_NSE_S2 = 20.0 def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray: @@ -18,15 +20,27 @@ def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.nd out = np.full_like(blue, np.nan, dtype=np.float64) m = ( np.isfinite(t) - & (t > 0) + & (t >= GCC_DENOM_EPS) & np.isfinite(blue) & np.isfinite(green) & np.isfinite(red) + & (blue > GCC_DENOM_EPS) + & (green > GCC_DENOM_EPS) + & (red > GCC_DENOM_EPS) ) out[m] = green[m].astype(np.float64) / t[m] return out.astype(np.float32) +def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None: + """Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable.""" + with rasterio.open(fusion_path) as src: + if src.count < 3: + return None + stacks = src.read(indexes=[1, 2, 3]).astype(np.float32) + return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0) + + def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]: """Fused GCC: BtI from 4-band REFL or ItB single-band GCC.""" with rasterio.open(fusion_path) as src: @@ -73,8 +87,10 @@ def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray: d = src.read(1).astype(np.float32) return np.isfinite(d) & (d > VALID_REFL_THRESHOLD) stacks = src.read().astype(np.float32) - ok = np.isfinite(stacks).all(axis=0) & ( - np.nanmax(stacks, axis=0) > VALID_REFL_THRESHOLD + with np.errstate(all="ignore"): + mx = np.nanmax(stacks, axis=0) + ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & ( + mx > VALID_REFL_THRESHOLD ) return ok @@ -95,7 +111,11 @@ def spatial_scores( mae = float(np.mean(np.abs(yt - yp))) bias = float(np.mean(yp - yt)) den = float(np.sum((yt - mean_t) ** 2)) - nse_s2 = float(1.0 - np.sum((yt - yp) ** 2) / den) if den > 0 else None + nse_s2 = None + if den > 0: + raw = float(1.0 - np.sum((yt - yp) ** 2) / den) + if abs(raw) <= MAX_REPORTED_NSE_S2: + nse_s2 = raw r = None if np.std(yt) > 0 and np.std(yp) > 0: r = float(pearsonr(yt, yp)[0]) @@ -122,6 +142,28 @@ def withheld_gcc_on_fusion_grid( return yt, yp, prof +def mask_gap_whittaker( + yt: np.ndarray, + y_gap: np.ndarray, + fused_gap_path: Path, + mode: str, +) -> np.ndarray: + """Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion).""" + m = ( + valid_mask_fused(fused_gap_path, mode) + & np.isfinite(yt) + & np.isfinite(y_gap) + & (yt > VALID_REFL_THRESHOLD) + & (yt <= 1.0) + & (y_gap > VALID_REFL_THRESHOLD) + & (y_gap <= 1.0) + ) + pos = _positive_bgr_mask(fused_gap_path) + if pos is not None: + m &= pos + return m + + def common_valid_mask( yt: np.ndarray, y_gap: np.ndarray, @@ -129,16 +171,14 @@ def common_valid_mask( fused_gap_path: Path, mode: str, ) -> np.ndarray: - """Shared finite mask: truth GCC, gap/nogap preds, and fusion valid-data rules.""" - m = ( - valid_mask_fused(fused_gap_path, mode) - & np.isfinite(yt) - & np.isfinite(y_gap) - & (yt > VALID_REFL_THRESHOLD) - & (y_gap > VALID_REFL_THRESHOLD) - ) + """Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA).""" + m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode) if y_nogap is not None: - m &= np.isfinite(y_nogap) & (y_nogap > VALID_REFL_THRESHOLD) + m &= ( + np.isfinite(y_nogap) + & (y_nogap > VALID_REFL_THRESHOLD) + & (y_nogap <= 1.0) + ) return m @@ -150,18 +190,20 @@ def evaluate_gap_vs_withheld( *, whittaker_context: tuple[Path, str, str, str, str, str] | None = None, ) -> dict: - """Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask. + """Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2. - ``delta_rmse`` = RMSE_gap − RMSE_no_gap; ``delta_nse`` = NSE_no_gap − NSE_gap (higher gap loss → positive delta_nse). + ``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only; + ``delta_nse`` = NSE_no_gap − NSE_gap, not exported to thesis tables). """ yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path) y_nogap = None if fused_nogap_path is not None and fused_nogap_path.is_file(): y_nogap, _ = read_fused_gcc(fused_nogap_path) - mask = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode) - out: dict = {"gap": spatial_scores(yt, y_gap, mask)} + mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode) + out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)} if y_nogap is not None: - out["no_gap"] = spatial_scores(yt, y_nogap, mask) + mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode) + out["no_gap"] = spatial_scores(yt, y_nogap, mask_full) g, ng = out["gap"], out["no_gap"] if g.get("rmse") is not None and ng.get("rmse") is not None: out["delta_rmse"] = float(g["rmse"] - ng["rmse"]) @@ -180,7 +222,7 @@ def evaluate_gap_vs_withheld( window_end_iso=w1, ) if wgcc is not None: - out["whittaker"] = constant_field_scores(yt, float(wgcc), mask) + out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw) return out diff --git a/preparation.py b/preparation.py index a823fc8..32d152c 100644 --- a/preparation.py +++ b/preparation.py @@ -11,7 +11,7 @@ from rasterio.vrt import WarpedVRT from rasterio import shutil as rio_shutil RESOLUTION_RATIO = 21 -# Centred temporal MA on S3 LR stack (METHODOLOGY §5.4.3); odd ≥3, or 1 to disable. +# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable. S3_MOVING_AVERAGE_WINDOW_DAYS = 5 @@ -79,7 +79,9 @@ def _get_itb_base_dir(season, site_name, cleaning_strategy): def _compute_gcc_from_refl_array(blue, green, red): - total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32) + total = ( + blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32) + ) mask = (total > 0) & np.isfinite(total) gcc = np.zeros_like(green, dtype=np.float32) gcc[mask] = green[mask].astype(np.float32) / total[mask] @@ -90,8 +92,8 @@ def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir): dst_s2_dir.mkdir(parents=True, exist_ok=True) for src in src_s2_dir.glob("*DIST_CLOUD.tif"): dst = dst_s2_dir / src.name - if dst.exists(): - continue + if dst.is_symlink() or dst.exists(): + dst.unlink(missing_ok=True) try: dst.symlink_to(src.resolve()) except OSError: