diff --git a/7-gcc-suitability.py b/7-gcc-suitability.py new file mode 100644 index 0000000..32cc4e5 --- /dev/null +++ b/7-gcc-suitability.py @@ -0,0 +1,742 @@ +"""Step 7: PhenoCam GCC suitability as a fusion-accuracy reference. + +Inputs (``data/``, ``{year}`` = ``--evaluation-year``): + +- ``metrics/{year}/{site}/gcc_s2.json``, ``gcc_phenocam.json`` — Step 5 timeseries +- ``metrics/manifest.json`` — site lat/lon +- ``sentinel_data/{year}/{site}/prepared/s2/`` — S2 REFL/GCC/DIST_CLOUD (Steps 3–4) +- ``sentinel_data/{year}/{site}/prepared/gcc_s3/``, ``prepared/s3_rgb/`` — Step 4 + +Outputs (``data/gcc_suitability/``): + +- ``{year}.json`` — representativeness (Line A), LOOCV concordance (Line B), + per-site and aggregate suitability verdict + +CLI: + +- ``--evaluation-year`` (default 2025) +- ``--min-cloudfree-s2`` (default 10) — minimum cloud-free S2 dates for LOOCV +- ``--alpha`` (default 0.05) — reserved for future significance tests + +Full-sample aggregate; does not accept ``--site``. +""" + +from __future__ import annotations + +import argparse +import json +import re +import shutil +import tempfile +from datetime import datetime +from pathlib import Path +from typing import Any + +import numpy as np +import rasterio +from pyproj import Transformer +from rasterio.crs import CRS +from rasterio.transform import rowcol +from scipy.stats import linregress, pearsonr, spearmanr +from tqdm import tqdm + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +DATA_DIR = Path("data") +DEFAULT_YEAR = 2025 +DEFAULT_ALPHA = 0.05 +MIN_CLOUDFREE_S2 = 10 +REPR_R_THRESHOLD = 0.7 +MATCH_TOLERANCE_DAYS = 5 + +RESOLUTION_RATIO = 30 +MAX_DAYS = 100 +MINIMUM_ACQUISITION_IMPORTANCE = 0 + +SMALL_SAMPLE_SITES = 6 + + +# --------------------------------------------------------------------------- +# efast import +# --------------------------------------------------------------------------- + + +def _import_efast(): + try: + import efast.efast as efast_module + + return efast_module + except ImportError as exc: + raise ImportError("efast not found. Install with: uv sync") from exc + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _r4(v: float | None) -> float | None: + return round(v, 4) if v is not None else None + + +def _window_mean(data: np.ndarray) -> float | None: + valid = data[~np.isnan(data)] + if valid.size == 0: + return None + return float(np.mean(valid)) + + +def _read_center_pixel(path: Path, lat: float, lon: float) -> float | None: + try: + with rasterio.open(path) as src: + transformer = Transformer.from_crs( + CRS.from_epsg(4326), src.crs, always_xy=True + ) + x, y = transformer.transform(lon, lat) + row, col = rowcol(src.transform, x, y) + h, w = src.height, src.width + r0, r1 = max(0, row - 1), min(h, row + 2) + c0, c1 = max(0, col - 1), min(w, col + 2) + window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0) + data = src.read(1, window=window).astype(float) + nodata = src.nodata + if nodata is not None: + data = np.where(data == nodata, np.nan, data) + data[data == 0] = np.nan + return _window_mean(data) + except Exception: + return None + + +def _date_from_s2_tif(path: Path) -> str | None: + parts = path.stem.split("_") + if len(parts) >= 3: + m = re.match(r"(\d{8})", parts[2]) + return m.group(1) if m else None + return None + + +def _iso_to_yyyymmdd(iso: str) -> str: + return iso.replace("-", "") + + +def _yyyymmdd_to_iso(d: str) -> str: + return f"{d[:4]}-{d[4:6]}-{d[6:]}" + + +def _day_gap(a: str, b: str) -> float: + return abs((np.datetime64(a) - np.datetime64(b)) / np.timedelta64(1, "D")) + + +def _match_series( + ref: list[dict], + ref_key: str, + pred: list[dict], + pred_key: str, + tolerance_days: int = MATCH_TOLERANCE_DAYS, +) -> tuple[list[float], list[float], list[str]]: + """Return paired (ref_vals, pred_vals, ref_dates) within tolerance.""" + ref_lookup: dict[str, float] = { + p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None + } + if not ref_lookup: + return [], [], [] + + ref_dates = sorted(ref_lookup) + obs, sim, matched_dates = [], [], [] + for pt in pred: + v = pt.get(pred_key) + if v is None: + continue + nearest = min(ref_dates, key=lambda d: _day_gap(pt["date"], d)) + if _day_gap(pt["date"], nearest) <= tolerance_days: + obs.append(ref_lookup[nearest]) + sim.append(v) + matched_dates.append(nearest) + return obs, sim, matched_dates + + +def _accuracy_metrics(obs: list[float], sim: list[float]) -> dict[str, Any] | None: + if len(obs) < 2: + return None + obs_arr = np.array(obs, dtype=float) + sim_arr = np.array(sim, dtype=float) + diff = sim_arr - obs_arr + rmse = float(np.sqrt(np.mean(diff**2))) + mae = float(np.mean(np.abs(diff))) + bias = float(np.mean(diff)) + r, _ = pearsonr(obs_arr, sim_arr) + return { + "n": len(obs), + "rmse": _r4(rmse), + "mae": _r4(mae), + "bias": _r4(bias), + "r": _r4(float(r)), + } + + +def _gcc_from_refl_file(refl_path: Path, gcc_path: Path) -> None: + with rasterio.open(refl_path) as src: + b, g, r = src.read(1), src.read(2), src.read(3) + profile = src.profile + total = b + g + r + invalid = (b < 0) | (g < 0) | (r < 0) + gcc = np.where(invalid, np.nan, g / (total + 1e-10)) + gcc[total == 0] = np.nan + profile.update(count=1, dtype="float32") + with rasterio.open(gcc_path, "w", **profile) as dst: + dst.write(gcc[np.newaxis].astype("float32")) + + +def _load_json_series(path: Path) -> list[dict]: + if not path.is_file(): + return [] + return json.loads(path.read_text()) + + +def _load_site_coords(year: int) -> dict[str, tuple[float, float]]: + manifest_path = DATA_DIR / "metrics" / "manifest.json" + if not manifest_path.is_file(): + return {} + manifest = json.loads(manifest_path.read_text()) + sites = manifest.get("sites", {}).get(str(year), {}) + coords: dict[str, tuple[float, float]] = {} + for site, meta in sites.items(): + lat, lon = meta.get("lat"), meta.get("lon") + if lat is not None and lon is not None: + coords[site] = (float(lat), float(lon)) + return coords + + +def _discover_sites(year: int) -> list[str]: + metrics_dir = DATA_DIR / "metrics" / str(year) + if not metrics_dir.is_dir(): + return [] + return sorted( + d.name + for d in metrics_dir.iterdir() + if d.is_dir() and (d / "gcc_s2.json").is_file() + ) + + +def _build_hr_symlink_dir(s2_dir: Path, holdout_yyyymmdd: str, dest: Path) -> None: + """Symlink all S2 hr inputs except the held-out acquisition date.""" + dest.mkdir(parents=True, exist_ok=True) + for pattern in ("*_REFL.tif", "*_GCC.tif", "*_DIST_CLOUD.tif"): + for src in sorted(s2_dir.glob(pattern)): + date_token = ( + src.stem.split("_")[2][:8] if len(src.stem.split("_")) >= 3 else "" + ) + if date_token == holdout_yyyymmdd: + continue + link = dest / src.name + if link.exists() or link.is_symlink(): + link.unlink() + link.symlink_to(src.resolve()) + + +# --------------------------------------------------------------------------- +# Line A — representativeness +# --------------------------------------------------------------------------- + + +def compute_representativeness(phenocam: list[dict], s2: list[dict]) -> dict[str, Any]: + """PhenoCam gcc_90 vs co-located observed S2 GCC.""" + obs, sim, _ = _match_series(phenocam, "gcc_90", s2, "gcc") + result: dict[str, Any] = { + "n": len(obs), + "r": None, + "spearman": None, + "slope": None, + "intercept": None, + "rmse": None, + "bias": None, + "peak_offset_days": None, + "representative": False, + } + if len(obs) < 2: + return result + + obs_arr = np.array(obs, dtype=float) + sim_arr = np.array(sim, dtype=float) + r, _ = pearsonr(obs_arr, sim_arr) + sp, _ = spearmanr(obs_arr, sim_arr) + reg = linregress(sim_arr, obs_arr) + diff = sim_arr - obs_arr + result.update( + { + "r": _r4(float(r)), + "spearman": _r4(float(sp)), + "slope": _r4(float(reg.slope)), + "intercept": _r4(float(reg.intercept)), + "rmse": _r4(float(np.sqrt(np.mean(diff**2)))), + "bias": _r4(float(np.mean(diff))), + "representative": float(r) >= REPR_R_THRESHOLD, + } + ) + + pc_dates = [p["date"] for p in phenocam if p.get("gcc_90") is not None] + s2_dates = [p["date"] for p in s2 if p.get("gcc") is not None] + if pc_dates and s2_dates: + pc_peak = max( + phenocam, + key=lambda p: p["gcc_90"] if p.get("gcc_90") is not None else -1, + )["date"] + s2_peak = max(s2, key=lambda p: p["gcc"] if p.get("gcc") is not None else -1)[ + "date" + ] + result["peak_offset_days"] = int(_day_gap(pc_peak, s2_peak)) + + return result + + +# --------------------------------------------------------------------------- +# Line B — LOOCV +# --------------------------------------------------------------------------- + + +def _phenocam_lookup(phenocam: list[dict]) -> dict[str, float]: + return {p["date"]: p["gcc_90"] for p in phenocam if p.get("gcc_90") is not None} + + +def _nearest_phenocam(iso_date: str, lookup: dict[str, float]) -> float | None: + if not lookup: + return None + dates = sorted(lookup) + nearest = min(dates, key=lambda d: _day_gap(iso_date, d)) + if _day_gap(iso_date, nearest) <= MATCH_TOLERANCE_DAYS: + return lookup[nearest] + return None + + +def run_loocv_site( + site: str, + year: int, + lat: float, + lon: float, + s2_series: list[dict], + phenocam: list[dict], + efast, +) -> list[dict[str, Any]]: + """Leave-one-out EFAST for each cloud-free S2 date; return per-date records.""" + s2_dir = DATA_DIR / "sentinel_data" / str(year) / site / "prepared" / "s2" + gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / site / "prepared" / "gcc_s3" + s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / site / "prepared" / "s3_rgb" + + pc_lookup = _phenocam_lookup(phenocam) + s2_truth = {p["date"]: p["gcc"] for p in s2_series} + fusion_kwargs = dict( + ratio=RESOLUTION_RATIO, + max_days=MAX_DAYS, + minimum_acquisition_importance=MINIMUM_ACQUISITION_IMPORTANCE, + ) + + records: list[dict[str, Any]] = [] + dates = [p["date"] for p in s2_series] + + with tempfile.TemporaryDirectory(prefix=f"loocv_{site}_") as tmp_root: + tmp = Path(tmp_root) + hr_dir = tmp / "hr" + itb_out = tmp / "itb" + bti_out = tmp / "bti" + bti_gcc = tmp / "bti_gcc" + itb_out.mkdir() + bti_out.mkdir() + bti_gcc.mkdir() + + for iso_date in tqdm(dates, desc=f"{site} LOOCV", leave=False): + yyyymmdd = _iso_to_yyyymmdd(iso_date) + truth = s2_truth.get(iso_date) + if truth is None: + continue + + if hr_dir.exists(): + shutil.rmtree(hr_dir) + _build_hr_symlink_dir(s2_dir, yyyymmdd, hr_dir) + + pred_date = datetime.strptime(yyyymmdd, "%Y%m%d") + + for f in itb_out.glob("*.tif"): + f.unlink() + for f in bti_out.glob("*.tif"): + f.unlink() + for f in bti_gcc.glob("*.tif"): + f.unlink() + + efast.fusion( + pred_date, + gcc_s3_dir, + hr_dir, + itb_out, + product="GCC", + **fusion_kwargs, + ) + efast.fusion( + pred_date, + s3_rgb_dir, + hr_dir, + bti_out, + product="REFL", + **fusion_kwargs, + ) + + itb_path = itb_out / f"GCC_{yyyymmdd}.tif" + refl_path = bti_out / f"REFL_{yyyymmdd}.tif" + bti_path = bti_gcc / f"GCC_{yyyymmdd}.tif" + + pred_itb = ( + _read_center_pixel(itb_path, lat, lon) if itb_path.is_file() else None + ) + pred_bti = None + if refl_path.is_file(): + _gcc_from_refl_file(refl_path, bti_path) + if bti_path.is_file(): + pred_bti = _read_center_pixel(bti_path, lat, lon) + + pc_val = _nearest_phenocam(iso_date, pc_lookup) + + records.append( + { + "date": iso_date, + "s2_truth": truth, + "pred_bti": pred_bti, + "pred_itb": pred_itb, + "phenocam": pc_val, + } + ) + + return records + + +def _method_accuracy(records: list[dict], pred_key: str, ref_key: str) -> dict | None: + obs, sim = [], [] + for rec in records: + pred = rec.get(pred_key) + ref = rec.get(ref_key) + if pred is None or ref is None: + continue + obs.append(ref) + sim.append(pred) + return _accuracy_metrics(obs, sim) + + +def _winner(rmse_bti: float | None, rmse_itb: float | None) -> str | None: + if rmse_bti is None or rmse_itb is None: + return None + if rmse_bti < rmse_itb: + return "bti" + if rmse_itb < rmse_bti: + return "itb" + return "tie" + + +def summarize_loocv(records: list[dict]) -> dict[str, Any]: + bti_vs_s2 = _method_accuracy(records, "pred_bti", "s2_truth") + itb_vs_s2 = _method_accuracy(records, "pred_itb", "s2_truth") + bti_vs_pc = _method_accuracy(records, "pred_bti", "phenocam") + itb_vs_pc = _method_accuracy(records, "pred_itb", "phenocam") + + winner_s2 = _winner( + bti_vs_s2["rmse"] if bti_vs_s2 else None, + itb_vs_s2["rmse"] if itb_vs_s2 else None, + ) + winner_pc = _winner( + bti_vs_pc["rmse"] if bti_vs_pc else None, + itb_vs_pc["rmse"] if itb_vs_pc else None, + ) + agreement = ( + winner_s2 == winner_pc + if winner_s2 and winner_pc and winner_s2 != "tie" and winner_pc != "tie" + else None + ) + + return { + "n_dates": len(records), + "bti": {"vs_s2": bti_vs_s2, "vs_phenocam": bti_vs_pc}, + "itb": {"vs_s2": itb_vs_s2, "vs_phenocam": itb_vs_pc}, + "winner_s2": winner_s2, + "winner_phenocam": winner_pc, + "winner_agreement": agreement, + } + + +# --------------------------------------------------------------------------- +# Aggregate concordance +# --------------------------------------------------------------------------- + + +def _pooled_concordance( + all_records: list[dict[str, Any]], +) -> dict[str, Any]: + """Pooled metrics across all held-out dates.""" + residual_pairs: list[tuple[float, float]] = [] + vec_s2: list[float] = [] + vec_pc: list[float] = [] + + for site_data in all_records: + for rec in site_data.get("records", []): + truth = rec.get("s2_truth") + pc = rec.get("phenocam") + for key in ("pred_bti", "pred_itb"): + pred = rec.get(key) + if pred is None or truth is None: + continue + err_s2 = abs(pred - truth) + if pc is not None: + err_pc = abs(pred - pc) + vec_s2.append(err_s2) + vec_pc.append(err_pc) + residual_pairs.append((err_s2, err_pc)) + + pooled_spearman = None + if len(vec_s2) >= 3: + sp, _ = spearmanr(vec_s2, vec_pc) + if not np.isnan(sp): + pooled_spearman = _r4(float(sp)) + + residual_corr = None + if len(residual_pairs) >= 3: + xs = np.array([p[0] for p in residual_pairs]) + ys = np.array([p[1] for p in residual_pairs]) + rc, _ = pearsonr(xs, ys) + residual_corr = _r4(float(rc)) + + agreements = [ + s.get("winner_agreement") + for s in all_records + if s.get("eligible") and s.get("winner_agreement") is not None + ] + winner_agreement_rate = ( + _r4(sum(1 for a in agreements if a) / len(agreements)) if agreements else None + ) + + n_loocv_dates = sum(len(s.get("records", [])) for s in all_records) + + return { + "pooled_spearman": pooled_spearman, + "residual_corr": residual_corr, + "winner_agreement_rate": winner_agreement_rate, + "n_loocv_dates": n_loocv_dates, + } + + +def _suitability_verdict( + n_repr_pass: int, + n_eligible: int, + n_total: int, + pooled: dict[str, Any], +) -> str: + if n_eligible == 0: + return "insufficient data" + repr_rate = n_repr_pass / n_total if n_total else 0 + agree = pooled.get("winner_agreement_rate") + sp = pooled.get("pooled_spearman") + rc = pooled.get("residual_corr") + + strong = 0 + if repr_rate >= 0.6: + strong += 1 + if agree is not None and agree >= 0.7: + strong += 1 + if sp is not None and sp >= 0.8: + strong += 1 + if rc is not None and rc >= 0.5: + strong += 1 + + if strong >= 3: + return "suitable" + if strong >= 1 or repr_rate >= 0.4: + return "partially suitable" + return "not suitable" + + +# --------------------------------------------------------------------------- +# Per-site processing +# --------------------------------------------------------------------------- + + +def process_site( + site: str, + year: int, + lat: float, + lon: float, + min_cloudfree: int, + efast, +) -> dict[str, Any]: + metrics_dir = DATA_DIR / "metrics" / str(year) / site + phenocam = _load_json_series(metrics_dir / "gcc_phenocam.json") + s2_series = _load_json_series(metrics_dir / "gcc_s2.json") + + repr_metrics = compute_representativeness(phenocam, s2_series) + n_cloudfree = len(s2_series) + eligible = n_cloudfree >= min_cloudfree + + result: dict[str, Any] = { + "eligible": eligible, + "n_cloudfree_s2": n_cloudfree, + "representativeness": repr_metrics, + "loocv": None, + "winner_s2": None, + "winner_phenocam": None, + "winner_agreement": None, + "records": [], + } + + if not eligible: + return result + + records = run_loocv_site(site, year, lat, lon, s2_series, phenocam, efast) + loocv = summarize_loocv(records) + result["loocv"] = loocv + result["winner_s2"] = loocv["winner_s2"] + result["winner_phenocam"] = loocv["winner_phenocam"] + result["winner_agreement"] = loocv["winner_agreement"] + result["records"] = records + return result + + +# --------------------------------------------------------------------------- +# Output / summary +# --------------------------------------------------------------------------- + + +def _compact_site_payload(site_result: dict[str, Any]) -> dict[str, Any]: + """Drop raw LOOCV records from JSON output (keep summaries only).""" + out = { + "eligible": site_result["eligible"], + "n_cloudfree_s2": site_result["n_cloudfree_s2"], + "representativeness": site_result["representativeness"], + "winner_s2": site_result.get("winner_s2"), + "winner_phenocam": site_result.get("winner_phenocam"), + "winner_agreement": site_result.get("winner_agreement"), + } + if site_result.get("loocv"): + out["loocv"] = site_result["loocv"] + return out + + +def _print_summary(payload: dict[str, Any]) -> None: + year = payload["year"] + agg = payload["aggregate"] + print( + f"\nPhenoCam GCC suitability — {year} " + f"({payload['n_sites_total']} site(s), " + f"{payload['n_sites_eligible']} LOOCV-eligible, " + f"{payload['n_sites_repr_pass']} representative)" + ) + print(f"Verdict: {agg['suitability_verdict']}") + print( + f" pooled Spearman (method errors): {agg.get('pooled_spearman', '—')} " + f"residual corr: {agg.get('residual_corr', '—')} " + f"winner agreement: {agg.get('winner_agreement_rate', '—')} " + f"LOOCV dates: {agg.get('n_loocv_dates', '—')}" + ) + print(f"\n{'site':<28} {'repr r':>8} {'pass':>5} {'LOOCV n':>8} {'win agree':>10}") + print("-" * 65) + for site, data in sorted(payload["sites"].items()): + rep = data["representativeness"] + loocv_n = data.get("loocv", {}).get("n_dates") if data.get("loocv") else "—" + agree = data.get("winner_agreement") + agree_s = "yes" if agree else ("no" if agree is False else "—") + pass_s = "yes" if rep.get("representative") else "no" + print( + f"{site:<28} {rep.get('r') or '—':>8} {pass_s:>5} " + f"{loocv_n!s:>8} {agree_s:>10}" + ) + if payload["n_sites_total"] < SMALL_SAMPLE_SITES: + print( + f"\nNote: only {payload['n_sites_total']} site(s); " + "interpret cross-site aggregates cautiously." + ) + if payload.get("dropped_sites"): + print(f"Dropped/ineligible: {', '.join(payload['dropped_sites'])}") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR) + parser.add_argument( + "--min-cloudfree-s2", + type=int, + default=MIN_CLOUDFREE_S2, + help="Minimum cloud-free S2 dates for LOOCV (default 10)", + ) + parser.add_argument( + "--alpha", + type=float, + default=DEFAULT_ALPHA, + help="Significance threshold (reserved; default 0.05)", + ) + args = parser.parse_args() + + year = args.evaluation_year + min_cloudfree = args.min_cloudfree_s2 + + sites = _discover_sites(year) + if not sites: + raise SystemExit( + f"No Step 5 metrics found under {DATA_DIR / 'metrics' / str(year)}" + ) + + coords = _load_site_coords(year) + efast = _import_efast() + + site_results: dict[str, dict[str, Any]] = {} + dropped: list[str] = [] + + for site in tqdm(sites, desc="Sites"): + if site not in coords: + dropped.append(site) + continue + lat, lon = coords[site] + site_results[site] = process_site(site, year, lat, lon, min_cloudfree, efast) + if not site_results[site]["eligible"]: + dropped.append(site) + + n_eligible = sum(1 for s in site_results.values() if s["eligible"]) + n_repr_pass = sum( + 1 + for s in site_results.values() + if s["representativeness"].get("representative") + ) + + pooled = _pooled_concordance(list(site_results.values())) + verdict = _suitability_verdict(n_repr_pass, n_eligible, len(sites), pooled) + + payload = { + "year": year, + "alpha": args.alpha, + "repr_r_threshold": REPR_R_THRESHOLD, + "min_cloudfree_s2": min_cloudfree, + "n_sites_total": len(sites), + "n_sites_eligible": n_eligible, + "n_sites_repr_pass": n_repr_pass, + "aggregate": { + "suitability_verdict": verdict, + **pooled, + }, + "sites": { + site: _compact_site_payload(data) + for site, data in sorted(site_results.items()) + }, + "dropped_sites": sorted(set(dropped)), + } + + out_dir = DATA_DIR / "gcc_suitability" + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / f"{year}.json" + out_path.write_text(json.dumps(payload, separators=(",", ":"))) + + _print_summary(payload) + print(f"\nWritten → {out_path}") + + +if __name__ == "__main__": + main() diff --git a/README.md b/README.md index 3724bd8..baf3ba7 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ End-to-end pipeline from selecting sites from the global [PhenoCam Network](http | 4 | `4-fusion.py` | Run EFAST BtI (fuse reflectance → GCC) and ItB (fuse GCC directly) for each screened site | | 5 | `5-metrics.py` | Extract PhenoCam-matched timeseries, compute NSE/RMSE/r baselines and fusion metrics, emit per-site JSON and webapp manifest | | 6 | `6-statistics-fusion-order.py` | Paired ItB-vs-BtI significance test (Wilcoxon + t-test) across all sites | +| 7 | `7-gcc-suitability.py` | PhenoCam GCC suitability as a fusion-accuracy reference (representativeness + LOOCV concordance) | --- @@ -43,9 +44,10 @@ uv run python 3-sentinel-data.py --evaluation-year 2025 uv run python 4-fusion.py --evaluation-year 2025 uv run python 5-metrics.py --evaluation-year 2025 uv run python 6-statistics-fusion-order.py --evaluation-year 2025 +uv run python 7-gcc-suitability.py --evaluation-year 2025 ``` -Steps 1–5 accept `--evaluation-year` (default `2025`) and `--site` (optional, for single-site runs). Step 6 is a full-sample aggregate and only accepts `--evaluation-year` and `--alpha` (default `0.05`). Steps 3–5 are resumable — existing output files are skipped. +Steps 1–5 accept `--evaluation-year` (default `2025`) and `--site` (optional, for single-site runs). Steps 6–7 are full-sample aggregates and only accept `--evaluation-year` (Step 6 and 7 also accept `--alpha`; Step 7 adds `--min-cloudfree-s2`, default `10`). Steps 3–5 are resumable — existing output files are skipped. ```bash # single site @@ -73,6 +75,7 @@ Step 3 S3 download uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`). Set `CDSE_USER` an | `metrics/{year}/{site}/` | 5 | Per-site timeseries, metrics, covariates JSON | | `metrics/manifest.json` | 5 | Webapp manifest (years + site metadata) | | `statistics_fusion_order/{year}.json` | 6 | Paired ItB-vs-BtI test summary (NSE, RMSE, nRMSE, r) | +| `gcc_suitability/{year}.json` | 7 | PhenoCam GCC suitability summary (representativeness + LOOCV concordance) | --- @@ -80,7 +83,7 @@ Step 3 S3 download uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`). Set `CDSE_USER` an -`python3 -m http.server 8080` runs the webapp on [http://localhost:8000/index.html](http://localhost:8000/index.html). Requires step 5 output (`data/metrics/manifest.json`). +`python3 -m http.server 8080` runs the webapp on [http://localhost:8000/index.html](http://localhost:8000/index.html). Requires step 5 output (`data/metrics/manifest.json`). The Statistics overlay GCC suitability tab uses step 7 output (`data/gcc_suitability/{year}.json`). --- diff --git a/index.html b/index.html index 47d6843..2210d3d 100644 --- a/index.html +++ b/index.html @@ -181,6 +181,13 @@ body { margin: 0; font: 13px/1.4 system-ui, sans-serif; background: #f5f5f5; col .stat-badge.bti { background: #e3f2fd; color: #0d47a1; } .stat-badge.none { background: #f5f5f5; color: #777; font-weight: 400; } .stat-badge.insuf { background: #fce4ec; color: #b71c1c; font-weight: 400; } +.stat-badge.pass { background: #e8f5e9; color: #1b5e20; } +.stat-badge.partial { background: #fff3e0; color: #e65100; font-weight: 400; } +.stat-badge.fail { background: #fce4ec; color: #b71c1c; font-weight: 400; } +.stat-site-table { width: 100%; border-collapse: collapse; font-size: 12px; margin-top: 8px; } +.stat-site-table th, .stat-site-table td { padding: 4px 8px; text-align: left; border-bottom: 1px solid #f0f0f0; } +.stat-site-table th { color: #888; font-weight: 500; } +.stat-site-table .sval { font-variant-numeric: tabular-nums; } .stat-row-table { width: 100%; border-collapse: collapse; font-size: 12px; } .stat-row-table td { padding: 3px 0; vertical-align: top; } .stat-row-table .slabel { color: #888; width: 46%; } @@ -243,12 +250,7 @@ body { margin: 0; font: 13px/1.4 system-ui, sans-serif; background: #f5f5f5; col -
+