Switching horses.

2026-06-10 14:18:06 +02:00 · 2026-06-10 14:18:06 +02:00 · e3e14027fc
commit e3e14027fc
parent 25cbd97662
51 changed files with 5078 additions and 11678 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,9 @@
-# Project data
+# Generated caches and downloads (regenerate via pipeline steps)
-data/*
+data/
 webapp/data
-# Environment
+# Environment and secrets
 .env
-.venv
+.venv/
 venv/
 env/
@ -42,6 +41,3 @@ dist/
 # OS
 .DS_Store
 Thumbs.db
 AGENTS.md
 .vibe
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,8 +0,0 @@
 repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.8.4
    hooks:
      - id: ruff
        args: [--fix]
      - id: ruff-format
--- a/1-phenocam.py
+++ b/1-phenocam.py
@ -0,0 +1,278 @@
 """Step 1: download worldwide PhenoCam sites for a calendar year.
 Inputs (``data/``): none — queries the PhenoCam API.
 Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
 - ``phenocam/{year}.json`` — site list manifest
 - ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata
 - ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV
 CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
 Next step: :mod:`2-phenocam-screening`.
 """
 from __future__ import annotations
 import argparse
 import json
 import sys
 from datetime import date
 from pathlib import Path
 from typing import Any
 import requests
 PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
 if str(PROCESSING_DIR) not in sys.path:
    sys.path.insert(0, str(PROCESSING_DIR))
 from acquisition_phenocam import PHENOCAM_API  # noqa: E402
 from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date  # noqa: E402
 EVALUATION_YEAR = 2025
 HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
 ONE_DAY_CSV_SUFFIX = "_1day.csv"
 def check_phenocam_host() -> None:
    try:
        response = requests.get(HOST_PROBE, timeout=30)
        response.raise_for_status()
    except requests.RequestException as exc:
        raise RuntimeError(
            f"PhenoCam API unreachable (phenocam.nau.edu): "
            f"{exc.__class__.__name__}: {exc}"
        ) from exc
 def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
    start = _parse_iso_date(first)
    end = _parse_iso_date(last)
    if start is None or end is None:
        return False
    return start <= date(season, 12, 31) and end >= date(season, 1, 1)
 def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
    return cache_dir / "phenocam" / str(evaluation_year)
 def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
    return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
 def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
    return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
 def load_candidate_cameras(
    evaluation_year: int,
    *,
    site_filter: set[str] | None = None,
    active_only: bool = False,
    limit: int | None = None,
 ) -> list[dict[str, Any]]:
    cameras: list[dict[str, Any]] = []
    for camera in _paginate_cameras():
        if active_only and not camera.get("active"):
            continue
        sitename = str(camera["Sitename"])
        if site_filter is not None and sitename not in site_filter:
            continue
        if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year):
            continue
        cameras.append(dict(camera))
    cameras.sort(key=lambda item: str(item["Sitename"]))
    if limit is not None:
        cameras = cameras[:limit]
    return cameras
 def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
    rois: list[dict[str, Any]] = []
    url = f"{PHENOCAM_API}/roilists/"
    params: dict[str, Any] | None = {"site": site_name}
    while url:
        response = requests.get(url, params=params, timeout=60)
        response.raise_for_status()
        payload = response.json()
        rois.extend(
            item for item in payload.get("results", []) if item.get("site") == site_name
        )
        url = payload.get("next")
        params = None
        if rois:
            break
    return dict(rois[0]) if rois else None
 def download_one_day_csv(csv_url: str, output_path: Path) -> None:
    response = requests.get(csv_url, timeout=60)
    response.raise_for_status()
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(response.text, encoding="utf-8")
 def download_site(
    camera: dict[str, Any],
    evaluation_year: int,
    cache_dir: Path,
 ) -> str:
    sitename = str(camera["Sitename"])
    roi = fetch_roi_record(sitename)
    payload = {"response": {"camera": camera, "roi": roi}}
    json_path = site_json_path(cache_dir, evaluation_year, sitename)
    json_path.parent.mkdir(parents=True, exist_ok=True)
    json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    csv_url = roi.get("one_day_summary") if roi else None
    if csv_url:
        download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename))
    return sitename
 def load_or_download_site(
    camera: dict[str, Any],
    evaluation_year: int,
    cache_dir: Path,
    *,
    refresh: bool,
 ) -> str:
    sitename = str(camera["Sitename"])
    json_path = site_json_path(cache_dir, evaluation_year, sitename)
    csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
    if not refresh and json_path.is_file():
        if not csv_path.is_file():
            payload = json.loads(json_path.read_text(encoding="utf-8"))
            roi = payload.get("response", {}).get("roi") or {}
            csv_url = roi.get("one_day_summary")
            if csv_url:
                download_one_day_csv(csv_url, csv_path)
        return sitename
    return download_site(camera, evaluation_year, cache_dir)
 def run_download(
    *,
    cache_dir: Path,
    evaluation_year: int,
    active_only: bool = False,
    site_filter: set[str] | None = None,
    limit: int | None = None,
    refresh: bool = False,
 ) -> list[str]:
    check_phenocam_host()
    candidates = load_candidate_cameras(
        evaluation_year,
        site_filter=site_filter,
        active_only=active_only,
        limit=limit,
    )
    print(
        f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
        f"{evaluation_year}"
    )
    sitenames: list[str] = []
    for index, camera in enumerate(candidates, start=1):
        sitename = str(camera["Sitename"])
        print(
            f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
            f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
        )
        sitenames.append(
            load_or_download_site(
                camera,
                evaluation_year,
                cache_dir,
                refresh=refresh,
            )
        )
    return sorted(sitenames)
 def write_manifest(
    sitenames: list[str],
    output_path: Path,
    cache_dir: Path,
    evaluation_year: int,
 ) -> None:
    rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent)
    payload = {
        "evaluation_year": evaluation_year,
        "count": len(sitenames),
        "sites_dir": rel_sites_dir.as_posix(),
        "sites": sitenames,
    }
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    print(f"[PhenoCam-1] Wrote {output_path}")
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--cache-dir",
        type=Path,
        default=Path("data"),
        help="Base directory for per-site files and manifest",
    )
    parser.add_argument(
        "--evaluation-year",
        type=int,
        default=EVALUATION_YEAR,
        help=f"Calendar year to download (default: {EVALUATION_YEAR})",
    )
    parser.add_argument(
        "--active-only",
        action="store_true",
        help="Restrict candidates to cameras marked active in the API",
    )
    parser.add_argument(
        "--limit",
        type=int,
        default=None,
        help="Process only the first N candidate sites (testing)",
    )
    parser.add_argument(
        "--sites",
        type=str,
        default=None,
        help="Comma-separated sitenames to download (testing)",
    )
    parser.add_argument(
        "--refresh",
        action="store_true",
        help="Re-download sites even when cache files exist",
    )
    parser.add_argument(
        "--output-json",
        type=Path,
        default=None,
        help="Manifest output path (default: data/phenocam/{year}.json)",
    )
    args = parser.parse_args(argv)
    site_filter = None
    if args.sites:
        site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
    sitenames = run_download(
        cache_dir=args.cache_dir,
        evaluation_year=args.evaluation_year,
        active_only=args.active_only,
        site_filter=site_filter,
        limit=args.limit,
        refresh=args.refresh,
    )
    manifest_path = args.output_json or (
        args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
    )
    write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/2-phenocam-screening.py
+++ b/2-phenocam-screening.py
@ -0,0 +1,495 @@
 """Step 2: PhenoCam GCC + SNR screening on step-1 cache.
 Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
 - ``phenocam/{year}.json`` — step-1 manifest
 - ``phenocam/{year}/{sitename}.json`` — per-site metadata
 - ``phenocam/{year}/{sitename}_1day.csv`` — GCC timeseries
 Outputs (``data/phenocam_screening/``):
 - ``{year}.json`` — full per-site results
 - ``{year}.csv`` — flat summary table
 CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional; default: all manifest sites).
 Next step: :mod:`3-sentinel-clouds`.
 """
 from __future__ import annotations
 import argparse
 import csv
 import json
 import math
 import sys
 from datetime import date, datetime
 from pathlib import Path
 from typing import Any
 import numpy as np
 from scipy.interpolate import UnivariateSpline
 PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
 if str(PROCESSING_DIR) not in sys.path:
    sys.path.insert(0, str(PROCESSING_DIR))
 from acquisition_phenocam import _phenocam_summary_gcc_value  # noqa: E402
 MIN_GCC_POINTS = 30
 SNR_THRESHOLD = 2.0
 CLUSTER_RADIUS_M = 500.0
 GATE_ORDER = ("phenocam", "snr", "cluster")
 ONE_DAY_CSV_SUFFIX = "_1day.csv"
 _EARTH_RADIUS_M = 6371000.0
 def load_manifest(path: Path) -> dict[str, Any]:
    payload = json.loads(path.read_text(encoding="utf-8"))
    for key in ("evaluation_year", "sites_dir", "sites"):
        if key not in payload:
            raise ValueError(f"Expected '{key}' in manifest {path}")
    return payload
 def resolve_sites_dir(manifest_path: Path, manifest: dict[str, Any]) -> Path:
    return (manifest_path.parent / manifest["sites_dir"]).resolve()
 def load_site_entry(sites_dir: Path, sitename: str) -> dict[str, Any]:
    json_path = sites_dir / f"{sitename}.json"
    payload = json.loads(json_path.read_text(encoding="utf-8"))
    csv_path = sites_dir / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
    payload["_one_day_csv"] = csv_path if csv_path.is_file() else None
    return payload
 def parse_gcc90_series(csv_path: Path, evaluation_year: int) -> list[tuple[str, float]]:
    lines = [
        line
        for line in csv_path.read_text(encoding="utf-8").split("\n")
        if line and not line.startswith("#")
    ]
    reader = csv.DictReader(lines)
    fieldnames = reader.fieldnames or ()
    use_mean_fallback = "gcc_90" not in fieldnames
    year_start = date(evaluation_year, 1, 1)
    year_end = date(evaluation_year, 12, 31)
    series: list[tuple[str, float]] = []
    for row in reader:
        date_str = row.get("date")
        if not date_str:
            continue
        try:
            row_date = datetime.strptime(date_str, "%Y-%m-%d").date()
        except ValueError:
            continue
        if not (year_start <= row_date <= year_end):
            continue
        gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
        if gcc is None:
            continue
        series.append((row_date.isoformat(), float(gcc)))
    series.sort(key=lambda item: item[0])
    return series
 def _months_covered(day_strings: list[str]) -> int:
    months: set[int] = set()
    for day in day_strings:
        months.add(datetime.strptime(day, "%Y-%m-%d").month)
    return len(months)
 def _aic_for_spline(x: np.ndarray, y: np.ndarray, spline: UnivariateSpline) -> float:
    residuals = y - spline(x)
    rss = float(np.sum(residuals**2))
    n = len(y)
    if rss <= 0 or n < 4:
        return math.inf
    edf = float(spline.get_knots().shape[0] + spline.get_coeffs().shape[0])
    return n * math.log(rss / n) + 2.0 * edf
 def compute_snr_aic_spline(series: list[tuple[str, float]]) -> float | None:
    if len(series) < MIN_GCC_POINTS:
        return None
    dates = [datetime.strptime(day, "%Y-%m-%d").date() for day, _ in series]
    x = np.array([(d - dates[0]).days for d in dates], dtype=float)
    y = np.array([value for _, value in series], dtype=float)
    if len(np.unique(x)) < 5:
        return None
    y_var = float(np.var(y))
    if y_var <= 0:
        return None
    candidates = np.logspace(-4, 2, 40) * y_var * len(y)
    best_spline: UnivariateSpline | None = None
    best_aic = math.inf
    for smoothing in candidates:
        try:
            spline = UnivariateSpline(x, y, k=3, s=float(smoothing))
        except Exception:
            continue
        aic = _aic_for_spline(x, y, spline)
        if aic < best_aic:
            best_aic = aic
            best_spline = spline
    if best_spline is None:
        return None
    residuals = y - best_spline(x)
    rmse = float(np.sqrt(np.mean(residuals**2)))
    amplitude = float(np.max(y) - np.min(y))
    if rmse <= 0:
        return None
    return amplitude / rmse
 def screen_site(
    site_entry: dict[str, Any],
    *,
    evaluation_year: int,
    min_gcc_points: int,
    snr_threshold: float,
 ) -> dict[str, Any]:
    response = site_entry["response"]
    roi = response.get("roi")
    csv_path = site_entry.get("_one_day_csv")
    calculations: dict[str, Any] = {
        "evaluation_year": evaluation_year,
        "n_gcc_points": 0,
        "first_gcc_date": None,
        "last_gcc_date": None,
        "months_with_gcc": 0,
        "snr": None,
        "min_gcc_points": min_gcc_points,
        "snr_threshold": snr_threshold,
        "status": "FAIL",
        "failing_gate": None,
        "passed_gates": [],
        "reason": None,
    }
    if roi is None or not roi.get("one_day_summary") or csv_path is None:
        calculations["failing_gate"] = "phenocam"
        calculations["reason"] = "no_roi"
        return {"response": response, "calculations": calculations}
    series = parse_gcc90_series(csv_path, evaluation_year)
    calculations["n_gcc_points"] = len(series)
    if calculations["n_gcc_points"] == 0:
        calculations["failing_gate"] = "phenocam"
        calculations["reason"] = "no_gcc_in_year"
        return {"response": response, "calculations": calculations}
    day_strings = [day for day, _ in series]
    calculations["first_gcc_date"] = day_strings[0]
    calculations["last_gcc_date"] = day_strings[-1]
    calculations["months_with_gcc"] = _months_covered(day_strings)
    if calculations["n_gcc_points"] < min_gcc_points:
        calculations["failing_gate"] = "phenocam"
        calculations["reason"] = "insufficient_gcc_points"
        return {"response": response, "calculations": calculations}
    calculations["passed_gates"].append("phenocam")
    snr = compute_snr_aic_spline(series)
    calculations["snr"] = snr
    if snr is None or snr < snr_threshold:
        calculations["failing_gate"] = "snr"
        calculations["reason"] = "insufficient_snr" if snr is not None else "snr_undefined"
        return {"response": response, "calculations": calculations}
    calculations["passed_gates"].append("snr")
    calculations["status"] = "PASS"
    calculations["failing_gate"] = None
    calculations["reason"] = None
    return {"response": response, "calculations": calculations}
 def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
    p1, p2 = math.radians(lat1), math.radians(lat2)
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
    return 2 * _EARTH_RADIUS_M * math.asin(math.sqrt(a))
 def _site_coords(row: dict[str, Any]) -> tuple[float, float] | None:
    camera = row["response"]["camera"]
    lat, lon = camera.get("Lat"), camera.get("Lon")
    if lat is None or lon is None:
        return None
    return float(lat), float(lon)
 def _cluster_rank(row: dict[str, Any]) -> tuple[int, float]:
    calc = row["calculations"]
    return calc["n_gcc_points"], float(calc.get("snr") or 0.0)
 def apply_cluster_gate(results: list[dict[str, Any]], *, radius_m: float) -> int:
    pool: list[tuple[int, float, float]] = []
    for idx, row in enumerate(results):
        if "snr" not in row["calculations"]["passed_gates"]:
            continue
        coords = _site_coords(row)
        if coords is None:
            row["calculations"]["passed_gates"].append("cluster")
            continue
        pool.append((idx, coords[0], coords[1]))
    n = len(pool)
    parent = list(range(n))
    def find(x: int) -> int:
        while parent[x] != x:
            parent[x] = parent[parent[x]]
            x = parent[x]
        return x
    def union(a: int, b: int) -> None:
        ra, rb = find(a), find(b)
        if ra != rb:
            parent[rb] = ra
    for i in range(n):
        _, lat1, lon1 = pool[i]
        for j in range(i + 1, n):
            _, lat2, lon2 = pool[j]
            if _haversine_m(lat1, lon1, lat2, lon2) <= radius_m:
                union(i, j)
    clusters: dict[int, list[int]] = {}
    for i in range(n):
        clusters.setdefault(find(i), []).append(i)
    demoted = 0
    for members in clusters.values():
        result_indices = [pool[i][0] for i in members]
        cluster_size = len(result_indices)
        winner_idx = max(result_indices, key=lambda idx: _cluster_rank(results[idx]))
        winner_name = str(results[winner_idx]["response"]["camera"]["Sitename"])
        for idx in result_indices:
            calc = results[idx]["calculations"]
            calc["cluster_size"] = cluster_size
            if idx == winner_idx:
                calc["passed_gates"].append("cluster")
            else:
                calc["status"] = "FAIL"
                calc["failing_gate"] = "cluster"
                calc["reason"] = "nearby_duplicate"
                calc["cluster_winner"] = winner_name
                demoted += 1
    return demoted
 def run_screening(
    manifest: dict[str, Any],
    sites_dir: Path,
    *,
    evaluation_year: int,
    min_gcc_points: int,
    snr_threshold: float,
    site_filter: set[str] | None = None,
 ) -> list[dict[str, Any]]:
    results: list[dict[str, Any]] = []
    sitenames = manifest["sites"]
    if site_filter is not None:
        sitenames = [name for name in sitenames if name in site_filter]
    for index, sitename in enumerate(sitenames, start=1):
        print(f"[PhenoCam-2] ({index}/{len(sitenames)}) {sitename}")
        site_entry = load_site_entry(sites_dir, sitename)
        results.append(
            screen_site(
                site_entry,
                evaluation_year=evaluation_year,
                min_gcc_points=min_gcc_points,
                snr_threshold=snr_threshold,
            )
        )
    return results
 def print_summary(results: list[dict[str, Any]], evaluation_year: int) -> None:
    passing = [row for row in results if row["calculations"]["status"] == "PASS"]
    gates_label = " + ".join(GATE_ORDER)
    print(
        f"\n[PhenoCam-2] Screening for {evaluation_year}: "
        f"{len(passing)}/{len(results)} pass ({gates_label})"
    )
    for gate in GATE_ORDER:
        fails = sum(1 for row in results if row["calculations"]["failing_gate"] == gate)
        after = sum(1 for row in results if gate in row["calculations"]["passed_gates"])
        print(f"  after_{gate}: {after}, fail_at_{gate}: {fails}")
    print("\nPer-site table")
    print(
        f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} "
        f"{'status':>6} gate reason"
    )
    print("-" * 72)
    for row in sorted(
        results,
        key=lambda item: str(item["response"]["camera"]["Sitename"]),
    ):
        camera = row["response"]["camera"]
        calc = row["calculations"]
        snr_text = f"{calc['snr']:.2f}" if calc["snr"] is not None else ""
        print(
            f"{camera['Sitename']:<24} {calc['n_gcc_points']:4d} "
            f"{calc['months_with_gcc']:3d} {snr_text:>6} "
            f"{calc['status']:>6} {(calc['failing_gate'] or '-'):<8} "
            f"{calc['reason'] or '-'}"
        )
 def write_screening_json(
    results: list[dict[str, Any]],
    output_path: Path,
    evaluation_year: int,
 ) -> None:
    passing = [row for row in results if row["calculations"]["status"] == "PASS"]
    payload = {
        "evaluation_year": evaluation_year,
        "count": len(results),
        "qualifying_count": len(passing),
        "sites": sorted(
            results,
            key=lambda item: str(item["response"]["camera"]["Sitename"]),
        ),
    }
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    print(f"[PhenoCam-2] Wrote {output_path}")
 def write_screening_csv(results: list[dict[str, Any]], output_path: Path) -> None:
    rows: list[dict[str, Any]] = []
    for row in results:
        camera = row["response"]["camera"]
        metadata = camera.get("sitemetadata") or {}
        roi = row["response"].get("roi") or {}
        calc = row["calculations"]
        rows.append(
            {
                "Sitename": camera.get("Sitename"),
                "Lat": camera.get("Lat"),
                "Lon": camera.get("Lon"),
                "site_description": metadata.get("site_description"),
                "primary_veg_type": metadata.get("primary_veg_type"),
                "site_type": metadata.get("site_type"),
                "one_day_summary": roi.get("one_day_summary"),
                **calc,
            }
        )
    fieldnames = list(rows[0].keys()) if rows else ["Sitename", "status"]
    if rows:
        extra = [k for row in rows for k in row if k not in fieldnames]
        fieldnames.extend(dict.fromkeys(extra))
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with output_path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)
    print(f"[PhenoCam-2] Wrote {output_path}")
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--evaluation-year",
        type=int,
        default=2025,
        help="Evaluation year (default: 2025)",
    )
    parser.add_argument(
        "--sites",
        type=str,
        default=None,
        help="Comma-separated sitenames (default: all sites in step-1 manifest)",
    )
    parser.add_argument(
        "--min-gcc-points",
        type=int,
        default=MIN_GCC_POINTS,
        help=f"Minimum valid gcc_90 observations in-year (default: {MIN_GCC_POINTS})",
    )
    parser.add_argument(
        "--snr-threshold",
        type=float,
        default=SNR_THRESHOLD,
        help=f"Minimum AIC-spline SNR (default: {SNR_THRESHOLD})",
    )
    parser.add_argument(
        "--output-json",
        type=Path,
        default=None,
        help="Screening output (default: data/phenocam_screening/{year}.json)",
    )
    parser.add_argument(
        "--output-csv",
        type=Path,
        default=None,
        help="Flat CSV summary path",
    )
    parser.add_argument(
        "--cluster-radius-m",
        type=float,
        default=CLUSTER_RADIUS_M,
        help=f"Deduplicate SNR-passed sites within this radius (default: {CLUSTER_RADIUS_M})",
    )
    parser.add_argument(
        "--no-cluster",
        action="store_true",
        help="Skip nearby-site deduplication gate",
    )
    args = parser.parse_args(argv)
    evaluation_year = args.evaluation_year
    manifest_path = Path("data") / "phenocam" / f"{evaluation_year}.json"
    if not manifest_path.is_file():
        raise SystemExit(f"Step-1 manifest not found: {manifest_path}")
    site_filter = None
    if args.sites:
        site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
    manifest = load_manifest(manifest_path)
    sites_dir_path = resolve_sites_dir(manifest_path, manifest)
    results = run_screening(
        manifest,
        sites_dir_path,
        evaluation_year=evaluation_year,
        min_gcc_points=args.min_gcc_points,
        snr_threshold=args.snr_threshold,
        site_filter=site_filter,
    )
    if not args.no_cluster:
        demoted = apply_cluster_gate(results, radius_m=args.cluster_radius_m)
        if demoted:
            print(f"[PhenoCam-2] Cluster dedup: demoted {demoted} nearby duplicate(s)")
    print_summary(results, evaluation_year)
    default_dir = Path("data") / "phenocam_screening"
    json_name = f"{evaluation_year}.json"
    csv_name = f"{evaluation_year}.csv"
    write_screening_json(
        results,
        args.output_json or (default_dir / json_name),
        evaluation_year,
    )
    write_screening_csv(results, args.output_csv or (default_dir / csv_name))
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/3-sentinel-data.py
+++ b/3-sentinel-data.py
@ -0,0 +1,805 @@
 """Step 3: Download S2 and S3 rasters and prepare EFAST inputs.
 Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
 - ``phenocam_screening/{year}.json`` — step-2 PASS sites (coordinates included)
 Outputs (``data/``):
 - ``sentinel_data/{year}/{sitename}/raw/s3/*.tif`` — S3 SYN L2 per-date GeoTIFFs
 - ``sentinel_data/{year}/{sitename}/prepared/s2/`` — S2 REFL + DIST_CLOUD GeoTIFFs
 - ``sentinel_data/{year}/{sitename}/prepared/s3/`` — S3 composite GeoTIFFs
 - ``sentinel_data/{year}/{sitename}/data.json`` — run summary
 Requires ``CDSE_USER`` / ``CDSE_PASSWORD`` (``uv sync`` installs efast).
 CLI:
 - ``--evaluation-year`` (default 2025)
 - ``--site`` (optional; default: all step-2 PASS sites)
 Prior step: :mod:`2-phenocam-screening`.
 Next step: :mod:`4-fusion`.
 """
 from __future__ import annotations
 import argparse
 import json
 import os
 import shutil
 import time
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 import netCDF4
 import numpy as np
 import openeo
 import rasterio
 import requests
 from dotenv import load_dotenv
 from pystac_client import Client
 from rasterio import shutil as rio_shutil
 from rasterio.enums import Resampling
 from rasterio.errors import WindowError
 from rasterio.transform import from_bounds
 from rasterio.vrt import WarpedVRT
 from rasterio.warp import transform_geom
 from rasterio.windows import Window
 from rasterio.windows import from_bounds as window_from_bounds
 from rasterio.windows import transform as window_transform
 from shapely import wkt as shapely_wkt
 from tqdm import tqdm
 # ---------------------------------------------------------------------------
 # Public constants — edit here to change pipeline behaviour
 # ---------------------------------------------------------------------------
 S2_BANDS = ["B02", "B03", "B04"]
 S3_BANDS = [
    "Syn_Oa04_reflectance",
    "Syn_Oa06_reflectance",
    "Syn_Oa08_reflectance",
    "Syn_Oa17_reflectance",
 ]
 S3_BAND_NAMES = ["SDR_Oa04", "SDR_Oa06", "SDR_Oa08", "SDR_Oa17"]
 RESOLUTION_RATIO = 30
 S3_MOSAIC_DAYS = 100
 S3_COMPOSITE_STEP = 2
 S3_COMPOSITE_SIGMA_DOY = 10
 S3_COMPOSITE_D = 20
 S3_SMOOTHING_STD = 1
 S3_REFLECTANCE_SCALE = 10_000  # OpenEO SYN L2 SDR → 0–1 (EFAST expects < 5)
 # ---------------------------------------------------------------------------
 # Internal S2 constants
 # ---------------------------------------------------------------------------
 EARTH_SEARCH_URL = "https://earth-search.aws.element84.com/v1"
 _BAND_ASSETS: dict[str, str] = {
    "B02": "blue",
    "B03": "green",
    "B04": "red",
    "B05": "rededge1",
    "B06": "rededge2",
    "B07": "rededge3",
    "B08": "nir",
    "B8A": "nir08",
    "B11": "swir16",
    "B12": "swir22",
 }
 _SCL_ASSET = "scl"
 _MIN_BBOX_HALF_DEG = 0.008
 # ---------------------------------------------------------------------------
 # Internal S3 constants
 # ---------------------------------------------------------------------------
 CDSE_TOKEN_URL = (
    "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/"
    "protocol/openid-connect/token"
 )
 OPENEO_URL = "openeo.dataspace.copernicus.eu"
 S3_COLLECTION = "SENTINEL3_SYN_L2_SYN"
 DATA_DIR = Path("data")
 DEFAULT_YEAR = 2025
 # ---------------------------------------------------------------------------
 # Credentials
 # ---------------------------------------------------------------------------
 def _cdse_credentials() -> dict[str, str | None]:
    load_dotenv()
    return {
        "username": os.getenv("CDSE_USER"),
        "password": os.getenv("CDSE_PASSWORD"),
    }
 # ---------------------------------------------------------------------------
 # Screening manifest helpers
 # ---------------------------------------------------------------------------
 def _load_screening_pass_sites(year: int) -> list[dict[str, Any]]:
    """Return list of PASS-site dicts from step-2 screening JSON.
    Each entry has ``sitename``, ``lat``, ``lon`` keys.
    """
    path = DATA_DIR / "phenocam_screening" / f"{year}.json"
    if not path.is_file():
        raise FileNotFoundError(f"Step-2 screening manifest not found: {path}")
    payload = json.loads(path.read_text(encoding="utf-8"))
    sites = []
    for row in payload.get("sites", []):
        calc = row.get("calculations", {})
        if calc.get("status") != "PASS":
            continue
        camera = row.get("response", {}).get("camera", {})
        name = camera.get("Sitename")
        lat = camera.get("Lat")
        lon = camera.get("Lon")
        if name and lat is not None and lon is not None:
            sites.append({"sitename": str(name), "lat": float(lat), "lon": float(lon)})
    return sites
 # ---------------------------------------------------------------------------
 # S2: geometry helpers (from s2_cloud_native.py)
 # ---------------------------------------------------------------------------
 def wkt_to_bbox(geometry_wkt: str) -> list[float]:
    """Convert a WKT geometry to a ``[west, south, east, north]`` bbox."""
    geom = shapely_wkt.loads(geometry_wkt)
    minx, miny, maxx, maxy = geom.bounds
    if minx == maxx and miny == maxy:
        minx -= _MIN_BBOX_HALF_DEG
        maxx += _MIN_BBOX_HALF_DEG
        miny -= _MIN_BBOX_HALF_DEG
        maxy += _MIN_BBOX_HALF_DEG
    return [minx, miny, maxx, maxy]
 def _boa_offset(item: Any) -> int:
    """Return the BOA additive offset for a STAC item.
    Processing baseline >= 04.00 applies a -1000 offset; earlier baselines use 0.
    """
    if item.properties.get("earthsearch:boa_offset_applied"):
        return 0
    baseline_str = str(
        item.properties.get("processing:baseline")
        or item.properties.get("s2:processing_baseline")
        or "0"
    )
    try:
        baseline = float(baseline_str)
    except ValueError:
        baseline = 0.0
    return -1000 if baseline >= 4.0 else 0
 def _window_for_bbox(
    src: rasterio.io.DatasetReader,
    bbox_4326: list[float],
 ) -> Window | None:
    """Return the rasterio Window for a EPSG:4326 bbox clipped to src bounds."""
    bbox_geom = {
        "type": "Polygon",
        "coordinates": [
            [
                [bbox_4326[0], bbox_4326[1]],
                [bbox_4326[2], bbox_4326[1]],
                [bbox_4326[2], bbox_4326[3]],
                [bbox_4326[0], bbox_4326[3]],
                [bbox_4326[0], bbox_4326[1]],
            ]
        ],
    }
    src_geom = transform_geom("EPSG:4326", src.crs.to_wkt(), bbox_geom)
    xs = [c[0] for c in src_geom["coordinates"][0][:4]]
    ys = [c[1] for c in src_geom["coordinates"][0][:4]]
    win = window_from_bounds(min(xs), min(ys), max(xs), max(ys), src.transform)
    try:
        return win.intersection(Window(0, 0, src.width, src.height))
    except WindowError:
        return None
 def _read_window(
    href: str,
    bbox_4326: list[float],
    out_shape: tuple[int, int] | None = None,
    resampling: Resampling = Resampling.bilinear,
 ) -> tuple[np.ndarray, dict[str, Any]] | None:
    """Range-read a single-band array for the bbox window from a COG URL."""
    with rasterio.open(href) as src:
        win = _window_for_bbox(src, bbox_4326)
        if win is None:
            return None
        data = src.read(1, window=win, out_shape=out_shape, resampling=resampling)
        profile: dict[str, Any] = {
            "crs": src.crs,
            "transform": window_transform(win, src.transform),
            "height": data.shape[0],
            "width": data.shape[1],
            "dtype": src.dtypes[0],
        }
    return data, profile
 def _read_bands(
    item: Any,
    bbox: list[float],
    bands: list[str],
 ) -> tuple[list[np.ndarray], dict[str, Any]] | None:
    """Range-read all requested bands for one STAC item."""
    band_arrays: list[np.ndarray] = []
    ref_profile: dict[str, Any] | None = None
    for band_name in bands:
        asset_key = _BAND_ASSETS.get(band_name)
        if asset_key is None or asset_key not in item.assets:
            return None
        ref_shape = (
            (ref_profile["height"], ref_profile["width"]) if ref_profile else None
        )
        result = _read_window(item.assets[asset_key].href, bbox, out_shape=ref_shape)
        if result is None:
            return None
        data, profile = result
        if ref_profile is None:
            ref_profile = profile
        band_arrays.append(data.astype("float32"))
    return (band_arrays, ref_profile) if ref_profile is not None else None
 def _cloud_mask(item: Any, bbox: list[float], shape: tuple[int, int]) -> np.ndarray:
    """Return a boolean cloud/shadow mask from the item's SCL band.
    Masks SCL classes 0 (no data), 3 (cloud shadow), and >7 (clouds, cirrus, snow).
    """
    scl = item.assets.get(_SCL_ASSET)
    result = (
        _read_window(scl.href, bbox, out_shape=shape, resampling=Resampling.nearest)
        if scl
        else None
    )
    if result is None:
        return np.zeros(shape, dtype=bool)
    scl_data, _ = result
    return (scl_data == 0) | (scl_data == 3) | (scl_data > 7)
 def _pad_to_multiple(arr: np.ndarray, ratio: int) -> np.ndarray:
    """Zero-pad (bands, H, W) so H and W are multiples of ``ratio``."""
    pad_h = (ratio - arr.shape[1] % ratio) % ratio
    pad_w = (ratio - arr.shape[2] % ratio) % ratio
    if pad_h or pad_w:
        arr = np.pad(arr, ((0, 0), (0, pad_h), (0, pad_w)), constant_values=0)
    return arr
 # ---------------------------------------------------------------------------
 # S2: STAC search + download (from s2_cloud_native.py)
 # ---------------------------------------------------------------------------
 def stac_search_s2(
    bbox: list[float],
    start_date: datetime,
    end_date: datetime,
 ) -> list[Any]:
    """Search Earth Search for S2 L2A items intersecting a bbox."""
    client = Client.open(EARTH_SEARCH_URL)
    search = client.search(
        collections=["sentinel-2-l2a"],
        bbox=bbox,
        datetime=(
            f"{start_date.strftime('%Y-%m-%dT%H:%M:%SZ')}/"
            f"{end_date.strftime('%Y-%m-%dT23:59:59Z')}"
        ),
        max_items=10_000,
    )
    return list({item.id: item for item in search.items()}.values())
 def download_s2_window(
    items: list[Any],
    bbox: list[float],
    output_dir: Path,
    bands: list[str],
    ratio: int = RESOLUTION_RATIO,
 ) -> None:
    """Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
    Writes ``{item.id}_REFL.tif`` directly — no intermediate raw download.
    Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
    ``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    for item in tqdm(items, unit="granule", desc="S2 COG window read"):
        out_path = output_dir / f"{item.id}_REFL.tif"
        if out_path.is_file():
            continue
        bands_result = _read_bands(item, bbox, bands)
        if bands_result is None:
            tqdm.write(f"[S2] Skipping {item.id}: missing asset or no bbox overlap")
            continue
        band_arrays, ref_profile = bands_result
        target_shape = (ref_profile["height"], ref_profile["width"])
        mask = _cloud_mask(item, bbox, target_shape)
        stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
        np.clip(stacked, 0, None, out=stacked)
        stacked[:, mask] = 0.0
        stacked = _pad_to_multiple(stacked, ratio)
        out_profile = {
            "driver": "GTiff",
            "count": len(bands),
            "dtype": "float32",
            "nodata": 0,
            "crs": ref_profile["crs"],
            "transform": ref_profile["transform"],
            "height": stacked.shape[1],
            "width": stacked.shape[2],
            "compress": "lzw",
        }
        with rasterio.open(out_path, "w", **out_profile) as dst:
            dst.write(stacked)
            for i, band_name in enumerate(bands, 1):
                dst.set_band_description(i, band_name)
 # ---------------------------------------------------------------------------
 # S3: download (from s3_openeo.py)
 # ---------------------------------------------------------------------------
 def _utm_epsg(bbox: list[float]) -> int:
    """Return the UTM EPSG code for the centre of a ``[W, S, E, N]`` bbox."""
    lon = (bbox[0] + bbox[2]) / 2
    lat = (bbox[1] + bbox[3]) / 2
    zone = int((lon + 180) / 6) + 1
    return 32600 + zone if lat >= 0 else 32700 + zone
 def _cdse_token(username: str, password: str) -> str:
    """Obtain a CDSE bearer token via password grant."""
    resp = requests.post(
        CDSE_TOKEN_URL,
        data={
            "grant_type": "password",
            "username": username,
            "password": password,
            "client_id": "cdse-public",
        },
        timeout=30,
    )
    resp.raise_for_status()
    return resp.json()["access_token"]
 def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
    """Split an OpenEO NetCDF into per-date GeoTIFFs.
    Output filenames match the ``S3*__YYYYMMDDTHHMMSS.tif`` pattern that
    ``s3_processing.produce_median_composite`` expects.
    Handles half-pixel cell-centre coordinates, ascending y-axis (flip_y),
    and fills NetCDF masked values with NaN.
    """
    written = 0
    with netCDF4.Dataset(str(nc_path), "r") as nc:
        times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
        x_coords = np.asarray(nc.variables["x"][:], dtype=float)
        y_coords = np.asarray(nc.variables["y"][:], dtype=float)
        half_x = abs(x_coords[1] - x_coords[0]) / 2 if len(x_coords) > 1 else 0.0
        half_y = abs(y_coords[1] - y_coords[0]) / 2 if len(y_coords) > 1 else 0.0
        transform = from_bounds(
            x_coords.min() - half_x,
            y_coords.min() - half_y,
            x_coords.max() + half_x,
            y_coords.max() + half_y,
            len(x_coords),
            len(y_coords),
        )
        flip_y = len(y_coords) > 1 and y_coords[0] < y_coords[-1]
        date_counts: dict[str, int] = {}
        for t_idx, time_val in enumerate(times):
            date_str = time_val.strftime("%Y%m%d")
            n = date_counts.get(date_str, 0)
            date_counts[date_str] = n + 1
            raw = np.stack(
                [nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0
            )
            stacked = (
                np.ma.filled(raw, fill_value=np.nan).astype("float32")
                / S3_REFLECTANCE_SCALE
            )
            if flip_y:
                stacked = stacked[:, ::-1, :]
            filename = f"S3_{date_str}_{n}__{date_str}T120000.tif"
            with rasterio.open(
                output_dir / filename,
                "w",
                driver="GTiff",
                height=len(y_coords),
                width=len(x_coords),
                count=len(S3_BANDS),
                dtype="float32",
                nodata=float("nan"),
                crs=f"EPSG:{epsg}",
                transform=transform,
                compress="lzw",
            ) as dst:
                dst.write(stacked)
                for i, band_name in enumerate(S3_BAND_NAMES, 1):
                    dst.set_band_description(i, band_name)
            written += 1
    return written
 def download_s3_openeo(
    start_date: datetime,
    end_date: datetime,
    aoi_geometry: str,
    output_dir: Path,
    credentials: dict[str, str | None],
 ) -> None:
    """Download S3 SYN L2 SDR for an AOI via CDSE OpenEO, server-side clipped.
    Writes per-date ``S3_{YYYYMMDD}_{n}__{YYYYMMDD}T120000.tif`` files to
    ``output_dir``, ready for ``s3_processing.produce_median_composite``.
    Skips if any ``S3*.tif`` files already exist.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    if any(output_dir.glob("S3*.tif")):
        print("[S3-OEO] Skipping — output_dir already contains S3 GeoTIFFs")
        return
    bbox = wkt_to_bbox(aoi_geometry)
    epsg = _utm_epsg(bbox)
    spatial_extent = {
        "west": bbox[0],
        "east": bbox[2],
        "south": bbox[1],
        "north": bbox[3],
    }
    print("[S3-OEO] Authenticating with CDSE...")
    token = _cdse_token(credentials["username"], credentials["password"])  # type: ignore[arg-type]
    conn = openeo.connect(OPENEO_URL)
    conn.authenticate_oidc_access_token(token)
    start_str = start_date.strftime("%Y-%m-%d")
    end_str = end_date.strftime("%Y-%m-%d")
    print(f"[S3-OEO] Loading {S3_COLLECTION} ({start_str} → {end_str})...")
    datacube = conn.load_collection(
        S3_COLLECTION,
        spatial_extent=spatial_extent,
        temporal_extent=[start_str, end_str],
        bands=S3_BANDS,
    ).resample_spatial(projection=epsg)
    nc_path = output_dir / "_s3_syn_l2.nc"
    print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
    t0 = time.time()
    datacube.download(str(nc_path), format="NetCDF")
    print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
    print("[S3-OEO] Splitting into per-date GeoTIFFs...")
    written = _netcdf_to_geotiffs(nc_path, output_dir, epsg)
    nc_path.unlink(missing_ok=True)
    print(f"[S3-OEO] {written} GeoTIFFs written to {output_dir}")
 # ---------------------------------------------------------------------------
 # S2: distance_to_clouds helper
 # ---------------------------------------------------------------------------
 def _import_distance_to_clouds():
    try:
        from efast.s2_processing import distance_to_clouds
        return distance_to_clouds
    except ImportError as exc:
        raise ImportError(
            "efast not found. Install with: uv sync"
        ) from exc
 def _rescale_dist_cloud(s2_dir: Path) -> None:
    """Ensure DIST_CLOUD values are in pixel units (not normalised to [0,1])."""
    for dc_path in s2_dir.glob("*DIST_CLOUD.tif"):
        with rasterio.open(dc_path) as src:
            d = src.read(1)
        if float(np.nanmax(d)) <= 1:
            with rasterio.open(dc_path, "r+") as dst:
                dst.write(np.where(d > 0, 2.0, d).astype(np.float32), 1)
 # ---------------------------------------------------------------------------
 # S3: compositing + reprojection helpers (from 4-sentinel-data.py)
 # ---------------------------------------------------------------------------
 def _import_s3_processing():
    try:
        from efast import s3_processing
        return s3_processing
    except ImportError as exc:
        raise ImportError(
            "efast not found. Install with: uv sync"
        ) from exc
 def _reproject_s3_composites_to_s2_grid(
    composite_dir: Path,
    s2_refl_path: Path,
    s3_out_dir: Path,
    *,
    resolution_ratio: int = RESOLUTION_RATIO,
 ) -> None:
    """Reproject S3 composites to the S2 spatial grid at LR resolution."""
    s3_out_dir.mkdir(parents=True, exist_ok=True)
    with rasterio.open(s2_refl_path) as s2_ref:
        target_bounds = s2_ref.bounds
        target_crs = s2_ref.crs
        width = s2_ref.width // resolution_ratio
        height = s2_ref.height // resolution_ratio
        s3_transform = rasterio.transform.from_bounds(
            target_bounds.left,
            target_bounds.bottom,
            target_bounds.right,
            target_bounds.top,
            width,
            height,
        )
    for sen3_path in sorted(composite_dir.glob("composite_*.tif")):
        date_part = sen3_path.stem.split("_", 1)[1].replace("-", "")
        outfile = s3_out_dir / f"composite_{date_part}.tif"
        vrt_options = {
            "transform": s3_transform,
            "height": height,
            "width": width,
            "crs": target_crs,
            "resampling": Resampling.cubic,
        }
        with rasterio.open(sen3_path) as s3_src:
            with WarpedVRT(s3_src, **vrt_options) as vrt:
                profile = vrt.profile.copy()
                profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
                rio_shutil.copy(vrt, outfile, **profile)
 def _s3_reflectance_scale(raw_s3_dir: Path) -> float:
    """Return multiplier that maps raw SYN L2 SDR values to 0–1 reflectance."""
    for path in raw_s3_dir.glob("S3*.tif"):
        with rasterio.open(path) as src:
            mx = float(np.nanmax(src.read()))
            if np.isfinite(mx) and mx > 5:
                return 1.0 / S3_REFLECTANCE_SCALE
    return 1.0
 def _stage_s3_for_efast(raw_s3_dir: Path, staging_dir: Path) -> int:
    """Copy ``S3_*.tif`` inputs, scaling reflectance when still in DN form."""
    scale = _s3_reflectance_scale(raw_s3_dir)
    if staging_dir.exists():
        shutil.rmtree(staging_dir)
    staging_dir.mkdir(parents=True)
    count = 0
    for src_path in sorted(raw_s3_dir.glob("S3*.tif")):
        dst_path = staging_dir / src_path.name
        with rasterio.open(src_path) as src:
            data = src.read().astype("float32") * scale
            profile = src.profile.copy()
            profile.update(dtype="float32")
            descriptions = src.descriptions
        with rasterio.open(dst_path, "w", **profile) as dst:
            dst.write(data)
            for i, desc in enumerate(descriptions, 1):
                if desc:
                    dst.set_band_description(i, desc)
        count += 1
    if scale != 1.0:
        print(f"[S3-PREP] Scaled raw SDR by {scale:g} for EFAST compositing")
    return count
 def _prepare_s3(
    raw_s3_dir: Path,
    s2_refl_path: Path,
    s3_out_dir: Path,
    *,
    work_dir: Path | None = None,
 ) -> None:
    """Run EFAST S3 compositing pipeline and reproject to S2 grid."""
    s3 = _import_s3_processing()
    base = work_dir or (s3_out_dir / "_efast_work")
    staging = base / "scaled"
    composites = base / "composites"
    blurred = base / "blurred"
    calibrated = base / "calibrated"
    for directory in (staging, composites, blurred, calibrated):
        if directory.exists():
            shutil.rmtree(directory)
        directory.mkdir(parents=True, exist_ok=True)
    staged = _stage_s3_for_efast(raw_s3_dir, staging)
    if staged == 0:
        raise ValueError(f"No S3*.tif files found in {raw_s3_dir}")
    print(
        f"[S3-PREP] produce_median_composite: mosaic_days={S3_MOSAIC_DAYS}, "
        f"step={S3_COMPOSITE_STEP}, sigma_doy={S3_COMPOSITE_SIGMA_DOY}, "
        f"D={S3_COMPOSITE_D}"
    )
    s3.produce_median_composite(
        staging,
        composites,
        step=S3_COMPOSITE_STEP,
        mosaic_days=S3_MOSAIC_DAYS,
        s3_bands=[1, 2, 3, 4],
        D=S3_COMPOSITE_D,
        sigma_doy=S3_COMPOSITE_SIGMA_DOY,
    )
    s3.smoothing(
        composites,
        blurred,
        product="composite",
        std=S3_SMOOTHING_STD,
        preserve_nan=False,
    )
    s3.reformat_s3(blurred, calibrated, product="composite", scaling_factor=1)
    for old in s3_out_dir.glob("composite_*.tif"):
        old.unlink()
    _reproject_s3_composites_to_s2_grid(calibrated, s2_refl_path, s3_out_dir)
    if work_dir is None and base.exists():
        shutil.rmtree(base)
    n_out = len(list(s3_out_dir.glob("composite_*.tif")))
    print(f"[S3-PREP] Wrote {n_out} composites")
 # ---------------------------------------------------------------------------
 # Per-site pipeline
 # ---------------------------------------------------------------------------
 def process_site(
    sitename: str,
    lat: float,
    lon: float,
    year: int,
 ) -> dict[str, Any]:
    """Download S2 + S3 and run EFAST preparation for one site."""
    site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
    s2_out = site_dir / "prepared" / "s2"
    s3_raw = site_dir / "raw" / "s3"
    s3_out = site_dir / "prepared" / "s3"
    aoi_wkt = f"POINT ({lon} {lat})"
    bbox = wkt_to_bbox(aoi_wkt)
    creds = _cdse_credentials()
    # S3 download
    print(f"[{sitename}] Downloading S3...")
    download_s3_openeo(
        start_date=datetime(year, 1, 1),
        end_date=datetime(year, 12, 31),
        aoi_geometry=aoi_wkt,
        output_dir=s3_raw,
        credentials=creds,
    )
    # S2 download
    print(f"[{sitename}] Searching S2 on Earth Search...")
    items = stac_search_s2(bbox, datetime(year, 1, 1), datetime(year, 12, 31))
    print(f"[{sitename}] {len(items)} S2 items found — downloading windows...")
    download_s2_window(items, bbox, s2_out, S2_BANDS, RESOLUTION_RATIO)
    # S2 distance-to-clouds
    print(f"[{sitename}] Computing distance-to-clouds...")
    distance_to_clouds = _import_distance_to_clouds()
    distance_to_clouds(s2_out, ratio=RESOLUTION_RATIO)
    _rescale_dist_cloud(s2_out)
    # S3 compositing
    s2_refl_path = next(iter(s2_out.glob("*_REFL.tif")), None)
    if s2_refl_path is None:
        raise ValueError(f"No REFL files in {s2_out} — S2 download may have failed")
    s3_out.mkdir(parents=True, exist_ok=True)
    print(f"[{sitename}] Running S3 compositing pipeline...")
    _prepare_s3(s3_raw, s2_refl_path, s3_out)
    summary = {
        "sitename": sitename,
        "evaluation_year": year,
        "lat": lat,
        "lon": lon,
        "s2_refl_count": len(list(s2_out.glob("*_REFL.tif"))),
        "s2_dist_cloud_count": len(list(s2_out.glob("*_DIST_CLOUD.tif"))),
        "s3_raw_count": len(list(s3_raw.glob("S3*.tif"))),
        "s3_composite_count": len(list(s3_out.glob("composite_*.tif"))),
    }
    site_dir.mkdir(parents=True, exist_ok=True)
    (site_dir / "data.json").write_text(
        json.dumps(summary, indent=2) + "\n", encoding="utf-8"
    )
    return summary
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
    parser.add_argument(
        "--site",
        type=str,
        default=None,
        help="Single sitename to process (default: all step-2 PASS sites)",
    )
    args = parser.parse_args(argv)
    year = args.evaluation_year
    pass_sites = _load_screening_pass_sites(year)
    if not pass_sites:
        print("[Sentinel-3] No PASS sites found in step-2 screening output")
        return 1
    if args.site:
        pass_sites = [s for s in pass_sites if s["sitename"] == args.site]
        if not pass_sites:
            print(f"[Sentinel-3] Site '{args.site}' not found in step-2 PASS sites")
            return 1
    print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
    for i, site in enumerate(pass_sites, 1):
        sitename = site["sitename"]
        print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
        try:
            summary = process_site(sitename, site["lat"], site["lon"], year)
            print(
                f"[Sentinel-3] {sitename} done — "
                f"{summary['s2_refl_count']} REFL, "
                f"{summary['s3_composite_count']} composites"
            )
        except Exception as exc:
            print(f"[Sentinel-3] {sitename} FAILED: {exc}")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/4-fusion.py
+++ b/4-fusion.py
@ -0,0 +1,330 @@
 """Step 4: Compute GCC and run EFAST BtI + ItB fusion for prepared sites.
 Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
 - ``sentinel_data/{year}/{sitename}/prepared/s2/`` — ``*_REFL.tif`` + ``*_DIST_CLOUD.tif``
 - ``sentinel_data/{year}/{sitename}/prepared/s3/`` — ``composite_*.tif`` (4-band)
 Outputs (``data/``):
 - ``sentinel_data/{year}/{sitename}/prepared/s2/*_GCC.tif`` — S2 GCC (in-place)
 - ``sentinel_data/{year}/{sitename}/prepared/gcc_s3/*.tif`` — S3 GCC composites
 - ``fusion/{year}/{sitename}/bti/fusion/REFL_*.tif`` — BtI fused 4-band reflectance
 - ``fusion/{year}/{sitename}/bti/gcc/GCC_*.tif`` — GCC derived from BtI fusion
 - ``fusion/{year}/{sitename}/itb/s2/GCC_*.tif`` — per-acquisition S2 GCC (simplified names)
 - ``fusion/{year}/{sitename}/itb/s3/GCC_*.tif`` — per-composite S3 GCC (simplified names)
 - ``fusion/{year}/{sitename}/itb/fusion/GCC_*.tif`` — ItB fused GCC
 Requires ``uv sync`` (efast).
 CLI:
 - ``--evaluation-year`` (default 2025)
 - ``--site`` (optional; default: all prepared sites under ``sentinel_data/{year}/``)
 Prior step: :mod:`3-sentinel-data`.
 """
 from __future__ import annotations
 import argparse
 import shutil
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Any
 import numpy as np
 import rasterio
 from dateutil import rrule
 # ---------------------------------------------------------------------------
 # Public constants
 # ---------------------------------------------------------------------------
 RESOLUTION_RATIO = 30
 MOSAIC_STEP = 2
 MAX_DAYS = 100
 MINIMUM_ACQUISITION_IMPORTANCE = 0
 DATA_DIR = Path("data")
 DEFAULT_YEAR = 2025
 # ---------------------------------------------------------------------------
 # efast import helper
 # ---------------------------------------------------------------------------
 def _import_efast():
    try:
        import efast.efast as efast_module
        return efast_module
    except ImportError as exc:
        raise ImportError(
            "efast not found. Install with: uv sync"
        ) from exc
 # ---------------------------------------------------------------------------
 # GCC computation (from s2_cloud_native.py and s3_openeo.py)
 # ---------------------------------------------------------------------------
 def compute_gcc_s2(s2_dir: Path, output_dir: Path) -> None:
    """Compute GCC from S2 REFL files and write ``*_GCC.tif`` to ``output_dir``.
    Reads every ``*_REFL.tif`` (band order B02/B03/B04) and writes a co-located
    single-band GCC file.  Cloud-masked pixels (zero in all bands) remain zero.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    for src_path in sorted(s2_dir.glob("*_REFL.tif")):
        out_path = output_dir / src_path.name.replace("_REFL.tif", "_GCC.tif")
        if out_path.is_file():
            continue
        with rasterio.open(src_path) as src:
            b, g, r = src.read(1), src.read(2), src.read(3)
            profile = src.profile
        total = b + g + r
        gcc = g / (total + 1e-10)
        gcc[total == 0] = 0
        profile.update(count=1)
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(gcc[np.newaxis].astype("float32"))
 def compute_gcc_s3(s3_dir: Path, output_dir: Path) -> None:
    """Compute GCC from S3 composite files and write single-band GeoTIFFs.
    Reads every ``composite_*.tif`` (band order Oa04/Oa06/Oa08/Oa17) and writes
    a single-band GCC file.  NaN pixels in the input remain NaN.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    for src_path in sorted(s3_dir.glob("composite_*.tif")):
        out_path = output_dir / src_path.name
        if out_path.is_file():
            continue
        with rasterio.open(src_path) as src:
            b, g, r = src.read(1), src.read(2), src.read(3)
            profile = src.profile
        total = b + g + r
        gcc = g / (total + 1e-10)
        gcc[np.isnan(total)] = np.nan
        profile.update(count=1, dtype="float32")
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(gcc[np.newaxis].astype("float32"))
 def compute_gcc_from_refl(refl_dir: Path, gcc_dir: Path) -> None:
    """Derive GCC from ``REFL_YYYYMMDD.tif`` files (BtI fusion output).
    Reads every ``REFL_*.tif`` and writes a co-located single-band
    ``GCC_YYYYMMDD.tif``.  Zero pixels remain zero.
    """
    gcc_dir.mkdir(parents=True, exist_ok=True)
    for src_path in sorted(refl_dir.glob("REFL_*.tif")):
        out_path = gcc_dir / src_path.name.replace("REFL_", "GCC_")
        if out_path.is_file():
            continue
        with rasterio.open(src_path) as src:
            b, g, r = src.read(1), src.read(2), src.read(3)
            profile = src.profile
        total = b + g + r
        gcc = g / (total + 1e-10)
        gcc[total == 0] = 0
        profile.update(count=1)
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(gcc[np.newaxis].astype("float32"))
 # ---------------------------------------------------------------------------
 # Date-range detection
 # ---------------------------------------------------------------------------
 def _refl_date_range(s2_dir: Path) -> tuple[datetime, datetime] | None:
    """Return (start, end) datetime from REFL filenames in ``s2_dir``.
    Filenames are expected to follow the S2 product naming convention, where
    the acquisition date ``YYYYMMDD`` appears at position index 2 when the
    stem is split by ``_``, e.g.
    ``S2A_MSIL2A_20230911T114111_N0509_R025_T29PKT_20230911T153131_REFL.tif``.
    """
    dates: list[datetime] = []
    for p in s2_dir.glob("*_REFL.tif"):
        parts = p.stem.split("_")
        if len(parts) >= 3:
            try:
                dates.append(datetime.strptime(parts[2][:8], "%Y%m%d"))
            except ValueError:
                pass
    if not dates:
        return None
    return min(dates), max(dates)
 # ---------------------------------------------------------------------------
 # Per-site fusion
 # ---------------------------------------------------------------------------
 def fuse_site(sitename: str, year: int) -> dict[str, Any]:
    """Run GCC computation and EFAST BtI + ItB fusion for one prepared site."""
    efast = _import_efast()
    s2_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s2"
    s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3"
    gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3"
    base = DATA_DIR / "fusion" / str(year) / sitename
    if not s2_dir.is_dir() or not any(s2_dir.glob("*_REFL.tif")):
        raise FileNotFoundError(f"No REFL files in {s2_dir}")
    if not s3_dir.is_dir() or not any(s3_dir.glob("composite_*.tif")):
        raise FileNotFoundError(f"No composite files in {s3_dir}")
    print(f"[{sitename}] Computing S2 GCC (in-place)...")
    compute_gcc_s2(s2_dir, s2_dir)
    print(f"[{sitename}] Computing S3 GCC...")
    compute_gcc_s3(s3_dir, gcc_s3_dir)
    date_range = _refl_date_range(s2_dir)
    if date_range is None:
        raise ValueError(f"Could not detect date range from REFL filenames in {s2_dir}")
    start, end = date_range
    print(f"[{sitename}] Date range: {start.date()} → {end.date()}")
    fusion_dates = list(
        rrule.rrule(
            rrule.DAILY,
            dtstart=start + timedelta(MOSAIC_STEP),
            until=end - timedelta(MOSAIC_STEP),
            interval=MOSAIC_STEP,
        )
    )
    _fusion_kwargs = dict(
        ratio=RESOLUTION_RATIO,
        max_days=MAX_DAYS,
        minimum_acquisition_importance=MINIMUM_ACQUISITION_IMPORTANCE,
    )
    # --- ItB: GCC first, then fuse GCC ---
    itb_s2 = base / "itb" / "s2"
    itb_s3 = base / "itb" / "s3"
    itb_fusion = base / "itb" / "fusion"
    itb_s2.mkdir(parents=True, exist_ok=True)
    itb_s3.mkdir(parents=True, exist_ok=True)
    itb_fusion.mkdir(parents=True, exist_ok=True)
    for p in sorted(s2_dir.glob("*_GCC.tif")):
        dst = itb_s2 / f"GCC_{p.stem.split('_')[2][:8]}.tif"
        if not dst.exists():
            shutil.copy2(p, dst)
    for p in sorted(gcc_s3_dir.glob("composite_*.tif")):
        dst = itb_s3 / f"GCC_{p.stem.split('_')[1]}.tif"
        if not dst.exists():
            shutil.copy2(p, dst)
    print(f"[{sitename}] ItB: fusing GCC over {len(fusion_dates)} dates...")
    for date in fusion_dates:
        efast.fusion(date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs)
    # --- BtI: fuse reflectance (3-band, matching S2 B02/B03/B04), then derive GCC ---
    # S3 composites have 4 bands; strip band 4 (Oa17/NIR) so shapes match S2 REFL.
    s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb"
    s3_rgb_dir.mkdir(parents=True, exist_ok=True)
    for p in sorted(s3_dir.glob("composite_*.tif")):
        out = s3_rgb_dir / p.name
        if not out.exists():
            with rasterio.open(p) as src:
                data = src.read([1, 2, 3])
                profile = src.profile.copy()
                profile.update(count=3)
            with rasterio.open(out, "w", **profile) as dst:
                dst.write(data)
    bti_fusion = base / "bti" / "fusion"
    bti_gcc = base / "bti" / "gcc"
    bti_fusion.mkdir(parents=True, exist_ok=True)
    print(f"[{sitename}] BtI: fusing REFL over {len(fusion_dates)} dates...")
    for date in fusion_dates:
        efast.fusion(date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs)
    print(f"[{sitename}] BtI: deriving GCC from fused REFL...")
    compute_gcc_from_refl(bti_fusion, bti_gcc)
    return {
        "sitename": sitename,
        "evaluation_year": year,
        "start": start.date().isoformat(),
        "end": end.date().isoformat(),
        "fusion_dates": len(fusion_dates),
        "itb_fusion_files": len(list(itb_fusion.glob("*.tif"))),
        "bti_fusion_files": len(list(bti_fusion.glob("*.tif"))),
        "bti_gcc_files": len(list(bti_gcc.glob("*.tif"))),
    }
 # ---------------------------------------------------------------------------
 # Site discovery
 # ---------------------------------------------------------------------------
 def _discover_sites(year: int) -> list[str]:
    """Return sitenames that have prepared S2 REFL files under sentinel_data."""
    base = DATA_DIR / "sentinel_data" / str(year)
    if not base.is_dir():
        return []
    return sorted(
        d.name
        for d in base.iterdir()
        if d.is_dir() and any((d / "prepared" / "s2").glob("*_REFL.tif"))
    )
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
    parser.add_argument(
        "--site",
        type=str,
        default=None,
        help="Single sitename to fuse (default: all prepared sites)",
    )
    args = parser.parse_args(argv)
    year = args.evaluation_year
    if args.site:
        sites = [args.site]
    else:
        sites = _discover_sites(year)
        if not sites:
            print(f"[Fusion] No prepared sites found under data/sentinel_data/{year}/")
            return 1
    print(f"[Fusion] Processing {len(sites)} site(s)")
    for i, sitename in enumerate(sites, 1):
        print(f"[Fusion] ({i}/{len(sites)}) {sitename}")
        try:
            summary = fuse_site(sitename, year)
            print(
                f"[Fusion] {sitename} done — "
                f"{summary['fusion_dates']} dates, "
                f"itb={summary['itb_fusion_files']} bti={summary['bti_fusion_files']} "
                f"bti_gcc={summary['bti_gcc_files']}"
            )
        except Exception as exc:
            print(f"[Fusion] {sitename} FAILED: {exc}")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/5-metrics.py
+++ b/5-metrics.py
@ -0,0 +1,695 @@
 """Step 5: Pre-compute per-site GCC timeseries + raster index for the webapp.
 Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
 - ``phenocam_screening/{year}.json`` — qualifying sites + metadata
 - ``phenocam/{year}/{site}_1day.csv`` — daily GCC timeseries
 - ``sentinel_data/{year}/{site}/prepared/s2/*_GCC.tif`` — S2 GCC rasters
 - ``sentinel_data/{year}/{site}/prepared/gcc_s3/composite_*.tif`` — S3 GCC rasters
 - ``fusion/{year}/{site}/bti/gcc/GCC_*.tif`` — BtI GCC rasters
 - ``fusion/{year}/{site}/itb/fusion/GCC_*.tif`` — ItB GCC rasters
 Outputs (``data/metrics/``):
 - ``manifest.json`` — years + per-site metadata
 - ``{year}/{site}/gcc_phenocam.json`` — PhenoCam ``gcc_90`` at matched dates
 - ``{year}/{site}/gcc_s2.json`` — S2 GCC (center pixel, cloud-free scenes)
 - ``{year}/{site}/gcc_s2_whittaker.json`` — Whittaker-smoothed S2 GCC
 - ``{year}/{site}/gcc_s3.json`` — S3 composite GCC
 - ``{year}/{site}/gcc_s3_smooth.json`` — S3 5-day moving average
 - ``{year}/{site}/gcc_fusion_bti.json`` — BtI fused GCC
 - ``{year}/{site}/gcc_fusion_itb.json`` — ItB fused GCC
 - ``{year}/{site}/phenocam_images.json`` — midday photo URLs for the viewer
 - ``{year}/{site}/rasters_s2_refl.json`` — S2 REFL paths (BtI view)
 - ``{year}/{site}/rasters_s3_composite.json`` — S3 composite paths (BtI view)
 - ``{year}/{site}/rasters_s2_gcc.json`` — S2 GCC paths (ItB view)
 - ``{year}/{site}/rasters_s3_gcc.json`` — S3 GCC paths (ItB view)
 - ``{year}/{site}/rasters_fusion_bti_refl.json`` — BtI fused REFL paths
 - ``{year}/{site}/rasters_fusion_itb_gcc.json`` — ItB fused GCC paths
 - ``{year}/{site}/metrics.json`` — NSE, RMSE, nRMSE, Pearson r vs PhenoCam per series
 - ``{year}/{site}/bands_s2.json`` — S2 center-pixel reflectance (B02, B03, B04) per scene
 - ``{year}/{site}/bands_s3.json`` — S3 center-pixel reflectance (Oa04, Oa06, Oa08, Oa17) per composite
 - ``{year}/{site}/covariates.json`` — spatial CV/std, S2/S3 counts, gap stats
 CLI:
 - ``--evaluation-year`` (default 2025)
 - ``--site`` (optional; default: all qualifying sites with sentinel data)
 """
 from __future__ import annotations
 import argparse
 import csv
 import json
 import re
 from pathlib import Path
 from typing import Any
 import datetime
 import numpy as np
 import rasterio
 from rasterio.crs import CRS
 from rasterio.transform import rowcol
 from pyproj import Transformer
 from scipy.stats import pearsonr
 from tqdm import tqdm
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 DATA_DIR = Path("data")
 DEFAULT_YEAR = 2025
 # GCC smoothing window for S3 moving average (days)
 S3_SMOOTH_WINDOW = 5
 # Whittaker lambda (penalised smoothing strength for S2)
 WHITTAKER_LAMBDA = 400.0
 # Half-width in metres for the spatial heterogeneity footprint (~300 m = 1 S3 pixel)
 SPATIAL_CV_HALF_M = 150
 # PhenoCam archive image URL pattern
 PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
 # ---------------------------------------------------------------------------
 # Helpers: raster pixel extraction
 # ---------------------------------------------------------------------------
 def _read_center_pixel(path: Path, lat: float, lon: float) -> float | None:
    """Return the 3×3 mean GCC value at (lat, lon) from a single-band raster.
    Returns ``None`` when the pixel is masked/zero/NaN.
    """
    try:
        with rasterio.open(path) as src:
            transformer = Transformer.from_crs(
                CRS.from_epsg(4326), src.crs, always_xy=True
            )
            x, y = transformer.transform(lon, lat)
            row, col = rowcol(src.transform, x, y)
            h, w = src.height, src.width
            r0, r1 = max(0, row - 1), min(h, row + 2)
            c0, c1 = max(0, col - 1), min(w, col + 2)
            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
            data = src.read(1, window=window).astype(float)
            nodata = src.nodata
        if nodata is not None:
            data = np.where(data == nodata, np.nan, data)
        data[data == 0] = np.nan
        val = np.nanmean(data)
        return None if np.isnan(val) else float(val)
    except Exception:
        return None
 # ---------------------------------------------------------------------------
 # Helpers: date extraction from filenames
 # ---------------------------------------------------------------------------
 def _date_from_gcc_tif(path: Path) -> str | None:
    """Extract YYYYMMDD from ``GCC_YYYYMMDD.tif`` or ``composite_YYYYMMDD.tif``."""
    m = re.search(r"(\d{8})", path.stem)
    return m.group(1) if m else None
 def _date_from_s2_tif(path: Path) -> str | None:
    """Extract YYYYMMDD from S2 product name ``S2X_TTTT_YYYYMMDD_…``."""
    parts = path.stem.split("_")
    if len(parts) >= 3:
        m = re.match(r"(\d{8})", parts[2])
        return m.group(1) if m else None
    return None
 # ---------------------------------------------------------------------------
 # Helpers: Whittaker smoother (2nd-order differences, tridiagonal solver)
 # ---------------------------------------------------------------------------
 def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
    """Penalised least-squares smoother (Whittaker, 2nd-order differences).
    Masked (None) values are filled via the smooth and then re-set to None in
    the output so the caller can distinguish observed from gap-filled points.
    """
    n = len(values)
    if n < 4:
        return values[:]
    obs_mask = [v is not None for v in values]
    y = np.array([v if v is not None else 0.0 for v in values], dtype=float)
    w = np.array([1.0 if m else 0.0 for m in obs_mask], dtype=float)
    W = np.diag(w)
    D = np.diff(np.eye(n), n=2, axis=0)  # (n-2) x n second-difference matrix
    A = W + lam * D.T @ D
    try:
        z = np.linalg.solve(A, w * y)
    except np.linalg.LinAlgError:
        return values[:]
    result: list[float | None] = []
    for i, m in enumerate(obs_mask):
        result.append(float(z[i]) if m else None)
    return result
 # ---------------------------------------------------------------------------
 # Helpers: PhenoCam CSV parsing
 # ---------------------------------------------------------------------------
 def _parse_phenocam_csv(
    csv_path: Path, year: int, site: str
 ) -> tuple[list[dict], list[dict]]:
    """Return (gcc_series, image_list) filtered to ``year``.
    ``gcc_series`` entries: ``{"date": "YYYY-MM-DD", "gcc_90": float}``
    ``image_list`` entries: ``{"date": "YYYY-MM-DD", "url": str}``
    """
    gcc_series: list[dict] = []
    image_list: list[dict] = []
    year_str = str(year)
    if not csv_path.is_file():
        return gcc_series, image_list
    with csv_path.open() as f:
        lines = [l for l in f if not l.startswith("#")]
    reader = csv.DictReader(lines)
    for row in reader:
        if row.get("year") != year_str:
            continue
        date = row.get("date", "")
        gcc_raw = row.get("gcc_90")
        if gcc_raw and gcc_raw not in ("NA", ""):
            try:
                gcc_series.append({"date": date, "gcc_90": float(gcc_raw)})
            except ValueError:
                pass
        fn = row.get("midday_filename", "").strip()
        if fn and fn != "NA" and date:
            month = date[5:7]
            url = PHENOCAM_IMAGE_URL.format(
                site=site, year=year_str, month=month, filename=fn
            )
            image_list.append({"date": date, "url": url})
    return gcc_series, image_list
 # ---------------------------------------------------------------------------
 # Helpers: moving average
 # ---------------------------------------------------------------------------
 def _moving_average(
    series: list[dict], value_key: str, window: int
 ) -> list[dict]:
    """Compute centred moving average; returns new list with ``_smooth`` suffix key."""
    if not series:
        return []
    vals = [p[value_key] for p in series]
    half = window // 2
    smoothed = []
    for i, pt in enumerate(series):
        chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
        smoothed.append({
            "date": pt["date"],
            value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
        })
    return smoothed
 # ---------------------------------------------------------------------------
 # Helpers: validation metrics
 # ---------------------------------------------------------------------------
 MATCH_TOLERANCE_DAYS = 5
 def compute_metrics(
    ref: list[dict], ref_key: str,
    pred: list[dict], pred_key: str,
 ) -> dict | None:
    """Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
    Each pred point is matched to the nearest ref date within
    ``MATCH_TOLERANCE_DAYS``.  Returns a dict or ``None`` if fewer than
    2 matched pairs exist.
    """
    ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
    if not ref_lookup:
        return None
    ref_dates = sorted(ref_lookup)
    obs, sim = [], []
    for pt in pred:
        v = pt.get(pred_key)
        if v is None:
            continue
        nearest = min(ref_dates, key=lambda d: abs((
            np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
        gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
        if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
            obs.append(ref_lookup[nearest])
            sim.append(v)
    if len(obs) < 2:
        return None
    obs_arr = np.array(obs)
    sim_arr = np.array(sim)
    obs_mean = obs_arr.mean()
    rmse = float(np.sqrt(np.mean((sim_arr - obs_arr) ** 2)))
    nrmse = rmse / obs_mean if obs_mean else None
    ss_res = float(np.sum((obs_arr - sim_arr) ** 2))
    ss_tot = float(np.sum((obs_arr - obs_mean) ** 2))
    nse = (1.0 - ss_res / ss_tot) if ss_tot else None
    r, _ = pearsonr(obs_arr, sim_arr)
    def _r4(v: float | None) -> float | None:
        return round(v, 4) if v is not None else None
    return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
 S2_BAND_NAMES = ["B02", "B03", "B04"]
 S3_BAND_NAMES = ["Oa04", "Oa06", "Oa08", "Oa17"]
 def _read_multiband_center(
    path: Path, lat: float, lon: float, band_names: list[str]
 ) -> dict[str, float | None]:
    """Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
    try:
        with rasterio.open(path) as src:
            transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
            x, y = transformer.transform(lon, lat)
            row, col = rowcol(src.transform, x, y)
            h, w = src.height, src.width
            r0, r1 = max(0, row - 1), min(h, row + 2)
            c0, c1 = max(0, col - 1), min(w, col + 2)
            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
            nodata = src.nodata
            result = {}
            for i, name in enumerate(band_names, 1):
                if i > src.count:
                    result[name] = None
                    continue
                data = src.read(i, window=window).astype(float)
                if nodata is not None:
                    data = np.where(data == nodata, np.nan, data)
                data[data == 0] = np.nan
                val = np.nanmean(data)
                result[name] = None if np.isnan(val) else round(float(val), 6)
        return result
    except Exception:
        return {name: None for name in band_names}
 def _multiband_series(
    tif_paths: list[Path],
    date_fn,
    lat: float,
    lon: float,
    band_names: list[str],
    desc: str,
 ) -> list[dict]:
    """Extract center-pixel values for all bands; return ``[{date, band1, band2, …}]``."""
    result = []
    for p in tqdm(tif_paths, desc=desc, leave=False):
        date = date_fn(p)
        if date is None:
            continue
        vals = _read_multiband_center(p, lat, lon, band_names)
        if any(v is not None for v in vals.values()):
            result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", **vals})
    return sorted(result, key=lambda x: x["date"])
 # ---------------------------------------------------------------------------
 # Helpers: spatial heterogeneity + observation density
 # ---------------------------------------------------------------------------
 def _read_footprint_stats(
    path: Path, lat: float, lon: float, half_m: float = SPATIAL_CV_HALF_M
 ) -> tuple[float, float] | tuple[None, None]:
    """Return (mean, std) of valid GCC pixels within a ±half_m metre square window.
    Returns ``(None, None)`` on any error or when fewer than 4 valid pixels exist.
    """
    try:
        with rasterio.open(path) as src:
            transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
            x, y = transformer.transform(lon, lat)
            res = abs(src.transform.a)  # pixel size in CRS units (metres for UTM)
            half_px = max(1, int(round(half_m / res)))
            row, col = rowcol(src.transform, x, y)
            h, w = src.height, src.width
            r0, r1 = max(0, row - half_px), min(h, row + half_px + 1)
            c0, c1 = max(0, col - half_px), min(w, col + half_px + 1)
            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
            data = src.read(1, window=window).astype(float)
            nodata = src.nodata
        if nodata is not None:
            data = np.where(data == nodata, np.nan, data)
        data[data <= 0] = np.nan
        valid = data[~np.isnan(data)]
        if len(valid) < 4:
            return None, None
        return float(np.mean(valid)), float(np.std(valid))
    except Exception:
        return None, None
 def compute_covariates(
    s2_gcc_paths: list[Path],
    s2_series: list[dict],
    s3_series: list[dict],
    n_gcc_points: int | None,
    lat: float,
    lon: float,
 ) -> dict:
    """Compute spatial heterogeneity and temporal observation density covariates."""
    # Spatial GCC statistics over ~300 m footprint
    means, stds = [], []
    for p in s2_gcc_paths:
        m, s = _read_footprint_stats(p, lat, lon)
        if m is not None and m > 0:
            means.append(m)
            stds.append(s)
    spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
    spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
    # S2 temporal gap statistics
    s2_dates = [datetime.date.fromisoformat(p["date"]) for p in s2_series]
    if len(s2_dates) >= 2:
        gaps = [(s2_dates[i + 1] - s2_dates[i]).days for i in range(len(s2_dates) - 1)]
        s2_mean_gap = round(float(np.mean(gaps)), 1)
        s2_max_gap = int(max(gaps))
    else:
        s2_mean_gap = None
        s2_max_gap = None
    return {
        "spatial_gcc_cv":    spatial_gcc_cv,
        "spatial_gcc_std":   spatial_gcc_std,
        "s2_scene_count":    len(s2_series),
        "s2_mean_gap_days":  s2_mean_gap,
        "s2_max_gap_days":   s2_max_gap,
        "s3_composite_count": len(s3_series),
        "n_gcc_points":      n_gcc_points,
    }
 # ---------------------------------------------------------------------------
 # Per-site export
 # ---------------------------------------------------------------------------
 def _write_json(path: Path, data: Any) -> None:
    path.write_text(json.dumps(data, separators=(",", ":")))
 def _raster_series(
    tif_paths: list[Path],
    date_fn,
    lat: float,
    lon: float,
    desc: str,
 ) -> list[dict]:
    """Extract center-pixel GCC from each tif, return ``[{date, gcc}]`` sorted."""
    result = []
    for p in tqdm(tif_paths, desc=desc, leave=False):
        date = date_fn(p)
        if date is None:
            continue
        val = _read_center_pixel(p, lat, lon)
        if val is not None:
            result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", "gcc": val})
    return sorted(result, key=lambda x: x["date"])
 def _raster_index(tif_paths: list[Path], date_fn, rel_root: Path) -> list[dict]:
    """Build raster index: ``[{date, path}]`` sorted by date."""
    result = []
    for p in tif_paths:
        date = date_fn(p)
        if date is None:
            continue
        try:
            rel = str(p.relative_to(rel_root))
        except ValueError:
            rel = str(p)
        result.append({"date": date, "path": rel})
    return sorted(result, key=lambda x: x["date"])
 def export_site(
    site: str,
    year: int,
    lat: float,
    lon: float,
    out_dir: Path,
    n_gcc_points: int | None = None,
 ) -> bool:
    """Export timeseries.json and rasters.json for one site. Returns True on success."""
    sentinel_base = DATA_DIR / "sentinel_data" / str(year) / site / "prepared"
    fusion_base = DATA_DIR / "fusion" / str(year) / site
    s2_gcc_dir = sentinel_base / "s2"
    s3_gcc_dir = sentinel_base / "gcc_s3"
    bti_gcc_dir = fusion_base / "bti" / "gcc"
    itb_gcc_dir = fusion_base / "itb" / "fusion"
    # Raster slider sources
    s2_refl_dir = sentinel_base / "s2"
    s3_comp_dir = sentinel_base / "s3"
    bti_refl_dir = fusion_base / "bti" / "fusion"
    has_fusion = bti_gcc_dir.is_dir() and any(bti_gcc_dir.glob("GCC_*.tif"))
    if not has_fusion:
        return False
    out_dir.mkdir(parents=True, exist_ok=True)
    # --- GCC timeseries from rasters ---
    s2_gcc_paths = sorted(s2_gcc_dir.glob("*_GCC.tif"))
    s3_gcc_paths = sorted(s3_gcc_dir.glob("composite_*.tif"))
    bti_paths = sorted(bti_gcc_dir.glob("GCC_*.tif"))
    itb_paths = sorted(itb_gcc_dir.glob("GCC_*.tif"))
    s2_series = _raster_series(s2_gcc_paths, _date_from_s2_tif, lat, lon, f"{site} S2")
    s3_series = _raster_series(s3_gcc_paths, _date_from_gcc_tif, lat, lon, f"{site} S3")
    bti_series = _raster_series(bti_paths, _date_from_gcc_tif, lat, lon, f"{site} BtI")
    itb_series = _raster_series(itb_paths, _date_from_gcc_tif, lat, lon, f"{site} ItB")
    # Whittaker on S2
    s2_vals = [p["gcc"] for p in s2_series]
    s2_smooth_vals = _whittaker_smooth(s2_vals)
    s2_whittaker = [
        {"date": p["date"], "gcc": v}
        for p, v in zip(s2_series, s2_smooth_vals)
        if v is not None
    ]
    # S3 5-day moving average
    s3_smooth = _moving_average(s3_series, "gcc", S3_SMOOTH_WINDOW)
    # PhenoCam CSV
    csv_path = DATA_DIR / "phenocam" / str(year) / f"{site}_1day.csv"
    phenocam_series, image_list = _parse_phenocam_csv(csv_path, year, site)
    s3_smooth_series = [
        {"date": p["date"], "gcc": p["gcc_smooth"]}
        for p in s3_smooth
        if p.get("gcc_smooth") is not None
    ]
    # Band reflectance timeseries (multi-band center-pixel)
    bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
    bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
    # --- Per-metric JSON outputs ---
    _write_json(out_dir / "gcc_phenocam.json", phenocam_series)
    _write_json(out_dir / "gcc_s2.json", s2_series)
    _write_json(out_dir / "gcc_s2_whittaker.json", s2_whittaker)
    _write_json(out_dir / "gcc_s3.json", s3_series)
    _write_json(out_dir / "gcc_s3_smooth.json", s3_smooth_series)
    _write_json(out_dir / "gcc_fusion_bti.json", bti_series)
    _write_json(out_dir / "gcc_fusion_itb.json", itb_series)
    _write_json(out_dir / "phenocam_images.json", image_list)
    _write_json(out_dir / "bands_s2.json", bands_s2)
    _write_json(out_dir / "bands_s3.json", bands_s3)
    # --- Raster index for slider ---
    rel_root = DATA_DIR.parent  # paths relative to project root
    # Valid-pixel sets: only show S2/S3 rasters where the center pixel had
    # usable data (non-zero GCC). This excludes cloud-masked / snow-covered
    # scenes that would render as black or visually nonsensical.
    s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
    s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
    s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
               if r["date"] in s2_valid_dates]
    s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
               if r["date"] in s3_valid_dates]
    s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
              if r["date"] in s2_valid_dates]
    s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
              if r["date"] in s3_valid_dates]
    bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
    itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
    _write_json(out_dir / "rasters_s2_refl.json", s2_refl)
    _write_json(out_dir / "rasters_s3_composite.json", s3_comp)
    _write_json(out_dir / "rasters_s2_gcc.json", s2_gcc)
    _write_json(out_dir / "rasters_s3_gcc.json", s3_gcc)
    _write_json(out_dir / "rasters_fusion_bti_refl.json", bti_refl)
    _write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
    # --- Site covariates (heterogeneity + observation density) ---
    _write_json(out_dir / "covariates.json", compute_covariates(
        s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
    ))
    # --- Validation metrics vs PhenoCam gcc_90 ---
    _write_json(out_dir / "metrics.json", {
        "bti":          compute_metrics(phenocam_series, "gcc_90", bti_series,       "gcc"),
        "itb":          compute_metrics(phenocam_series, "gcc_90", itb_series,       "gcc"),
        "s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker,     "gcc"),
        "s3_smooth":    compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
        "s2":           compute_metrics(phenocam_series, "gcc_90", s2_series,        "gcc"),
        "s3":           compute_metrics(phenocam_series, "gcc_90", s3_series,        "gcc"),
    })
    # Remove legacy bundled outputs if present
    for legacy in ("timeseries.json", "rasters.json"):
        (out_dir / legacy).unlink(missing_ok=True)
    return True
 # ---------------------------------------------------------------------------
 # Manifest
 # ---------------------------------------------------------------------------
 VEG_TYPE_LABELS = {
    "AG": "Agriculture",
    "DB": "Deciduous broadleaf",
    "DN": "Deciduous needleleaf",
    "EB": "Evergreen broadleaf",
    "EN": "Evergreen needleleaf",
    "GR": "Grassland",
    "MX": "Mixed",
    "SH": "Shrubland",
    "TN": "Tundra",
    "UN": "Unknown",
    "WL": "Wetland",
    "RF": "Reference",
 }
 def build_manifest(years: list[int], filter_site: str | None = None) -> dict:
    manifest: dict[str, Any] = {"years": years, "sites": {}}
    for year in years:
        screening_path = DATA_DIR / "phenocam_screening" / f"{year}.json"
        if not screening_path.is_file():
            continue
        data = json.loads(screening_path.read_text())
        sites_meta: dict[str, Any] = {}
        for entry in data.get("sites", []):
            if entry.get("calculations", {}).get("status") != "PASS":
                continue
            cam = entry.get("response", {}).get("camera", {})
            roi = entry.get("response", {}).get("roi", {})
            calc = entry.get("calculations", {})
            site = cam.get("Sitename", "")
            if not site:
                continue
            if filter_site and site != filter_site:
                continue
            sm = cam.get("sitemetadata", {})
            veg_raw = sm.get("primary_veg_type") or roi.get("roitype") or "UN"
            fusion_dir = DATA_DIR / "fusion" / str(year) / site / "bti" / "gcc"
            has_fusion = fusion_dir.is_dir() and any(fusion_dir.glob("GCC_*.tif"))
            sites_meta[site] = {
                "lat": cam.get("Lat"),
                "lon": cam.get("Lon"),
                "veg_type": veg_raw,
                "veg_label": VEG_TYPE_LABELS.get(veg_raw, veg_raw),
                "description": sm.get("site_description", ""),
                "dominant_species": sm.get("dominant_species", ""),
                "group": sm.get("group", ""),
                "snr": calc.get("snr"),
                "n_gcc_points": calc.get("n_gcc_points"),
                "has_fusion": has_fusion,
            }
        manifest["sites"][str(year)] = sites_meta
    return manifest
 # ---------------------------------------------------------------------------
 # CLI
 # ---------------------------------------------------------------------------
 def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
    parser.add_argument("--site", type=str, default=None)
    args = parser.parse_args()
    year = args.evaluation_year
    filter_site = args.site
    out_base = DATA_DIR / "metrics"
    out_base.mkdir(parents=True, exist_ok=True)
    # Determine years with screening data
    screening_dir = DATA_DIR / "phenocam_screening"
    years = sorted(
        int(p.stem) for p in screening_dir.glob("*.json") if p.stem.isdigit()
    )
    if not years:
        years = [year]
    print(f"Building manifest for years: {years}")
    manifest = build_manifest(years, filter_site)
    # Export per-site data for the requested year
    year_sites = manifest["sites"].get(str(year), {})
    fusion_sites = {s: m for s, m in year_sites.items() if m["has_fusion"]}
    if filter_site:
        fusion_sites = {s: m for s, m in fusion_sites.items() if s == filter_site}
    print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
    for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
        out_dir = out_base / str(year) / site
        ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
        if ok:
            print(f"  ✓ {site}")
        else:
            print(f"  ✗ {site} — no fusion data found")
    manifest_path = out_base / "manifest.json"
    manifest_path.write_text(json.dumps(manifest, separators=(",", ":")))
    print(f"Manifest written → {manifest_path}")
 if __name__ == "__main__":
    main()
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,151 @@
 # AGENTS.md
 Worldwide PhenoCam EFAST feasibility screening. Human summary: [`README.md`](README.md).
 ---
 ## Layout
 | Path | Role |
 |------|------|
 | `1-phenocam.py` | Step 1: download PhenoCam metadata + `one_day_summary` CSV |
 | `2-phenocam-screening.py` | Step 2: PhenoCam + SNR gates on cached CSVs |
 | `3-sentinel-data.py` | Step 3: S2 (Earth Search COG) + S3 (CDSE OpenEO) download + EFAST prep |
 | `4-fusion.py` | Step 4: GCC computation + EFAST BtI/ItB fusion loop |
 | `5-metrics.py` | Step 5: timeseries, covariates, `metrics.json`, webapp manifest |
 | `data/` | Manifests, per-site caches, screening outputs (large; mostly generated) |
 | `webapp/` | Static QA viewer (`make serve` from workspace root) |
 Workspace orchestration: [`../AGENTS.md`](../AGENTS.md).
 ---
 ## Where to work
 | Task | Location |
 |------|----------|
 | PhenoCam bulk download | `1-phenocam.py` |
 | GCC/SNR screening on disk | `2-phenocam-screening.py` |
 | S2/S3 download + EFAST prep | `3-sentinel-data.py` |
 | GCC + fusion | `4-fusion.py` |
 | Metrics + webapp index | `5-metrics.py` |
 | Web QA | `../Makefile` target `serve` → `webapp/index.html` |
 ---
 ## Setup
 **Preferred (uv):** from `processing/`:
 ```bash
 uv sync                              # all deps from pyproject.toml (incl. efast)
 ```
 Run any script as `uv run python <script>.py …`. Python version is pinned in `.python-version` (3.11.10).
 - `CDSE_USER` — Copernicus Data Space username
 - `CDSE_PASSWORD` — Copernicus Data Space password
 Required for step 3 S3 download (CDSE OpenEO). Step 3 S2 download uses AWS Earth Search (no auth).
 ---
 ## CLI convention
 Every numbered step script shares two user-facing flags:
 | Flag | Default | Role |
 |------|---------|------|
 | `--evaluation-year` | `2025` | Calendar year; input/output paths under `data/` use `{year}` |
 | `--site` | all eligible | Single sitename to limit scope (testing or single-site runs) |
 All other tunable parameters (bands, resolution ratio, compositing window, etc.) are public constants at the top of each script. Paths are derived from the year — do not pass manifest paths on the CLI. Each script docstring lists **Inputs** and **Outputs** under `data/`.
 Resume behaviour: step 3 skips S3 sites when `raw/s3/S3*.tif` already exist; step 3 skips S2 scenes when `*_REFL.tif` already exists. Step 4 skips GCC/fusion files that already exist. Step 5 overwrites JSON sidecars for processed sites.
 Example:
 ```bash
 uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
 uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
 uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
 ```
 ---
 ## Workflow
 ### Stepped pipeline (resumable)
 ```bash
 uv run python 1-phenocam.py --evaluation-year 2025
 uv run python 2-phenocam-screening.py --evaluation-year 2025
 uv run python 3-sentinel-data.py --evaluation-year 2025
 uv run python 4-fusion.py --evaluation-year 2025
 uv run python 5-metrics.py --evaluation-year 2025
 # single site
 uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
 uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
 uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
 ```
 S3 uses CDSE OpenEO collection `SENTINEL3_SYN_L2_SYN` (bands Oa04/Oa06/Oa08/Oa17). S2 uses AWS Earth Search COG range reads (no auth). No S2↔S3 radiometric harmonisation.
 ---
 ## Screening gates
 ### Step 2 (`2-phenocam-screening.py`)
 | Gate | Rule |
 |------|------|
 | `phenocam` | ROI + `one_day_summary` CSV; ≥ `MIN_GCC_POINTS` (30) valid `gcc_90` in evaluation year |
 | `snr` | AIC-selected cubic spline SNR ≥ `SNR_THRESHOLD` (2.0) |
 | `cluster` | SNR-passed sites within 500 m deduplicated; keep highest `n_gcc_points` (SNR tie-break) |
 ---
 ## Data layout
 **Naming:** `data/` paths follow step script names — `1-phenocam.py` → `phenocam/`, `2-phenocam-screening.py` → `phenocam_screening/`, `3-sentinel-data.py` → `sentinel_data/`, `4-fusion.py` → `fusion/`, `5-metrics.py` → `metrics/`.
 ```
 data/
  phenocam/
    {year}.json                           # step-1 manifest
    {year}/
      {sitename}.json                     # camera + ROI API payload
      {sitename}_1day.csv                 # raw PhenoCam summary CSV
  phenocam_screening/
    {year}.json                           # step-2 results
    {year}.csv
  sentinel_data/{year}/{sitename}/
    raw/s3/                               # step 3: S3 SYN L2 per-date GeoTIFFs
    prepared/s2/                          # step 3: *_REFL.tif, *_DIST_CLOUD.tif, *_GCC.tif
    prepared/s3/                          # step 3: composite_*.tif
    prepared/gcc_s3/                      # step 4: single-band GCC composites
    data.json                             # step-3 run summary
  fusion/{year}/{sitename}/
    bti/fusion/REFL_*.tif                 # step 4: BtI fused reflectance
    bti/gcc/GCC_*.tif                     # step 4: BtI GCC
    itb/s2/GCC_*.tif                      # step 4: S2 GCC (ItB stack)
    itb/s3/GCC_*.tif                      # step 4: S3 GCC (ItB stack)
    itb/fusion/GCC_*.tif                  # step 4: ItB fused GCC
  metrics/
    manifest.json                         # step 5: years + site metadata for webapp
    {year}/{sitename}/
      gcc_*.json, metrics.json, covariates.json, rasters_*.json, bands_*.json
 ```
 ---
 ## Module map
 | File | Responsibility |
 |------|----------------|
 | `1-phenocam.py` | Paginate PhenoCam API; cache JSON + CSV; write manifest |
 | `2-phenocam-screening.py` | Parse cached CSVs; PhenoCam + SNR gates |
 | `3-sentinel-data.py` | S2 COG range reads (Earth Search); S3 OpenEO download; EFAST REFL/DIST_CLOUD/composites |
 | `4-fusion.py` | GCC from S2 REFL + S3 composites; daily `efast.fusion` BtI + ItB |
 | `5-metrics.py` | PhenoCam-matched GCC series, baselines, fusion metrics, raster index, covariates |
--- a/619
+++ b/619
@ -1,619 +0,0 @@
 GNU AFFERO GENERAL PUBLIC LICENSE
 =================================
 Version 3, 19 November 2007
 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.
                            Preamble
 The GNU Affero General Public License is a free, copyleft license for
 software and other kinds of works, specifically designed to ensure
 cooperation with the community in the case of network server software.
 The licenses for most software and other practical works are designed
 to take away your freedom to share and change the works.  By contrast,
 our General Public Licenses are intended to guarantee your freedom to
 share and change all versions of a program--to make sure it remains free
 software for all its users.
 When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
 have the freedom to distribute copies of free software (and charge for
 them if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs, and that you know you can do these things.
 Developers that use our General Public Licenses protect your rights
 with two steps: (1) assert copyright on the software, and (2) offer
 you this License which gives you legal permission to copy, distribute
 and/or modify the software.
 A secondary benefit of defending all users' freedom is that
 improvements made in alternate versions of the program, if they
 receive widespread use, become available for other developers to
 incorporate.  Many developers of free software are heartened and
 encouraged by the resulting cooperation.  However, in the case of
 software used on network servers, this result may fail to come about.
 The GNU General Public License permits making a modified version and
 letting the public access it on a server without ever releasing its
 source code to the public.
 The GNU Affero General Public License is designed specifically to
 ensure that, in such cases, the modified source code becomes available
 to the community.  It requires the operator of a network server to
 provide the source code of the modified version running there to the
 users of that server.  Therefore, public use of a modified version, on
 a publicly accessible server, gives the public access to the source
 code of the modified version.
 An older license, called the Affero General Public License and
 published by Affero, was designed to accomplish similar goals.  This is
 a different license, not a version of the Affero GPL, but Affero has
 released a new version of the Affero GPL which permits relicensing under
 this license.
 The precise terms and conditions for copying, distribution and
 modification follow.
                       TERMS AND CONDITIONS
  0. Definitions.
 "This License" refers to version 3 of the GNU Affero General Public License.
 "Copyright" also means copyright-like laws that apply to other kinds of
 works, such as semiconductor masks.
 "The Program" refers to any copyrightable work licensed under this
 License.  Each licensee is addressed as "you".  "Licensees" and
 "recipients" may be individuals or organizations.
 To "modify" a work means to copy from or adapt all or part of the work
 in a fashion requiring copyright permission, other than the making of an
 exact copy.  The resulting work is called a "modified version" of the
 earlier work or a work "based on" the earlier work.
 A "covered work" means either the unmodified Program or a work based
 on the Program.
 To "propagate" a work means to do anything with it that, without
 permission, would make you directly or secondarily liable for
 infringement under applicable copyright law, except executing it on a
 computer or modifying a private copy.  Propagation includes copying,
 distribution (with or without modification), making available to the
 public, and in some countries other activities as well.
 To "convey" a work means any kind of propagation that enables other
 parties to make or receive copies.  Mere interaction with a user through
 a computer network, with no transfer of a copy, is not conveying.
 An interactive user interface displays "Appropriate Legal Notices"
 to the extent that it includes a convenient and prominently visible
 feature that (1) displays an appropriate copyright notice, and (2)
 tells the user that there is no warranty for the work (except to the
 extent that warranties are provided), that licensees may convey the
 work under this License, and how to view a copy of this License.  If
 the interface presents a list of user commands or options, such as a
 menu, a prominent item in the list meets this criterion.
  1. Source Code.
 The "source code" for a work means the preferred form of the work
 for making modifications to it.  "Object code" means any non-source
 form of a work.
 A "Standard Interface" means an interface that either is an official
 standard defined by a recognized standards body, or, in the case of
 interfaces specified for a particular programming language, one that
 is widely used among developers working in that language.
 The "System Libraries" of an executable work include anything, other
 than the work as a whole, that (a) is included in the normal form of
 packaging a Major Component, but which is not part of that Major
 Component, and (b) serves only to enable use of the work with that
 Major Component, or to implement a Standard Interface for which an
 implementation is available to the public in source code form.  A
 "Major Component", in this context, means a major essential component
 (kernel, window system, and so on) of the specific operating system
 (if any) on which the executable work runs, or a compiler used to
 produce the work, or an object code interpreter used to run it.
 The "Corresponding Source" for a work in object code form means all
 the source code needed to generate, install, and (for an executable
 work) run the object code and to modify the work, including scripts to
 control those activities.  However, it does not include the work's
 System Libraries, or general-purpose tools or generally available free
 programs which are used unmodified in performing those activities but
 which are not part of the work.  For example, Corresponding Source
 includes interface definition files associated with source files for
 the work, and the source code for shared libraries and dynamically
 linked subprograms that the work is specifically designed to require,
 such as by intimate data communication or control flow between those
 subprograms and other parts of the work.
 The Corresponding Source need not include anything that users
 can regenerate automatically from other parts of the Corresponding
 Source.
 The Corresponding Source for a work in source code form is that
 same work.
  2. Basic Permissions.
 All rights granted under this License are granted for the term of
 copyright on the Program, and are irrevocable provided the stated
 conditions are met.  This License explicitly affirms your unlimited
 permission to run the unmodified Program.  The output from running a
 covered work is covered by this License only if the output, given its
 content, constitutes a covered work.  This License acknowledges your
 rights of fair use or other equivalent, as provided by copyright law.
 You may make, run and propagate covered works that you do not
 convey, without conditions so long as your license otherwise remains
 in force.  You may convey covered works to others for the sole purpose
 of having them make modifications exclusively for you, or provide you
 with facilities for running those works, provided that you comply with
 the terms of this License in conveying all material for which you do
 not control copyright.  Those thus making or running the covered works
 for you must do so exclusively on your behalf, under your direction
 and control, on terms that prohibit them from making any copies of
 your copyrighted material outside their relationship with you.
 Conveying under any other circumstances is permitted solely under
 the conditions stated below.  Sublicensing is not allowed; section 10
 makes it unnecessary.
  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
 No covered work shall be deemed part of an effective technological
 measure under any applicable law fulfilling obligations under article
 11 of the WIPO copyright treaty adopted on 20 December 1996, or
 similar laws prohibiting or restricting circumvention of such
 measures.
 When you convey a covered work, you waive any legal power to forbid
 circumvention of technological measures to the extent such circumvention
 is effected by exercising rights under this License with respect to
 the covered work, and you disclaim any intention to limit operation or
 modification of the work as a means of enforcing, against the work's
 users, your or third parties' legal rights to forbid circumvention of
 technological measures.
  4. Conveying Verbatim Copies.
 You may convey verbatim copies of the Program's source code as you
 receive it, in any medium, provided that you conspicuously and
 appropriately publish on each copy an appropriate copyright notice;
 keep intact all notices stating that this License and any
 non-permissive terms added in accord with section 7 apply to the code;
 keep intact all notices of the absence of any warranty; and give all
 recipients a copy of this License along with the Program.
 You may charge any price or no price for each copy that you convey,
 and you may offer support or warranty protection for a fee.
  5. Conveying Modified Source Versions.
 You may convey a work based on the Program, or the modifications to
 produce it from the Program, in the form of source code under the
 terms of section 4, provided that you also meet all of these conditions:
 a) The work must carry prominent notices stating that you modified
 it, and giving a relevant date.
 b) The work must carry prominent notices stating that it is
 released under this License and any conditions added under section
 7.  This requirement modifies the requirement in section 4 to
 "keep intact all notices".
 c) You must license the entire work, as a whole, under this
 License to anyone who comes into possession of a copy.  This
 License will therefore apply, along with any applicable section 7
 additional terms, to the whole of the work, and all its parts,
 regardless of how they are packaged.  This License gives no
 permission to license the work in any other way, but it does not
 invalidate such permission if you have separately received it.
 d) If the work has interactive user interfaces, each must display
 Appropriate Legal Notices; however, if the Program has interactive
 interfaces that do not display Appropriate Legal Notices, your
 work need not make them do so.
 A compilation of a covered work with other separate and independent
 works, which are not by their nature extensions of the covered work,
 and which are not combined with it such as to form a larger program,
 in or on a volume of a storage or distribution medium, is called an
 "aggregate" if the compilation and its resulting copyright are not
 used to limit the access or legal rights of the compilation's users
 beyond what the individual works permit.  Inclusion of a covered work
 in an aggregate does not cause this License to apply to the other
 parts of the aggregate.
  6. Conveying Non-Source Forms.
 You may convey a covered work in object code form under the terms
 of sections 4 and 5, provided that you also convey the
 machine-readable Corresponding Source under the terms of this License,
 in one of these ways:
 a) Convey the object code in, or embodied in, a physical product
 (including a physical distribution medium), accompanied by the
 Corresponding Source fixed on a durable physical medium
 customarily used for software interchange.
 b) Convey the object code in, or embodied in, a physical product
 (including a physical distribution medium), accompanied by a
 written offer, valid for at least three years and valid for as
 long as you offer spare parts or customer support for that product
 model, to give anyone who possesses the object code either (1) a
 copy of the Corresponding Source for all the software in the
 product that is covered by this License, on a durable physical
 medium customarily used for software interchange, for a price no
 more than your reasonable cost of physically performing this
 conveying of source, or (2) access to copy the
 Corresponding Source from a network server at no charge.
 c) Convey individual copies of the object code with a copy of the
 written offer to provide the Corresponding Source.  This
 alternative is allowed only occasionally and noncommercially, and
 only if you received the object code with such an offer, in accord
 with subsection 6b.
 d) Convey the object code by offering access from a designated
 place (gratis or for a charge), and offer equivalent access to the
 Corresponding Source in the same way through the same place at no
 further charge.  You need not require recipients to copy the
 Corresponding Source along with the object code.  If the place to
 copy the object code is a network server, the Corresponding Source
 may be on a different server (operated by you or a third party)
 that supports equivalent copying facilities, provided you maintain
 clear directions next to the object code saying where to find the
 Corresponding Source.  Regardless of what server hosts the
 Corresponding Source, you remain obligated to ensure that it is
 available for as long as needed to satisfy these requirements.
 e) Convey the object code using peer-to-peer transmission, provided
 you inform other peers where the object code and Corresponding
 Source of the work are being offered to the general public at no
 charge under subsection 6d.
 A separable portion of the object code, whose source code is excluded
 from the Corresponding Source as a System Library, need not be
 included in conveying the object code work.
 A "User Product" is either (1) a "consumer product", which means any
 tangible personal property which is normally used for personal, family,
 or household purposes, or (2) anything designed or sold for incorporation
 into a dwelling.  In determining whether a product is a consumer product,
 doubtful cases shall be resolved in favor of coverage.  For a particular
 product received by a particular user, "normally used" refers to a
 typical or common use of that class of product, regardless of the status
 of the particular user or of the way in which the particular user
 actually uses, or expects or is expected to use, the product.  A product
 is a consumer product regardless of whether the product has substantial
 commercial, industrial or non-consumer uses, unless such uses represent
 the only significant mode of use of the product.
 "Installation Information" for a User Product means any methods,
 procedures, authorization keys, or other information required to install
 and execute modified versions of a covered work in that User Product from
 a modified version of its Corresponding Source.  The information must
 suffice to ensure that the continued functioning of the modified object
 code is in no case prevented or interfered with solely because
 modification has been made.
 If you convey an object code work under this section in, or with, or
 specifically for use in, a User Product, and the conveying occurs as
 part of a transaction in which the right of possession and use of the
 User Product is transferred to the recipient in perpetuity or for a
 fixed term (regardless of how the transaction is characterized), the
 Corresponding Source conveyed under this section must be accompanied
 by the Installation Information.  But this requirement does not apply
 if neither you nor any third party retains the ability to install
 modified object code on the User Product (for example, the work has
 been installed in ROM).
 The requirement to provide Installation Information does not include a
 requirement to continue to provide support service, warranty, or updates
 for a work that has been modified or installed by the recipient, or for
 the User Product in which it has been modified or installed.  Access to a
 network may be denied when the modification itself materially and
 adversely affects the operation of the network or violates the rules and
 protocols for communication across the network.
 Corresponding Source conveyed, and Installation Information provided,
 in accord with this section must be in a format that is publicly
 documented (and with an implementation available to the public in
 source code form), and must require no special password or key for
 unpacking, reading or copying.
  7. Additional Terms.
 "Additional permissions" are terms that supplement the terms of this
 License by making exceptions from one or more of its conditions.
 Additional permissions that are applicable to the entire Program shall
 be treated as though they were included in this License, to the extent
 that they are valid under applicable law.  If additional permissions
 apply only to part of the Program, that part may be used separately
 under those permissions, but the entire Program remains governed by
 this License without regard to the additional permissions.
 When you convey a copy of a covered work, you may at your option
 remove any additional permissions from that copy, or from any part of
 it.  (Additional permissions may be written to require their own
 removal in certain cases when you modify the work.)  You may place
 additional permissions on material, added by you to a covered work,
 for which you have or can give appropriate copyright permission.
 Notwithstanding any other provision of this License, for material you
 add to a covered work, you may (if authorized by the copyright holders of
 that material) supplement the terms of this License with terms:
 a) Disclaiming warranty or limiting liability differently from the
 terms of sections 15 and 16 of this License; or
 b) Requiring preservation of specified reasonable legal notices or
 author attributions in that material or in the Appropriate Legal
 Notices displayed by works containing it; or
 c) Prohibiting misrepresentation of the origin of that material, or
 requiring that modified versions of such material be marked in
 reasonable ways as different from the original version; or
 d) Limiting the use for publicity purposes of names of licensors or
 authors of the material; or
 e) Declining to grant rights under trademark law for use of some
 trade names, trademarks, or service marks; or
 f) Requiring indemnification of licensors and authors of that
 material by anyone who conveys the material (or modified versions of
 it) with contractual assumptions of liability to the recipient, for
 any liability that these contractual assumptions directly impose on
 those licensors and authors.
 All other non-permissive additional terms are considered "further
 restrictions" within the meaning of section 10.  If the Program as you
 received it, or any part of it, contains a notice stating that it is
 governed by this License along with a term that is a further
 restriction, you may remove that term.  If a license document contains
 a further restriction but permits relicensing or conveying under this
 License, you may add to a covered work material governed by the terms
 of that license document, provided that the further restriction does
 not survive such relicensing or conveying.
 If you add terms to a covered work in accord with this section, you
 must place, in the relevant source files, a statement of the
 additional terms that apply to those files, or a notice indicating
 where to find the applicable terms.
 Additional terms, permissive or non-permissive, may be stated in the
 form of a separately written license, or stated as exceptions;
 the above requirements apply either way.
  8. Termination.
 You may not propagate or modify a covered work except as expressly
 provided under this License.  Any attempt otherwise to propagate or
 modify it is void, and will automatically terminate your rights under
 this License (including any patent licenses granted under the third
 paragraph of section 11).
 However, if you cease all violation of this License, then your
 license from a particular copyright holder is reinstated (a)
 provisionally, unless and until the copyright holder explicitly and
 finally terminates your license, and (b) permanently, if the copyright
 holder fails to notify you of the violation by some reasonable means
 prior to 60 days after the cessation.
 Moreover, your license from a particular copyright holder is
 reinstated permanently if the copyright holder notifies you of the
 violation by some reasonable means, this is the first time you have
 received notice of violation of this License (for any work) from that
 copyright holder, and you cure the violation prior to 30 days after
 your receipt of the notice.
 Termination of your rights under this section does not terminate the
 licenses of parties who have received copies or rights from you under
 this License.  If your rights have been terminated and not permanently
 reinstated, you do not qualify to receive new licenses for the same
 material under section 10.
  9. Acceptance Not Required for Having Copies.
 You are not required to accept this License in order to receive or
 run a copy of the Program.  Ancillary propagation of a covered work
 occurring solely as a consequence of using peer-to-peer transmission
 to receive a copy likewise does not require acceptance.  However,
 nothing other than this License grants you permission to propagate or
 modify any covered work.  These actions infringe copyright if you do
 not accept this License.  Therefore, by modifying or propagating a
 covered work, you indicate your acceptance of this License to do so.
  10. Automatic Licensing of Downstream Recipients.
 Each time you convey a covered work, the recipient automatically
 receives a license from the original licensors, to run, modify and
 propagate that work, subject to this License.  You are not responsible
 for enforcing compliance by third parties with this License.
 An "entity transaction" is a transaction transferring control of an
 organization, or substantially all assets of one, or subdividing an
 organization, or merging organizations.  If propagation of a covered
 work results from an entity transaction, each party to that
 transaction who receives a copy of the work also receives whatever
 licenses to the work the party's predecessor in interest had or could
 give under the previous paragraph, plus a right to possession of the
 Corresponding Source of the work from the predecessor in interest, if
 the predecessor has it or can get it with reasonable efforts.
 You may not impose any further restrictions on the exercise of the
 rights granted or affirmed under this License.  For example, you may
 not impose a license fee, royalty, or other charge for exercise of
 rights granted under this License, and you may not initiate litigation
 (including a cross-claim or counterclaim in a lawsuit) alleging that
 any patent claim is infringed by making, using, selling, offering for
 sale, or importing the Program or any portion of it.
  11. Patents.
 A "contributor" is a copyright holder who authorizes use under this
 License of the Program or a work on which the Program is based.  The
 work thus licensed is called the contributor's "contributor version".
 A contributor's "essential patent claims" are all patent claims
 owned or controlled by the contributor, whether already acquired or
 hereafter acquired, that would be infringed by some manner, permitted
 by this License, of making, using, or selling its contributor version,
 but do not include claims that would be infringed only as a
 consequence of further modification of the contributor version.  For
 purposes of this definition, "control" includes the right to grant
 patent sublicenses in a manner consistent with the requirements of
 this License.
 Each contributor grants you a non-exclusive, worldwide, royalty-free
 patent license under the contributor's essential patent claims, to
 make, use, sell, offer for sale, import and otherwise run, modify and
 propagate the contents of its contributor version.
 In the following three paragraphs, a "patent license" is any express
 agreement or commitment, however denominated, not to enforce a patent
 (such as an express permission to practice a patent or covenant not to
 sue for patent infringement).  To "grant" such a patent license to a
 party means to make such an agreement or commitment not to enforce a
 patent against the party.
 If you convey a covered work, knowingly relying on a patent license,
 and the Corresponding Source of the work is not available for anyone
 to copy, free of charge and under the terms of this License, through a
 publicly available network server or other readily accessible means,
 then you must either (1) cause the Corresponding Source to be so
 available, or (2) arrange to deprive yourself of the benefit of the
 patent license for this particular work, or (3) arrange, in a manner
 consistent with the requirements of this License, to extend the patent
 license to downstream recipients.  "Knowingly relying" means you have
 actual knowledge that, but for the patent license, your conveying the
 covered work in a country, or your recipient's use of the covered work
 in a country, would infringe one or more identifiable patents in that
 country that you have reason to believe are valid.
 If, pursuant to or in connection with a single transaction or
 arrangement, you convey, or propagate by procuring conveyance of, a
 covered work, and grant a patent license to some of the parties
 receiving the covered work authorizing them to use, propagate, modify
 or convey a specific copy of the covered work, then the patent license
 you grant is automatically extended to all recipients of the covered
 work and works based on it.
 A patent license is "discriminatory" if it does not include within
 the scope of its coverage, prohibits the exercise of, or is
 conditioned on the non-exercise of one or more of the rights that are
 specifically granted under this License.  You may not convey a covered
 work if you are a party to an arrangement with a third party that is
 in the business of distributing software, under which you make payment
 to the third party based on the extent of your activity of conveying
 the work, and under which the third party grants, to any of the
 parties who would receive the covered work from you, a discriminatory
 patent license (a) in connection with copies of the covered work
 conveyed by you (or copies made from those copies), or (b) primarily
 for and in connection with specific products or compilations that
 contain the covered work, unless you entered into that arrangement,
 or that patent license was granted, prior to 28 March 2007.
 Nothing in this License shall be construed as excluding or limiting
 any implied license or other defenses to infringement that may
 otherwise be available to you under applicable patent law.
  12. No Surrender of Others' Freedom.
 If conditions are imposed on you (whether by court order, agreement or
 otherwise) that contradict the conditions of this License, they do not
 excuse you from the conditions of this License.  If you cannot convey a
 covered work so as to satisfy simultaneously your obligations under this
 License and any other pertinent obligations, then as a consequence you may
 not convey it at all.  For example, if you agree to terms that obligate you
 to collect a royalty for further conveying from those to whom you convey
 the Program, the only way you could satisfy both those terms and this
 License would be to refrain entirely from conveying the Program.
  13. Remote Network Interaction; Use with the GNU General Public License.
 Notwithstanding any other provision of this License, if you modify the
 Program, your modified version must prominently offer all users
 interacting with it remotely through a computer network (if your version
 supports such interaction) an opportunity to receive the Corresponding
 Source of your version by providing access to the Corresponding Source
 from a network server at no charge, through some standard or customary
 means of facilitating copying of software.  This Corresponding Source
 shall include the Corresponding Source for any work covered by version 3
 of the GNU General Public License that is incorporated pursuant to the
 following paragraph.
 Notwithstanding any other provision of this License, you have
 permission to link or combine any covered work with a work licensed
 under version 3 of the GNU General Public License into a single
 combined work, and to convey the resulting work.  The terms of this
 License will continue to apply to the part which is the covered work,
 but the work with which it is combined will remain governed by version
 3 of the GNU General Public License.
  14. Revised Versions of this License.
 The Free Software Foundation may publish revised and/or new versions of
 the GNU Affero General Public License from time to time.  Such new versions
 will be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.
 Each version is given a distinguishing version number.  If the
 Program specifies that a certain numbered version of the GNU Affero General
 Public License "or any later version" applies to it, you have the
 option of following the terms and conditions either of that numbered
 version or of any later version published by the Free Software
 Foundation.  If the Program does not specify a version number of the
 GNU Affero General Public License, you may choose any version ever published
 by the Free Software Foundation.
 If the Program specifies that a proxy can decide which future
 versions of the GNU Affero General Public License can be used, that proxy's
 public statement of acceptance of a version permanently authorizes you
 to choose that version for the Program.
 Later license versions may give you additional or different
 permissions.  However, no additional obligations are imposed on any
 author or copyright holder as a result of your choosing to follow a
 later version.
  15. Disclaimer of Warranty.
 THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
 APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
 HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
 OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
 IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
 ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  16. Limitation of Liability.
 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGES.
  17. Interpretation of Sections 15 and 16.
 If the disclaimer of warranty and limitation of liability provided
 above cannot be given local legal effect according to their terms,
 reviewing courts shall apply local law that most closely approximates
 an absolute waiver of all civil liability in connection with the
 Program, unless a warranty or assumption of liability accompanies a
 copy of the Program in return for a fee.
--- a/README.md
+++ b/README.md
@ -1,146 +1,57 @@
-# Satellite Data Fusion Pipeline
+# Worldwide PhenoCam EFAST feasibility screening
-Python pipeline for downloading Sentinel-2 and Sentinel-3 imagery and PhenoCam ground truth, applying NDVI-based cloud pre-selection, fusing sensors with the [EFAST](https://github.com/DHI-GRAS/efast) algorithm, and evaluating fused **Green Chromatic Coordinate (GCC)** time series against PhenoCam `gcc_90`.
+Screen the global [PhenoCam Network](https://phenocam.nau.edu/) for sites where EFAST Sentinel-2 / Sentinel-3 fusion is likely to work: enough PhenoCam `gcc_90`, seasonal signal, and S2/S3 coverage for a calendar year.
-## Features
+Agent-oriented detail: [`AGENTS.md`](AGENTS.md).
- **Acquisition** — S2 L2A (AWS Element84 STAC), S3 OLCI L1B (Copernicus OpenEO), PhenoCam midday images and GCC CSV
+---
 - **Pre-selection** — Aggressive and non-aggressive NDVI-based cloud screening (plus dark-scene rejection)
 - **Preparation** — Harmonised reflectance/GCC rasters, distance-to-cloud weights, S3 compositing and optional temporal smoothing
 - **Fusion** — EFAST under eight scenarios per site (BtI and ItB × two strategies × σ ∈ {20, 30} days)
 - **Post-processing** — Crop to valid-data window; NDVI and GCC timeseries at the site
 - **Metrics** — Temporal comparison vs PhenoCam (`metrics.json`); optional Tier-2 withheld-S2 gap validation
 - **Web viewer** — Static HTML dashboard over pipeline outputs (`webapp/`)
-## Installation
+## Quick start
 From `processing/`:
 ```bash
-pip install -r requirements.txt
+uv sync
-pip install git+https://github.com/DHI-GRAS/efast.git   # not on PyPI
+uv run python 1-phenocam.py --evaluation-year 2025
 ```
-Create `.env` with Copernicus Data Space credentials:
+### Stepped pipeline (resumable)
- `CDSE_USER`
+All steps use `--evaluation-year` (default 2025) and optional `--site`. See each script docstring for inputs/outputs under `data/`.
 - `CDSE_PASSWORD`
 Python version is pinned in `.python-version` (use `.venv/` locally).
 ## Usage
 ```python
 from run import run_pipeline
 run_pipeline(season=2024, site_position=(47.116171, 11.320308), site_name="innsbruck")
 ```
 `site_position` is always **`(lat, lon)`**. Study sites are listed at the bottom of `run.py`: `innsbruck`, `forthgr`, `pitsalu`, `vindeln2`, `sunflowerjerez1`, `institutekarnobat`.
 By default, most stages in `run.py` are **commented out** (metrics-only). Uncomment acquisition → pre-selection → preparation → fusion → post-processing for a full run.
 ### Pipeline stages
 1. Download S2, S3, and PhenoCam
 2. Pre-selection (per-sensor NDVI screening → `raw/preselection/`)
 3. Prepare S2/S3 for each strategy (`prepared_{aggressive|nonaggressive}/` and `_itb/` variants)
 4. EFAST fusion (BtI reflectance and ItB GCC products)
 5. Post-process crops and timeseries (`processed_*_sigma{20,30}/`)
 6. Compute metrics vs PhenoCam → `metrics.json`
 ### Gap validation (optional)
 With prepared data and EFAST installed:
 ```bash
-# Phenology sidecars (TIMESAT 50 % amplitude)
+uv run python 1-phenocam.py --evaluation-year 2025
-python -m phenology_timesat --all
+uv run python 2-phenocam-screening.py --evaluation-year 2025
 uv run python 3-sentinel-data.py --evaluation-year 2025
 uv run python 4-fusion.py --evaluation-year 2025
 uv run python 5-metrics.py --evaluation-year 2025
-# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up)
+# single site
-python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \
+uv run python 3-sentinel-data.py --evaluation-year 2025 --site innsbruck
-  --strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30
+uv run python 4-fusion.py --evaluation-year 2025 --site innsbruck
-
+uv run python 5-metrics.py --evaluation-year 2025 --site innsbruck
 # All six sites, best BtI scenario per site
 python -m gap_validation.batch_spatial
 # Full-season NSE_PC on gap-degraded stack (slow)
 python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688
 python -m gap_validation.batch_temporal
 # TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier)
 python -m gap_validation.phenology_offsets
 ```
-Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`.
+Step 3 S3 uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`); S2 uses AWS Earth Search COG range reads (no auth).
-## Data layout
+---
-```
+## Outputs (under `data/`)
 data/{site_name}/{season}/
  raw/
    s2/                    # {YYYYMMDD}_{n}.geotiff — B02, B03, B04, B8A
    s3/                    # {YYYYMMDD}_{n}.geotiff — Oa04, Oa06, Oa08, Oa17
    phenocam/              # JPEGs, GCC JSON, phenology sidecar
    preselection/          # {s2,s3}_preselection.{json,csv}
  prepared_{strategy}/
    s2/                    # REFL + DIST_CLOUD GeoTIFFs
    s3/                    # composite_{YYYYMMDD}.tif
    fusion/                # REFL_{YYYYMMDD}.tif (σ≈20)
    fusion_sigma30/        # REFL (σ=30)
  prepared_{strategy}_itb/
    s2/  s3/  fusion/      # GCC products (Index-then-Blend)
  processed_{strategy}_sigma{20,30}/
    s2/  s3/  fusion/      # cropped {YYYYMMDD}_0.geotiff
    gcc/  ndvi/            # timeseries.json per source
  processed_{strategy}_itb_sigma{20,30}/
    s2/  s3/  fusion/  gcc/
  validation/            # gap experiment (when run)
  metrics.json
 ```
-Site metadata: `data/sites.geojson` (six thesis sites). `data/coweeta/` is local/legacy and not listed there.
+| Artifact | Step | Role |
 |----------|------|------|
 | `phenocam/{year}.json` | 1 | Site list + `sites_dir` pointer |
 | `phenocam/{year}/{site}.json`, `{site}_1day.csv` | 1 | Raw API + GCC CSV |
 | `phenocam_screening/{year}.json` / `.csv` | 2 | PhenoCam + SNR gate results |
 | `sentinel_data/{year}/{site}/prepared/s2/` | 3 | S2 REFL + DIST_CLOUD GeoTIFFs |
 | `sentinel_data/{year}/{site}/prepared/s3/` | 3 | S3 composite GeoTIFFs |
 | `fusion/{year}/{site}/` | 4 | BtI/ItB fused rasters |
 | `metrics/{year}/{site}/`, `metrics/manifest.json` | 5 | Timeseries JSON, covariates, webapp manifest |
-### File formats
+The 2025 manifest currently lists **739** cameras with archive overlap; most per-site CSV/JSON files are cached under `data/phenocam/2025/`.
-**Sentinel-2** — Multi-band GeoTIFF; bands `[blue, green, red, nir]`; `VIEWING_ZENITH_ANGLE` metadata; filename `{YYYYMMDD}_{increment}.geotiff`.
+---
 **Sentinel-3** — Multi-band GeoTIFF; same band order; filename `{YYYYMMDD}_{increment}.geotiff`.
 **Prepared S2** — `S2A_MSIL2A_{YYYYMMDD}_REFL.tif` plus `*DIST_CLOUD.tif` (cloud-distance weights for EFAST).
 ## Web viewer
-Static HTML/JS in `webapp/` — no build step. Shared GeoTIFF helpers: `webapp/common.js`. CDN: Leaflet, geotiff.js, proj4. Symlink: `webapp/data` → `../data`.
+From the workspace root, `make serve` serves `processing/` at [http://localhost:8000/webapp/index.html](http://localhost:8000/webapp/index.html). Requires step 5 (`data/metrics/manifest.json`).
 Serve from the **repository root** (not `webapp/`):
 ```bash
 python3 -m http.server 8000
 # http://localhost:8000/webapp/index.html
 ```
 Or from the workspace root: `make serve`.
 | Page | Purpose | Primary data paths |
 |------|---------|-------------------|
 | `index.html` | Post-processed maps, NDVI/GCC timeseries, PhenoCam | `processed_{strategy}_sigma{n}/`, `raw/phenocam/` |
 | `preselection.html` | Cloud-screening diagnostics | `raw/preselection/{s2,s3}_preselection.json` |
 | `prepared.html` | Prepared REFL/GCC before crop | `prepared_{strategy}/`, `prepared_{strategy}_itb/` |
 | `fusion.html` | EFAST daily fusion rasters | `prepared_*/fusion/`, `fusion_sigma30/` |
 | `postprocessed.html` | Cropped processed stacks | `processed_*_sigma*/` |
 | `metrics.html` | Tabular `metrics.json` (thesis export source) | `{site}/{season}/metrics.json` under `webapp/data/` |
 | `gap_validation.html` | Withheld-S2 gap experiment | `{site}/{season}/validation/gap_validation_summary.json` |
 | `phenology.html` | TIMESAT on PhenoCam GCC | `raw/phenocam/phenocam_phenology.json` |
 Site/season dropdowns use `data/sites.geojson`. Map pages: **BtI | ItB**; scenarios `aggressive` / `nonaggressive`, σ 20 / 30. Keep the shared nav consistent across all eight pages. QA only — thesis tables are exported from the workspace root (`make export` or `../scripts/export_thesis_tables.py`).
 ## Development
 ```bash
 ruff check --fix . && ruff format .
 ```
 Pre-commit hooks: `.pre-commit-config.yaml`.
 ## License
 GNU Affero General Public License v3.0 (AGPL-3.0). See [LICENSE](LICENSE).
--- a/acquisition_phenocam.py
+++ b/acquisition_phenocam.py
@ -1,282 +0,0 @@
 """PhenoCam acquisition from PhenoCam Network API."""
 import csv
 import json
 import requests
 from pathlib import Path
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 PHENOCAM_API = "https://phenocam.nau.edu/api"
 def _phenocam_summary_gcc_value(row, use_mean_fallback: bool):
    """Extract daily GCC from a one-day summary row.
    Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth).
    Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present.
    With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``.
    """
    if not use_mean_fallback:
        oflag = row.get("outlierflag_gcc_90")
        if oflag is not None and str(oflag).strip() in ("1", "1.0"):
            return None
    raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
    if raw is None:
        return None
    text = str(raw).strip()
    if not text or text.upper() == "NA":
        return None
    try:
        val = float(text)
    except ValueError:
        return None
    if val <= -9998.0:
        return None
    return val
 def _find_start_offset(site_name, start_dt, total_count):
    """Binary search to find approximate offset for start date."""
    low, high = 0, total_count - 1
    limit = 1
    for _ in range(15):
        mid = (low + high) // 2
        response = requests.get(
            f"{PHENOCAM_API}/middayimages/",
            params={"site": site_name, "limit": limit, "offset": mid},
            timeout=30,
        )
        response.raise_for_status()
        results = response.json().get("results", [])
        if not results:
            break
        mid_date_str = results[0].get("imgdate", "")
        if not mid_date_str:
            break
        try:
            mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
            if mid_date < start_dt:
                low = mid + 1
            else:
                high = mid
        except ValueError:
            break
    return max(0, low - 100)
 def download_phenocam(season, site_position, site_name, date_range=None):
    """Wrapper that downloads both phenocam images and GCC time series."""
    _download_phenocam_images(season, site_position, site_name, date_range)
    _download_phenocam_gcc(season, site_position, site_name, date_range)
 def _download_phenocam_images(season, site_position, site_name, date_range=None):
    lat, lon = site_position
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    start_date, end_date = datetime_range.split("/")
    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    try:
        response = requests.get(
            f"{PHENOCAM_API}/middayimages/",
            params={"site": site_name, "limit": 1},
            timeout=30,
        )
        response.raise_for_status()
        total_count = response.json().get("count", 0)
        if total_count == 0:
            print(f"[PhenoCam] No images found for site '{site_name}'")
            return
        print(
            f"[PhenoCam] Found {total_count} total images, estimating start offset..."
        )
        start_offset = _find_start_offset(site_name, start_dt, total_count)
        url = f"{PHENOCAM_API}/middayimages/"
        params = {"site": site_name, "offset": start_offset}
        print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
        images = []
        page = 1
        max_pages = 500
        past_end_date = False
        while url and page <= max_pages and not past_end_date:
            response = requests.get(url, params=params, timeout=30)
            response.raise_for_status()
            data = response.json()
            results = data.get("results", [])
            if not results:
                break
            for img in results:
                img_date_str = img.get("imgdate", "")
                if not img_date_str:
                    continue
                try:
                    img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
                    if img_date > end_dt:
                        past_end_date = True
                        break
                    if start_dt <= img_date <= end_dt:
                        images.append(img)
                except ValueError:
                    continue
            if url and not past_end_date:
                url = data.get("next")
                params = None
                page += 1
                if page % 50 == 0:
                    print(
                        f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..."
                    )
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            print(f"[PhenoCam] Site '{site_name}' not found")
            return
        raise
    print(f"[PhenoCam] Found {len(images)} images")
    def _download_image(img):
        date_str = img.get("imgdate", "").replace("-", "")
        if not date_str:
            return None
        filepath = output_dir / f"{date_str}.jpg"
        if filepath.exists():
            return f"Skipped {date_str}.jpg (exists)"
        img_path = img.get("imgpath")
        if not img_path:
            return None
        img_url = f"https://phenocam.nau.edu{img_path}"
        try:
            img_response = requests.get(img_url, timeout=30)
            img_response.raise_for_status()
            filepath.write_bytes(img_response.content)
            return f"Saved {date_str}.jpg"
        except Exception as e:
            return f"Error downloading {date_str}: {e}"
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(_download_image, img) for img in images]
        for future in as_completed(futures):
            result = future.result()
            if result:
                print(f"[PhenoCam] {result}")
    print("[PhenoCam] Completed")
 def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
    """Fetch greenness-index time series from PhenoCam API. Saves JSON and CSV."""
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    output_file = Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
    output_file.parent.mkdir(parents=True, exist_ok=True)
    start_date, end_date = datetime_range.split("/")
    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
    # Get ROIs for site (paginate through results)
    try:
        url = f"{PHENOCAM_API}/roilists/"
        params = {"site": site_name}
        rois = []
        while url:
            r = requests.get(url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()
            rois.extend(
                [roi for roi in data.get("results", []) if roi["site"] == site_name]
            )
            url = data.get("next")
            params = None
            if len(rois) > 0:
                break
        if not rois:
            print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
            return
        csv_url = rois[0].get("one_day_summary")
        if not csv_url:
            print("[PhenoCam-GI] No CSV data URL found for ROI")
            return
    except requests.exceptions.RequestException as e:
        print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
        return
    # Fetch CSV data
    try:
        csv_r = requests.get(csv_url, timeout=30)
        csv_r.raise_for_status()
        lines = [
            line for line in csv_r.text.split("\n") if line and not line.startswith("#")
        ]
        reader = csv.DictReader(lines)
        fieldnames = reader.fieldnames or ()
        use_mean_fallback = "gcc_90" not in fieldnames
        if use_mean_fallback:
            print(
                "[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)"
            )
        timeseries = []
        for row in reader:
            try:
                date_str = row.get("date")
                if not date_str:
                    continue
                date = datetime.strptime(date_str, "%Y-%m-%d")
                if start_dt <= date <= end_dt:
                    gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
                    if gcc is not None:
                        timeseries.append(
                            {"date": date.isoformat(), "greenness_index": gcc}
                        )
            except (ValueError, KeyError):
                continue
    except requests.exceptions.RequestException as e:
        print(f"[PhenoCam-GI] Error fetching CSV: {e}")
        return
    timeseries.sort(key=lambda x: x["date"])
    output_dir = output_file.parent
    json_path = output_dir / "phenocam_gcc.json"
    csv_path = output_dir / "phenocam_gcc.csv"
    with open(json_path, "w") as f:
        json.dump(timeseries, f, indent=2)
    with open(csv_path, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=["date", "greenness_index"])
        writer.writeheader()
        writer.writerows(timeseries)
    print(
        f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)"
    )
    from phenocam_snr import write_phenocam_snr
    write_phenocam_snr(site_name, season, base=Path("data"))
--- a/acquisition_s2.py
+++ b/acquisition_s2.py
@ -1,190 +0,0 @@
 """Sentinel-2-MSI acquisition from AWS Element84 Earth Search (STAC catalog)."""
 import numpy as np
 import rasterio
 import xml.etree.ElementTree as ET
 import requests
 from pathlib import Path
 from rasterio.crs import CRS
 from rasterio.warp import Resampling, calculate_default_transform, reproject, transform_geom
 from rasterio.windows import from_bounds, transform as window_transform
 from pystac_client import Client
 BBOX_SIZE = 0.011
 TARGET_CRS = CRS.from_epsg(32632)
 def _get_bbox(lon, lat):
    half = BBOX_SIZE / 2
    return [lon - half, lat - half, lon + half, lat + half]
 def _get_window_for_bbox(src, bbox):
    bbox_geom = {
        "type": "Polygon",
        "coordinates": [
            [
                [bbox[0], bbox[1]],
                [bbox[2], bbox[1]],
                [bbox[2], bbox[3]],
                [bbox[0], bbox[3]],
                [bbox[0], bbox[1]],
            ]
        ],
    }
    bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
    coords = bbox_transformed["coordinates"][0]
    x_coords = [c[0] for c in coords[:4]]
    y_coords = [c[1] for c in coords[:4]]
    bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
    src_bounds = src.bounds
    intersect_bbox = [
        max(bbox_crs[0], src_bounds.left),
        max(bbox_crs[1], src_bounds.bottom),
        min(bbox_crs[2], src_bounds.right),
        min(bbox_crs[3], src_bounds.top),
    ]
    return from_bounds(*intersect_bbox, src.transform)
 def _extract_viewing_angle(item):
    if "granule_metadata" not in item.assets:
        return None
    try:
        xml_url = item.assets["granule_metadata"].href
        xml_resp = requests.get(xml_url, timeout=10)
        xml_resp.raise_for_status()
        root = ET.fromstring(xml_resp.content)
        angles = [
            abs(float(zenith_elem.text))
            for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle")
            if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None
        ]
        return angles[0] if angles else None
    except Exception as e:
        print(f"[S2] Warning: Could not extract viewing angle: {e}")
        return None
 def download_s2(season, site_position, site_name, date_range=None):
    lat, lon = site_position
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    output_dir = Path(f"data/{site_name}/{season}/raw/s2/")
    print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    bbox = _get_bbox(lon, lat)
    bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
    output_dir.mkdir(parents=True, exist_ok=True)
    print("[S2] Connecting to STAC catalog...")
    client = Client.open("https://earth-search.aws.element84.com/v1")
    search = client.search(
        collections=["sentinel-2-l2a"],
        intersects={"type": "Point", "coordinates": [lon, lat]},
        datetime=datetime_range,
        max_items=1000,
    )
    print("[S2] Searching items...")
    items_by_key = {}
    for item in search.items():
        date = item.datetime.strftime("%Y%m%d")
        parts = item.id.split("_")
        increment = parts[3] if len(parts) > 3 else "0"
        key = (date, increment)
        if key not in items_by_key:
            items_by_key[key] = item
    print(f"[S2] Found {len(items_by_key)} unique items")
    for (date, increment), item in items_by_key.items():
        filepath = output_dir / f"{date}_{increment}.geotiff"
        if filepath.exists():
            print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
            continue
        print(f"[S2] Processing {date}_{increment}...")
        band_data = {}
        profile = None
        for band_name, asset_name in bands.items():
            if asset_name not in item.assets:
                continue
            asset = item.assets[asset_name]
            with rasterio.open(asset.href) as src:
                window = _get_window_for_bbox(src, bbox)
                if window.height <= 0 or window.width <= 0:
                    continue
                data = src.read(window=window)
                new_transform = window_transform(window, src.transform)
                if profile is None:
                    profile = {
                        "driver": "GTiff",
                        "height": window.height,
                        "width": window.width,
                        "count": len(bands),
                        "dtype": data.dtype,
                        "crs": src.crs,
                        "transform": new_transform,
                        "compress": "lzw",
                    }
                band_idx = list(bands.keys()).index(band_name)
                band_data[band_idx] = data[0]
        if profile and len(band_data) == len(bands):
            stacked = np.array([band_data[i] for i in sorted(band_data.keys())])
            band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
            viewing_angle = _extract_viewing_angle(item)
            if profile["crs"] != TARGET_CRS:
                src_transform = profile["transform"]
                src_height, src_width = profile["height"], profile["width"]
                left, bottom, right, top = rasterio.transform.array_bounds(
                    src_height, src_width, src_transform
                )
                dst_transform, dst_width, dst_height = calculate_default_transform(
                    profile["crs"], TARGET_CRS, src_width, src_height,
                    left=left, bottom=bottom, right=right, top=top,
                )
                reprojected = np.empty(
                    (len(stacked), dst_height, dst_width), dtype=stacked.dtype
                )
                for i in range(len(stacked)):
                    reproject(
                        source=stacked[i],
                        destination=reprojected[i],
                        src_transform=src_transform,
                        src_crs=profile["crs"],
                        dst_transform=dst_transform,
                        dst_crs=TARGET_CRS,
                        resampling=Resampling.bilinear,
                    )
                stacked = reprojected
                profile.update({
                    "crs": TARGET_CRS,
                    "transform": dst_transform,
                    "width": dst_width,
                    "height": dst_height,
                })
            with rasterio.open(filepath, "w", **profile) as dst:
                for i, data in enumerate(stacked, 1):
                    dst.write(data, i)
                    dst.set_band_description(i, band_names[i - 1])
                tags = {}
                if viewing_angle is not None:
                    tags["VIEWING_ZENITH_ANGLE"] = str(viewing_angle)
                pb = item.properties.get("s2:processing_baseline")
                if pb is not None:
                    tags["PROCESSING_BASELINE"] = str(pb)
                if tags:
                    dst.update_tags(**tags)
            angle_msg = (
                f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else ""
            )
            print(f"[S2] Saved: {filepath}{angle_msg}")
        else:
            print(f"[S2] Skipping {date}_{increment} (missing bands)")
    print("[S2] Completed")
--- a/acquisition_s3.py
+++ b/acquisition_s3.py
@ -1,160 +0,0 @@
 """Sentinel-3-OLCI acquisition from Copernicus Data Space OpenEO API."""
 import os
 import time
 from pathlib import Path
 from datetime import datetime
 from dotenv import load_dotenv
 import openeo
 import requests
 import netCDF4
 import numpy as np
 import rasterio
 from rasterio.transform import from_bounds
 load_dotenv()
 BBOX_SIZE = 0.016  # Larger than S2 to ensure full coverage including padded pixels
 def _get_bbox(lon, lat):
    half = BBOX_SIZE / 2
    return [lon - half, lat - half, lon + half, lat + half]
 def _process_netcdf(nc_file, output_dir, bands, openeo_bands):
    with netCDF4.Dataset(str(nc_file), "r") as nc:
        times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
        x_coords = nc.variables["x"][:]
        y_coords = nc.variables["y"][:]
        band_vars = sorted(
            [v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
        )
        band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
        transform = from_bounds(
            float(x_coords.min()),
            float(y_coords.min()),
            float(x_coords.max()),
            float(y_coords.max()),
            len(x_coords),
            len(y_coords),
        )
        print(f"[S3] Found {len(times)} time steps")
        date_counts = {}
        for t_idx, time_val in enumerate(times):
            dt = (
                time_val
                if isinstance(time_val, datetime)
                else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
            )
            date_str = dt.strftime("%Y%m%d")
            increment = date_counts.get(date_str, 0)
            date_counts[date_str] = increment + 1
            band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
            stacked = np.stack(band_data, axis=0)
            output_path = output_dir / f"{date_str}_{increment}.geotiff"
            with rasterio.open(
                output_path,
                "w",
                driver="GTiff",
                height=len(y_coords),
                width=len(x_coords),
                count=len(band_data),
                dtype=stacked.dtype,
                crs="EPSG:32632",
                transform=transform,
                compress="lzw",
            ) as dst:
                dst.write(stacked)
                for i, band_name in enumerate(band_names, 1):
                    dst.set_band_description(i, band_name)
            print(f"[S3] Saved: {output_path}")
 def download_s3(season, site_position, site_name, date_range=None):
    lat, lon = site_position
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    output_dir = Path(f"data/{site_name}/{season}/raw/s3/")
    print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    bbox = _get_bbox(lon, lat)
    bands = {
        "SDR_Oa04": "blue",
        "SDR_Oa06": "green",
        "SDR_Oa08": "red",
        "SDR_Oa17": "nir",
    }
    output_dir.mkdir(parents=True, exist_ok=True)
    band_map = {
        "SDR_Oa04": "B04",
        "SDR_Oa06": "B06",
        "SDR_Oa08": "B08",
        "SDR_Oa17": "B17",
    }
    openeo_bands = [band_map.get(b, b) for b in bands.keys()]
    start_date, end_date = datetime_range.split("/")
    spatial_extent = {
        "west": bbox[0],
        "east": bbox[2],
        "south": bbox[1],
        "north": bbox[3],
    }
    print("[S3] Authenticating...")
    token_response = requests.post(
        "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
        data={
            "grant_type": "password",
            "username": os.getenv("CDSE_USER"),
            "password": os.getenv("CDSE_PASSWORD"),
            "client_id": "cdse-public",
        },
    )
    token_response.raise_for_status()
    tokens = token_response.json()
    access_token = tokens["access_token"]
    print("[S3] Connecting to OpenEO...")
    conn = openeo.connect("openeo.dataspace.copernicus.eu")
    conn.authenticate_oidc_access_token(access_token)
    print("[S3] Loading collection...")
    datacube = conn.load_collection(
        "SENTINEL3_OLCI_L1B",
        spatial_extent=spatial_extent,
        temporal_extent=[start_date, end_date],
        bands=openeo_bands,
    ).resample_spatial(projection=32632)
    output_file = output_dir / "s3_data.nc"
    print(f"[S3] Downloading NetCDF to {output_file}...")
    print(f"[S3] Temporal extent: {start_date} to {end_date}")
    print(f"[S3] Spatial extent: {spatial_extent}")
    print(f"[S3] Bands: {openeo_bands}")
    print("[S3] This may take several minutes depending on data volume...")
    start_time = time.time()
    try:
        datacube.download(str(output_file), format="NetCDF")
        elapsed = time.time() - start_time
        print(f"[S3] Download completed in {elapsed:.1f} seconds")
    except Exception as e:
        elapsed = time.time() - start_time
        print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}")
        raise
    print("[S3] Processing NetCDF...")
    process_start = time.time()
    _process_netcdf(output_file, output_dir, bands, openeo_bands)
    process_elapsed = time.time() - process_start
    print(f"[S3] Processing completed in {process_elapsed:.1f} seconds")
    print(f"[S3] Removing temporary NetCDF file...")
    os.remove(output_file)
    print("[S3] Completed")
--- a/data/sites.geojson
+++ b/data/sites.geojson
@ -1,132 +0,0 @@
 {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          25.0743,
          35.3045
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2024": {}
        },
        "elevation": 68,
        "description": "FORTH Heraklion Greece",
        "sitename": "forthgr",
        "ndvi_selected": true,
        "vegetation_type": "Agriculture"
      }
    },
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          11.320308,
          47.116171
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2020": {},
          "2024": {}
        },
        "elevation": 972,
        "description": "Neustift Field Site, Stubai Valley, Tyrol, Austria",
        "sitename": "innsbruck",
        "ndvi_selected": true,
        "vegetation_type": "Grassland"
      }
    },
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          24.3688,
          58.5633
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2024": {}
        },
        "elevation": 3,
        "description": "Abandoned peat extraction area, Estonia",
        "sitename": "pitsalu",
        "ndvi_selected": true,
        "vegetation_type": "Wetland"
      }
    },
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          19.7673,
          64.2437
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2023": {}
        },
        "elevation": 224,
        "description": "SITES Svartberget Research Station, Vindeln, Sweden",
        "sitename": "vindeln2",
        "ndvi_selected": true,
        "vegetation_type": "Deciduous Broadleaf"
      }
    },
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          -6.0033,
          36.7455
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2024": {}
        },
        "elevation": 56,
        "description": "Sun flower plot, Jerez, Spain",
        "sitename": "sunflowerjerez1",
        "ndvi_selected": true,
        "vegetation_type": "Agriculture"
      }
    },
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          26.9837,
          42.6558
        ]
      },
      "properties": {
        "country": "",
        "seasons": {
          "2024": {}
        },
        "elevation": 262,
        "description": "Institute of Agriculture in Karnobat (selection fields)",
        "sitename": "institutekarnobat",
        "ndvi_selected": true,
        "vegetation_type": "Agriculture"
      }
    }
  ]
 }
--- a/deploy.sh
+++ b/deploy.sh
@ -1,84 +0,0 @@
 #!/bin/bash
 set -e
 MODE="${1:-setup}"
 SERVER="${2:-root@49.12.2.88}"
 APP_DIR="/opt/satellite-fusion"
 DATA_DIR="$APP_DIR/data"
 case "$MODE" in
    setup)
        echo "Deploying to $SERVER..."
        TEMP_DIR=$(mktemp -d)
        rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$TEMP_DIR/"
        cat > "$TEMP_DIR/.env.example" <<EOF
 CDSE_USER=your_username_here
 CDSE_PASSWORD=your_password_here
 EOF
        ssh $SERVER "mkdir -p $APP_DIR"
        rsync -av --delete "$TEMP_DIR/" "$SERVER:$APP_DIR/"
        rm -rf "$TEMP_DIR"
        ssh $SERVER <<ENDSSH
 set -e
 cd $APP_DIR
 # Find/install Python 3.11
 if ! command -v python3.11 &> /dev/null; then
    apt-get update -qq
    apt-get install -y python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
        apt-get install -y -t trixie-backports python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
            apt-get install -y software-properties-common
            add-apt-repository -y ppa:deadsnakes/ppa 2>/dev/null || true
            apt-get update -qq
            apt-get install -y python3.11 python3.11-venv python3.11-dev
        }
    }
 fi
 # Setup venv
 [ -d venv ] && rm -rf venv
 python3.11 -m venv venv
 source venv/bin/activate
 pip install --upgrade pip -q
 pip install -r requirements.txt -q
 pip install git+https://github.com/DHI-GRAS/efast.git -q
 # Setup .env
 [ ! -f .env ] && [ -f .env.example ] && cp .env.example .env
 # Setup systemd service
 if [ -f satellite-fusion-web.service ]; then
    sed "s|/opt/satellite-fusion|$APP_DIR|g" satellite-fusion-web.service | \
        sed "s|--directory /opt/satellite-fusion|--directory $APP_DIR/webapp|g" > /tmp/satellite-fusion-web.service
    cp /tmp/satellite-fusion-web.service /etc/systemd/system/
    systemctl daemon-reload
 fi
 # Create data directory and webapp/data symlink
 mkdir -p $DATA_DIR
 ln -sf ../data $APP_DIR/webapp/data
 ENDSSH
        echo "Setup complete!"
        ;;
    upload)
        echo "Uploading data to $SERVER..."
        rsync -avh --progress --exclude='*.pyc' --exclude='__pycache__' data/ "$SERVER:$DATA_DIR/"
        echo "Data upload complete!"
        ;;
    code)
        echo "Uploading code to $SERVER..."
        rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$SERVER:$APP_DIR/"
        echo "Code upload complete!"
        ;;
    *)
        echo "Usage: $0 {setup|upload|code} [server]"
        echo "  setup  - Deploy code and setup server (default)"
        echo "  upload - Upload data directory only"
        echo "  code   - Upload code files only (no setup)"
        exit 1
        ;;
 esac
--- a/fusion.py
+++ b/fusion.py
@ -1,176 +0,0 @@
 """EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
 from datetime import datetime, timedelta
 from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
 def _import_efast():
    """Lazy import of efast to avoid import errors when not using efast functions."""
    try:
        import efast
        return efast
    except ImportError:
        raise ImportError(
            "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
        )
 def run_efast(
    season,
    site_position,
    site_name,
    cleaning_strategy="aggressive",
    sigma=None,
    date_range=None,
    *,
    s2_output_dir=None,
    s3_output_dir=None,
    fusion_output_dir=None,
 ):
    lat, lon = site_position
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy)
    s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
    s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
    fusion_output_dir = fusion_output_dir or (
        efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
    )
    fusion_output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    efast = _import_efast()
    start_str, end_str = datetime_range.split("/")
    start_date = datetime.strptime(start_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_str, "%Y-%m-%d")
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y%m%d")
        output_file = fusion_output_dir / f"REFL_{date_str}.tif"
        try:
            kwargs = {
                "product": "REFL",
                "max_days": 30,
                "date_position": 2,
                "minimum_acquisition_importance": 0.0,
                "ratio": RESOLUTION_RATIO,
            }
            if sigma is not None:
                kwargs["sigma"] = sigma
            efast.fusion(
                current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
            )
            print(
                f"[EFAST] Saved: {output_file}"
                if output_file.exists()
                else f"[EFAST] No output for {date_str} (insufficient nearby data)"
            )
        except Exception as e:
            print(f"[EFAST] Error processing {date_str}: {e}")
        current_date += timedelta(days=1)
    print("[EFAST] Completed")
 def run_all_efast_scenarios(
    season, site_position, site_name, sigma_value=30, date_range=None
 ):
    """Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
    for strategy in ["aggressive", "nonaggressive"]:
        run_efast(
            season,
            site_position,
            site_name,
            cleaning_strategy=strategy,
            sigma=None,
            date_range=date_range,
        )
        run_efast(
            season,
            site_position,
            site_name,
            cleaning_strategy=strategy,
            sigma=sigma_value,
            date_range=date_range,
        )
 def run_efast_itb(
    season,
    site_position,
    site_name,
    cleaning_strategy="aggressive",
    sigma=None,
    date_range=None,
    *,
    s2_output_dir=None,
    s3_output_dir=None,
    fusion_output_dir=None,
 ):
    lat, lon = site_position
    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
    efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
    s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
    s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
    fusion_output_dir = fusion_output_dir or (
        efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
    )
    fusion_output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    efast = _import_efast()
    start_str, end_str = datetime_range.split("/")
    start_date = datetime.strptime(start_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_str, "%Y-%m-%d")
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y%m%d")
        output_file = fusion_output_dir / f"GCC_{date_str}.tif"
        try:
            kwargs = {
                "product": "GCC",
                "max_days": 30,
                "date_position": 2,
                "minimum_acquisition_importance": 0.0,
                "ratio": RESOLUTION_RATIO,
            }
            if sigma is not None:
                kwargs["sigma"] = sigma
            efast.fusion(
                current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
            )
            print(
                f"[EFAST-ITB] Saved: {output_file}"
                if output_file.exists()
                else f"[EFAST-ITB] No output for {date_str}"
            )
        except Exception as e:
            print(f"[EFAST-ITB] Error {date_str}: {e}")
        current_date += timedelta(days=1)
    print("[EFAST-ITB] Completed")
 def run_all_efast_itb_scenarios(
    season, site_position, site_name, sigma_value=30, date_range=None
 ):
    for strategy in ["aggressive", "nonaggressive"]:
        run_efast_itb(
            season,
            site_position,
            site_name,
            cleaning_strategy=strategy,
            sigma=None,
            date_range=date_range,
        )
        run_efast_itb(
            season,
            site_position,
            site_name,
            cleaning_strategy=strategy,
            sigma=sigma_value,
            date_range=date_range,
        )
--- a/fusion_phenology.py
+++ b/fusion_phenology.py
@ -1,263 +0,0 @@
 """
 No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
 Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
 directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
 ``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
 and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
 Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
 (:mod:`gap_validation.phenology_offsets`).
 """
 from __future__ import annotations
 import argparse
 import json
 import re
 from datetime import datetime
 from pathlib import Path
 from metrics_stats import _norm_date_key, load_timeseries
 from phenology_timesat import (
    _timesat as _timesat_pkg,
    build_yraw_three_years,
    iter_sites_seasons_from_sites_geojson,
    phenocam_phenology_path,
    run_timesat_phenology_from_yraw,
 )
 FUSION_SCENARIO_KEYS: tuple[str, ...] = (
    "aggressive_sigma20",
    "aggressive_sigma30",
    "nonaggressive_sigma20",
    "nonaggressive_sigma30",
    "aggressive_sigma20_itb",
    "aggressive_sigma30_itb",
    "nonaggressive_sigma20_itb",
    "nonaggressive_sigma30_itb",
 )
 def fusion_phenology_path(site_name: str, season: int) -> Path:
    return Path(f"data/{site_name}/{season}/fusion_phenology.json")
 def parse_scenario_key(key: str) -> tuple[str, int, str]:
    """``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
    mode = "itb" if key.endswith("_itb") else "bti"
    base = key.replace("_itb", "")
    m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
    if not m:
        raise ValueError(f"Cannot parse scenario key: {key!r}")
    return m.group(1), int(m.group(2)), mode
 def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
    strategy, sigma, mode = parse_scenario_key(scenario_key)
    if mode == "bti":
        processed = f"processed_{strategy}_sigma{sigma}"
    else:
        processed = f"processed_{strategy}_itb_sigma{sigma}"
    return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
 def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
    """YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
    raw = load_timeseries(ts_path)
    out: dict[str, float] = {}
    for k, v in raw.items():
        nk = _norm_date_key(k)
        if nk and v is not None:
            try:
                fv = float(v)
            except (TypeError, ValueError):
                continue
            if fv == fv:  # finite
                out[nk] = fv
    return out
 def timesat_transitions_from_by_date(
    by_date: dict[str, float], season: int
 ) -> dict[str, str | float | None]:
    """Run TIMESAT on fusion GCC; return transition dates for *season*."""
    if len(by_date) < 10:
        return {
            "green_up_50pct_date": None,
            "green_down_50pct_date": None,
            "timesat_input": None,
            "n_values": len(by_date),
        }
    y1, y2, y3 = season - 1, season, season + 1
    yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
    out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
    return {
        "green_up_50pct_date": out.get("green_up_50pct_date"),
        "green_down_50pct_date": out.get("green_down_50pct_date"),
        "timesat_input": stack_mode,
        "n_values": len(by_date),
    }
 def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
    if not iso_a or not iso_b:
        return None
    try:
        a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
        b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
        return abs((a - b).days)
    except ValueError:
        return None
 def _offsets_vs_reference(
    fused: dict[str, str | float | None], reference: dict
 ) -> dict[str, int | None]:
    ref_up = reference.get("green_up_50pct_date")
    ref_dn = reference.get("green_down_50pct_date")
    fup = fused.get("green_up_50pct_date")
    fdn = fused.get("green_down_50pct_date")
    return {
        "abs_day_offset_green_up": _day_offset(fup, ref_up),
        "abs_day_offset_green_down": _day_offset(fdn, ref_dn),
    }
 def compute_fusion_phenology_for_site(
    site_name: str,
    season: int,
    *,
    scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
 ) -> dict:
    ref_path = phenocam_phenology_path(site_name, season)
    reference = (
        json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
    )
    scenarios: dict[str, dict] = {}
    for key in scenario_keys:
        ts_path = fusion_gcc_timeseries_path(site_name, season, key)
        if not ts_path.is_file():
            scenarios[key] = {
                "workflow": parse_scenario_key(key)[2],
                "missing_timeseries": str(ts_path),
            }
            continue
        by_date = fusion_gcc_by_date(ts_path)
        fused = timesat_transitions_from_by_date(by_date, season)
        strategy, sigma, mode = parse_scenario_key(key)
        scenarios[key] = {
            "workflow": mode,
            "strategy": strategy,
            "sigma": sigma,
            "timeseries_path": str(ts_path),
            **fused,
            **_offsets_vs_reference(fused, reference),
        }
    return {
        "site_name": site_name,
        "season": season,
        "reference": {
            "source": str(ref_path) if ref_path.is_file() else None,
            "green_up_50pct_date": reference.get("green_up_50pct_date"),
            "green_down_50pct_date": reference.get("green_down_50pct_date"),
        },
        "scenarios": scenarios,
    }
 def write_fusion_phenology_for_site(
    site_name: str,
    season: int,
    *,
    scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
 ) -> Path | None:
    if _timesat_pkg is None:
        out = fusion_phenology_path(site_name, season)
        print(
            f"[Fusion phenology] Skipped (no timesat); would write {out}. "
            "pip install timesat"
        )
        return None
    payload = compute_fusion_phenology_for_site(
        site_name, season, scenario_keys=scenario_keys
    )
    out = fusion_phenology_path(site_name, season)
    out.parent.mkdir(parents=True, exist_ok=True)
    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    n_ok = sum(
        1
        for s in payload["scenarios"].values()
        if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
    )
    print(
        f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
        f"≥1 transition date)"
    )
    return out
 def write_fusion_phenology_all(
    *,
    sites_geojson: str | Path = "data/sites.geojson",
    seasons: dict[str, int] | None = None,
 ) -> int:
    if seasons:
        pairs = sorted((s, seasons[s]) for s in seasons.keys())
    else:
        pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
    n = 0
    for site, season in pairs:
        print(f"=== {site} {season} ===")
        if write_fusion_phenology_for_site(site, season):
            n += 1
    print(f"[Fusion phenology] Processed {n} site/season pair(s).")
    return n
 def main() -> None:
    ap = argparse.ArgumentParser(
        description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
    )
    ap.add_argument("--site", type=str, default=None)
    ap.add_argument("--season", type=int, default=None)
    ap.add_argument(
        "--all",
        action="store_true",
        help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
    )
    ap.add_argument(
        "--primary-only",
        action="store_true",
        help="With --all: only thesis primary seasons per site.",
    )
    ap.add_argument(
        "--sites-geojson",
        type=Path,
        default=Path("data/sites.geojson"),
    )
    args = ap.parse_args()
    if _timesat_pkg is None:
        raise SystemExit("Install timesat: pip install timesat")
    primary = {
        "forthgr": 2024,
        "innsbruck": 2024,
        "pitsalu": 2024,
        "vindeln2": 2023,
        "sunflowerjerez1": 2024,
        "institutekarnobat": 2024,
    }
    if args.all:
        write_fusion_phenology_all(
            sites_geojson=args.sites_geojson,
            seasons=primary if args.primary_only else None,
        )
        return
    if not args.site or args.season is None:
        raise SystemExit("Provide --site and --season, or use --all --primary-only")
    write_fusion_phenology_for_site(args.site, args.season)
 if __name__ == "__main__":
    main()
--- a/gap_validation/init.py
+++ b/gap_validation/init.py
@ -1 +0,0 @@
 """Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""
--- a/gap_validation/main.py
+++ b/gap_validation/main.py
@ -1,4 +0,0 @@
 from gap_validation.run import main
 if __name__ == "__main__":
    main()
--- a/gap_validation/batch_spatial.py
+++ b/gap_validation/batch_spatial.py
@ -1,135 +0,0 @@
 """Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site)."""
 from __future__ import annotations
 import argparse
 import json
 import re
 from pathlib import Path
 from gap_validation.run import run_validation
 # Primary season per site (matches scripts/export_thesis_tables.py).
 PRIMARY_SEASON = {
    "forthgr": 2024,
    "innsbruck": 2024,
    "pitsalu": 2024,
    "vindeln2": 2023,
    "sunflowerjerez1": 2024,
    "institutekarnobat": 2024,
 }
 def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]:
    data = json.loads(geojson.read_text(encoding="utf-8"))
    out: dict[str, tuple[float, float]] = {}
    for feat in data.get("features", []):
        props = feat.get("properties") or {}
        name = props.get("sitename")
        coords = (feat.get("geometry") or {}).get("coordinates")
        if not name or not coords or len(coords) < 2:
            continue
        lon, lat = float(coords[0]), float(coords[1])
        out[str(name)] = (lat, lon)
    return out
 def _parse_scenario(key: str) -> tuple[str, int | None, str]:
    """``aggressive_sigma20`` → (strategy, sigma, bti)."""
    mode = "itb" if key.endswith("_itb") else "bti"
    base = key.replace("_itb", "")
    m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
    if not m:
        raise ValueError(f"Cannot parse scenario key: {key!r}")
    strategy = m.group(1)
    sigma = int(m.group(2))
    return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode
 def _best_from_metrics(metrics_path: Path, workflow: str) -> str | None:
    """Best scenario key (max no-gap NSE_PC) for ``workflow`` (``bti`` or ``itb``)."""
    if workflow not in ("bti", "itb"):
        raise ValueError(f"workflow must be bti or itb, got {workflow!r}")
    if not metrics_path.is_file():
        return None
    temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {}
    want_itb = workflow == "itb"
    best_key, best_nse = None, None
    for k, v in temporal.items():
        if k.endswith("_itb") != want_itb or not isinstance(v, dict):
            continue
        n = v.get("nse_pc")
        if isinstance(n, (int, float)) and (best_nse is None or n > best_nse):
            best_nse = n
            best_key = k
    return best_key
 def _best_bti_from_metrics(metrics_path: Path) -> str | None:
    return _best_from_metrics(metrics_path, "bti")
 def _best_itb_from_metrics(metrics_path: Path) -> str | None:
    return _best_from_metrics(metrics_path, "itb")
 def _resolve_workflows(workflow: str) -> tuple[str, ...]:
    return ("bti", "itb") if workflow == "both" else (workflow,)
 def main() -> None:
    ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).")
    ap.add_argument("--data-dir", type=Path, default=Path("data"))
    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
    ap.add_argument("--skip-fusion", action="store_true")
    ap.add_argument("--write-manifest-only", action="store_true")
    ap.add_argument(
        "--workflow",
        choices=["bti", "itb", "both"],
        default="both",
        help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
    )
    ap.add_argument(
        "--gap-days",
        type=int,
        action="append",
        help="Filter gap lengths (default: all 15 and 30 in manifest).",
    )
    args = ap.parse_args()
    positions = _site_positions(args.sites_geojson)
    gap_filter = args.gap_days
    workflows = _resolve_workflows(args.workflow)
    for site, season in sorted(PRIMARY_SEASON.items()):
        pos = positions.get(site)
        if not pos:
            print(f"[skip] No coordinates for {site}")
            continue
        metrics_path = args.data_dir / site / str(season) / "metrics.json"
        for workflow in workflows:
            scenario_key = _best_from_metrics(metrics_path, workflow)
            if not scenario_key:
                print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
                continue
            strategy, sigma, mode = _parse_scenario(scenario_key)
            sigma_kw = 30 if sigma == 30 else None
            print(f"=== {site} {season} {scenario_key} ===")
            out = run_validation(
                site,
                season,
                pos,
                strategy,
                sigma_kw,
                mode,
                skip_manifest=False,
                skip_fusion=args.skip_fusion,
                write_manifest_only=args.write_manifest_only,
                gap_days_filter=gap_filter,
                transition_filter=None,
                s2_calendar_strategy=strategy,
            )
            print(out)
 if __name__ == "__main__":
    main()
--- a/gap_validation/batch_temporal.py
+++ b/gap_validation/batch_temporal.py
@ -1,65 +0,0 @@
 """Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario)."""
 from __future__ import annotations
 import argparse
 from pathlib import Path
 from gap_validation.batch_spatial import (
    PRIMARY_SEASON,
    _best_from_metrics,
    _parse_scenario,
    _resolve_workflows,
    _site_positions,
 )
 from gap_validation.temporal_pc import run_temporal_pc
 def main() -> None:
    ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).")
    ap.add_argument("--data-dir", type=Path, default=Path("data"))
    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
    ap.add_argument("--skip-fusion", action="store_true")
    ap.add_argument(
        "--workflow",
        choices=["bti", "itb", "both"],
        default="both",
        help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
    )
    ap.add_argument("--gap-days", type=int, action="append")
    args = ap.parse_args()
    positions = _site_positions(args.sites_geojson)
    workflows = _resolve_workflows(args.workflow)
    for site, season in sorted(PRIMARY_SEASON.items()):
        pos = positions.get(site)
        if not pos:
            print(f"[skip] No coordinates for {site}")
            continue
        metrics_path = args.data_dir / site / str(season) / "metrics.json"
        for workflow in workflows:
            scenario_key = _best_from_metrics(metrics_path, workflow)
            if not scenario_key:
                print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
                continue
            strategy, sigma, mode = _parse_scenario(scenario_key)
            sigma_kw = 30 if sigma == 30 else None
            print(f"=== {site} {season} temporal {scenario_key} ===")
            out = run_temporal_pc(
                site,
                season,
                pos,
                strategy,
                sigma_kw,
                mode,
                skip_manifest=False,
                skip_fusion=args.skip_fusion,
                gap_days_filter=args.gap_days,
                transition_filter=None,
                s2_calendar_strategy=strategy,
            )
            print(out)
 if __name__ == "__main__":
    main()
--- a/gap_validation/calendar.py
+++ b/gap_validation/calendar.py
@ -1,210 +0,0 @@
 """Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
 from __future__ import annotations
 import json
 import re
 from datetime import date, datetime, timedelta
 from pathlib import Path
 from phenology_timesat import phenocam_phenology_path
 REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
 DEFAULT_GAP_LENGTHS = (15, 30)
 TRANSITIONS = ("green_up", "green_down")
 def validation_dir(site_name: str, season: int) -> Path:
    return Path(f"data/{site_name}/{season}/validation")
 def _parse_iso_date(s, season: int) -> date | None:
    if not s or not isinstance(s, str):
        return None
    try:
        d = datetime.strptime(s[:10], "%Y-%m-%d").date()
    except ValueError:
        return None
    y0, y1 = date(season, 1, 1), date(season, 12, 31)
    return d if y0 <= d <= y1 else None
 def transition_midpoint(
    site_name: str,
    season: int,
    transition: str,
    phenology_path: Path | None = None,
 ) -> date | None:
    """TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
    if transition not in TRANSITIONS:
        raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
    path = phenology_path or phenocam_phenology_path(site_name, season)
    if not path.is_file():
        return None
    try:
        rec = json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return None
    key = (
        "green_up_50pct_date"
        if transition == "green_up"
        else "green_down_50pct_date"
    )
    return _parse_iso_date(rec.get(key), season)
 def phenology_midpoint(
    site_name: str, season: int, phenology_path: Path | None = None
 ) -> date:
    """Legacy: green-up if in season, else green-down, else July 1."""
    for tr in ("green_up", "green_down"):
        d = transition_midpoint(site_name, season, tr, phenology_path)
        if d:
            return d
    return date(season, 7, 1)
 def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
    """[start, end] inclusive, gap_days wide, clamped to calendar year."""
    half = gap_days // 2
    start = mid - timedelta(days=half)
    end = mid + timedelta(days=gap_days - 1 - half)
    y0, y1 = date(season, 1, 1), date(season, 12, 31)
    if start < y0:
        end = min(y1, end + (y0 - start))
        start = y0
    if end > y1:
        start = max(y0, start - (end - y1))
        end = y1
    return start, end
 def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
    """Return sorted (acquisition_date, filename) for *REFL.tif."""
    out: list[tuple[date, str]] = []
    if not prepared_s2.is_dir():
        return out
    for p in sorted(prepared_s2.glob("*REFL.tif")):
        m = REFL_DATE_RE.search(p.name)
        if not m:
            continue
        d = datetime.strptime(m.group(1), "%Y%m%d").date()
        out.append((d, p.name))
    out.sort(key=lambda x: x[0])
    return out
 def nearest_s2_acquisition(
    prediction: date, pairs: list[tuple[date, str]]
 ) -> tuple[date, str] | None:
    if not pairs:
        return None
    return min(pairs, key=lambda t: abs((t[0] - prediction).days))
 def build_manifest_entries(
    site_name: str,
    season: int,
    gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
    transitions: tuple[str, ...] = TRANSITIONS,
    s2_calendar_strategy: str = "aggressive",
 ) -> list[dict]:
    """One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
    prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
    pairs = list_s2_refl_dates(prepared_s2)
    entries: list[dict] = []
    for transition in transitions:
        mid = transition_midpoint(site_name, season, transition)
        if mid is None:
            continue
        for gap_days in gap_lengths:
            w0, w1 = centered_window(mid, gap_days, season)
            prediction = mid
            ns = nearest_s2_acquisition(prediction, pairs)
            if ns is None:
                withheld_date = None
                withheld_filename = None
            else:
                withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
            entries.append(
                {
                    "transition": transition,
                    "gap_days": gap_days,
                    "midpoint_rule": f"{transition}_50pct_date",
                    "midpoint_date": mid.isoformat(),
                    "window_start": w0.isoformat(),
                    "window_end": w1.isoformat(),
                    "prediction_date": prediction.isoformat(),
                    "withheld_s2_date": withheld_date,
                    "withheld_s2_filename": withheld_filename,
                }
            )
    return entries
 def write_gap_withheld_images(
    site_name: str,
    season: int,
    entries: list[dict],
 ) -> Path:
    """Reproducibility sidecar for withheld scenes and gap placement."""
    path = validation_dir(site_name, season) / "gap_withheld_images.json"
    records = []
    for e in entries:
        records.append(
            {
                "site_name": site_name,
                "season": season,
                "transition": e.get("transition"),
                "gap_days": e.get("gap_days"),
                "midpoint_date": e.get("midpoint_date"),
                "window_start": e.get("window_start"),
                "window_end": e.get("window_end"),
                "withheld_s2_date": e.get("withheld_s2_date"),
                "withheld_s2_filename": e.get("withheld_s2_filename"),
            }
        )
    path.write_text(
        json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
        + "\n",
        encoding="utf-8",
    )
    return path
 def write_manifest(
    site_name: str,
    season: int,
    site_position: tuple[float, float],
    s2_calendar_strategy: str = "aggressive",
    *,
    gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
    transitions: tuple[str, ...] = TRANSITIONS,
 ) -> Path:
    out_dir = validation_dir(site_name, season)
    out_dir.mkdir(parents=True, exist_ok=True)
    entries = build_manifest_entries(
        site_name,
        season,
        gap_lengths=gap_lengths,
        transitions=transitions,
        s2_calendar_strategy=s2_calendar_strategy,
    )
    path = out_dir / "gap_manifest.json"
    payload = {
        "site_name": site_name,
        "season": season,
        "site_position_lat_lon": list(site_position),
        "s2_calendar_strategy": s2_calendar_strategy,
        "entries": entries,
    }
    path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    write_gap_withheld_images(site_name, season, entries)
    return path
 def load_manifest(site_name: str, season: int) -> dict:
    path = validation_dir(site_name, season) / "gap_manifest.json"
    if not path.is_file():
        raise FileNotFoundError(f"Missing manifest: {path}")
    return json.loads(path.read_text(encoding="utf-8"))
--- a/gap_validation/export_rasters.py
+++ b/gap_validation/export_rasters.py
@ -1,438 +0,0 @@
 """Export 2×4 RGB panels for Tier-A gap validation (thesis appendix).
 Crops follow the same fusion-valid bounding box as ``postprocessing.process_cropped``
 and the webapp (``processed_*`` / ``common.js``), anchored on gap-degraded fusion at the
 prediction date; S2 and S3 are read from prepared stacks on that shared window.
 """
 from __future__ import annotations
 import json
 import re
 from datetime import date, datetime
 from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
 import rasterio
 from rasterio import windows
 from rasterio.transform import rowcol
 from rasterio.warp import Resampling, reproject
 from gap_validation.s2_mask_dir import acquisition_yyyymmdd_in_window, yyyymmdd_from_iso
 REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
 S3_COMPOSITE_RE = re.compile(r"composite_(\d{8})\.tif$")
 TRANSITIONS = ("green_up", "green_down")
 COL_TITLES = ("Withheld S2", "Gap fusion", "S3 composite", "Nearest S2")
 ROW_LABELS = {"green_up": "Green-up", "green_down": "Green-down"}
 VALID_REFL_THRESHOLD = 0.001
 NODATA_RGB = (0.15, 0.15, 0.15)
 def _parse_bti_scenario(scenario: str) -> tuple[str, int]:
    m = re.match(r"^(aggressive|nonaggressive)_sigma(20|30)$", scenario)
    if not m:
        raise ValueError(f"expected BtI scenario key, got {scenario!r}")
    return m.group(1), int(m.group(2))
 def _prepared_base(data_dir: Path, site: str, season: int, strategy: str) -> Path:
    return data_dir / site / str(season) / f"prepared_{strategy}"
 def _s2_strategy_fallbacks(strategy: str, manifest: dict) -> tuple[str, ...]:
    """Prepared trees to try for S2 REFL (best-BtI first, then manifest calendar)."""
    order: list[str] = []
    for s in (strategy, manifest.get("s2_calendar_strategy")):
        if isinstance(s, str) and s and s not in order:
            order.append(s)
    for s in ("aggressive", "nonaggressive"):
        if s not in order:
            order.append(s)
    return tuple(order)
 def _find_prepared_s2_refl(
    data_dir: Path,
    site: str,
    season: int,
    filename: str,
    strategies: tuple[str, ...],
 ) -> Path | None:
    for strat in strategies:
        p = _prepared_base(data_dir, site, season, strat) / "s2" / filename
        if p.is_file():
            return p
    return None
 def _gap_spatial_fusion_dir(
    data_dir: Path,
    site: str,
    season: int,
    gap_days: int,
    transition: str,
    strategy: str,
    sigma: int,
 ) -> Path:
    return (
        data_dir
        / site
        / str(season)
        / "validation"
        / "fusion"
        / f"gap_{gap_days}_{transition}"
        / f"{strategy}_sigma{sigma}_bti"
    )
 def _iso_to_date(iso_d: str) -> date:
    return datetime.strptime(iso_d[:10], "%Y-%m-%d").date()
 def _exclude_ymds(entry: dict) -> set[str]:
    withheld_fn = entry.get("withheld_s2_filename") or ""
    m = REFL_DATE_RE.search(withheld_fn)
    return {m.group(1)} if m else set()
 def nearest_stack_s2(
    prepared_s2_dir: Path,
    prediction_iso: str,
    *,
    exclude_ymds: set[str],
 ) -> Path | None:
    if not prepared_s2_dir.is_dir():
        return None
    target = _iso_to_date(prediction_iso)
    best_path: Path | None = None
    best_delta: int | None = None
    for p in prepared_s2_dir.glob("S2A_MSIL2A_*_REFL.tif"):
        m = REFL_DATE_RE.search(p.name)
        if not m or m.group(1) in exclude_ymds:
            continue
        delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
        if best_delta is None or delta < best_delta:
            best_delta = delta
            best_path = p
    return best_path
 def nearest_s3_composite(prepared_s3_dir: Path, prediction_iso: str) -> Path | None:
    if not prepared_s3_dir.is_dir():
        return None
    target = _iso_to_date(prediction_iso)
    best_path: Path | None = None
    best_delta: int | None = None
    for p in prepared_s3_dir.glob("composite_*.tif"):
        m = S3_COMPOSITE_RE.search(p.name)
        if not m:
            continue
        delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
        if best_delta is None or delta < best_delta:
            best_delta = delta
            best_path = p
    return best_path
 def _crop_window_from_fusion(fusion_path: Path) -> dict | None:
    """Fusion-valid crop (``postprocessing.process_cropped``) on the full prepared grid."""
    if not fusion_path.is_file():
        return None
    with rasterio.open(fusion_path) as src:
        data = src.read()
        valid = np.isfinite(data) & (data > VALID_REFL_THRESHOLD)
        rows = np.any(valid, axis=(0, 2))
        cols = np.any(valid, axis=(0, 1))
        row_idx = np.where(rows)[0]
        col_idx = np.where(cols)[0]
        if len(row_idx) == 0 or len(col_idx) == 0:
            return None
        r0, r1 = int(row_idx[0]), int(row_idx[-1])
        c0, c1 = int(col_idx[0]), int(col_idx[-1])
        w, h = c1 - c0 + 1, r1 - r0 + 1
        win = windows.Window(c0, r0, w, h)
        return {
            "window": win,
            "crop_transform": windows.transform(win, src.transform),
            "full_transform": src.transform,
            "crs": src.crs,
            "profile": src.profile.copy(),
        }
 def _read_bgr_prepared_s2(prepared_refl: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
    if not prepared_refl.is_file():
        return None
    with rasterio.open(prepared_refl) as src:
        if src.count < 3:
            return None
        b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
        return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
 def _read_bgr_gap_fusion(fusion_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
    if not fusion_path.is_file():
        return None
    with rasterio.open(fusion_path) as src:
        if src.count < 3:
            return None
        b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
        return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
 def _read_bgr_prepared_s3(s3_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
    """Resample S3 composite to the fusion grid, then crop (matches ``process_cropped``)."""
    if not s3_path.is_file():
        return None
    with rasterio.open(s3_path) as src:
        if src.count < 3:
            return None
        temp_profile = crop["profile"].copy()
        temp_profile.update({"dtype": "float32", "count": src.count})
        bands: list[np.ndarray] = []
        with rasterio.MemoryFile() as memfile:
            with memfile.open(**temp_profile) as resampled:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(resampled, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=crop["full_transform"],
                        dst_crs=crop["crs"],
                        resampling=Resampling.nearest,
                    )
                b, g, r = resampled.read(
                    indexes=(1, 2, 3), window=crop["window"]
                )
                bands = [
                    b.astype(np.float64),
                    g.astype(np.float64),
                    r.astype(np.float64),
                ]
        return bands[0], bands[1], bands[2]
 def _refl_valid(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
    return (
        np.isfinite(blue)
        & np.isfinite(green)
        & np.isfinite(red)
        & (blue > VALID_REFL_THRESHOLD)
        & (green > VALID_REFL_THRESHOLD)
        & (red > VALID_REFL_THRESHOLD)
    )
 def _panel_stretch_limits(
    blue: np.ndarray, green: np.ndarray, red: np.ndarray, valid: np.ndarray
 ) -> tuple[float, float]:
    """Per-panel 2--98 % stretch on positive reflectance (webapp ``common.js`` style)."""
    if not valid.any():
        return 0.0, 1.0
    vals = np.concatenate([red[valid], green[valid], blue[valid]])
    lo, hi = np.percentile(vals, (2, 98))
    if hi <= lo:
        return 0.0, 1.0
    return float(lo), float(hi)
 def _bgr_to_rgba(
    blue: np.ndarray,
    green: np.ndarray,
    red: np.ndarray,
    *,
    valid: np.ndarray,
    vmin: float,
    vmax: float,
 ) -> np.ndarray:
    rgba = np.zeros((*blue.shape, 4), dtype=np.float32)
    rgba[..., 3] = 1.0
    rgba[~valid, 0] = NODATA_RGB[0]
    rgba[~valid, 1] = NODATA_RGB[1]
    rgba[~valid, 2] = NODATA_RGB[2]
    span = vmax - vmin or 1.0
    for band, idx in ((red, 0), (green, 1), (blue, 2)):
        norm = np.clip((band - vmin) / span, 0.0, 1.0)
        rgba[..., idx] = np.where(valid, norm, rgba[..., idx])
    return rgba
 def _phenocam_pixel_cropped(
    crop: dict, site_position_lat_lon: tuple[float, float]
 ) -> tuple[int, int] | None:
    lat, lon = site_position_lat_lon
    try:
        r, c = rowcol(
            crop["crop_transform"], [lon], [lat], op=crop["crs"]
        )
        return int(r[0]), int(c[0])
    except Exception:
        return None
 def _resolve_row_paths(
    data_dir: Path,
    site: str,
    season: int,
    entry: dict,
    strategy: str,
    sigma: int,
    *,
    gap_days: int,
    manifest: dict,
 ) -> tuple[Path, Path, Path, Path] | None:
    pred_ymd = yyyymmdd_from_iso(entry["prediction_date"])
    transition = entry["transition"]
    prep = _prepared_base(data_dir, site, season, strategy)
    s2_strats = _s2_strategy_fallbacks(strategy, manifest)
    withheld_fn = entry.get("withheld_s2_filename")
    if not withheld_fn:
        return None
    withheld = _find_prepared_s2_refl(
        data_dir, site, season, withheld_fn, s2_strats
    )
    fusion = (
        _gap_spatial_fusion_dir(data_dir, site, season, gap_days, transition, strategy, sigma)
        / f"REFL_{pred_ymd}.tif"
    )
    s3_exact = prep / "s3" / f"composite_{pred_ymd}.tif"
    s3 = (
        s3_exact
        if s3_exact.is_file()
        else nearest_s3_composite(prep / "s3", entry["prediction_date"])
    )
    w0 = _iso_to_date(entry["window_start"])
    w1 = _iso_to_date(entry["window_end"])
    nearest: Path | None = None
    for strat in s2_strats:
        prep_s2 = _prepared_base(data_dir, site, season, strat) / "s2"
        window_ymds = acquisition_yyyymmdd_in_window(prep_s2, w0, w1)
        exclude = window_ymds | _exclude_ymds(entry)
        nearest = nearest_stack_s2(
            prep_s2, entry["prediction_date"], exclude_ymds=exclude
        )
        if nearest is not None:
            break
    if withheld is None or not fusion.is_file() or s3 is None or nearest is None:
        return None
    return withheld, fusion, s3, nearest
 def build_site_panel(
    site: str,
    season: int,
    data_dir: Path,
    out_png: Path,
    *,
    best_bti_scenario: str,
    site_label: str,
    site_position_lat_lon: tuple[float, float] | None = None,
    gap_days: int = 30,
 ) -> bool:
    """Build 2×4 RGB figure; return False if manifest or any transition row is incomplete."""
    manifest_path = data_dir / site / str(season) / "validation" / "gap_manifest.json"
    if not manifest_path.is_file():
        return False
    manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
    strategy, sigma = _parse_bti_scenario(best_bti_scenario)
    rows: list[tuple[str, dict, tuple[Path, Path, Path, Path]]] = []
    for transition in TRANSITIONS:
        entry = next(
            (
                e
                for e in manifest["entries"]
                if e.get("gap_days") == gap_days and e.get("transition") == transition
            ),
            None,
        )
        if not entry:
            continue
        paths = _resolve_row_paths(
            data_dir,
            site,
            season,
            entry,
            strategy,
            sigma,
            gap_days=gap_days,
            manifest=manifest,
        )
        if paths is None:
            continue
        rows.append((transition, entry, paths))
    if not rows:
        return False
    readers = (
        _read_bgr_prepared_s2,
        _read_bgr_gap_fusion,
        _read_bgr_prepared_s3,
        _read_bgr_prepared_s2,
    )
    fig, axes = plt.subplots(
        len(rows),
        4,
        figsize=(12.0, 2.8 * len(rows)),
        squeeze=False,
        constrained_layout=True,
    )
    for row_idx, (transition, entry, paths) in enumerate(rows):
        row_title = ROW_LABELS.get(transition, transition)
        crop = _crop_window_from_fusion(paths[1])
        if crop is None:
            for ax in axes[row_idx]:
                ax.set_visible(False)
            continue
        layers: list[tuple[np.ndarray, np.ndarray, np.ndarray]] = []
        for path, read_fn in zip(paths, readers, strict=True):
            bgr = read_fn(path, crop)
            if bgr is None:
                layers = []
                break
            layers.append(bgr)
        if len(layers) != 4:
            for ax in axes[row_idx]:
                ax.set_visible(False)
            continue
        mark: tuple[int, int] | None = None
        if site_position_lat_lon:
            mark = _phenocam_pixel_cropped(crop, site_position_lat_lon)
        for col_idx, (col_title, bgr) in enumerate(zip(COL_TITLES, layers, strict=True)):
            ax = axes[row_idx, col_idx]
            blue, green, red = bgr
            valid = _refl_valid(blue, green, red)
            vmin, vmax = _panel_stretch_limits(blue, green, red, valid)
            rgba = _bgr_to_rgba(
                blue, green, red, valid=valid, vmin=vmin, vmax=vmax
            )
            ax.imshow(rgba, origin="upper", aspect="equal", interpolation="nearest")
            h, w = rgba.shape[:2]
            if col_idx == 0 and mark and 0 <= mark[0] < h and 0 <= mark[1] < w:
                ax.plot(
                    mark[1],
                    mark[0],
                    "+",
                    color="red",
                    markersize=8,
                    markeredgewidth=1.2,
                )
            if row_idx == 0:
                ax.set_title(col_title, fontsize=9)
            if col_idx == 0:
                ax.set_ylabel(row_title, fontsize=9)
            ax.set_xticks([])
            ax.set_yticks([])
    fig.suptitle(f"{site_label} ({season})", fontsize=10)
    out_png.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_png, dpi=150)
    plt.close(fig)
    return True
--- a/gap_validation/fusion_masked.py
+++ b/gap_validation/fusion_masked.py
@ -1,200 +0,0 @@
 """EFAST with symlinked S2 dir (gap window omitted); outputs under validation/."""
 from __future__ import annotations
 from datetime import datetime
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from fusion import run_efast, run_efast_itb
 from preparation import _get_base_dir, _get_itb_base_dir
 from gap_validation.s2_mask_dir import (
    acquisition_yyyymmdd_in_window,
    assert_no_leakage,
    build_masked_s2_dir_bti,
    build_masked_s2_dir_itb,
 )
 def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
    return _get_base_dir(season, site_name, strategy) / "s3"
 def validation_fusion_dir(
    site_name: str,
    season: int,
    gap_days: int,
    transition: str,
    strategy: str,
    sigma: int | None,
    mode: str,
 ) -> Path:
    """``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``."""
    sig = 30 if sigma == 30 else 20
    return (
        Path(f"data/{site_name}/{season}/validation")
        / "fusion"
        / f"gap_{gap_days}_{transition}"
        / f"{strategy}_sigma{sig}_{mode}"
    )
 def excluded_acquisition_days(
    prepared_s2: Path,
    window_start_iso: str,
    window_end_iso: str,
    withheld_yyyymmdd: str,
 ) -> set[str]:
    """Union of gap-window S2 days and the withheld validation acquisition."""
    w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
    w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
    excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1)
    excluded.add(withheld_yyyymmdd)
    return excluded
 def run_masked_fusion_one_date(
    season: int,
    site_position: tuple[float, float],
    site_name: str,
    strategy: str,
    sigma: int | None,
    mode: str,
    prediction_date_iso: str,
    window_start_iso: str,
    window_end_iso: str,
    withheld_yyyymmdd: str,
    fusion_output_dir: Path,
 ) -> Path:
    """Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only."""
    fusion_output_dir.mkdir(parents=True, exist_ok=True)
    date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
    with TemporaryDirectory(prefix="gapval_s2_") as tmp:
        tmp_s2 = Path(tmp) / "s2"
        if mode == "bti":
            prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
            excl = excluded_acquisition_days(
                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
            )
            build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
            run_efast(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
                date_range=date_range,
                s2_output_dir=tmp_s2,
                s3_output_dir=prepared_s3_dir(season, site_name, strategy),
                fusion_output_dir=fusion_output_dir,
            )
        elif mode == "itb":
            prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
            excl = excluded_acquisition_days(
                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
            )
            build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
            run_efast_itb(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
                date_range=date_range,
                s2_output_dir=tmp_s2,
                s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
                fusion_output_dir=fusion_output_dir,
            )
        else:
            raise ValueError(f"mode must be bti or itb, got {mode!r}")
    return fusion_output_dir
 def run_masked_fusion_season(
    season: int,
    site_position: tuple[float, float],
    site_name: str,
    strategy: str,
    sigma: int | None,
    mode: str,
    window_start_iso: str,
    window_end_iso: str,
    withheld_yyyymmdd: str,
    fusion_output_dir: Path,
 ) -> Path:
    """Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier)."""
    fusion_output_dir.mkdir(parents=True, exist_ok=True)
    date_range = f"{season}-01-01/{season}-12-31"
    with TemporaryDirectory(prefix="gapval_s2_") as tmp:
        tmp_s2 = Path(tmp) / "s2"
        if mode == "bti":
            prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
            excl = excluded_acquisition_days(
                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
            )
            build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
            run_efast(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
                date_range=date_range,
                s2_output_dir=tmp_s2,
                s3_output_dir=prepared_s3_dir(season, site_name, strategy),
                fusion_output_dir=fusion_output_dir,
            )
        else:
            prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
            excl = excluded_acquisition_days(
                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
            )
            build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
            run_efast_itb(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
                date_range=date_range,
                s2_output_dir=tmp_s2,
                s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
                fusion_output_dir=fusion_output_dir,
            )
    return fusion_output_dir
 def production_fusion_path(
    season: int,
    site_name: str,
    strategy: str,
    sigma: int | None,
    mode: str,
    yyyymmdd: str,
 ) -> Path:
    """Single-date fused raster from the normal prepared tree (no-gap baseline)."""
    if mode == "bti":
        base = _get_base_dir(season, site_name, strategy)
        sub = f"fusion_sigma{sigma}" if sigma else "fusion"
        return base / sub / f"REFL_{yyyymmdd}.tif"
    base = _get_itb_base_dir(season, site_name, strategy)
    sub = f"fusion_sigma{sigma}" if sigma else "fusion"
    return base / sub / f"GCC_{yyyymmdd}.tif"
 def withheld_s2_refl_path(
    season: int, site_name: str, strategy: str, withheld_filename: str | None
 ) -> Path | None:
    if not withheld_filename:
        return None
    p = _get_base_dir(season, site_name, strategy) / "s2" / withheld_filename
    return p if p.is_file() else None
--- a/gap_validation/phenology_offsets.py
+++ b/gap_validation/phenology_offsets.py
@ -1,163 +0,0 @@
 """TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference."""
 from __future__ import annotations
 import argparse
 import json
 from datetime import datetime
 from pathlib import Path
 from fusion_phenology import timesat_transitions_from_by_date
 from phenology_timesat import phenocam_phenology_path
 from gap_validation.batch_spatial import (
    PRIMARY_SEASON,
    _best_from_metrics,
    _parse_scenario,
    _resolve_workflows,
    _site_positions,
 )
 from gap_validation.calendar import load_manifest, validation_dir
 from gap_validation.temporal_pc import _fusion_gcc_timeseries
 def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
    if not iso_a or not iso_b:
        return None
    try:
        a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
        b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
        return abs((a - b).days)
    except ValueError:
        return None
 def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
    out = timesat_transitions_from_by_date(by_date, season)
    return {
        "green_up": out.get("green_up_50pct_date"),
        "green_down": out.get("green_down_50pct_date"),
    }
 def _temporal_fusion_dir(
    site: str, season: int, gap_days: int, transition: str, scenario_key: str
 ) -> Path:
    strategy, sigma, mode = _parse_scenario(scenario_key)
    sig = 30 if sigma == 30 else 20
    return (
        validation_dir(site, season)
        / "temporal"
        / f"gap_{gap_days}_{transition}"
        / f"{strategy}_sigma{sig}_{mode}"
    )
 def compute_offsets_for_site(
    site: str,
    season: int,
    site_position: tuple[float, float],
    *,
    workflow: str = "bti",
    gap_days_list: tuple[int, ...] = (15, 30),
 ) -> list[dict]:
    base = Path(f"data/{site}/{season}")
    metrics_path = base / "metrics.json"
    scenario_key = _best_from_metrics(metrics_path, workflow)
    if not scenario_key:
        return []
    ref_path = phenocam_phenology_path(site, season)
    reference = (
        json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
    )
    manifest = load_manifest(site, season)
    rows: list[dict] = []
    for entry in manifest["entries"]:
        gd = entry.get("gap_days")
        tr = entry.get("transition")
        if gd not in gap_days_list or tr not in ("green_up", "green_down"):
            continue
        fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key)
        if not fusion_dir.is_dir():
            continue
        _, _, mode = _parse_scenario(scenario_key)
        ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode)
        if len(ts) < 10:
            continue
        fused = _timesat_transitions(ts, season)
        ref_key = (
            "green_up_50pct_date"
            if tr == "green_up"
            else "green_down_50pct_date"
        )
        ref_date = reference.get(ref_key)
        fused_date = fused.get("green_up" if tr == "green_up" else "green_down")
        rows.append(
            {
                "site_name": site,
                "season": season,
                "transition": tr,
                "gap_days": gd,
                "scenario": scenario_key,
                "reference_date": ref_date,
                "fused_date": fused_date,
                "abs_day_offset": _day_offset(fused_date, ref_date),
                "window_start": entry.get("window_start"),
                "window_end": entry.get("window_end"),
            }
        )
    return rows
 def write_phenology_offsets(
    site: str,
    season: int,
    site_position: tuple[float, float],
    *,
    workflow: str = "bti",
    gap_days_list: tuple[int, ...] = (15, 30),
 ) -> Path:
    rows = compute_offsets_for_site(
        site, season, site_position, workflow=workflow, gap_days_list=gap_days_list
    )
    vdir = validation_dir(site, season)
    payload = {
        "site_name": site,
        "season": season,
        "workflow": workflow,
        "records": rows,
    }
    out = vdir / f"gap_phenology_offsets_{workflow}.json"
    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    if workflow == "bti":
        # Legacy alias for backward-compatible readers.
        (vdir / "gap_phenology_offsets.json").write_text(
            json.dumps(payload, indent=2) + "\n", encoding="utf-8"
        )
    return out
 def main() -> None:
    ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.")
    ap.add_argument("--data-dir", type=Path, default=Path("data"))
    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
    ap.add_argument(
        "--workflow",
        choices=["bti", "itb", "both"],
        default="both",
        help="Fusion workflow(s) (default: both best BtI and best ItB).",
    )
    args = ap.parse_args()
    positions = _site_positions(args.sites_geojson)
    workflows = _resolve_workflows(args.workflow)
    for site, season in sorted(PRIMARY_SEASON.items()):
        pos = positions.get(site)
        if not pos:
            continue
        for workflow in workflows:
            p = write_phenology_offsets(site, season, pos, workflow=workflow)
            print(p)
 if __name__ == "__main__":
    main()
--- a/gap_validation/run.py
+++ b/gap_validation/run.py
@ -1,352 +0,0 @@
 """Tier-2 gap validation CLI: manifest, masked EFAST, spatial ``nse_s2``, Whittaker crossover."""
 from __future__ import annotations
 import argparse
 import json
 import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
 from gap_validation.calendar import (
    DEFAULT_GAP_LENGTHS,
    TRANSITIONS,
    load_manifest,
    validation_dir,
    write_manifest,
 )
 from gap_validation.fusion_masked import (
    production_fusion_path,
    run_masked_fusion_one_date,
    validation_fusion_dir,
    withheld_s2_refl_path,
 )
 from gap_validation.spatial_metrics import evaluate_gap_vs_withheld
 from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
 def _ymd_from_iso(iso_d: str) -> str:
    return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
 def _yyyymmdd_from_withheld_filename(fn: str) -> str | None:
    for part in fn.replace(".tif", "").split("_"):
        if len(part) == 8 and part.isdigit():
            return part
    return None
 def _withheld_iso(entry: dict) -> str | None:
    d = entry.get("withheld_s2_date")
    if isinstance(d, str) and len(d) >= 10:
        return d[:10]
    fn = entry.get("withheld_s2_filename")
    if not fn or not isinstance(fn, str):
        return None
    ymd = _yyyymmdd_from_withheld_filename(fn)
    if not ymd:
        return None
    return datetime.strptime(ymd, "%Y%m%d").date().isoformat()
 def _fused_file(fusion_dir: Path, mode: str, ymd: str) -> Path:
    stem = "REFL" if mode == "bti" else "GCC"
    return fusion_dir / f"{stem}_{ymd}.tif"
 def _scenario_key(strategy: str, sigma: int | None, mode: str) -> str:
    sig = 30 if sigma == 30 else 20
    return f"{strategy}_sigma{sig}_{mode}"
 def _git_rev() -> str | None:
    try:
        return subprocess.check_output(
            ["git", "rev-parse", "HEAD"],
            cwd=Path(__file__).resolve().parent.parent,
            text=True,
        ).strip()
    except (OSError, subprocess.CalledProcessError):
        return None
 def _filter_entries(
    entries: list[dict],
    gap_days_filter: list[int] | None,
    transition_filter: list[str] | None,
 ) -> list[dict]:
    out = entries
    if gap_days_filter:
        out = [e for e in out if e.get("gap_days") in gap_days_filter]
    if transition_filter:
        out = [e for e in out if e.get("transition") in transition_filter]
    return out
 def run_validation(
    site_name: str,
    season: int,
    site_position: tuple[float, float],
    strategy: str,
    sigma: int | None,
    mode: str,
    *,
    skip_manifest: bool,
    skip_fusion: bool,
    write_manifest_only: bool,
    gap_days_filter: list[int] | None,
    transition_filter: list[str] | None,
    s2_calendar_strategy: str,
    manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
    manifest_transitions: tuple[str, ...] = TRANSITIONS,
 ) -> Path:
    base = Path(f"data/{site_name}/{season}")
    vdir = validation_dir(site_name, season)
    vdir.mkdir(parents=True, exist_ok=True)
    if not skip_manifest:
        write_manifest(
            site_name,
            season,
            site_position,
            s2_calendar_strategy=s2_calendar_strategy,
            gap_lengths=manifest_gap_lengths,
            transitions=manifest_transitions,
        )
    if write_manifest_only:
        return vdir / "gap_manifest.json"
    manifest = load_manifest(site_name, season)
    entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
    results: list[dict] = []
    for entry in entries:
        gap_days = entry["gap_days"]
        transition = entry.get("transition", "green_up")
        pred = entry["prediction_date"]
        w0 = entry["window_start"]
        w1 = entry["window_end"]
        fn = entry.get("withheld_s2_filename")
        if not fn:
            results.append(
                {
                    "transition": transition,
                    "gap_days": gap_days,
                    "error": "no_withheld_s2_filename",
                    "entry": entry,
                }
            )
            continue
        ymd = _ymd_from_iso(pred)
        wh_ymd = _yyyymmdd_from_withheld_filename(fn)
        if not wh_ymd:
            results.append(
                {
                    "transition": transition,
                    "gap_days": gap_days,
                    "error": "could_not_parse_withheld_yyyymmdd",
                    "withheld_s2_filename": fn,
                }
            )
            continue
        withheld_iso = (
            _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
        )
        fusion_out = validation_fusion_dir(
            site_name, season, gap_days, transition, strategy, sigma, mode
        )
        if not skip_fusion:
            try:
                run_masked_fusion_one_date(
                    season,
                    site_position,
                    site_name,
                    strategy,
                    sigma,
                    mode,
                    pred,
                    w0,
                    w1,
                    wh_ymd,
                    fusion_out,
                )
            except RuntimeError as e:
                results.append(
                    {
                        "transition": transition,
                        "gap_days": gap_days,
                        "error": str(e),
                        "entry": entry,
                    }
                )
                continue
        fused_gap = _fused_file(fusion_out, mode, ymd)
        prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
        wh_path = withheld_s2_refl_path(season, site_name, strategy, fn)
        if wh_path is None or not fused_gap.is_file():
            results.append(
                {
                    "transition": transition,
                    "gap_days": gap_days,
                    "prediction_date": pred,
                    "withheld_s2_filename": fn,
                    "scenario": {
                        "strategy": strategy,
                        "sigma": 30 if sigma == 30 else 20,
                        "mode": mode,
                    },
                    "error": "missing_withheld_refl_or_fused_gap",
                    "fused_gap_path": str(fused_gap),
                }
            )
            continue
        spatial = evaluate_gap_vs_withheld(
            wh_path,
            fused_gap,
            prod if prod.is_file() else None,
            mode,
            whittaker_context=(base, strategy, pred, withheld_iso, w0, w1),
        )
        fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
        wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
        results.append(
            {
                "transition": transition,
                "gap_days": gap_days,
                "prediction_date": pred,
                "window_start": w0,
                "window_end": w1,
                "withheld_s2_filename": fn,
                "scenario": {
                    "strategy": strategy,
                    "sigma": 30 if sigma == 30 else 20,
                    "mode": mode,
                },
                "paths": {
                    "fused_gap": str(fused_gap),
                    "fused_no_gap": str(prod) if prod.is_file() else None,
                    "withheld_s2_refl": str(wh_path),
                },
                "spatial": spatial,
                "whittaker_crossover_row": {
                    "transition": transition,
                    "gap_days": gap_days,
                    "nse_s2_fusion": fusion_nse,
                    "nse_s2_whittaker": wh_nse,
                },
            }
        )
    scenario = _scenario_key(strategy, sigma, mode)
    crossover_rows = [
        r["whittaker_crossover_row"]
        for r in results
        if isinstance(r.get("whittaker_crossover_row"), dict)
    ]
    summary = {
        "site_name": site_name,
        "season": season,
        "scenario": scenario,
        "command_line": sys.argv,
        "git_commit": _git_rev(),
        "manifest": str(vdir / "gap_manifest.json"),
        "gap_withheld_images": str(vdir / "gap_withheld_images.json"),
        "results": results,
        "whittaker_crossover": {
            scenario: {
                "metric": "nse_s2_spatial_vs_withheld_s2_gcc",
                "whittaker_definition": (
                    "Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
                    "all S2 dates in the gap window and the withheld acquisition removed; "
                    "prediction is a spatially constant field at smoothed GCC(prediction_date)."
                ),
                "first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
                    crossover_rows,
                    fusion_key="nse_s2_fusion",
                    whittaker_key="nse_s2_whittaker",
                ),
                "by_gap": crossover_rows,
            }
        },
    }
    out_path = vdir / f"gap_validation_summary_{mode}.json"
    out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
    if mode == "bti":
        # Legacy alias for backward-compatible readers (webapp, older scripts).
        (vdir / "gap_validation_summary.json").write_text(
            json.dumps(summary, indent=2) + "\n", encoding="utf-8"
        )
    return out_path
 def main() -> None:
    ap = argparse.ArgumentParser(
        description="Tier-2 withheld-S2 gap validation (outputs under data/.../validation/)."
    )
    ap.add_argument("--site", required=True)
    ap.add_argument("--season", type=int, required=True)
    ap.add_argument("--lat", type=float, required=True)
    ap.add_argument("--lon", type=float, required=True)
    ap.add_argument(
        "--strategy", default="aggressive", choices=["aggressive", "nonaggressive"]
    )
    ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
    ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
    ap.add_argument(
        "--gap-days",
        type=int,
        action="append",
        metavar="N",
        help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
    )
    ap.add_argument(
        "--transition",
        choices=list(TRANSITIONS),
        action="append",
        help="Restrict to transition(s); repeatable (default: all in manifest).",
    )
    ap.add_argument("--skip-manifest", action="store_true")
    ap.add_argument(
        "--skip-fusion",
        action="store_true",
        help="Reuse existing validation fusion rasters.",
    )
    ap.add_argument(
        "--write-manifest-only",
        action="store_true",
        help="Write gap_manifest.json + gap_withheld_images.json and exit.",
    )
    ap.add_argument(
        "--s2-calendar-strategy",
        default="aggressive",
        choices=["aggressive", "nonaggressive"],
        help="Which prepared_*/s2 tree is used to pick nearest S2 for withholding.",
    )
    args = ap.parse_args()
    sigma_kw = 30 if args.sigma == 30 else None
    site_position = (args.lat, args.lon)
    gap_filter = args.gap_days if args.gap_days else None
    trans_filter = args.transition if args.transition else None
    out = run_validation(
        args.site,
        args.season,
        site_position,
        args.strategy,
        sigma_kw,
        args.mode,
        skip_manifest=args.skip_manifest,
        skip_fusion=args.skip_fusion,
        write_manifest_only=args.write_manifest_only,
        gap_days_filter=gap_filter,
        transition_filter=trans_filter,
        s2_calendar_strategy=args.s2_calendar_strategy,
    )
    print(out)
 if __name__ == "__main__":
    main()
--- a/gap_validation/s2_mask_dir.py
+++ b/gap_validation/s2_mask_dir.py
@ -1,91 +0,0 @@
 """Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
 from __future__ import annotations
 import re
 from datetime import date, datetime
 from pathlib import Path
 # Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
 S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
 def yyyymmdd_in_name(name: str) -> str | None:
    m = S2_PREP_DATE_RE.search(name)
    return m.group(1) if m else None
 def yyyymmdd_from_iso(iso_d: str) -> str:
    return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
 def acquisition_yyyymmdd_in_window(
    prepared_s2: Path, window_start: date, window_end: date
 ) -> set[str]:
    """All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
    out: set[str] = set()
    if not prepared_s2.is_dir():
        return out
    for p in prepared_s2.glob("*REFL.tif"):
        m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
        if not m:
            continue
        d = datetime.strptime(m.group(1), "%Y%m%d").date()
        if window_start <= d <= window_end:
            out.add(m.group(1))
    return out
 def build_masked_s2_dir(
    prepared_s2: Path,
    excluded_yyyymmdd: set[str],
    dest: Path,
    patterns: tuple[str, ...],
 ) -> int:
    """Symlink all files matching ``patterns`` except excluded acquisition days."""
    dest.mkdir(parents=True, exist_ok=True)
    n = 0
    for pattern in patterns:
        for src in sorted(prepared_s2.glob(pattern)):
            if not src.is_file() and not src.is_symlink():
                continue
            y = yyyymmdd_in_name(src.name)
            if y and y in excluded_yyyymmdd:
                continue
            link = dest / src.name
            if link.exists() or link.is_symlink():
                link.unlink()
            link.symlink_to(src.resolve())
            n += 1
    return n
 def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
    """Fail if the withheld validation acquisition is present in the fusion input dir."""
    for p in masked_s2_dir.iterdir():
        y = yyyymmdd_in_name(p.name)
        if y == withheld_yyyymmdd:
            raise RuntimeError(
                f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
                f"found in masked S2 dir {masked_s2_dir}"
            )
 def build_masked_s2_dir_bti(
    prepared_s2: Path,
    excluded_yyyymmdd: set[str],
    dest: Path,
 ) -> int:
    return build_masked_s2_dir(
        prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
    )
 def build_masked_s2_dir_itb(
    prepared_s2: Path,
    excluded_yyyymmdd: set[str],
    dest: Path,
 ) -> int:
    return build_masked_s2_dir(
        prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
    )
--- a/gap_validation/spatial_metrics.py
+++ b/gap_validation/spatial_metrics.py
@ -1,234 +0,0 @@
 """Per-pixel GCC vs withheld S2; NSE (nse_s2); no-gap baseline; deltas."""
 from __future__ import annotations
 from pathlib import Path
 import numpy as np
 import rasterio
 from rasterio.warp import reproject, Resampling
 from scipy.stats import pearsonr
 # Match postprocessing valid mask on reflectance (METH / postprocessing.py).
 VALID_REFL_THRESHOLD = 0.001
 GCC_DENOM_EPS = 1e-3
 MAX_REPORTED_NSE_S2 = 20.0
 def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
    t = red.astype(np.float64) + green.astype(np.float64) + blue.astype(np.float64)
    out = np.full_like(blue, np.nan, dtype=np.float64)
    m = (
        np.isfinite(t)
        & (t >= GCC_DENOM_EPS)
        & np.isfinite(blue)
        & np.isfinite(green)
        & np.isfinite(red)
        & (blue > GCC_DENOM_EPS)
        & (green > GCC_DENOM_EPS)
        & (red > GCC_DENOM_EPS)
    )
    out[m] = green[m].astype(np.float64) / t[m]
    return out.astype(np.float32)
 def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
    """Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
    with rasterio.open(fusion_path) as src:
        if src.count < 3:
            return None
        stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
    return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
 def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
    """Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
    with rasterio.open(fusion_path) as src:
        if src.count >= 4:
            b = src.read(1).astype(np.float32)
            g = src.read(2).astype(np.float32)
            r = src.read(3).astype(np.float32)
            gcc = _gcc_from_rgb(b, g, r)
        else:
            gcc = src.read(1).astype(np.float32)
        prof = src.profile.copy()
    return gcc, prof
 def warp_refl_bands_to_grid(
    refl_path: Path,
    height: int,
    width: int,
    transform,
    crs,
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Resample S2 REFL blue/green/red to fusion grid (bilinear)."""
    with rasterio.open(refl_path) as src:
        b = np.empty((height, width), dtype=np.float32)
        g = np.empty((height, width), dtype=np.float32)
        r = np.empty((height, width), dtype=np.float32)
        for i, dst in enumerate((b, g, r), start=1):
            reproject(
                source=rasterio.band(src, i),
                destination=dst,
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=crs,
                resampling=Resampling.bilinear,
            )
    return b, g, r
 def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
    """Valid pixels: BtI uses REFL-style mask; ItB uses single-band GCC (postprocessing ItB)."""
    with rasterio.open(fusion_path) as src:
        if mode == "itb" or src.count < 4:
            d = src.read(1).astype(np.float32)
            return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
        stacks = src.read().astype(np.float32)
        with np.errstate(all="ignore"):
            mx = np.nanmax(stacks, axis=0)
        ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
            mx > VALID_REFL_THRESHOLD
        )
        return ok
 def spatial_scores(
    y_true_gcc: np.ndarray,
    y_pred_gcc: np.ndarray,
    mask: np.ndarray,
 ) -> dict:
    """RMSE, MAE, mean bias, Pearson r, nse_s2 (Nash–Sutcliffe vs spatial truth)."""
    yt = y_true_gcc[mask].astype(np.float64).ravel()
    yp = y_pred_gcc[mask].astype(np.float64).ravel()
    n = int(yt.size)
    if n < 2:
        return {"n_pixels": n}
    mean_t = float(np.mean(yt))
    rmse = float(np.sqrt(np.mean((yt - yp) ** 2)))
    mae = float(np.mean(np.abs(yt - yp)))
    bias = float(np.mean(yp - yt))
    den = float(np.sum((yt - mean_t) ** 2))
    nse_s2 = None
    if den > 0:
        raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
        if abs(raw) <= MAX_REPORTED_NSE_S2:
            nse_s2 = raw
    r = None
    if np.std(yt) > 0 and np.std(yp) > 0:
        r = float(pearsonr(yt, yp)[0])
    return {
        "n_pixels": n,
        "rmse": rmse,
        "mae": mae,
        "mean_bias": bias,
        "pearson_r": r,
        "nse_s2": nse_s2,
    }
 def withheld_gcc_on_fusion_grid(
    withheld_refl_path: Path, fused_path: Path
 ) -> tuple[np.ndarray, np.ndarray, dict]:
    """``y_true`` GCC (withheld S2) and ``y_pred`` GCC from ``fused_path``, same grid."""
    yp, prof = read_fused_gcc(fused_path)
    h, w = yp.shape
    b, g, r = warp_refl_bands_to_grid(
        withheld_refl_path, h, w, prof["transform"], prof["crs"]
    )
    yt = _gcc_from_rgb(b, g, r)
    return yt, yp, prof
 def mask_gap_whittaker(
    yt: np.ndarray,
    y_gap: np.ndarray,
    fused_gap_path: Path,
    mode: str,
 ) -> np.ndarray:
    """Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
    m = (
        valid_mask_fused(fused_gap_path, mode)
        & np.isfinite(yt)
        & np.isfinite(y_gap)
        & (yt > VALID_REFL_THRESHOLD)
        & (yt <= 1.0)
        & (y_gap > VALID_REFL_THRESHOLD)
        & (y_gap <= 1.0)
    )
    pos = _positive_bgr_mask(fused_gap_path)
    if pos is not None:
        m &= pos
    return m
 def common_valid_mask(
    yt: np.ndarray,
    y_gap: np.ndarray,
    y_nogap: np.ndarray | None,
    fused_gap_path: Path,
    mode: str,
 ) -> np.ndarray:
    """Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
    m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
    if y_nogap is not None:
        m &= (
            np.isfinite(y_nogap)
            & (y_nogap > VALID_REFL_THRESHOLD)
            & (y_nogap <= 1.0)
        )
    return m
 def evaluate_gap_vs_withheld(
    withheld_refl_path: Path,
    fused_gap_path: Path,
    fused_nogap_path: Path | None,
    mode: str,
    *,
    whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
 ) -> dict:
    """Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
    ``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
    ``delta_nse`` = NSE_no_gap − NSE_gap, not exported to thesis tables).
    """
    yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
    y_nogap = None
    if fused_nogap_path is not None and fused_nogap_path.is_file():
        y_nogap, _ = read_fused_gcc(fused_nogap_path)
    mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
    out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
    if y_nogap is not None:
        mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
        out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
        g, ng = out["gap"], out["no_gap"]
        if g.get("rmse") is not None and ng.get("rmse") is not None:
            out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
        if g.get("nse_s2") is not None and ng.get("nse_s2") is not None:
            out["delta_nse"] = float(ng["nse_s2"] - g["nse_s2"])
    if whittaker_context is not None:
        from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
        base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context
        wgcc = whittaker_gcc_on_gap_masked_series(
            base,
            strategy,
            prediction_iso,
            withheld_iso,
            window_start_iso=w0,
            window_end_iso=w1,
        )
        if wgcc is not None:
            out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
    return out
 def constant_field_scores(
    y_true_gcc: np.ndarray, scalar: float, mask: np.ndarray
 ) -> dict:
    """NSE / RMSE when prediction is a spatially constant Whittaker value (same mask as fusion)."""
    yp = np.full_like(y_true_gcc, scalar, dtype=np.float32)
    return spatial_scores(y_true_gcc, yp, mask)
--- a/gap_validation/temporal_pc.py
+++ b/gap_validation/temporal_pc.py
@ -1,293 +0,0 @@
 """Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation)."""
 from __future__ import annotations
 import argparse
 import json
 import re
 from datetime import datetime
 from pathlib import Path
 from metrics_indices import _get_gcc_from_original
 from metrics_stats import (
    WHITTAKER_LAMBDA_DAYS_SQ,
    _norm_date_key,
    _s2_gcc_series_from_preselection,
    _whittaker_smooth_dict,
    calculate_temporal_metrics,
    load_timeseries,
 )
 from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest
 from gap_validation.fusion_masked import run_masked_fusion_season
 from gap_validation.run import (
    _filter_entries,
    _scenario_key,
    _withheld_iso,
    _yyyymmdd_from_withheld_filename,
 )
 from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
 def _fusion_gcc_timeseries(
    fusion_dir: Path, site_position: tuple[float, float], mode: str
 ) -> dict[str, float]:
    """3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``."""
    pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif"
    out: dict[str, float] = {}
    for p in sorted(fusion_dir.glob(pattern)):
        m = re.search(r"_(\d{8})\.tif$", p.name)
        if not m:
            continue
        d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat()
        gcc = _get_gcc_from_original(p, site_position)
        if gcc is not None:
            out[d] = float(gcc)
    return out
 def whittaker_timeseries_gap_degraded(
    base: Path,
    strategy: str,
    window_start_iso: str,
    window_end_iso: str,
    withheld_iso: str,
    lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
 ) -> dict[str, float]:
    """Daily Whittaker GCC on S2 preselection with gap window + withheld day removed."""
    all_gcc, flags = _s2_gcc_series_from_preselection(base)
    if not all_gcc:
        return {}
    idx = 0 if strategy == "aggressive" else 1
    w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
    w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
    wh_k = _norm_date_key(withheld_iso)
    def in_window(dk: str) -> bool:
        try:
            d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
        except ValueError:
            return False
        return w0 <= d <= w1
    kept = sorted(
        (d, g)
        for d, g in all_gcc.items()
        if d in flags
        and not flags[d][idx]
        and _norm_date_key(d) != wh_k
        and not in_window(_norm_date_key(d) or "")
    )
    if len(kept) < 2:
        return {}
    obs_d, obs_v = zip(*kept)
    return _whittaker_smooth_dict(obs_d, obs_v, lam)
 def run_temporal_pc(
    site_name: str,
    season: int,
    site_position: tuple[float, float],
    strategy: str,
    sigma: int | None,
    mode: str,
    *,
    skip_manifest: bool,
    skip_fusion: bool,
    gap_days_filter: list[int] | None,
    transition_filter: list[str] | None,
    s2_calendar_strategy: str,
 ) -> Path:
    """Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``."""
    base = Path(f"data/{site_name}/{season}")
    vdir = validation_dir(site_name, season)
    vdir.mkdir(parents=True, exist_ok=True)
    if not skip_manifest:
        write_manifest(
            site_name,
            season,
            site_position,
            s2_calendar_strategy=s2_calendar_strategy,
        )
    manifest = load_manifest(site_name, season)
    entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
    phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
    phenocam_ts = load_timeseries(phenocam_ts_path)
    nogap_metrics_path = base / "metrics.json"
    nogap_nse: dict[str, float | None] = {}
    if nogap_metrics_path.is_file():
        m = json.loads(nogap_metrics_path.read_text(encoding="utf-8"))
        sk = _scenario_key(strategy, sigma, mode)
        block = (m.get("temporal") or {}).get(sk) or {}
        nogap_nse["nse_pc"] = block.get("nse_pc")
    results: list[dict] = []
    crossover_rows: list[dict] = []
    for entry in entries:
        transition = entry.get("transition", "green_up")
        gap_days = entry["gap_days"]
        pred = entry["prediction_date"]
        w0, w1 = entry["window_start"], entry["window_end"]
        fn = entry.get("withheld_s2_filename")
        if not fn:
            results.append(
                {"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"}
            )
            continue
        wh_ymd = _yyyymmdd_from_withheld_filename(fn)
        if not wh_ymd:
            results.append(
                {
                    "transition": transition,
                    "gap_days": gap_days,
                    "error": "bad_withheld_filename",
                }
            )
            continue
        withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
        temporal_dir = (
            vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode)
        )
        if not skip_fusion:
            try:
                run_masked_fusion_season(
                    season,
                    site_position,
                    site_name,
                    strategy,
                    sigma,
                    mode,
                    w0,
                    w1,
                    wh_ymd,
                    temporal_dir,
                )
            except RuntimeError as e:
                results.append(
                    {
                        "transition": transition,
                        "gap_days": gap_days,
                        "error": str(e),
                    }
                )
                continue
            fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
        else:
            fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
        fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
        wh_ts = whittaker_timeseries_gap_degraded(
            base, strategy, w0, w1, withheld_iso
        )
        wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts)
        row: dict = {
            "transition": transition,
            "gap_days": gap_days,
            "prediction_date": pred,
            "window_start": w0,
            "window_end": w1,
            "withheld_s2_filename": fn,
            "temporal": {
                "fused": fused_metrics,
                "whittaker": wh_metrics,
            },
            "fusion_dir": str(temporal_dir),
        }
        if fused_metrics and nogap_nse.get("nse_pc") is not None:
            g_rmse = fused_metrics.get("rmse")
            ng_rmse = None
            if nogap_metrics_path.is_file():
                sk = _scenario_key(strategy, sigma, mode)
                ng_rmse = (
                    (json.loads(nogap_metrics_path.read_text()).get("temporal") or {})
                    .get(sk, {})
                    .get("rmse")
                )
            n_g = fused_metrics.get("nse_pc")
            n_ng = nogap_nse["nse_pc"]
            if g_rmse is not None and ng_rmse is not None:
                row["delta_rmse"] = float(g_rmse - ng_rmse)
            if n_g is not None and n_ng is not None:
                row["delta_nse"] = float(n_ng - n_g)
        fn_pc = (fused_metrics or {}).get("nse_pc")
        wh_pc = (wh_metrics or {}).get("nse_pc")
        row["utility_crossover_row"] = {
            "transition": transition,
            "gap_days": gap_days,
            "nse_pc_fusion": fn_pc,
            "nse_pc_whittaker": wh_pc,
        }
        crossover_rows.append(row["utility_crossover_row"])
        results.append(row)
    scenario = _scenario_key(strategy, sigma, mode)
    payload = {
        "site_name": site_name,
        "season": season,
        "scenario": scenario,
        "tier": "temporal_nse_pc",
        "manifest": str(vdir / "gap_manifest.json"),
        "results": results,
        "utility_crossover": {
            scenario: {
                "metric": "nse_pc_vs_phenocam_gcc90",
                "first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker(
                    crossover_rows,
                    fusion_key="nse_pc_fusion",
                    whittaker_key="nse_pc_whittaker",
                ),
                "by_gap": crossover_rows,
            }
        },
    }
    out_path = vdir / f"gap_metrics_{mode}.json"
    out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    if mode == "bti":
        # Legacy alias for backward-compatible readers.
        (vdir / "gap_metrics.json").write_text(
            json.dumps(payload, indent=2) + "\n", encoding="utf-8"
        )
    return out_path
 def main() -> None:
    ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.")
    ap.add_argument("--site", required=True)
    ap.add_argument("--season", type=int, required=True)
    ap.add_argument("--lat", type=float, required=True)
    ap.add_argument("--lon", type=float, required=True)
    ap.add_argument("--strategy", default="aggressive")
    ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
    ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
    ap.add_argument("--gap-days", type=int, action="append")
    ap.add_argument("--transition", choices=list(TRANSITIONS), action="append")
    ap.add_argument("--skip-manifest", action="store_true")
    ap.add_argument("--skip-fusion", action="store_true")
    ap.add_argument("--s2-calendar-strategy", default="aggressive")
    args = ap.parse_args()
    sigma_kw = 30 if args.sigma == 30 else None
    out = run_temporal_pc(
        args.site,
        args.season,
        (args.lat, args.lon),
        args.strategy,
        sigma_kw,
        args.mode,
        skip_manifest=args.skip_manifest,
        skip_fusion=args.skip_fusion,
        gap_days_filter=args.gap_days,
        transition_filter=args.transition,
        s2_calendar_strategy=args.s2_calendar_strategy,
    )
    print(out)
 if __name__ == "__main__":
    main()
--- a/gap_validation/whittaker_compare.py
+++ b/gap_validation/whittaker_compare.py
@ -1,81 +0,0 @@
 """Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
 from __future__ import annotations
 from datetime import date, datetime
 from pathlib import Path
 from metrics_stats import (
    WHITTAKER_LAMBDA_DAYS_SQ,
    _norm_date_key,
    _s2_gcc_series_from_preselection,
    _whittaker_smooth_dict,
 )
 def _date_in_window(dk: str, start: date, end: date) -> bool:
    try:
        d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
    except ValueError:
        return False
    return start <= d <= end
 def whittaker_gcc_on_gap_masked_series(
    base: Path,
    strategy: str,
    prediction_iso: str,
    withheld_iso: str,
    *,
    window_start_iso: str | None = None,
    window_end_iso: str | None = None,
    lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
 ) -> float | None:
    """Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
    pred_k = _norm_date_key(prediction_iso)
    wh_k = _norm_date_key(withheld_iso)
    if not pred_k or not wh_k:
        return None
    w0 = w1 = None
    if window_start_iso and window_end_iso:
        w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
        w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
    all_gcc, flags = _s2_gcc_series_from_preselection(base)
    if not all_gcc:
        return None
    idx = 0 if strategy == "aggressive" else 1
    kept = []
    for d, g in all_gcc.items():
        if d not in flags or flags[d][idx]:
            continue
        dk = _norm_date_key(d)
        if not dk or dk == wh_k:
            continue
        if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
            continue
        kept.append((d, g))
    kept.sort(key=lambda t: t[0])
    if len(kept) < 2:
        return None
    obs_d, obs_v = zip(*kept)
    smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
    return smooth.get(pred_k)
 def first_gap_where_fusion_below_whittaker(
    rows: list[dict],
    *,
    fusion_key: str = "nse_s2",
    whittaker_key: str = "nse_s2",
 ) -> int | None:
    """Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
    eligible = [
        r
        for r in rows
        if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
    ]
    eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
    for r in eligible:
        if r[fusion_key] < r[whittaker_key]:
            return int(r["gap_days"])
    return None
--- a/metrics_indices.py
+++ b/metrics_indices.py
@ -1,689 +0,0 @@
 """Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
 import json
 import numpy as np
 import rasterio
 from rasterio.warp import transform as transform_coords
 from pathlib import Path
 from datetime import datetime
 from preselection import _sample_3x3
 RED_BAND = 3
 NIR_BAND = 4
 BLUE_BAND = 1
 GREEN_BAND = 2
 def _calculate_and_write_ndvi(input_file, output_file):
    with rasterio.open(input_file) as src:
        red = src.read(RED_BAND).astype(np.float32)
        nir = src.read(NIR_BAND).astype(np.float32)
        mask = (red > 0) & (nir > 0)
        ndvi = np.zeros_like(red, dtype=np.float32)
        ndvi[mask] = (nir[mask] - red[mask]) / (nir[mask] + red[mask])
        profile = src.profile.copy()
        profile.update(
            {
                "count": 1,
                "dtype": "float32",
                "nodata": 0,
                "compress": "lzw",
            }
        )
        with rasterio.open(output_file, "w", **profile) as dst:
            dst.write(ndvi, 1)
            dst.set_band_description(1, "NDVI")
 def _get_ndvi_value(ndvi_file, site_position):
    try:
        with rasterio.open(ndvi_file) as src:
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            # Check if point is within bounds
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None  # Point is outside raster bounds
            samples = list(src.sample([(x[0], y[0])]))
            if samples:
                value = float(samples[0][0])
                # Check if it's actually nodata (using raster's nodata value)
                if src.nodata is not None and value == src.nodata:
                    return None  # This is nodata, not a valid 0 value
                if np.isnan(value):
                    return None  # NaN is invalid
                # 0 is a valid NDVI value (no vegetation), so return it
                return value
    except Exception as e:
        print(f"Error sampling {ndvi_file.name}: {e}")
        pass
    return None
 def _create_timeseries_for_dir(
    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
 ):
    print(f"[NDVI-{source_name}] Creating timeseries.json...")
    timeseries = []
    for input_file in sorted(input_dir.glob(pattern)):
        if "DIST_CLOUD" in input_file.name:
            continue
        filename = input_file.name
        parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
        date_str = None
        for part in parts:
            if len(part) == 8 and part.isdigit():
                date_str = part
                break
        if date_str:
            try:
                date = datetime.strptime(date_str, "%Y%m%d").isoformat()
            except ValueError:
                date = date_str
        else:
            date_str = parts[0]
            date = date_str
            print(
                f"[NDVI-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
            )
        ndvi_value, band_means = _sample_3x3(input_file, site_position)
        blue_mean = band_means.get("b02") if band_means else None
        if ndvi_value is None:
            print(
                f"[NDVI-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
            )
        entry = {"date": date, "filename": filename, "ndvi": ndvi_value}
        if blue_mean is not None:
            entry["blue"] = blue_mean
        timeseries.append(entry)
    timeseries.sort(key=lambda x: x["date"])
    output_dir.mkdir(parents=True, exist_ok=True)
    timeseries_file = output_dir / "timeseries.json"
    with open(timeseries_file, "w") as f:
        json.dump(timeseries, f, indent=2)
    print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
 def _process_ndvi_files(
    input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
 ):
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[NDVI-{source_name}] Processing {input_dir}...")
    geotiff_files = sorted(input_dir.glob(pattern))
    if not geotiff_files:
        print(f"[NDVI-{source_name}] No files found")
        return
    for geotiff_file in geotiff_files:
        # Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI)
        if "DIST_CLOUD" in geotiff_file.name:
            continue
        # Check if file has enough bands (need at least 4 for RED and NIR)
        try:
            with rasterio.open(geotiff_file) as src:
                if src.count < 4:
                    print(
                        f"[NDVI-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 4+)"
                    )
                    continue
        except Exception as e:
            print(
                f"[NDVI-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
            )
            continue
        output_file = output_dir / (
            output_namer(geotiff_file) if output_namer else geotiff_file.name
        )
        _calculate_and_write_ndvi(geotiff_file, output_file)
        print(f"[NDVI-{source_name}] Saved: {output_file}")
 def generate_ndvi_raw(season, site_position, site_name):
    # No longer creating NDVI GeoTIFF files, only timeseries
    pass
 def _get_output_name_prepared(geotiff_file):
    if geotiff_file.suffix == ".tif":
        if "REFL" in geotiff_file.stem:
            # For S2: S2A_MSIL2A_20240101_REFL -> date is at index [2]
            # For S3: composite_20240101.tif -> date is at index [1] after removing .tif
            parts = geotiff_file.stem.split("_")
            if len(parts) >= 3 and parts[0].startswith("S2"):
                # S2 format: S2A_MSIL2A_YYYYMMDD_REFL
                date_str = parts[2]
            elif len(parts) >= 2 and parts[0] == "composite":
                # S3 format: composite_YYYYMMDD
                date_str = parts[1]
            else:
                # Fallback: try index [1] for other formats
                date_str = parts[1] if len(parts) > 1 else parts[0]
            return f"{date_str}_ndvi.geotiff"
        return geotiff_file.name.replace(".tif", ".geotiff")
    return geotiff_file.name
 def _fusion_namer(f):
    date_str = f.stem.split("_")[1]
    return f"{date_str}_ndvi.geotiff"
 def generate_ndvi_post_process(season, site_position, site_name):
    # No longer creating NDVI GeoTIFF files, only timeseries
    pass
 def create_ndvi_timeseries_post_process(season, site_position, site_name):
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            processed_dir = f"processed_{strategy}_sigma{sigma}"
            for source in ["s2", "s3"]:
                input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
                output_dir = Path(
                    f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
                )
                _create_timeseries_for_dir(
                    input_dir,
                    output_dir,
                    site_position,
                    f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
                )
            input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
            output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
            _create_timeseries_for_dir(
                input_dir,
                output_dir,
                site_position,
                f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
            )
 def _calculate_and_write_gcc(input_file, output_file):
    with rasterio.open(input_file) as src:
        blue = src.read(BLUE_BAND).astype(np.float32)
        green = src.read(GREEN_BAND).astype(np.float32)
        red = src.read(RED_BAND).astype(np.float32)
        total = red + green + blue
        mask = total > 0
        gcc = np.zeros_like(green, dtype=np.float32)
        gcc[mask] = green[mask] / total[mask]
        profile = src.profile.copy()
        profile.update(
            {
                "count": 1,
                "dtype": "float32",
                "nodata": 0,
                "compress": "lzw",
            }
        )
        with rasterio.open(output_file, "w", **profile) as dst:
            dst.write(gcc, 1)
            dst.set_band_description(1, "GCC")
 def _get_gcc_value(gcc_file, site_position):
    try:
        with rasterio.open(gcc_file) as src:
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None
            samples = list(src.sample([(x[0], y[0])]))
            if samples:
                value = float(samples[0][0])
                if src.nodata is not None and value == src.nodata:
                    return None
                if np.isnan(value):
                    return None
                return value
    except Exception as e:
        print(f"Error sampling {gcc_file.name}: {e}")
        pass
    return None
 def _get_gcc_from_original(input_file, site_position):
    """Calculate GCC directly from original file without creating GeoTIFF."""
    try:
        with rasterio.open(input_file) as src:
            if src.count == 1:
                g = src.read(1).astype(np.float32)
                lon, lat = site_position[1], site_position[0]
                x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
                if not (
                    src.bounds.left <= x[0] <= src.bounds.right
                    and src.bounds.bottom <= y[0] <= src.bounds.top
                ):
                    return None
                row, col = src.index(x[0], y[0])
                if row < 0 or row >= src.height or col < 0 or col >= src.width:
                    return None
                r0, r1 = max(0, row - 1), min(src.height, row + 2)
                c0, c1 = max(0, col - 1), min(src.width, col + 2)
                win = g[r0:r1, c0:c1]
                mask = np.isfinite(win) & (win > 0)
                if not np.any(mask):
                    return None
                return float(np.mean(win[mask]))
            if src.count < 3:
                return None
            blue = src.read(BLUE_BAND).astype(np.float32)
            green = src.read(GREEN_BAND).astype(np.float32)
            red = src.read(RED_BAND).astype(np.float32)
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None
            row, col = src.index(x[0], y[0])
            if row < 0 or row >= src.height or col < 0 or col >= src.width:
                return None
            # Extract 3x3 window with boundary handling
            r0, r1 = max(0, row - 1), min(src.height, row + 2)
            c0, c1 = max(0, col - 1), min(src.width, col + 2)
            blue_window = blue[r0:r1, c0:c1]
            green_window = green[r0:r1, c0:c1]
            red_window = red[r0:r1, c0:c1]
            # Calculate GCC for each pixel in window
            total = red_window + green_window + blue_window
            mask = (
                (total > 0)
                & ~np.isnan(total)
                & (blue_window >= 0)
                & (green_window >= 0)
                & (red_window >= 0)
            )
            if not np.any(mask):
                negative_pixels = np.sum(
                    (blue_window < 0) | (green_window < 0) | (red_window < 0)
                )
                if negative_pixels > 0:
                    print(
                        f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
                    )
                return None
            gcc_window = np.zeros_like(green_window, dtype=np.float32)
            gcc_window[mask] = green_window[mask] / total[mask]
            # Return mean of valid GCC values
            valid_gcc = gcc_window[mask]
            return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
    except Exception:
        return None
 def _create_gcc_timeseries_for_dir(
    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
 ):
    print(f"[GCC-{source_name}] Creating timeseries.json...")
    timeseries = []
    for input_file in sorted(input_dir.glob(pattern)):
        if "DIST_CLOUD" in input_file.name:
            continue
        filename = input_file.name
        parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
        date_str = None
        for part in parts:
            if len(part) == 8 and part.isdigit():
                date_str = part
                break
        if date_str:
            try:
                date = datetime.strptime(date_str, "%Y%m%d").isoformat()
            except ValueError:
                date = date_str
        else:
            date_str = parts[0]
            date = date_str
            print(
                f"[GCC-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
            )
        gcc_value = _get_gcc_from_original(input_file, site_position)
        if gcc_value is None:
            print(
                f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
            )
        timeseries.append(
            {"date": date, "filename": filename, "greenness_index": gcc_value}
        )
    timeseries.sort(key=lambda x: x["date"])
    output_dir.mkdir(parents=True, exist_ok=True)
    timeseries_file = output_dir / "timeseries.json"
    with open(timeseries_file, "w") as f:
        json.dump(timeseries, f, indent=2)
    print(f"[GCC-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
 def _process_gcc_files(
    input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
 ):
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[GCC-{source_name}] Processing {input_dir}...")
    geotiff_files = sorted(input_dir.glob(pattern))
    if not geotiff_files:
        print(f"[GCC-{source_name}] No files found")
        return
    for geotiff_file in geotiff_files:
        if "DIST_CLOUD" in geotiff_file.name:
            continue
        try:
            with rasterio.open(geotiff_file) as src:
                if src.count < 3:
                    print(
                        f"[GCC-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 3+)"
                    )
                    continue
        except Exception as e:
            print(
                f"[GCC-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
            )
            continue
        output_file = output_dir / (
            output_namer(geotiff_file) if output_namer else geotiff_file.name
        )
        _calculate_and_write_gcc(geotiff_file, output_file)
        print(f"[GCC-{source_name}] Saved: {output_file}")
 def generate_gcc_post_process(season, site_position, site_name):
    # No longer creating GCC GeoTIFF files, only timeseries
    pass
 def create_gcc_timeseries_post_process(season, site_position, site_name):
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            processed_dir = f"processed_{strategy}_sigma{sigma}"
            for source in ["s2", "s3"]:
                input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
                output_dir = Path(
                    f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
                )
                _create_gcc_timeseries_for_dir(
                    input_dir,
                    output_dir,
                    site_position,
                    f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
                )
            input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
            output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
            _create_gcc_timeseries_for_dir(
                input_dir,
                output_dir,
                site_position,
                f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
            )
            itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
            base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
            if not base_itb.exists():
                continue
            for source in ["s2", "s3"]:
                inp, out = base_itb / source, base_itb / "gcc" / source
                _create_gcc_timeseries_for_dir(
                    inp,
                    out,
                    site_position,
                    f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
                )
            _create_gcc_timeseries_for_dir(
                base_itb / "fusion",
                base_itb / "gcc" / "fusion",
                site_position,
                f"POST-ITB-FUSION-{strategy}-σ{sigma}",
            )
 def _get_bands_from_original(input_file, site_position):
    """Extract mean B02, B03, B04, B8A from 3x3 window at site. Returns dict or None."""
    try:
        with rasterio.open(input_file) as src:
            if src.count < 4:
                return None
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None
            row, col = src.index(x[0], y[0])
            r0, r1 = max(0, row - 1), min(src.height, row + 2)
            c0, c1 = max(0, col - 1), min(src.width, col + 2)
            bands = [
                src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
                for i in range(4)
            ]
            mask = ~np.any([np.isnan(b) for b in bands], axis=0)
            mask &= np.all([b > 0 for b in bands], axis=0)
            if not np.any(mask):
                return None
            return {
                "b02": float(np.mean(bands[0][mask])),
                "b03": float(np.mean(bands[1][mask])),
                "b04": float(np.mean(bands[2][mask])),
                "b8a": float(np.mean(bands[3][mask])),
            }
    except Exception:
        return None
 def _create_bands_timeseries_for_dir(
    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
 ):
    print(f"[BANDS-{source_name}] Creating timeseries.json...")
    timeseries = []
    for f in sorted(input_dir.glob(pattern)):
        if "DIST_CLOUD" in f.name:
            continue
        parts = f.name.replace(".geotiff", "").replace(".tif", "").split("_")
        date_str = next((p for p in parts if len(p) == 8 and p.isdigit()), None)
        if not date_str:
            continue
        date = datetime.strptime(date_str, "%Y%m%d").isoformat()
        bands = _get_bands_from_original(f, site_position)
        timeseries.append({"date": date, "filename": f.name, **(bands or {})})
    timeseries.sort(key=lambda x: x["date"])
    output_dir.mkdir(parents=True, exist_ok=True)
    (output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
    print(
        f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
    )
 def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
    """Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
    def load(p):
        p = Path(p)
        if not p.exists():
            return []
        try:
            return json.loads((p / "timeseries.json").read_text())
        except Exception:
            return []
    ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
    gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
    bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
    keys = sorted(set(ndvi) | set(gcc) | set(bands))
    merged = []
    for k in keys:
        r = {"date": k, "filename": ""}
        for d in [ndvi.get(k, {}), gcc.get(k, {}), bands.get(k, {})]:
            r.update({x: d[x] for x in d if x not in ("date",)})
        merged.append(r)
    export_dir.mkdir(parents=True, exist_ok=True)
    (export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
    cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
    def esc(v):
        s = str(v) if v is not None else ""
        return f'"{s}"' if "," in s or '"' in s else s
    rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
    (export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
    print(
        f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
    )
 def create_prepared_fusion_timeseries(season, site_position, site_name):
    """Generate NDVI, GCC, and band timeseries for prepared S2/S3 and fusion outputs."""
    for strategy in ["aggressive", "nonaggressive"]:
        base = Path(f"data/{site_name}/{season}/prepared_{strategy}")
        for source in ["s2", "s3"]:
            inp = base / source
            if inp.exists():
                _create_timeseries_for_dir(
                    inp,
                    base / "ndvi" / source,
                    site_position,
                    f"PREPARED-{source.upper()}-{strategy}",
                    "*.tif",
                )
                _create_gcc_timeseries_for_dir(
                    inp,
                    base / "gcc" / source,
                    site_position,
                    f"PREPARED-{source.upper()}-{strategy}",
                    "*.tif",
                )
                _create_bands_timeseries_for_dir(
                    inp,
                    base / "bands" / source,
                    site_position,
                    f"PREPARED-{source.upper()}-{strategy}",
                    "*.tif",
                )
                _write_export(
                    base / "ndvi" / source,
                    base / "gcc" / source,
                    base / "bands" / source,
                    base / "export" / source,
                )
        for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
            inp = base / fusion_sub
            if inp.exists():
                _create_timeseries_for_dir(
                    inp,
                    base / "ndvi" / fusion_sub,
                    site_position,
                    f"FUSION-{strategy}-σ{sig or 20}",
                    "*.tif",
                )
                _create_gcc_timeseries_for_dir(
                    inp,
                    base / "gcc" / fusion_sub,
                    site_position,
                    f"FUSION-{strategy}-σ{sig or 20}",
                    "*.tif",
                )
                _create_bands_timeseries_for_dir(
                    inp,
                    base / "bands" / fusion_sub,
                    site_position,
                    f"FUSION-{strategy}-σ{sig or 20}",
                    "*.tif",
                )
                _write_export(
                    base / "ndvi" / fusion_sub,
                    base / "gcc" / fusion_sub,
                    base / "bands" / fusion_sub,
                    base / "export" / fusion_sub,
                )
        itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
        if not itb.exists():
            continue
        for source in ["s2", "s3"]:
            inp = itb / source
            if inp.exists():
                _create_gcc_timeseries_for_dir(
                    inp,
                    itb / "gcc" / source,
                    site_position,
                    f"PREPARED-ITB-{source.upper()}-{strategy}",
                    "*.tif",
                )
        for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
            inp = itb / fusion_sub
            if inp.exists():
                _create_gcc_timeseries_for_dir(
                    inp,
                    itb / "gcc" / fusion_sub,
                    site_position,
                    f"FUSION-ITB-{strategy}-σ{sig or 20}",
                    "*.tif",
                )
 def create_bands_timeseries_post_process(season, site_position, site_name):
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            processed_dir = f"processed_{strategy}_sigma{sigma}"
            base = Path(f"data/{site_name}/{season}/{processed_dir}")
            for source in ["s2", "s3", "fusion"]:
                inp, out = base / source, base / "bands" / source
                if inp.exists():
                    _create_bands_timeseries_for_dir(
                        inp,
                        out,
                        site_position,
                        f"POST-{source.upper()}-{strategy}-σ{sigma}",
                        "*.geotiff",
                    )
                    _write_export(
                        base / "ndvi" / source,
                        base / "gcc" / source,
                        base / "bands" / source,
                        base / "export" / source,
                    )
--- a/metrics_stats.py
+++ b/metrics_stats.py
@ -1,529 +0,0 @@
 """Metrics and statistics: temporal metrics and PhenoCam stats."""
 import json
 import numpy as np
 from pathlib import Path
 from datetime import datetime, timedelta
 from scipy import sparse
 from scipy.sparse.linalg import spsolve
 from scipy.stats import pearsonr
 WHITTAKER_LAMBDA_DAYS_SQ = 400.0
 def _norm_date_key(s):
    if s is None:
        return None
    t = str(s).strip()
    return t.split("T")[0][:10] if "T" in t else t[:10]
 def load_timeseries(filepath):
    """Load JSON timeseries and return dict mapping date -> value."""
    if not Path(filepath).exists():
        return {}
    with open(filepath) as f:
        data = json.load(f)
    return {item["date"]: item.get("greenness_index") for item in data}
 def match_dates(fusion_ts, phenocam_ts):
    """Match dates between timeseries, return aligned numpy arrays (filter None values)."""
    def _bundle(m):
        out = {}
        for k, v in m.items():
            nk = _norm_date_key(k)
            if nk and nk not in out:
                out[nk] = v
        return out
    fa, pa = _bundle(fusion_ts), _bundle(phenocam_ts)
    common_dates = set(fa) & set(pa)
    fusion_vals = []
    phenocam_vals = []
    dates = []
    for date in sorted(common_dates):
        fusion_val = fa[date]
        phenocam_val = pa[date]
        if fusion_val is not None and phenocam_val is not None:
            fusion_vals.append(fusion_val)
            phenocam_vals.append(phenocam_val)
            dates.append(date)
    return np.array(fusion_vals), np.array(phenocam_vals), dates
 def pearson_correlation(y_true, y_pred):
    """Calculate Pearson correlation coefficient r."""
    if len(y_true) < 2 or np.std(y_true) == 0 or np.std(y_pred) == 0:
        return None
    r, _ = pearsonr(y_true, y_pred)
    return float(r)
 def r_squared(y_true, y_pred):
    """Generalized R² vs predicting mean(y_true); can be negative. Same formula as ``nse`` with the same arguments; not Pearson r squared."""
    if len(y_true) < 2 or np.std(y_true) == 0:
        return None
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    if ss_tot == 0:
        return None
    return float(1 - (ss_res / ss_tot))
 def rmse(y_true, y_pred):
    """Calculate Root Mean Square Error."""
    if len(y_true) == 0:
        return None
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
 def mae(y_true, y_pred):
    """Calculate Mean Absolute Error."""
    if len(y_true) == 0:
        return None
    return float(np.mean(np.abs(y_true - y_pred)))
 def nrmse(y_true, y_pred):
    """Calculate normalized RMSE (RMSE / mean(y_true))."""
    if len(y_true) == 0:
        return None
    mean_val = np.mean(y_true)
    if mean_val == 0:
        return None
    rmse_val = rmse(y_true, y_pred)
    return float(rmse_val / mean_val) if rmse_val is not None else None
 def nse(y_true, y_pred):
    """Calculate Nash-Sutcliffe Efficiency."""
    if len(y_true) < 2:
        return None
    numerator = np.sum((y_true - y_pred) ** 2)
    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
    if denominator == 0:
        return None
    return float(1 - (numerator / denominator))
 def residual_vs_phenocam(fusion_ts, phenocam_ts):
    """Stats of (fused_GCC − PhenoCam_GCC) on matched dates; None if too few points.
    Mean: positive → fusion systematically above PhenoCam; negative → below; ~0 → unbiased mean.
    Compare BtI vs ItB means at same strategy/σ (``derived.bti_vs_itb_mean_residual``): closer to 0 → less mean bias vs PhenoCam.
    """
    yf, yp, _dates = match_dates(fusion_ts, phenocam_ts)
    if len(yf) < 2:
        return None
    r = yf - yp
    return {
        "mean": float(np.mean(r)),
        "std": float(np.std(r)),
        "mae": float(np.mean(np.abs(r))),
        "rmse": float(np.sqrt(np.mean(r**2))),
        "n_samples": int(len(r)),
    }
 def calculate_temporal_metrics(fusion_ts, phenocam_ts):
    """Temporal metrics vs PhenoCam (nse_pc; nse is the same value)."""
    fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
    if len(fusion_vals) < 2:
        return None
    n_pc = nse(phenocam_vals, fusion_vals)
    metrics = {
        "pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
        "r_squared": r_squared(phenocam_vals, fusion_vals),
        "rmse": rmse(phenocam_vals, fusion_vals),
        "mae": mae(phenocam_vals, fusion_vals),
        "nrmse": nrmse(phenocam_vals, fusion_vals),
        "nse_pc": n_pc,
        "nse": n_pc,
        "n_samples": len(fusion_vals),
        "date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
    }
    rv = residual_vs_phenocam(fusion_ts, phenocam_ts)
    if rv:
        metrics["residual_vs_phenocam"] = rv
    return metrics
 def derived_tier1(temporal: dict) -> dict:
    """ΔNSE_PC (σ20 − σ30) and paired BtI vs ItB mean residual; needs temporal fusion keys.
    ΔNSE_PC > 0 → NSE_PC higher at σ=20 than σ=30 (tighter EFAST temporal kernel wins).
    ΔNSE_PC < 0 → σ=30 wins (broader smoothing matches PhenoCam better).
    """
    d_nse = {"bti": {}, "itb": {}}
    for strategy in ("aggressive", "nonaggressive"):
        for mode, suf in (("bti", ""), ("itb", "_itb")):
            k20 = f"{strategy}_sigma20{suf}"
            k30 = f"{strategy}_sigma30{suf}"
            n20 = (temporal.get(k20) or {}).get("nse_pc")
            n30 = (temporal.get(k30) or {}).get("nse_pc")
            if isinstance(n20, (int, float)) and isinstance(n30, (int, float)):
                d_nse[mode][strategy] = float(n20 - n30)
            else:
                d_nse[mode][strategy] = None
    paired = []
    for strategy in ("aggressive", "nonaggressive"):
        for sig in (20, 30):
            kb, ki = f"{strategy}_sigma{sig}", f"{strategy}_sigma{sig}_itb"
            mb = (temporal.get(kb) or {}).get("residual_vs_phenocam", {}).get("mean")
            mi = (temporal.get(ki) or {}).get("residual_vs_phenocam", {}).get("mean")
            paired.append(
                {
                    "strategy": strategy,
                    "sigma": sig,
                    "mean_residual_bti": float(mb)
                    if isinstance(mb, (int, float))
                    else None,
                    "mean_residual_itb": float(mi)
                    if isinstance(mi, (int, float))
                    else None,
                }
            )
    return {
        "delta_nse_pc_sigma20_minus_sigma30": d_nse,
        "bti_vs_itb_mean_residual": paired,
    }
 MATCHED_PAIR_CONFIGS = (
    "aggressive_sigma20",
    "aggressive_sigma30",
    "nonaggressive_sigma20",
    "nonaggressive_sigma30",
 )
 def derived_matched_pair_workflow(temporal: dict) -> dict:
    """Per-config BtI vs ItB NSE_PC/RMSE pairs and site-level consistency flags."""
    per_config = []
    nse_deltas: list[float] = []
    nse_bti_wins_count = 0
    residual_bti_wins_count = 0
    for config in MATCHED_PAIR_CONFIGS:
        kb = config
        ki = f"{config}_itb"
        tb = temporal.get(kb) or {}
        ti = temporal.get(ki) or {}
        nse_bti = tb.get("nse_pc")
        nse_itb = ti.get("nse_pc")
        rmse_bti = tb.get("rmse")
        rmse_itb = ti.get("rmse")
        mb = (tb.get("residual_vs_phenocam") or {}).get("mean")
        mi = (ti.get("residual_vs_phenocam") or {}).get("mean")
        delta_nse = None
        delta_rmse = None
        bti_wins = None
        residual_bti_wins = None
        if isinstance(nse_bti, (int, float)) and isinstance(nse_itb, (int, float)):
            delta_nse = float(nse_bti) - float(nse_itb)
            bti_wins = delta_nse > 0
            nse_deltas.append(delta_nse)
            if bti_wins:
                nse_bti_wins_count += 1
        if isinstance(rmse_bti, (int, float)) and isinstance(rmse_itb, (int, float)):
            delta_rmse = float(rmse_bti) - float(rmse_itb)
        if isinstance(mb, (int, float)) and isinstance(mi, (int, float)):
            if float(mb) > float(mi):
                residual_bti_wins_count += 1
                residual_bti_wins = True
            elif float(mb) < float(mi):
                residual_bti_wins = False
            else:
                residual_bti_wins = None
        per_config.append(
            {
                "config": config,
                "nse_pc_bti": float(nse_bti) if isinstance(nse_bti, (int, float)) else None,
                "nse_pc_itb": float(nse_itb) if isinstance(nse_itb, (int, float)) else None,
                "rmse_bti": float(rmse_bti) if isinstance(rmse_bti, (int, float)) else None,
                "rmse_itb": float(rmse_itb) if isinstance(rmse_itb, (int, float)) else None,
                "delta_nse_bti_minus_itb": delta_nse,
                "delta_rmse_bti_minus_itb": delta_rmse,
                "bti_wins": bti_wins,
                "residual_bti_wins": residual_bti_wins,
            }
        )
    mean_delta_nse = (
        float(sum(nse_deltas) / len(nse_deltas)) if nse_deltas else None
    )
    return {
        "per_config": per_config,
        "consistency": nse_bti_wins_count,
        "nse_bti_wins_count": nse_bti_wins_count,
        "residual_bti_wins_count": residual_bti_wins_count,
        "residual_nse_mismatch": residual_bti_wins_count != nse_bti_wins_count,
        "mean_delta_nse": mean_delta_nse,
    }
 def calculate_phenocam_stats(phenocam_ts):
    """Calculate phenocam summary statistics."""
    values = [v for v in phenocam_ts.values() if v is not None]
    if len(values) == 0:
        return None
    vals = np.array(values)
    return {
        "mean": float(np.mean(vals)),
        "std": float(np.std(vals)),
        "min": float(np.min(vals)),
        "max": float(np.max(vals)),
        "n_samples": len(vals),
    }
 def _s2_gcc_series_from_preselection(base: Path):
    """Build the raw S2 GCC series from s2_preselection.json.
    Uses the 3x3 site-window band means stored per raw S2 acquisition and
    computes GCC = b03 / (b02 + b03 + b04). Scale cancels, so DN vs
    reflectance is irrelevant. Returns (all_gcc, flags) where all_gcc maps
    YYYY-MM-DD -> gcc for every row with a positive band sum, and flags maps
    the same date key -> (excluded_aggressive, excluded_nonaggressive).
    """
    path = base / "raw" / "preselection" / "s2_preselection.json"
    if not path.exists():
        return {}, {}
    with open(path) as f:
        rows = json.load(f)
    all_gcc: dict = {}
    flags: dict = {}
    for e in rows:
        nk = _norm_date_key(e.get("date"))
        if not nk:
            continue
        try:
            b02 = float(e.get("b02"))
            b03 = float(e.get("b03"))
            b04 = float(e.get("b04"))
        except (TypeError, ValueError):
            continue
        total = b02 + b03 + b04
        if not np.isfinite(total) or total <= 0:
            continue
        gcc = b03 / total
        if not np.isfinite(gcc):
            continue
        if nk in all_gcc:
            continue
        all_gcc[nk] = float(gcc)
        flags[nk] = (
            bool(e.get("excluded_aggressive")),
            bool(e.get("excluded_nonaggressive")),
        )
    return all_gcc, flags
 def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
    """Daily Whittaker (weights 1 at obs); returns {YYYY-MM-DD: z}."""
    pairs = [
        (_norm_date_key(d), float(v))
        for d, v in zip(obs_dates, obs_values)
        if v is not None and _norm_date_key(d)
    ]
    if len(pairs) < 2:
        return {}
    days = sorted({p[0] for p in pairs})
    t0 = datetime.strptime(days[0], "%Y-%m-%d").date()
    t1 = datetime.strptime(days[-1], "%Y-%m-%d").date()
    n = (t1 - t0).days + 1
    if n < n_min:
        return {}
    w = np.zeros(n)
    y = np.zeros(n)
    for dk, val in pairs:
        i = (datetime.strptime(dk, "%Y-%m-%d").date() - t0).days
        if 0 <= i < n:
            w[i] = 1.0
            y[i] = val
    D = sparse.diags(
        [1.0, -2.0, 1.0], [0, 1, 2], shape=(n - 2, n), format="csc", dtype=np.float64
    )
    H = D.T @ D
    Wm = sparse.diags(w.astype(np.float64), format="csc")
    z = spsolve(Wm + lam * H, w * y)
    out = {}
    for i in range(n):
        out[(t0 + timedelta(days=i)).isoformat()] = float(z[i])
    return out
 def calculate_all_metrics(season, site_name, site_position):
    """Calculate metrics for all 4 scenarios and save to JSON."""
    del site_position
    results = {"temporal": {}}
    base = Path(f"data/{site_name}/{season}")
    # Load phenocam timeseries once (same for all scenarios)
    phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
    phenocam_ts = load_timeseries(phenocam_ts_path)
    if not phenocam_ts:
        print("[METRICS] Warning: No phenocam data found")
        return results
    # Calculate phenocam stats
    phenocam_stats = calculate_phenocam_stats(phenocam_ts)
    if phenocam_stats:
        results["phenocam_stats"] = phenocam_stats
    from phenocam_snr import compute_snr, load_phenocam_snr, write_phenocam_snr
    snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
    if not snr_info:
        write_phenocam_snr(
            site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
        )
        snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
    if not snr_info:
        snr_info = compute_snr(
            site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
        )
    if snr_info.get("snr") is not None:
        results["phenocam_snr"] = {
            "amplitude": snr_info.get("amplitude"),
            "spline_rmse_gcc90": snr_info.get("spline_rmse_gcc90"),
            "snr": snr_info.get("snr"),
        }
    baseline = {}
    all_gcc, flags = _s2_gcc_series_from_preselection(base)
    if all_gcc:
        m0 = calculate_temporal_metrics(all_gcc, phenocam_ts)
        if m0:
            baseline["s2"] = m0
        for strategy, flag_idx in (("aggressive", 0), ("nonaggressive", 1)):
            kept_items = sorted(
                (
                    (d, g)
                    for d, g in all_gcc.items()
                    if d in flags and not flags[d][flag_idx]
                ),
                key=lambda x: x[0],
            )
            if not kept_items:
                continue
            kept_ts = dict(kept_items)
            mcf = calculate_temporal_metrics(kept_ts, phenocam_ts)
            if mcf:
                baseline.setdefault("s2_cloudfree", {})[strategy] = mcf
            obs_d, obs_v = zip(*kept_items)
            smooth = _whittaker_smooth_dict(obs_d, obs_v, WHITTAKER_LAMBDA_DAYS_SQ)
            if smooth:
                mw = calculate_temporal_metrics(smooth, phenocam_ts)
                if mw:
                    baseline.setdefault("s2_whittaker_lambda400", {})[strategy] = mw
    for strategy in ("aggressive", "nonaggressive"):
        p = base / f"processed_{strategy}_sigma20" / "gcc" / "s3" / "timeseries.json"
        if not p.exists():
            continue
        s3_ts = load_timeseries(p)
        if s3_ts:
            m3 = calculate_temporal_metrics(s3_ts, phenocam_ts)
            if m3:
                baseline.setdefault("s3", {})[strategy] = m3
    if baseline:
        results["baseline"] = baseline
    # Calculate fusion metrics for each scenario
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            scenario_name = f"{strategy}_sigma{sigma}"
            print(f"[METRICS] Calculating metrics for {scenario_name}...")
            processed_dir = f"processed_{strategy}_sigma{sigma}"
            # Load fusion timeseries
            fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
            fusion_ts = load_timeseries(fusion_ts_path)
            if not fusion_ts:
                print(
                    f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
                )
                continue
            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
            if temporal_metrics:
                results["temporal"][scenario_name] = temporal_metrics
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            scenario_name = f"{strategy}_sigma{sigma}_itb"
            processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
            fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
            fusion_ts = load_timeseries(fusion_ts_path)
            if not fusion_ts:
                print(
                    f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
                )
                continue
            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
            if temporal_metrics:
                results["temporal"][scenario_name] = temporal_metrics
    if results["temporal"]:
        derived = derived_tier1(results["temporal"])
        derived["matched_pair_workflow"] = derived_matched_pair_workflow(
            results["temporal"]
        )
        results["derived"] = derived
    # Save results
    output_path = Path(f"data/{site_name}/{season}/metrics.json")
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"[METRICS] Saved results to {output_path}")
    return results
 def main():
    """Standalone script entry point."""
    import sys
    if len(sys.argv) < 4:
        print("Usage: metrics_stats.py <season> <site_name> <lat> <lon>")
        print("Example: metrics_stats.py 2024 innsbruck 47.116171 11.320308")
        sys.exit(1)
    season = int(sys.argv[1])
    site_name = sys.argv[2]
    site_position = (float(sys.argv[3]), float(sys.argv[4]))
    results = calculate_all_metrics(season, site_name, site_position)
    # Save results
    output_path = Path(f"data/{site_name}/{season}/metrics.json")
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"[METRICS] Saved results to {output_path}")
 if __name__ == "__main__":
    main()
--- a/phenocam_snr.py
+++ b/phenocam_snr.py
@ -1,328 +0,0 @@
 """PhenoCam signal-to-noise ratio for aggregate utility eligibility (Richardson et al., 2018)."""
 from __future__ import annotations
 import json
 import re
 from pathlib import Path
 import requests
 PHENOCAM_API = "https://phenocam.nau.edu/api"
 SPLINE_RMSE_RE = re.compile(
    r"^\s*#\s*Spline\s+RMSE\s+gcc_90\s*:\s*([0-9.eE+-]+)\s*$",
    re.IGNORECASE,
 )
 PRIMARY_SEASON: dict[str, int] = {
    "forthgr": 2024,
    "innsbruck": 2024,
    "pitsalu": 2024,
    "vindeln2": 2023,
    "sunflowerjerez1": 2024,
    "institutekarnobat": 2024,
 }
 # PhenoCam ROI type codes for archive URLs (first ROI used by acquisition when multiple exist).
 SITE_ROITYPE: dict[str, str] = {
    "forthgr": "AG",
    "innsbruck": "GR",
    "pitsalu": "WL",
    "vindeln2": "MX",
    "sunflowerjerez1": "AG",
    "institutekarnobat": "AG",
 }
 PHENOCAM_ARCHIVE = "https://phenocam.nau.edu/data/archive"
 def phenocam_snr_path(site_name: str, season: int, base: Path | None = None) -> Path:
    root = base or Path("data")
    return root / site_name / str(season) / "raw" / "phenocam" / "phenocam_snr.json"
 def parse_spline_rmse_gcc90(text: str) -> float | None:
    """Parse ``# Spline RMSE gcc_90: <value>`` from transition-dates CSV header."""
    for line in text.splitlines():
        m = SPLINE_RMSE_RE.match(line)
        if m:
            try:
                return float(m.group(1))
            except ValueError:
                return None
    return None
 def transition_dates_archive_url(site_name: str, roitype: str, seq: int = 1000) -> str:
    return (
        f"{PHENOCAM_ARCHIVE}/{site_name}/ROI/"
        f"{site_name}_{roitype}_{seq}_1day_transition_dates.csv"
    )
 def transition_dates_url(site_name: str) -> str | None:
    """Return ``one_day_transition_dates`` URL for the site's primary ROI."""
    roitype = SITE_ROITYPE.get(site_name)
    if roitype:
        for seq in (1000, 2000, 1001):
            url = transition_dates_archive_url(site_name, roitype, seq)
            try:
                r = requests.head(url, timeout=15, allow_redirects=True)
                if r.status_code == 200:
                    return url
            except requests.RequestException:
                continue
    try:
        url = f"{PHENOCAM_API}/roilists/"
        params: dict | None = {"site": site_name}
        while url:
            r = requests.get(url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()
            for roi in data.get("results", []):
                if roi.get("site") == site_name:
                    td = roi.get("one_day_transition_dates")
                    if td:
                        return td
            url = data.get("next")
            params = None
    except requests.RequestException:
        pass
    return None
 def fetch_spline_rmse_from_archive(site_name: str) -> float | None:
    """Fetch spline RMSE via PhenoCam archive URL (fast path)."""
    roitype = SITE_ROITYPE.get(site_name)
    if not roitype:
        return None
    for seq in (1000, 2000, 1001):
        url = transition_dates_archive_url(site_name, roitype, seq)
        try:
            r = requests.get(url, timeout=20)
            if r.status_code != 200:
                continue
            rmse = parse_spline_rmse_gcc90(r.text)
            if rmse is not None:
                return rmse
        except requests.RequestException:
            continue
    return None
 def fetch_spline_rmse_gcc90(site_name: str) -> float | None:
    """Download transition-dates file header and return spline RMSE for gcc_90."""
    rmse = fetch_spline_rmse_from_archive(site_name)
    if rmse is not None:
        return rmse
    td_url = transition_dates_url(site_name)
    if not td_url:
        return None
    try:
        r = requests.get(td_url, timeout=30)
        r.raise_for_status()
        return parse_spline_rmse_gcc90(r.text)
    except requests.RequestException:
        return None
 def season_amplitude(
    site_name: str,
    season: int,
    *,
    base: Path | None = None,
    metrics: dict | None = None,
 ) -> float | None:
    """Seasonal amplitude max(gcc_90) - min(gcc_90) over the evaluation season."""
    if metrics:
        ps = metrics.get("phenocam_stats") or {}
        mn, mx = ps.get("min"), ps.get("max")
        if isinstance(mn, (int, float)) and isinstance(mx, (int, float)):
            return float(mx - mn)
    root = base or Path("data")
    p = root / site_name / str(season) / "raw" / "phenocam" / "phenocam_gcc.json"
    if not p.is_file():
        return None
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        vals = [
            it.get("greenness_index")
            for it in data
            if isinstance(it.get("greenness_index"), (int, float))
        ]
    elif isinstance(data, dict):
        vals = [v for v in data.values() if isinstance(v, (int, float))]
    else:
        return None
    if not vals:
        return None
    return float(max(vals) - min(vals))
 def compute_snr(
    site_name: str,
    season: int,
    *,
    base: Path | None = None,
    metrics: dict | None = None,
    spline_rmse: float | None = None,
    fetch_if_missing: bool = True,
 ) -> dict:
    """Return amplitude, spline RMSE, and SNR; may fetch RMSE from PhenoCam API."""
    root = base or Path("data")
    amp = season_amplitude(site_name, season, base=root, metrics=metrics)
    rmse = spline_rmse
    if rmse is None:
        sidecar = phenocam_snr_path(site_name, season, root)
        if sidecar.is_file():
            cached = json.loads(sidecar.read_text(encoding="utf-8"))
            rmse = cached.get("spline_rmse_gcc90")
        elif fetch_if_missing:
            rmse = fetch_spline_rmse_gcc90(site_name)
    snr = None
    if isinstance(amp, (int, float)) and isinstance(rmse, (int, float)) and rmse > 0:
        snr = float(amp) / float(rmse)
    return {
        "site": site_name,
        "season": season,
        "amplitude": amp,
        "spline_rmse_gcc90": rmse,
        "snr": snr,
    }
 def write_phenocam_snr(
    site_name: str,
    season: int,
    *,
    base: Path | None = None,
    metrics: dict | None = None,
    fetch_if_missing: bool = True,
 ) -> Path | None:
    """Compute SNR and write ``phenocam_snr.json``; returns path or None on failure."""
    root = base or Path("data")
    info = compute_snr(
        site_name,
        season,
        base=root,
        metrics=metrics,
        fetch_if_missing=fetch_if_missing,
    )
    if info.get("spline_rmse_gcc90") is None:
        print(
            f"[PhenoCam-SNR] Warning: no spline RMSE for {site_name} {season}; "
            "skipping phenocam_snr.json"
        )
        return None
    out = phenocam_snr_path(site_name, season, root)
    out.parent.mkdir(parents=True, exist_ok=True)
    td_url = transition_dates_url(site_name)
    payload = {
        "site": site_name,
        "season": season,
        "amplitude": info.get("amplitude"),
        "spline_rmse_gcc90": info.get("spline_rmse_gcc90"),
        "snr": info.get("snr"),
        "source": "phenocam_1day_transition_dates_header",
        "transition_dates_url": td_url,
        "roitype": SITE_ROITYPE.get(site_name),
    }
    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
    print(f"[PhenoCam-SNR] Saved: {out} (SNR={info.get('snr')})")
    return out
 def load_phenocam_snr(
    site_name: str, season: int, *, base: Path | None = None
 ) -> dict | None:
    """Load cached SNR sidecar if present."""
    p = phenocam_snr_path(site_name, season, base)
    if not p.is_file():
        return None
    return json.loads(p.read_text(encoding="utf-8"))
 def suggest_snr_threshold(snrs: list[float]) -> tuple[float, str]:
    """
    Choose eligibility threshold from cross-site SNR distribution.
    Returns (threshold, rationale). Uses a distribution-based split only when it
    separates a low-SNR group (max below 2) from a high-SNR group (min at or above 2).
    Otherwise defaults to SNR >= 2.
    """
    if not snrs:
        return 2.0, "default SNR >= 2 (no site SNR values available)"
    sorted_snrs = sorted(snrs)
    if len(sorted_snrs) == 1:
        return 2.0, "default SNR >= 2 (single site only)"
    if all(s >= 2.0 for s in sorted_snrs):
        return 2.0, "default SNR >= 2 (all sites exceed 2; no low-SNR exclusion group)"
    for i in range(1, len(sorted_snrs)):
        low, high = sorted_snrs[:i], sorted_snrs[i:]
        if not low or not high:
            continue
        gap = high[0] - low[-1]
        if gap >= 0.5 and low[-1] < 2.0 <= high[0]:
            threshold = (low[-1] + high[0]) / 2.0
            return (
                round(threshold, 3),
                f"gap between {low[-1]:.3f} and {high[0]:.3f} straddles SNR=2 "
                f"(midpoint {threshold:.3f})",
            )
    return 2.0, "default SNR >= 2 (no clear low/high cluster separation)"
 def report_all_sites(
    *,
    base: Path | None = None,
    sites: dict[str, int] | None = None,
    fetch_if_missing: bool = True,
 ) -> list[dict]:
    """Compute SNR for all primary-season sites; print table and return rows."""
    root = base or Path("data")
    site_seasons = sites or PRIMARY_SEASON
    rows: list[dict] = []
    for site in sorted(site_seasons.keys()):
        season = site_seasons[site]
        metrics_path = root / site / str(season) / "metrics.json"
        metrics = None
        if metrics_path.is_file():
            metrics = json.loads(metrics_path.read_text(encoding="utf-8"))
        info = compute_snr(
            site,
            season,
            base=root,
            metrics=metrics,
            fetch_if_missing=fetch_if_missing,
        )
        rows.append(info)
    print(f"{'site':<20} {'season':>6} {'amplitude':>10} {'rmse_spl':>10} {'SNR':>8}")
    print("-" * 58)
    for r in rows:
        amp = r.get("amplitude")
        rmse = r.get("spline_rmse_gcc90")
        snr = r.get("snr")
        print(
            f"{r['site']:<20} {r['season']:>6} "
            f"{amp if amp is not None else '---':>10} "
            f"{rmse if rmse is not None else '---':>10} "
            f"{snr if snr is not None else '---':>8}"
        )
    valid_snrs = [r["snr"] for r in rows if isinstance(r.get("snr"), (int, float))]
    threshold, rationale = suggest_snr_threshold(valid_snrs)
    print(f"\nSuggested threshold: SNR >= {threshold} ({rationale})")
    for r in rows:
        snr = r.get("snr")
        if isinstance(snr, (int, float)):
            r["eligible_at_2"] = snr >= 2.0
            r["eligible_at_3"] = snr >= 3.0
            r["eligible_at_suggested"] = snr >= threshold
    return rows
 if __name__ == "__main__":
    report_all_sites()
--- a/phenology_timesat.py
+++ b/phenology_timesat.py
@ -1,738 +0,0 @@
 """
 PhenoCam GCC: green-up and green-down (50 % of seasonal amplitude) via TIMESAT.
 Reads ``data/.../raw/phenocam/phenocam_gcc.json`` (or any path) and uses the
 ``timesat`` package (``timesat.tsfprocess``) with the same seasonal-threshold
 meaning as the TIMESAT GUI: *startmethod* 1, *p_startcutoff* (0.5, 0.5) = 50 % of
 the **per-season** amplitude above the local base. See the TIMESAT manual,
 section 4.3 and row 37–38 (season start method = seasonal amplitude).
 **License:** the ``timesat`` PyPI wheel is under the TIMESAT Research License
 (non-commercial research; see package metadata on PyPI).
 PhenoCam time series: single-year acquisition writes
  ``phenocam_gcc.json`` (and ``phenocam_gcc.csv``). The three-year series used
  for TIMESAT is stored separately as ``phenocam_gcc_3y.json`` in the same
  folder (created on first use from the one-day summary API, then reused).
 Importable: ``write_phenocam_phenology_for_site`` is called from ``run.py``;
 the CLI entry point remains optional for ad-hoc runs.
 **Saving results:** use ``-o path.json`` or ``--sidecar`` to write a JSON file
 (see ``--help``). Sidecar mode writes ``phenocam_phenology.json`` (two dates
 only) next to ``phenocam_gcc.json``.
 ``run_pipeline`` in ``run.py`` writes the same ``phenocam_phenology.json`` by
 default when ``timesat`` is installed. GCC for TIMESAT uses ``phenocam_gcc_3y.json``
 if present, otherwise the PhenoCam API for that site (listed in
 ``data/sites.geojson``; not a site list from the API). One-year
 ``phenocam_gcc.json`` on disk can still fill gaps when merged.
 Use ``python phenology_timesat.py --all`` to batch every
 ``(sitename, season)`` from ``data/sites.geojson`` (``properties.sitename`` and
 ``properties.seasons``).
 """
 from __future__ import annotations
 import argparse
 import csv
 import json
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
 import numpy as np
 import requests
 PHENOCAM_API = "https://phenocam.nau.edu/api"
 try:
    import timesat as _timesat
 except ImportError:
    _timesat = None
 NODATA = -9999.0
 def load_phenocam_gcc(path: Path) -> dict[str, float]:
    """Return map YYYY-MM-DD -> greenness index from PhenoCam JSON list."""
    with open(path) as f:
        rows = json.load(f)
    out: dict[str, float] = {}
    for row in rows:
        d = str(row.get("date", ""))[:10]
        v = row.get("greenness_index")
        if d and v is not None and np.isfinite(v):
            out[d] = float(v)
    return out
 def _gcc_from_summary_row(row: dict, use_mean_fallback: bool) -> float | None:
    """Extract daily GCC from a one-day summary row (same rules as acquisition)."""
    if not use_mean_fallback:
        oflag = row.get("outlierflag_gcc_90")
        if oflag is not None and str(oflag).strip() in ("1", "1.0"):
            return None
    raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
    if raw is None:
        return None
    text = str(raw).strip()
    if not text or text.upper() == "NA":
        return None
    try:
        val = float(text)
    except ValueError:
        return None
    if val <= -9998.0:
        return None
    return val
 def _phenocam_one_day_summary_csv_url(site_name: str) -> str | None:
    """Return URL of the one-day summary CSV for *site_name*, or None on failure."""
    try:
        url = f"{PHENOCAM_API}/roilists/"
        params: dict | None = {"site": site_name}
        rois: list[dict] = []
        while url:
            r = requests.get(url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()
            rois.extend(
                [roi for roi in data.get("results", []) if roi["site"] == site_name]
            )
            url = data.get("next")
            params = None
            if rois:
                break
        if not rois:
            return None
        return rois[0].get("one_day_summary") or None
    except requests.RequestException:
        return None
 def _parse_phenocam_gcc_from_csv_text(
    text: str, start_date: str, end_date: str
 ) -> dict[str, float]:
    """Map YYYY-MM-DD -> gcc for rows in [start_date, end_date] inclusive."""
    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    lines = [line for line in text.split("\n") if line and not line.startswith("#")]
    reader = csv.DictReader(lines)
    fieldnames = reader.fieldnames or ()
    use_mean_fallback = "gcc_90" not in fieldnames
    out: dict[str, float] = {}
    for row in reader:
        try:
            date_str = row.get("date")
            if not date_str:
                continue
            date = datetime.strptime(date_str, "%Y-%m-%d")
            if not (start_dt <= date <= end_dt):
                continue
            gcc = _gcc_from_summary_row(row, use_mean_fallback)
            if gcc is not None:
                out[date.date().isoformat()] = gcc
        except (ValueError, KeyError):
            continue
    return out
 def save_phenocam_gcc_json(path: Path, by_date: dict[str, float]) -> None:
    """Write the same list-of-objects format as :func:`acquisition_phenocam` GCC JSON."""
    rows = [
        {"date": d, "greenness_index": v}
        for d, v in sorted(by_date.items(), key=lambda x: x[0])
    ]
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(rows, f, indent=2)
        f.write("\n")
 def fetch_phenocam_gcc_three_years_separately(
    site_name: str, season: int
 ) -> dict[str, float]:
    """
    Download PhenoCam one-day summary GCC for three **calendar** years
    (``season-1`` … ``season+1``), independently of :mod:`acquisition_phenocam`.
    Uses one HTTP GET of the full summary CSV, then **three** per-year
    extractions (same logic as the acquisition CSV filter, three date windows).
    """
    out: dict[str, float] = {}
    csv_url = _phenocam_one_day_summary_csv_url(site_name)
    if not csv_url:
        print(
            f"[PhenoCam phenology] No PhenoCam one-day summary URL for site {site_name!r}"
        )
        return out
    try:
        csv_r = requests.get(csv_url, timeout=30)
        csv_r.raise_for_status()
    except requests.RequestException as e:
        print(f"[PhenoCam phenology] API CSV fetch failed: {e}")
        return out
    text = csv_r.text
    for y in (season - 1, season, season + 1):
        part = _parse_phenocam_gcc_from_csv_text(text, f"{y}-01-01", f"{y}-12-31")
        out.update(part)
    return out
 def load_or_fetch_phenocam_gcc_3y(
    site_name: str, season: int, gcc_3y_path: Path
 ) -> dict[str, float]:
    """
    Use ``phenocam_gcc_3y.json`` on disk if it exists and parses; else fetch
    three years from the PhenoCam one-day summary for *site_name* and save to
    *gcc_3y_path*.
    """
    if gcc_3y_path.is_file():
        try:
            cached = load_phenocam_gcc(gcc_3y_path)
        except (OSError, json.JSONDecodeError):
            cached = {}
        if cached:
            print(f"[PhenoCam phenology] Using {gcc_3y_path} ({len(cached)} values)")
            return cached
    out = fetch_phenocam_gcc_three_years_separately(site_name, season)
    if not out:
        return {}
    save_phenocam_gcc_json(gcc_3y_path, out)
    print(
        f"[PhenoCam phenology] Fetched and wrote {gcc_3y_path} "
        f"({len(out)} values for {season - 1}–{season + 1})"
    )
    return out
 def resolve_phenocam_gcc_for_timesat(
    site_name: str, season: int, gcc_path: Path
 ) -> dict[str, float]:
    """
    Load three-year series from ``phenocam_gcc_3y.json`` (or fetch once and
    save there), merge with one-year ``gcc_path`` if present; three-year values
    win on duplicate dates.
    """
    gcc_3y = gcc_path.parent / "phenocam_gcc_3y.json"
    by_3y = load_or_fetch_phenocam_gcc_3y(site_name, season, gcc_3y)
    by_1y: dict[str, float] = {}
    if gcc_path.is_file():
        try:
            by_1y = load_phenocam_gcc(gcc_path)
        except (OSError, json.JSONDecodeError):
            pass
    if by_3y:
        return {**by_1y, **by_3y}
    return by_1y
 def _day_count(calendar_year: int) -> int:
    a = datetime(calendar_year, 1, 1)
    b = datetime(calendar_year + 1, 1, 1)
    return (b - a).days
 def daily_profile_for_year(by_date: dict[str, float], calendar_year: int) -> np.ndarray:
    """
    One value per day (length 365 or 366 for leap years). Gaps are filled by
    linear interpolation in time along the year; if only one valid point exists,
    that value is used for the whole year.
    """
    n = _day_count(calendar_year)
    raw = np.full(n, np.nan, dtype=np.float64)
    for d in range(1, n + 1):
        dt = datetime(calendar_year, 1, 1) + timedelta(days=d - 1)
        key = dt.strftime("%Y-%m-%d")
        if key in by_date:
            raw[d - 1] = by_date[key]
    valid = np.isfinite(raw) & (raw > 0.0)
    if not np.any(valid):
        raise ValueError(f"No valid GCC in JSON for calendar year {calendar_year}")
    if np.sum(valid) == 1:
        v = float(raw[valid][0])
        return np.full(n, v, dtype=np.float32)
    idx = np.arange(n, dtype=np.float64)
    raw = np.interp(idx, idx[valid], raw[valid])
    return raw.astype(np.float32)
 def _gcc_profile_365_for_timesat(profile: np.ndarray) -> np.ndarray:
    """TIMESAT uses 365 days per season; drop Dec 31 on leap years."""
    p = np.asarray(profile, dtype=np.float32).ravel()
    if p.size == 366:
        return p[:365]
    if p.size == 365:
        return p
    raise ValueError(f"expected 365 or 366 daily values, got {p.size}")
 def yyyydoy_to_iso(v: float) -> str:
    x = int(round(float(v)))
    y = x // 1000
    doy = x - y * 1000
    d = datetime(y, 1, 1) + timedelta(days=doy - 1)
    return d.date().isoformat()
 def build_yraw_three_years(
    by_date: dict[str, float], y1: int, y2: int, y3: int
 ) -> tuple[np.ndarray, str]:
    """
    Stack three calendar years of daily GCC (365 pts/year) for TIMESAT.
    If each of *y1*, *y2*, *y3* has at least one valid GCC in *by_date* (after
    per-year gap filling), returns their concatenation — **three real years**.
    If any of those years cannot be built (e.g. single-year download only),
    falls back to **replicating** the profile for *y2* three times (legacy
    TIMESAT workaround).
    """
    try:
        p1 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y1))
        p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
        p3 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y3))
        yraw = np.concatenate([p1, p2, p3]).astype(np.float32, copy=False)
        return yraw, "three_independent_years"
    except ValueError:
        p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
        yraw = np.tile(p2, 3)
        return yraw, "single_year_replicated"
 def run_timesat_phenology_from_yraw(
    yraw: np.ndarray,
    years_triplet: tuple[int, int, int],
    *,
    start_cutoff: tuple[float, float] = (0.5, 0.5),
    smooth_window: float = 2.0,
    p_ignoreday: int = 366,
 ) -> dict[str, str | float | None]:
    """
    Run TIMESAT on a length ``365 * 3`` daily VI stack and calendar *years_triplet*
    (YYYY, YYYY, YYYY) for the time vector. Middle year in the triplet is the
    season whose SOS/EOS we report.
    """
    yraw = np.asarray(yraw, dtype=np.float32).ravel()
    y1, y2, y3 = years_triplet
    nyear = 3
    npt = 365 * nyear
    if yraw.size != npt:
        raise ValueError(f"yraw must have length {npt}, got {yraw.size}")
    tlist: list[int] = []
    for y in (y1, y2, y3):
        t0 = datetime(y, 1, 1)
        for d in range(365):
            tlist.append(int((t0 + timedelta(days=d)).strftime("%Y%j")))
    tv = np.array(tlist, dtype=np.int32)
    if len(tv) != npt:
        raise RuntimeError("internal: length mismatch")
    vi = np.asfortranarray(yraw.reshape(1, 1, -1))
    qa = np.asfortranarray(np.ones((1, 1, npt), dtype=np.float32))
    lc = np.ones((1, 1), dtype=np.uint8)
    landuse = np.ones(255, dtype=np.uint8)
    p_out = np.arange(1, npt + 1, dtype=np.int32)
    p_ylu = np.asfortranarray(np.array([0.0, 1.0], dtype=np.float64))
    ci = 0
    p_fitmethod = np.zeros(255, dtype=np.int32)
    p_fitmethod[ci] = 1
    p_smooth = np.zeros(255, dtype=np.float64)
    p_smooth[ci] = float(smooth_window)
    p_nenvi = np.zeros(255, dtype=np.int32)
    p_nenvi[ci] = 1
    p_wfact = np.zeros(255, dtype=np.float64)
    p_wfact[ci] = 1.0
    p_startmethod = np.zeros(255, dtype=np.int32)
    p_startmethod[ci] = 1
    p_startcutoff = np.zeros((255, 2), dtype=np.float64, order="F")
    p_startcutoff[ci, :] = np.array(
        [start_cutoff[0], start_cutoff[1]], dtype=np.float64
    )
    p_low = np.zeros(255, dtype=np.float64)
    p_fillbase = np.zeros(255, dtype=np.int32)
    p_seasonmethod = np.zeros(255, dtype=np.int32)
    p_seasonmethod[ci] = 1
    p_seapar = np.zeros(255, dtype=np.float64)
    p_seapar[ci] = 1.0
    if _timesat is None:
        raise ImportError("Install the 'timesat' package: pip install timesat")
    vpp, _vppqa, nseason, yfit, _yfitqa, _seasonfit, _tseq = _timesat.tsfprocess(
        nyear,
        vi,
        qa,
        tv,
        lc,
        1,
        landuse,
        p_out,
        p_ignoreday,
        p_ylu,
        0,
        p_fitmethod,
        p_smooth,
        NODATA,
        45,
        0,
        p_nenvi,
        p_wfact,
        p_startmethod,
        p_startcutoff,
        p_low,
        p_fillbase,
        1,
        p_seasonmethod,
        p_seapar,
        1,
        1,
        1,
        npt,
        len(p_out),
    )
    a = vpp[0, 0, :]
    # three growing-season rows at indices 0, 13*2, 13*4 in the raw vector
    middle_block = 2
    off = 13 * middle_block
    sosd = a[off + 0] if a.size > off + 0 else np.nan
    sosv = a[off + 1] if a.size > off + 1 else np.nan
    eosd = a[off + 3] if a.size > off + 3 else np.nan
    eosv = a[off + 4] if a.size > off + 4 else np.nan
    yfit_max = float(np.max(yfit)) if yfit.size else float("nan")
    def pick(x: float) -> str | None:
        if not np.isfinite(x) or x < 1.0e5 or x < 0:
            return None
        try:
            return yyyydoy_to_iso(x)
        except (OverflowError, ValueError):
            return None
    return {
        "reference_calendar_year": y2,
        "green_up_50pct_date": pick(sosd),
        "green_up_50pct_fitted_gcc": float(sosv) if np.isfinite(sosv) else None,
        "green_down_50pct_date": pick(eosd),
        "green_down_50pct_fitted_gcc": float(eosv) if np.isfinite(eosv) else None,
        "nseason": nseason[0, 0].tolist() if nseason.ndim >= 2 else [],
        "yfit_max": yfit_max,
    }
 def run_timesat_phenology(
    daily_profile: np.ndarray,
    years_triplet: tuple[int, int, int],
    *,
    start_cutoff: tuple[float, float] = (0.5, 0.5),
    smooth_window: float = 2.0,
    p_ignoreday: int = 366,
 ) -> dict[str, str | float | None]:
    """
    Back-compat: run TIMESAT on one year’s 365(–366) profile **replicated** three times.
    Prefer :func:`build_yraw_three_years` + :func:`run_timesat_phenology_from_yraw`.
    """
    prof = np.asarray(daily_profile, dtype=np.float32).ravel()
    if len(prof) not in (365, 366):
        raise ValueError("daily_profile must have length 365 or 366")
    if len(prof) == 366:
        prof = prof[:365]
    yraw = np.tile(prof, 3)
    return run_timesat_phenology_from_yraw(
        yraw,
        years_triplet,
        start_cutoff=start_cutoff,
        smooth_window=smooth_window,
        p_ignoreday=p_ignoreday,
    )
 def phenocam_gcc_path(site_name: str, season: int) -> Path:
    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
 def phenocam_gcc_3y_path(site_name: str, season: int) -> Path:
    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc_3y.json")
 def iter_sites_seasons_with_phenocam(
    data_root: str | Path = "data",
 ) -> list[tuple[str, int]]:
    """``(site_name, season)`` for every ``phenocam_gcc.json`` under *data_root* (legacy)."""
    root = Path(data_root)
    if not root.is_dir():
        return []
    out: list[tuple[str, int]] = []
    seen: set[tuple[str, int]] = set()
    for p in sorted(root.glob("*/*/raw/phenocam/phenocam_gcc.json")):
        rel = p.relative_to(root)
        site, season_s = rel.parts[0], rel.parts[1]
        if not season_s.isdigit():
            continue
        season = int(season_s)
        key = (site, season)
        if key not in seen:
            seen.add(key)
            out.append(key)
    return out
 def iter_sites_seasons_from_sites_geojson(
    path: str | Path = "data/sites.geojson",
 ) -> list[tuple[str, int]]:
    """
    ``(sitename, season)`` from a GeoJSON FeatureCollection: each feature’s
    ``properties.sitename`` and each key in ``properties.seasons`` (4-digit year).
    """
    path = Path(path)
    if not path.is_file():
        return []
    with open(path, encoding="utf-8") as f:
        fc = json.load(f)
    out: list[tuple[str, int]] = []
    for feat in fc.get("features", []):
        props = feat.get("properties") or {}
        name = props.get("sitename")
        seasons = props.get("seasons")
        if not name or not isinstance(seasons, dict):
            continue
        for skey in sorted(seasons.keys()):
            if skey.isdigit() and len(skey) == 4:
                out.append((str(name), int(skey)))
    return out
 def write_phenocam_phenology_all(
    *,
    sites_geojson: str | Path | None = None,
    data_root: str | Path = "data",
    smooth_window: float = 2.0,
    p_ignoreday: int = 366,
 ) -> int:
    """
    Run :func:`write_phenocam_phenology_for_site` for every ``(site, season)`` in
    *sites_geojson* (default: :file:`<data_root>/sites.geojson`), not a glob over
    ``data/``.
    """
    geo = Path(
        sites_geojson
        if sites_geojson is not None
        else Path(data_root) / "sites.geojson"
    )
    pairs = iter_sites_seasons_from_sites_geojson(geo)
    if not pairs and geo.is_file():
        print(
            f"[PhenoCam phenology] No (sitename, season) entries in {geo} "
            "(check properties.sitename and properties.seasons)."
        )
    elif not pairs:
        print(f"[PhenoCam phenology] Missing or empty sites file: {geo}")
    n = 0
    for site, season in pairs:
        print(f"=== {site} {season} ===")
        write_phenocam_phenology_for_site(
            site, season, smooth_window=smooth_window, p_ignoreday=p_ignoreday
        )
        n += 1
    print(f"[PhenoCam phenology] Processed {n} site/season pair(s) from {geo}.")
    return n
 def phenocam_phenology_path(site_name: str, season: int) -> Path:
    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_phenology.json")
 def write_phenocam_phenology_for_site(
    site_name: str,
    season: int,
    *,
    smooth_window: float = 2.0,
    p_ignoreday: int = 366,
 ) -> None:
    """
    If ``timesat`` is installed, build GCC from ``phenocam_gcc_3y.json`` (or fetch
    three years once and save there), with optional one-year ``phenocam_gcc.json``,
    then write
    ``phenocam_phenology.json`` in the same directory with
    ``green_up_50pct_date`` and ``green_down_50pct_date`` (ISO dates or null).
    """
    if _timesat is None:
        out = phenocam_phenology_path(site_name, season)
        print(
            f"[PhenoCam phenology] Skipped (no timesat); would write {out}. "
            "pip install timesat"
        )
        return
    gcc = phenocam_gcc_path(site_name, season)
    try:
        by_date = resolve_phenocam_gcc_for_timesat(site_name, season, gcc)
    except OSError as e:
        print(f"[PhenoCam phenology] Skipped: {e}")
        return
    if not by_date:
        g3 = gcc.parent / "phenocam_gcc_3y.json"
        print(
            f"[PhenoCam phenology] No GCC ({gcc} and no data in {g3} after API); "
            f"skipping {phenocam_phenology_path(site_name, season).name}."
        )
        return
    try:
        yraw, stack_mode = build_yraw_three_years(
            by_date, season - 1, season, season + 1
        )
    except (OSError, ValueError) as e:
        print(f"[PhenoCam phenology] Skipped: {e}")
        return
    out = run_timesat_phenology_from_yraw(
        yraw,
        (season - 1, season, season + 1),
        smooth_window=smooth_window,
        p_ignoreday=p_ignoreday,
    )
    record = {
        "green_up_50pct_date": out.get("green_up_50pct_date"),
        "green_down_50pct_date": out.get("green_down_50pct_date"),
    }
    out_path = phenocam_phenology_path(site_name, season)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(record, f, indent=2)
        f.write("\n")
    gup, gdn = record["green_up_50pct_date"], record["green_down_50pct_date"]
    print(
        f"[PhenoCam phenology] Wrote {out_path} (green-up {gup!r}, green-down {gdn!r}; "
        f"TIMESAT input={stack_mode})"
    )
 def main() -> None:
    ap = argparse.ArgumentParser(
        description="TIMESAT 50 % seasonal-amplitude green-up / green-down for PhenoCam GCC JSON."
    )
    ap.add_argument(
        "--all",
        action="store_true",
        help="Write phenocam for every (sitename, season) in the sites GeoJSON (see --sites-geojson).",
    )
    ap.add_argument(
        "--data-root",
        type=Path,
        default=Path("data"),
        help="Resolves default --sites-geojson to <data-root>/sites.geojson.",
    )
    ap.add_argument(
        "--sites-geojson",
        type=Path,
        default=None,
        help="For --all: path to data/sites.geojson (default: <data-root>/sites.geojson).",
    )
    ap.add_argument(
        "gcc_json",
        type=Path,
        nargs="?",
        default=Path("data/innsbruck/2024/raw/phenocam/phenocam_gcc.json"),
        help="Path to phenocam_gcc.json (default: Innsbruck 2024 if present).",
    )
    ap.add_argument(
        "--season",
        type=int,
        default=None,
        help="Calendar year to build the daily GCC profile (default: infer from file path .../<year>/...).",
    )
    ap.add_argument(
        "--savitzky-hw",
        type=float,
        default=2.0,
        help="Half-width for fitmethod 1 (Savitzky–Golay); default 2.",
    )
    ap.add_argument(
        "--p-ignoreday",
        type=int,
        default=366,
        help="TIMESAT p_ignoreday (default 366).",
    )
    ap.add_argument(
        "-o",
        "--output",
        type=Path,
        default=None,
        help="Write results to this JSON file (same schema as stdout, plus metadata).",
    )
    ap.add_argument(
        "--sidecar",
        action="store_true",
        help="Save two-date JSON next to input as phenocam_phenology.json (implies -o).",
    )
    args = ap.parse_args()
    if _timesat is None:
        raise SystemExit(
            "The 'timesat' package is required. Install with: pip install timesat"
        )
    if args.all:
        write_phenocam_phenology_all(
            sites_geojson=args.sites_geojson,
            data_root=args.data_root,
            smooth_window=args.savitzky_hw,
            p_ignoreday=args.p_ignoreday,
        )
        return
    path: Path = args.gcc_json
    if not path.is_file():
        raise SystemExit(f"Not a file: {path}")
    season = args.season
    if season is None:
        for part in path.parts:
            if part.isdigit() and len(part) == 4:
                season = int(part)
                break
        if season is None:
            season = datetime.now().year
    by_date = load_phenocam_gcc(path)
    yraw, stack_mode = build_yraw_three_years(by_date, season - 1, season, season + 1)
    out = run_timesat_phenology_from_yraw(
        yraw,
        (season - 1, season, season + 1),
        smooth_window=args.savitzky_hw,
        p_ignoreday=args.p_ignoreday,
    )
    payload = {
        **out,
        "source_gcc_json": str(path.resolve()),
        "profile_year": season,
        "timesat_input": stack_mode,
        "method": "TIMESAT tsfprocess; startmethod=1; p_startcutoff=[0.5,0.5] (50% seasonal amplitude)",
    }
    out_path = args.output
    if args.sidecar:
        out_path = path.parent / "phenocam_phenology.json"
    if out_path is not None:
        out_path.parent.mkdir(parents=True, exist_ok=True)
        to_write = (
            {
                "green_up_50pct_date": out.get("green_up_50pct_date"),
                "green_down_50pct_date": out.get("green_down_50pct_date"),
            }
            if args.sidecar
            else payload
        )
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(to_write, f, indent=2)
            f.write("\n")
        print(f"Wrote {out_path}", file=sys.stderr)
    print(json.dumps(payload, indent=2))
    gup = out.get("green_up_50pct_date")
    gdn = out.get("green_down_50pct_date")
    if gup and gdn:
        print(
            f"Green-up (50 %): {gup}  |  Green-down (50 %): {gdn}  "
            f"(profile year {season}, TIMESAT reference year {out['reference_calendar_year']})"
        )
 if __name__ == "__main__":
    main()
--- a/postprocessing.py
+++ b/postprocessing.py
@ -1,268 +0,0 @@
 """Post-processing: crop fusion/S2/S3 to valid pixels."""
 from pathlib import Path
 import numpy as np
 import rasterio
 from rasterio import windows
 from rasterio.warp import reproject, Resampling
 def process_cropped(
    season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
 ):
    """Crop fusion to valid data, then crop S2/S3 to match."""
    base = Path(f"data/{site_name}/{season}")
    prepared = base / f"prepared_{cleaning_strategy}"
    processed_dir = (
        f"processed_{cleaning_strategy}_sigma{sigma}"
        if sigma
        else f"processed_{cleaning_strategy}_sigma20"
    )
    processed = base / processed_dir
    s2_prep = prepared / "s2"
    s3_prep = prepared / "s3"
    fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
    for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
        output_dir.mkdir(parents=True, exist_ok=True)
    print(
        f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
    )
    # Crop fusion to valid data and get dimensions
    fusion_dims = {}
    for fusion_file in fusion_prep.glob("REFL_*.tif"):
        date_str = fusion_file.stem.split("_")[1]
        with rasterio.open(fusion_file) as src:
            data = src.read()
            valid = ~np.isnan(data) & (data > 0.001)
            rows = np.any(valid, axis=(0, 2))
            cols = np.any(valid, axis=(0, 1))
            row_idx = np.where(rows)[0]
            col_idx = np.where(cols)[0]
            if len(row_idx) == 0 or len(col_idx) == 0:
                print(f"[PROCESS] Skipping {fusion_file.name} (no valid pixels)")
                continue
            r0, r1 = row_idx[0], row_idx[-1]
            c0, c1 = col_idx[0], col_idx[-1]
            w, h = c1 - c0 + 1, r1 - r0 + 1
            window = windows.Window(c0, r0, w, h)
            data_crop = src.read(window=window)
            transform = rasterio.windows.transform(window, src.transform)
            p = src.profile.copy()
            p.update({"width": w, "height": h, "transform": transform})
            output_file = processed / "fusion" / f"{date_str}_0.geotiff"
            with rasterio.open(output_file, "w", **p) as dst:
                dst.write(data_crop)
            fusion_dims[date_str] = (
                c0,
                r0,
                w,
                h,
                transform,
                src.transform,
                src.crs,
                src.profile,
            )
        print(f"[PROCESS] Cropped fusion: {output_file}")
    # Crop S2 and S3 to fusion size
    for date_str, (
        c0,
        r0,
        w,
        h,
        transform,
        fusion_transform,
        crs,
        fusion_profile,
    ) in fusion_dims.items():
        window = windows.Window(c0, r0, w, h)
        # S2
        for s2_file in s2_prep.glob("*REFL.tif"):
            if s2_file.stem.split("_")[2] == date_str:
                output_file = processed / "s2" / f"{date_str}_0.geotiff"
                with rasterio.open(s2_file) as src:
                    data = src.read(window=window)
                    p2 = src.profile.copy()
                    p2.update(
                        {"width": w, "height": h, "transform": transform, "crs": crs}
                    )
                    with rasterio.open(output_file, "w", **p2) as dst:
                        dst.write(data)
                print(f"[PROCESS] Cropped: {output_file}")
        # S3: resample to fusion pixel size, then crop
        s3_file = s3_prep / f"composite_{date_str}.tif"
        if s3_file.exists():
            output_file = processed / "s3" / f"{date_str}_0.geotiff"
            with rasterio.open(s3_file) as src:
                # Resample to fusion pixel size
                temp_profile = fusion_profile.copy()
                temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
                with rasterio.MemoryFile() as memfile:
                    with memfile.open(**temp_profile) as resampled:
                        for i in range(1, src.count + 1):
                            reproject(
                                source=rasterio.band(src, i),
                                destination=rasterio.band(resampled, i),
                                src_transform=src.transform,
                                src_crs=src.crs,
                                dst_transform=fusion_transform,
                                dst_crs=crs,
                                resampling=Resampling.nearest,
                            )
                        # Crop using same window
                        data = resampled.read(window=window)
                        p2 = resampled.profile.copy()
                        p2.update({"width": w, "height": h, "transform": transform})
                        with rasterio.open(output_file, "w", **p2) as dst:
                            dst.write(data)
            print(f"[PROCESS] Cropped: {output_file}")
    print("[PROCESS] Completed")
 def process_cropped_itb(
    season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
 ):
    base = Path(f"data/{site_name}/{season}")
    prepared = base / f"prepared_{cleaning_strategy}_itb"
    processed_dir = (
        f"processed_{cleaning_strategy}_itb_sigma{sigma}"
        if sigma
        else f"processed_{cleaning_strategy}_itb_sigma20"
    )
    processed = base / processed_dir
    s2_prep = prepared / "s2"
    s3_prep = prepared / "s3"
    fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
    for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
        output_dir.mkdir(parents=True, exist_ok=True)
    print(
        f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
    )
    fusion_dims = {}
    for fusion_file in fusion_prep.glob("GCC_*.tif"):
        date_str = fusion_file.stem.split("_")[1]
        with rasterio.open(fusion_file) as src:
            data = src.read()
            valid = ~np.isnan(data) & (data > 0.001)
            rows = np.any(valid, axis=(0, 2))
            cols = np.any(valid, axis=(0, 1))
            row_idx = np.where(rows)[0]
            col_idx = np.where(cols)[0]
            if len(row_idx) == 0 or len(col_idx) == 0:
                print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
                continue
            r0, r1 = row_idx[0], row_idx[-1]
            c0, c1 = col_idx[0], col_idx[-1]
            w, h = c1 - c0 + 1, r1 - r0 + 1
            window = windows.Window(c0, r0, w, h)
            data_crop = src.read(window=window)
            transform = rasterio.windows.transform(window, src.transform)
            p = src.profile.copy()
            p.update({"width": w, "height": h, "transform": transform})
            output_file = processed / "fusion" / f"{date_str}_0.geotiff"
            with rasterio.open(output_file, "w", **p) as dst:
                dst.write(data_crop)
            fusion_dims[date_str] = (
                c0,
                r0,
                w,
                h,
                transform,
                src.transform,
                src.crs,
                src.profile,
            )
        print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
    for date_str, (
        c0,
        r0,
        w,
        h,
        transform,
        fusion_transform,
        crs,
        fusion_profile,
    ) in fusion_dims.items():
        window = windows.Window(c0, r0, w, h)
        for s2_file in s2_prep.glob("*GCC.tif"):
            parts = s2_file.stem.split("_")
            if len(parts) > 2 and parts[2] == date_str:
                output_file = processed / "s2" / f"{date_str}_0.geotiff"
                with rasterio.open(s2_file) as src:
                    data = src.read(window=window)
                    p2 = src.profile.copy()
                    p2.update(
                        {"width": w, "height": h, "transform": transform, "crs": crs}
                    )
                    with rasterio.open(output_file, "w", **p2) as dst:
                        dst.write(data)
                print(f"[PROCESS-ITB] Cropped: {output_file}")
                break
        s3_file = s3_prep / f"composite_{date_str}.tif"
        if s3_file.exists():
            output_file = processed / "s3" / f"{date_str}_0.geotiff"
            with rasterio.open(s3_file) as src:
                temp_profile = fusion_profile.copy()
                temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
                with rasterio.MemoryFile() as memfile:
                    with memfile.open(**temp_profile) as resampled:
                        for i in range(1, src.count + 1):
                            reproject(
                                source=rasterio.band(src, i),
                                destination=rasterio.band(resampled, i),
                                src_transform=src.transform,
                                src_crs=src.crs,
                                dst_transform=fusion_transform,
                                dst_crs=crs,
                                resampling=Resampling.nearest,
                            )
                        data = resampled.read(window=window)
                        p2 = resampled.profile.copy()
                        p2.update({"width": w, "height": h, "transform": transform})
                        with rasterio.open(output_file, "w", **p2) as dst:
                            dst.write(data)
            print(f"[PROCESS-ITB] Cropped: {output_file}")
    print("[PROCESS-ITB] Completed")
 def post_process_all_itb_scenarios(season, site_position, site_name):
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [None, 30]:
            process_cropped_itb(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
            )
 def post_process_all_scenarios(season, site_position, site_name):
    """Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [None, 30]:
            process_cropped(
                season,
                site_position,
                site_name,
                cleaning_strategy=strategy,
                sigma=sigma,
            )
 def post_process_timeseries(season, site_position, site_name):
    """Generate NDVI, GCC, and S2 bands timeseries for all 4 scenarios."""
    from metrics_indices import (
        create_ndvi_timeseries_post_process,
        create_gcc_timeseries_post_process,
        create_bands_timeseries_post_process,
    )
    create_ndvi_timeseries_post_process(season, site_position, site_name)
    create_gcc_timeseries_post_process(season, site_position, site_name)
    create_bands_timeseries_post_process(season, site_position, site_name)
--- a/preparation.py
+++ b/preparation.py
@ -1,364 +0,0 @@
 """Data preparation: S2/S3 preprocessing for fusion."""
 import json
 import shutil
 from pathlib import Path
 from collections import defaultdict
 import numpy as np
 import rasterio
 from rasterio.warp import Resampling
 from rasterio.vrt import WarpedVRT
 from rasterio import shutil as rio_shutil
 RESOLUTION_RATIO = 21
 # Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
 S3_MOVING_AVERAGE_WINDOW_DAYS = 5
 def _apply_s3_temporal_moving_average(s3_dir, window):
    """In-place smoothing of composite_*.tif along calendar order; nodata 0 → NaN for averaging."""
    if window <= 1:
        return
    paths = sorted(s3_dir.glob("composite_*.tif"), key=lambda p: p.stem.split("_")[1])
    if not paths:
        return
    k = (window - 1) // 2
    arrs = []
    profiles = []
    for p in paths:
        with rasterio.open(p) as src:
            d = src.read().astype(np.float32)
            d[d == 0] = np.nan
            arrs.append(d)
            profiles.append(src.profile.copy())
    stack = np.stack(arrs, axis=0)
    t, _, _, _ = stack.shape
    out = np.empty_like(stack)
    for i in range(t):
        lo, hi = max(0, i - k), min(t, i + k + 1)
        out[i] = np.nanmean(stack[lo:hi], axis=0)
    out = np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
    for p, prof, slc in zip(paths, profiles, out):
        prof.update({"dtype": "float32", "nodata": 0})
        with rasterio.open(p, "w", **prof) as dst:
            dst.write(slc)
    print(f"[S3-PREP] Applied {window}-day centred MA ({t} composites)")
 def _import_distance_to_clouds():
    """Lazy import of efast.distance_to_clouds."""
    try:
        from efast.s2_processing import distance_to_clouds
        return distance_to_clouds
    except ImportError:
        raise ImportError(
            "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
        )
 def _load_excluded(season, site_name, cleaning_strategy):
    """Load excluded filenames from NDVI timeseries (excluded_aggressive / excluded_nonaggressive)."""
    base = Path(f"data/{site_name}/{season}/raw/preselection")
    key = f"excluded_{cleaning_strategy}"
    clouds = {"s2": set(), "s3": set()}
    for source in ["s2", "s3"]:
        ts_file = base / f"{source}_preselection.json"
        if ts_file.exists():
            data = json.loads(ts_file.read_text())
            clouds[source] = {e["filename"] for e in data if e.get(key)}
    return clouds
 def _get_base_dir(season, site_name, cleaning_strategy):
    return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
 def _get_itb_base_dir(season, site_name, cleaning_strategy):
    return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
 def _compute_gcc_from_refl_array(blue, green, red):
    total = (
        blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
    )
    mask = (total > 0) & np.isfinite(total)
    gcc = np.zeros_like(green, dtype=np.float32)
    gcc[mask] = green[mask].astype(np.float32) / total[mask]
    return gcc
 def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
    dst_s2_dir.mkdir(parents=True, exist_ok=True)
    for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
        dst = dst_s2_dir / src.name
        if dst.is_symlink() or dst.exists():
            dst.unlink(missing_ok=True)
        try:
            dst.symlink_to(src.resolve())
        except OSError:
            shutil.copy2(src, dst)
 def prepare_s2_gcc_for_itb(
    season, site_position, site_name, cleaning_strategy="aggressive"
 ):
    base = _get_base_dir(season, site_name, cleaning_strategy)
    itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
    s2_prep = base / "s2"
    itb_s2.mkdir(parents=True, exist_ok=True)
    for refl in sorted(s2_prep.glob("*REFL.tif")):
        out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
        if out.exists():
            continue
        with rasterio.open(refl) as src:
            if src.count < 4:
                continue
            b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
            gcc = _compute_gcc_from_refl_array(b, g, r)
            profile = src.profile.copy()
            profile.update({"count": 1, "dtype": "float32", "nodata": 0})
            with rasterio.open(out, "w", **profile) as dst:
                dst.write(gcc, 1)
        print(f"[S2-ITB] Saved {out.name}")
    _link_dist_cloud_from_prepared(s2_prep, itb_s2)
 def prepare_s3_gcc_for_itb(
    season, site_position, site_name, cleaning_strategy="aggressive"
 ):
    base = _get_base_dir(season, site_name, cleaning_strategy)
    itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
    itb_s3.mkdir(parents=True, exist_ok=True)
    for comp in sorted((base / "s3").glob("composite_*.tif")):
        out = itb_s3 / comp.name
        if out.exists():
            continue
        with rasterio.open(comp) as src:
            if src.count < 4:
                continue
            b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
            gcc = _compute_gcc_from_refl_array(b, g, r)
            profile = src.profile.copy()
            profile.update({"count": 1, "dtype": "float32", "nodata": 0})
            with rasterio.open(out, "w", **profile) as dst:
                dst.write(gcc, 1)
        print(f"[S3-ITB] Saved {out.name}")
 def _reproject_raster_to_target(
    src_path,
    dst_path,
    target_bounds,
    target_crs,
    width,
    height,
    resampling=Resampling.bilinear,
 ):
    dst_transform = rasterio.transform.from_bounds(
        target_bounds.left,
        target_bounds.bottom,
        target_bounds.right,
        target_bounds.top,
        width,
        height,
    )
    with rasterio.open(src_path) as src:
        vrt_options = {
            "transform": dst_transform,
            "height": height,
            "width": width,
            "crs": target_crs,
            "resampling": resampling,
        }
        with WarpedVRT(src, **vrt_options) as vrt:
            profile = vrt.profile.copy()
            profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
            rio_shutil.copy(vrt, dst_path, **profile)
 def _rescale_dist_cloud_for_small_roi(s2_output_dir):
    """Rescale DIST_CLOUD when max distance ≤1 so EFAST fusion gets valid weights.
    EFAST uses wo_i = (distance - 1) / D; values ≤1 yield zero/NaN weights. In small
    ROIs (e.g. PhenoCam sites, 7×4 LR grid), distance_transform_edt never exceeds 1.
    Scale non-zero values to ≥2 so fusion can produce non-NaN output.
    """
    for dc_path in s2_output_dir.glob("*DIST_CLOUD.tif"):
        with rasterio.open(dc_path, "r") as src:
            d = src.read(1)
        d_max = float(np.nanmax(d))
        if d_max <= 1:
            # Map (0, 1] -> (0, 2] so (d-1)/15 gives positive weight
            d_scaled = np.where(d > 0, 2.0, d).astype(np.float32)
            with rasterio.open(dc_path, "r+") as dst:
                dst.write(d_scaled, 1)
            print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
 def prepare_s2(
    season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
 ):
    lat, lon = site_position
    s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
    s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
    s2_output_dir = _get_base_dir(season, site_name, cleaning_strategy) / "s2"
    clouds = _load_excluded(season, site_name, cleaning_strategy)
    s2_output_dir.mkdir(parents=True, exist_ok=True)
    print(
        f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
    )
    s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
    if not s3_files:
        raise ValueError("No non-cloud S3 files found for reference bounds")
    with rasterio.open(s3_files[0]) as s3_ref:
        target_bounds = s3_ref.bounds
        target_crs = s3_ref.crs
        s2_width = s3_ref.width * RESOLUTION_RATIO
        s2_height = s3_ref.height * RESOLUTION_RATIO
    for s2_file in sorted(s2_dir.glob("*.geotiff")):
        if s2_file.name in clouds["s2"]:
            print(
                f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
            )
            continue
        date_str = s2_file.name.split("_")[0]
        refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
        if refl_dst.exists():
            print(f"[S2-PREP] Skipping {s2_file.name} (exists)")
            continue
        print(f"[S2-PREP] Processing {s2_file.name}...")
        temp_normalized = s2_output_dir / f"temp_{s2_file.name}"
        with rasterio.open(s2_file) as src:
            data = src.read().astype("float32") / 10000.0
            profile = src.profile.copy()
            profile.update({"dtype": "float32", "nodata": 0})
            with rasterio.open(temp_normalized, "w", **profile) as dst:
                dst.write(data)
        _reproject_raster_to_target(
            temp_normalized, refl_dst, target_bounds, target_crs, s2_width, s2_height
        )
        temp_normalized.unlink()
        print(f"[S2-PREP] Saved: {refl_dst}")
    print("[S2-PREP] Computing distance-to-clouds...")
    distance_to_clouds = _import_distance_to_clouds()
    distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
    _rescale_dist_cloud_for_small_roi(s2_output_dir)
    print("[S2-PREP] Completed")
 def prepare_s3(
    season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
 ):
    lat, lon = site_position
    s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
    base_dir = _get_base_dir(season, site_name, cleaning_strategy)
    s2_prepared_dir = base_dir / "s2"
    s3_preprocessed_dir = base_dir / "s3"
    clouds = _load_excluded(season, site_name, cleaning_strategy)
    s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
    print(
        f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
    )
    s3_by_date = defaultdict(list)
    for s3_file in s3_dir.glob("*.geotiff"):
        if s3_file.name not in clouds["s3"]:
            s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
        else:
            print(
                f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
            )
    print(
        f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
    )
    temp_composite_dir = s3_preprocessed_dir / "temp_composites"
    if temp_composite_dir.exists():
        shutil.rmtree(temp_composite_dir)
    temp_composite_dir.mkdir()
    for date_str, s3_files in sorted(s3_by_date.items()):
        composite_path = temp_composite_dir / f"composite_{date_str}.tif"
        if len(s3_files) == 1:
            shutil.copy(s3_files[0], composite_path)
            print(f"[S3-PREP] Composite {date_str}: 1 acquisition")
        else:
            s3_stack = []
            for s3_file in s3_files:
                with rasterio.open(s3_file) as src:
                    data = src.read()
                    data[:, np.abs(np.nanmean(data, axis=0)) >= 5] = np.nan
                    s3_stack.append(data)
            composite = np.nanmean(np.array(s3_stack), axis=0).astype("float32")
            with rasterio.open(s3_files[0]) as src:
                profile = src.profile.copy()
                profile.update({"count": composite.shape[0], "dtype": "float32"})
            with rasterio.open(composite_path, "w", **profile) as dst:
                dst.write(composite)
            print(
                f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
            )
    # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
    # This ensures fusion covers the same area as S2 and dimensions match
    sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif"))
    if len(sen2_ref_paths) == 0:
        raise ValueError(f"No REFL files found in {s2_prepared_dir}")
    # Get bounds from REFL file (full coverage, matches S2)
    # Use integer division to match distance_to_clouds logic exactly
    with rasterio.open(sen2_ref_paths[0]) as s2_ref:
        target_bounds = s2_ref.bounds
        target_crs = s2_ref.crs
        # Use integer division matching distance_to_clouds: s2_height // ratio, s2_width // ratio
        width = s2_ref.width // RESOLUTION_RATIO
        height = s2_ref.height // RESOLUTION_RATIO
        s3_transform = rasterio.transform.from_bounds(
            target_bounds.left,
            target_bounds.bottom,
            target_bounds.right,
            target_bounds.top,
            width,
            height,
        )
    print(
        f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
    )
    # Reproject each S3 composite to match S2 REFL bounds
    sen3_paths = sorted(temp_composite_dir.glob("*.tif"))
    for sen3_path in sen3_paths:
        vrt_options = {
            "transform": s3_transform,
            "height": height,
            "width": width,
            "crs": target_crs,
            "resampling": Resampling.cubic,
        }
        with rasterio.open(sen3_path) as s3_src:
            with WarpedVRT(s3_src, **vrt_options) as vrt:
                name = sen3_path.name
                outfile = s3_preprocessed_dir / name
                profile = vrt.profile.copy()
                profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
                rio_shutil.copy(vrt, outfile, **profile)
        print(f"[S3-PREP] Saved: {outfile}")
    _apply_s3_temporal_moving_average(
        s3_preprocessed_dir, S3_MOVING_AVERAGE_WINDOW_DAYS
    )
    shutil.rmtree(temp_composite_dir)
    print("[S3-PREP] Completed")
--- a/preselection.py
+++ b/preselection.py
@ -1,142 +0,0 @@
 """Pre-selection: self-contained NDVI timeseries with cloud/dark-imagery exclusion markers."""
 import csv
 import json
 import numpy as np
 import rasterio
 from rasterio.warp import transform as transform_coords
 from pathlib import Path
 from datetime import datetime
 WINDOW_DAYS = 14
 MIN_WINDOW_SIZE = 3
 THRESHOLDS = {"aggressive": {"threshold": 0.3, "delta": 0.15}, "nonaggressive": {"threshold": 0.2, "delta": 0.25}}
 # S2 uses reflectance * 10000, S3 uses 0-1
 BLUE_MIN = {"s2": 100, "s3": 0.01}
 GREEN_BAND = 2
 RED_BAND = 3
 NIR_BAND = 4
 BLUE_BAND = 1
 BAND_KEYS = ["b02", "b03", "b04", "b8a"]
 def _sample_3x3(input_file, site_position):
    """Sample mean NDVI and all four bands (3x3 window) at site. Returns (ndvi, {b02,b03,b04,b8a}) or (None, None)."""
    try:
        with rasterio.open(input_file) as src:
            if src.count < 4:
                return None, None
            bands = [src.read(i).astype(np.float32) for i in range(1, 5)]
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None, None
            row, col = src.index(x[0], y[0])
            if row < 0 or row >= src.height or col < 0 or col >= src.width:
                return None, None
            r0, r1 = max(0, row - 1), min(src.height, row + 2)
            c0, c1 = max(0, col - 1), min(src.width, col + 2)
            windows = [b[r0:r1, c0:c1] for b in bands]
            red_w, nir_w = windows[RED_BAND - 1], windows[NIR_BAND - 1]
            mask = (red_w > 0) & (nir_w > 0) & ~np.isnan(red_w) & ~np.isnan(nir_w)
            if not np.any(mask):
                return None, None
            ndvi = float(np.mean((nir_w[mask] - red_w[mask]) / (nir_w[mask] + red_w[mask])))
            band_means = {k: round(float(np.mean(w[mask])), 6) for k, w in zip(BAND_KEYS, windows)}
            return ndvi, band_means
    except Exception:
        return None, None
 def _extract_date(filename):
    for part in filename.replace(".geotiff", "").split("_"):
        if len(part) == 8 and part.isdigit():
            return part, datetime.strptime(part, "%Y%m%d").isoformat()
    return None, None
 def _is_excluded(entry, entries, strategy, source="s2"):
    """True if entry is excluded by strategy (NDVI threshold/delta or dark blue)."""
    th = THRESHOLDS[strategy]
    if entry.get("ndvi") is None:
        return True
    blue_min = BLUE_MIN.get(source, BLUE_MIN["s2"])
    if entry.get("b02") is not None and entry["b02"] < blue_min:
        return True
    entry_date = datetime.fromisoformat(entry["date"].replace("Z", "+00:00"))
    window_ndvi = []
    for e in entries:
        if e.get("ndvi") is None:
            continue
        d = datetime.fromisoformat(e["date"].replace("Z", "+00:00"))
        if abs((d - entry_date).days) <= WINDOW_DAYS:
            window_ndvi.append(e["ndvi"])
    if len(window_ndvi) < MIN_WINDOW_SIZE:
        return False
    threshold = max(window_ndvi) - th["delta"]
    return entry["ndvi"] < threshold and entry["ndvi"] < th["threshold"]
 def create_timeseries(season, site_position, site_name):
    """Build NDVI timeseries (3x3 window) for raw S2/S3, with exclusion markers for both strategies."""
    lat, lon = site_position
    base = Path(f"data/{site_name}/{season}")
    print(f"[PRESELECT] Creating NDVI timeseries: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
    for source in ["s2", "s3"]:
        input_dir = base / "raw" / source
        out_dir = base / "raw" / "preselection"
        out_dir.mkdir(parents=True, exist_ok=True)
        output_file = out_dir / f"{source}_preselection.json"
        if not input_dir.exists():
            print(f"[PRESELECT] Skipping {source}: {input_dir} not found")
            continue
        timeseries = []
        for f in sorted(input_dir.glob("*.geotiff")):
            if "DIST_CLOUD" in f.name:
                continue
            date_str, date_iso = _extract_date(f.name)
            if not date_str:
                continue
            ndvi, band_means = _sample_3x3(f, site_position)
            entry = {"filename": f.name, "date": date_iso, "ndvi": ndvi}
            if band_means:
                entry.update(band_means)
            timeseries.append(entry)
        timeseries.sort(key=lambda e: e["date"])
        for e in timeseries:
            e["excluded_aggressive"] = _is_excluded(e, timeseries, "aggressive", source)
            e["excluded_nonaggressive"] = _is_excluded(e, timeseries, "nonaggressive", source)
        with open(output_file, "w") as out:
            json.dump(timeseries, out, indent=2)
        csv_file = out_dir / f"{source}_preselection.csv"
        fieldnames = ["filename", "date", "ndvi"] + BAND_KEYS + ["excluded_aggressive", "excluded_nonaggressive"]
        with open(csv_file, "w", newline="") as out:
            w = csv.DictWriter(out, fieldnames=fieldnames, extrasaction="ignore")
            w.writeheader()
            for e in timeseries:
                w.writerow({k: e.get(k) for k in fieldnames})
        n_excl_agg = sum(1 for e in timeseries if e["excluded_aggressive"])
        n_excl_non = sum(1 for e in timeseries if e["excluded_nonaggressive"])
        print(f"[PRESELECT] Saved {output_file} + {csv_file.name}: {len(timeseries)} entries ({n_excl_agg} aggressive, {n_excl_non} nonaggressive excluded)")
    print("[PRESELECT] Completed")
 # Backward compatibility
 def detect_clouds(season, site_position, site_name, cleaning_strategy="aggressive"):
    """Create timeseries with exclusion markers. Strategy is read from timeseries when preparing."""
    create_timeseries(season, site_position, site_name)
 preselect = create_timeseries
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,2 +1,31 @@
 [project]
 name = "worldwide"
 version = "0.1.0"
 description = "Worldwide PhenoCam EFAST feasibility screening"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
    "efast @ git+https://github.com/DHI-GRAS/efast.git",
    "netCDF4",
    "numpy",
    "openeo",
    "pystac-client",
    "python-dateutil",
    "python-dotenv",
    "rasterio",
    "requests",
    "scipy",
    "shapely",
    "tqdm",
 ]
 [dependency-groups]
 dev = [
    "ruff",
 ]
 [tool.ruff.lint.per-file-ignores]
-"run.py" = ["F401"]
+"1-phenocam.py" = ["E402"]
 "2-phenocam-screening.py" = ["E402"]
 "3-sentinel-data.py" = ["E402"]
 "4-fusion.py" = ["E402"]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,12 +0,0 @@
 pystac-client
 rasterio
 openeo
 python-dotenv
 netCDF4
 numpy
 timesat
 requests
 scipy
 matplotlib
 ruff
 pre-commit
--- a/run.py
+++ b/run.py
@ -1,87 +0,0 @@
 """Pipeline entry point.
 Active snippet below only **regenerates metrics.json** (temporal, baseline,
 `derived`, `residual_vs_phenocam`). Requires existing post-processed GCC
 timeseries under `data/{site}/{season}/processed_*`.
 Un-comment imports and steps below for acquisition → fusion → post-process.
 """
 # from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
 # from postprocessing import (
 #     post_process_all_scenarios,
 #     post_process_all_itb_scenarios,
 #     post_process_timeseries,
 # )
 # from acquisition_s2 import download_s2
 # from acquisition_s3 import download_s3
 # from acquisition_phenocam import download_phenocam
 # from preselection import create_timeseries
 # from preparation import (
 #     prepare_s2,
 #     prepare_s3,
 #     prepare_s2_gcc_for_itb,
 #     prepare_s3_gcc_for_itb,
 # )
 # from metrics_indices import create_prepared_fusion_timeseries
 from metrics_stats import calculate_all_metrics
 # from phenology_timesat import write_phenocam_phenology_for_site
 def run_pipeline(season, site_position, site_name):
    """Run pipeline (metrics-only by default; see module docstring)."""
    try:
        # print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}")
        # download_s2(season, site_position, site_name)
        # download_s3(season, site_position, site_name)
        # download_phenocam(season, site_position, site_name)
        # print(f"PhenoCam phenology (50 % amplitude): {site_name}, {season}")
        # write_phenocam_phenology_for_site(site_name, season)
        # print(f"Creating preselection timeseries: {site_name}, {season}")
        # create_timeseries(season, site_position, site_name)
        # print(f"Preparing S2 and S3 for fusion: {site_name}, {season}")
        # for strategy in ["aggressive", "nonaggressive"]:
        #     prepare_s2(season, site_position, site_name, cleaning_strategy=strategy)
        #     prepare_s3(season, site_position, site_name, cleaning_strategy=strategy)
        # print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
        # run_all_efast_scenarios(season, site_position, site_name)
        # print(f"Index-then-Blend (ItB): {site_name}, {season}")
        # for strategy in ["aggressive", "nonaggressive"]:
        #     prepare_s2_gcc_for_itb(
        #         season, site_position, site_name, cleaning_strategy=strategy
        #     )
        #     prepare_s3_gcc_for_itb(
        #         season, site_position, site_name, cleaning_strategy=strategy
        #     )
        # run_all_efast_itb_scenarios(season, site_position, site_name)
        # post_process_all_itb_scenarios(season, site_position, site_name)
        # print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
        # create_prepared_fusion_timeseries(season, site_position, site_name)
        # print(f"Post-processing (crop): {site_name}, {season}")
        # post_process_all_scenarios(season, site_position, site_name)
        # post_process_timeseries(season, site_position, site_name)
        print(f"Calculating metrics: {site_name}, {season}")
        calculate_all_metrics(season, site_name, site_position)
    except Exception as e:
        print(f"Error: {e}")
        raise
 if __name__ == "__main__":
    run_pipeline(2024, (47.116171, 11.320308), "innsbruck")
    run_pipeline(2024, (35.3045, 25.0743), "forthgr")
    run_pipeline(2020, (47.116171, 11.320308), "innsbruck")
    run_pipeline(2024, (58.5633, 24.3688), "pitsalu")
    run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
    run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
    run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")
--- a/satellite-fusion-web.service
+++ b/satellite-fusion-web.service
@ -1,16 +0,0 @@
 [Unit]
 Description=Satellite Fusion Pipeline Web Server
 After=network.target
 [Service]
 Type=simple
 User=root
 WorkingDirectory=/opt/satellite-fusion/webapp
 Environment="PATH=/opt/satellite-fusion/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
 ExecStart=/opt/satellite-fusion/venv/bin/python3 -m http.server 8000 --directory /opt/satellite-fusion/webapp
 Restart=always
 RestartSec=10
 [Install]
 WantedBy=multi-user.target
--- a/suitability_screening.py
+++ b/suitability_screening.py
@ -1,634 +0,0 @@
 #!/usr/bin/env python3
 """Compute per-site suitability indicators from existing pipeline outputs.
 The script is intentionally schema-tolerant: it prints one site's discovered JSON
 structure first, then uses a small set of common field-name conventions to compute
 SNR, S2 archive density, and S2-S3 GCC coherence.
 """
 from __future__ import annotations
 import argparse
 import json
 import math
 import re
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
 import numpy as np
 import pandas as pd
 from scipy.interpolate import UnivariateSpline
 from scipy.stats import pearsonr
 OUTPUT_NAME = "suitability_screening.json"
 SNR_THRESHOLD = 2.0
 MATCH_TOLERANCE_DAYS = 2
 def load_json(path: Path) -> Any | None:
    if not path.is_file():
        return None
    try:
        with path.open("r", encoding="utf-8") as f:
            return json.load(f)
    except (json.JSONDecodeError, OSError) as exc:
        print(f"[WARN] Could not read JSON {path}: {exc}")
        return None
 def jsonable_float(value: Any) -> float | None:
    if isinstance(value, bool):
        return None
    try:
        out = float(value)
    except (TypeError, ValueError):
        return None
    if not math.isfinite(out):
        return None
    return out
 def parse_date(value: Any) -> pd.Timestamp | None:
    if value is None:
        return None
    if isinstance(value, pd.Timestamp):
        return value.normalize()
    text = str(value).strip()
    if not text:
        return None
    match = re.search(r"(?<!\d)(\d{8})(?!\d)", text)
    if match:
        try:
            return pd.to_datetime(match.group(1), format="%Y%m%d").normalize()
        except (TypeError, ValueError):
            pass
    try:
        ts = pd.to_datetime(text, errors="coerce")
    except (TypeError, ValueError):
        return None
    if pd.isna(ts):
        return None
    if getattr(ts, "tzinfo", None) is not None:
        ts = ts.tz_convert(None)
    return pd.Timestamp(ts).normalize()
 def compact(value: Any, *, max_text: int = 220) -> Any:
    """Return a short representation suitable for discovery logging."""
    if isinstance(value, dict):
        return {k: compact(v, max_text=max_text) for k, v in list(value.items())[:12]}
    if isinstance(value, list):
        return [compact(v, max_text=max_text) for v in value[:2]]
    text = repr(value)
    if len(text) > max_text:
        return text[: max_text - 3] + "..."
    return value
 def top_keys(data: Any) -> list[str]:
    if isinstance(data, dict):
        return list(data.keys())
    if isinstance(data, list) and data and isinstance(data[0], dict):
        keys: set[str] = set()
        for entry in data[:5]:
            keys.update(entry.keys())
        return sorted(keys)
    return []
 def normalize_records(data: Any) -> list[dict[str, Any]]:
    """Convert common JSON shapes into a list of record dictionaries."""
    if data is None:
        return []
    if isinstance(data, list):
        records = []
        for item in data:
            if isinstance(item, dict):
                records.append(dict(item))
            else:
                records.append({"value": item})
        return records
    if not isinstance(data, dict):
        return [{"value": data}]
    for key in ("timeseries", "time_series", "data", "entries", "results", "records"):
        value = data.get(key)
        if isinstance(value, list):
            return normalize_records(value)
    # Dict keyed by date or filename.
    if data and all(not isinstance(v, (list, tuple)) for v in data.values()):
        records = []
        for key, value in data.items():
            if isinstance(value, dict):
                record = dict(value)
                record.setdefault("date", key)
            else:
                record = {"date": key, "value": value}
            records.append(record)
        return records
    return [dict(data)]
 def first_records(data: Any, count: int = 2) -> list[Any]:
    records = normalize_records(data)
    return records[:count]
 def recursive_snr_candidates(data: Any, prefix: str = "") -> list[tuple[str, Any]]:
    found: list[tuple[str, Any]] = []
    if isinstance(data, dict):
        for key, value in data.items():
            path = f"{prefix}.{key}" if prefix else str(key)
            if "snr" in str(key).lower():
                found.append((path, value))
            found.extend(recursive_snr_candidates(value, path))
    elif isinstance(data, list):
        for i, value in enumerate(data[:10]):
            found.extend(recursive_snr_candidates(value, f"{prefix}[{i}]"))
    return found
 def find_numeric_snr(data: Any) -> float | None:
    candidates = recursive_snr_candidates(data)
    # Prefer exact leaf keys named "snr"; fall back to any numeric snr-containing key.
    candidates.sort(key=lambda kv: 0 if kv[0].split(".")[-1].lower() == "snr" else 1)
    for _, value in candidates:
        numeric = jsonable_float(value)
        if numeric is not None:
            return numeric
        if isinstance(value, dict):
            nested = value.get("snr")
            numeric = jsonable_float(nested)
            if numeric is not None:
                return numeric
    return None
 def find_site_roots(base_dir: Path) -> list[tuple[str, Path]]:
    """Find direct site roots, plus the repo's common site/year layout."""
    roots: list[tuple[str, Path]] = []
    if not base_dir.is_dir():
        return roots
    def looks_like_site_root(path: Path) -> bool:
        return any(
            (
                (path / "metrics.json").exists(),
                (path / "raw" / "preselection").exists(),
                (path / "phenocam").exists(),
                (path / "raw" / "phenocam").exists(),
            )
        )
    for child in sorted(p for p in base_dir.iterdir() if p.is_dir()):
        if looks_like_site_root(child):
            roots.append((child.name, child))
            continue
        for grandchild in sorted(p for p in child.iterdir() if p.is_dir()):
            if looks_like_site_root(grandchild):
                name = child.name if grandchild.name.isdigit() else f"{child.name}_{grandchild.name}"
                roots.append((name, grandchild))
    return roots
 def find_s2_preselection(site_root: Path) -> Path | None:
    candidates = [
        site_root / "raw" / "preselection" / "s2_preselection.json",
        site_root / "preselection" / "s2_preselection.json",
    ]
    return next((p for p in candidates if p.is_file()), None)
 def find_s3_timeseries(site_root: Path) -> Path | None:
    candidates = [
        site_root / "processed_aggressive_sigma20" / "gcc" / "s3" / "timeseries.json",
        site_root / "processed_aggressive_itb_sigma20" / "gcc" / "s3" / "timeseries.json",
    ]
    for candidate in candidates:
        if candidate.is_file():
            return candidate
    matches = sorted(site_root.glob("processed*aggressive*sigma20*/gcc/s3/timeseries.json"))
    return matches[0] if matches else None
 def find_metrics(site_root: Path) -> Path | None:
    path = site_root / "metrics.json"
    return path if path.is_file() else None
 def find_phenocam(site_root: Path) -> Path | None:
    candidates = [
        site_root / "phenocam" / "gcc_90.json",
        site_root / "phenocam" / "phenocam_gcc.json",
        site_root / "raw" / "phenocam" / "gcc_90.json",
        site_root / "raw" / "phenocam" / "phenocam_gcc.json",
    ]
    for candidate in candidates:
        if candidate.is_file():
            return candidate
    patterns = [
        "phenocam/*gcc*90*.json",
        "phenocam/*gcc*.json",
        "raw/phenocam/*gcc*90*.json",
        "raw/phenocam/*gcc*.json",
        "raw/phenocam/*.json",
    ]
    for pattern in patterns:
        matches = sorted(site_root.glob(pattern))
        if matches:
            return matches[0]
    return None
 def print_structure(label: str, path: Path | None) -> None:
    print(f"\n[{label}]")
    if path is None:
        print("missing")
        return
    data = load_json(path)
    print(f"path: {path}")
    print(f"type: {type(data).__name__}")
    print(f"keys: {top_keys(data)}")
    records = [] if label == "metrics.json" else first_records(data, 2)
    if records:
        print(f"first {len(records)} entr{'y' if len(records) == 1 else 'ies'}:")
        print(json.dumps(compact(records), indent=2, default=str))
    if label == "metrics.json":
        snr = recursive_snr_candidates(data)
        phenocam_keys = []
        if isinstance(data, dict):
            for key, value in data.items():
                if "phenocam" in str(key).lower():
                    phenocam_keys.append((key, top_keys(value)))
        print(f"phenocam-like keys: {phenocam_keys}")
        print(f"snr-like keys: {[(path, compact(value)) for path, value in snr]}")
 def run_discovery(site_name: str, site_root: Path) -> None:
    print("\n=== Discovery mode ===")
    print(f"Using site: {site_name} ({site_root})")
    print_structure("s2_preselection.json", find_s2_preselection(site_root))
    print_structure("S3 timeseries.json", find_s3_timeseries(site_root))
    print_structure("metrics.json", find_metrics(site_root))
    print_structure("PhenoCam gcc_90 file", find_phenocam(site_root))
    print("\n=== Computing indicators ===")
 def choose_discovery_site(site_roots: list[tuple[str, Path]]) -> tuple[str, Path]:
    def score(item: tuple[str, Path]) -> int:
        _, root = item
        return sum(
            int(path is not None)
            for path in (
                find_s2_preselection(root),
                find_s3_timeseries(root),
                find_metrics(root),
                find_phenocam(root),
            )
        )
    return max(site_roots, key=score)
 def truthy_status(value: Any, *, field_name: str | None = None) -> bool | None:
    if isinstance(value, bool):
        if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
            return not value
        return value
    if value is None:
        return True
    if isinstance(value, (int, float)) and not isinstance(value, bool):
        if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
            return not bool(value)
        return bool(value)
    text = str(value).strip().lower()
    if text in {"", "none", "null", "nan", "ok", "pass", "passed", "keep", "kept", "valid", "selected"}:
        return True
    if text in {
        "fail",
        "failed",
        "false",
        "reject",
        "rejected",
        "exclude",
        "excluded",
        "invalid",
        "cloud",
        "cloudy",
        "dark",
        "bad",
    }:
        return False
    if field_name and any(word in field_name.lower() for word in ("reason", "status")):
        return False
    return None
 def acquisition_passes(entry: dict[str, Any], strategy: str) -> bool:
    strategy_aliases = {
        strategy,
        strategy.replace("nonaggressive", "non_aggressive"),
        strategy.replace("nonaggressive", "non-aggressive"),
    }
    negative_prefixes = ("excluded", "exclude", "rejected", "reject")
    positive_prefixes = ("passed", "pass", "keep", "kept", "valid", "selected")
    for alias in strategy_aliases:
        for prefix in negative_prefixes:
            key = f"{prefix}_{alias}"
            if key in entry:
                return not bool(entry[key])
        for prefix in positive_prefixes:
            key = f"{prefix}_{alias}"
            if key in entry:
                return bool(entry[key])
    for alias in strategy_aliases:
        nested = entry.get(alias)
        if isinstance(nested, dict):
            for key, value in nested.items():
                passed = truthy_status(value, field_name=key)
                if passed is not None:
                    return passed
        elif nested is not None:
            passed = truthy_status(nested, field_name=alias)
            if passed is not None:
                return passed
    # Generic status fields.
    for key in (*negative_prefixes, *positive_prefixes, "status", "strategy", "reason", "rejection_reason"):
        if key in entry:
            passed = truthy_status(entry[key], field_name=key)
            if passed is not None:
                return passed
    # Dict keyed by date with a scalar rejection reason.
    if "value" in entry and len(entry) <= 3:
        passed = truthy_status(entry.get("value"), field_name="value")
        if passed is not None:
            return passed
    # Existing pipeline entries with band means and no rejection marker are usable.
    return True
 def band_value(entry: dict[str, Any], names: Iterable[str]) -> float | None:
    lowered = {str(k).lower(): v for k, v in entry.items()}
    for name in names:
        if name.lower() in lowered:
            value = jsonable_float(lowered[name.lower()])
            if value is not None:
                return value
    for container_key in ("bands", "band_means", "reflectance", "reflectances", "means", "window_means"):
        container = entry.get(container_key)
        if isinstance(container, dict):
            value = band_value(container, names)
            if value is not None:
                return value
    return None
 def entry_date(entry: dict[str, Any]) -> pd.Timestamp | None:
    for key in ("date", "datetime", "time", "timestamp", "acquisition_date"):
        if key in entry:
            date = parse_date(entry[key])
            if date is not None:
                return date
    for key in ("filename", "file", "path", "name"):
        if key in entry:
            date = parse_date(entry[key])
            if date is not None:
                return date
    return None
 def s2_gcc_series(s2_data: Any) -> pd.DataFrame:
    rows = []
    for entry in normalize_records(s2_data):
        if not isinstance(entry, dict) or not acquisition_passes(entry, "aggressive"):
            continue
        date = entry_date(entry)
        blue = band_value(entry, ("b02", "blue", "B02", "band_1", "band1"))
        green = band_value(entry, ("b03", "green", "B03", "band_2", "band2"))
        red = band_value(entry, ("b04", "red", "B04", "band_3", "band3"))
        if date is None or blue is None or green is None or red is None:
            continue
        denom = blue + green + red
        if denom <= 0:
            continue
        rows.append({"date": date, "s2_gcc": green / denom})
    if not rows:
        return pd.DataFrame(columns=["date", "s2_gcc"])
    return pd.DataFrame(rows).groupby("date", as_index=False)["s2_gcc"].mean().sort_values("date")
 def value_from_record(entry: dict[str, Any], preferred: Iterable[str]) -> float | None:
    lowered = {str(k).lower(): v for k, v in entry.items()}
    for name in preferred:
        value = jsonable_float(lowered.get(name.lower()))
        if value is not None:
            return value
    for key, value in lowered.items():
        if any(token in key for token in ("gcc", "greenness")):
            numeric = jsonable_float(value)
            if numeric is not None:
                return numeric
    return None
 def gcc_timeseries(data: Any, value_name: str) -> pd.DataFrame:
    rows = []
    for entry in normalize_records(data):
        if not isinstance(entry, dict):
            continue
        date = entry_date(entry)
        value = value_from_record(
            entry,
            ("greenness_index", "gcc_90", "gcc", "value", "mean", "site_value"),
        )
        if date is not None and value is not None:
            rows.append({"date": date, value_name: value})
    if not rows:
        return pd.DataFrame(columns=["date", value_name])
    return pd.DataFrame(rows).groupby("date", as_index=False)[value_name].mean().sort_values("date")
 def compute_archive_density(s2_data: Any | None) -> tuple[int | None, int | None]:
    if s2_data is None:
        return None, None
    records = [entry for entry in normalize_records(s2_data) if isinstance(entry, dict)]
    if not records:
        return None, None
    aggressive = sum(1 for entry in records if acquisition_passes(entry, "aggressive"))
    nonaggressive = sum(1 for entry in records if acquisition_passes(entry, "nonaggressive"))
    return aggressive, nonaggressive
 def compute_coherence(s2_data: Any | None, s3_data: Any | None) -> tuple[int | None, float | None, float | None]:
    if s2_data is None or s3_data is None:
        return None, None, None
    s2 = s2_gcc_series(s2_data)
    s3 = gcc_timeseries(s3_data, "s3_gcc")
    if s2.empty or s3.empty:
        return 0, None, None
    matched = pd.merge_asof(
        s2.sort_values("date"),
        s3.sort_values("date"),
        on="date",
        direction="nearest",
        tolerance=pd.Timedelta(days=MATCH_TOLERANCE_DAYS),
    ).dropna(subset=["s2_gcc", "s3_gcc"])
    n = int(len(matched))
    if n < 2:
        return n, None, None
    r, p_value = pearsonr(matched["s2_gcc"].to_numpy(), matched["s3_gcc"].to_numpy())
    return n, jsonable_float(r), jsonable_float(p_value)
 def phenocam_series(data: Any | None) -> pd.DataFrame:
    if data is None:
        return pd.DataFrame(columns=["date", "gcc"])
    rows = []
    for entry in normalize_records(data):
        if isinstance(entry, dict):
            date = entry_date(entry)
            value = value_from_record(
                entry,
                ("gcc_90", "greenness_index", "gcc", "gcc_mean", "value"),
            )
        else:
            date = None
            value = jsonable_float(entry)
        if date is not None and value is not None:
            rows.append({"date": date, "gcc": value})
    if not rows:
        return pd.DataFrame(columns=["date", "gcc"])
    return pd.DataFrame(rows).groupby("date", as_index=False)["gcc"].mean().sort_values("date")
 def compute_snr_from_phenocam(phenocam_data: Any | None) -> float | None:
    series = phenocam_series(phenocam_data)
    if len(series) < 5:
        return None
    x = (series["date"] - series["date"].min()).dt.days.to_numpy(dtype=float)
    y = series["gcc"].to_numpy(dtype=float)
    if len(np.unique(x)) < 5:
        return None
    try:
        spline = UnivariateSpline(x, y, k=3)
        residual = y - spline(x)
    except Exception as exc:
        print(f"[WARN] Could not fit PhenoCam smoothing spline: {exc}")
        return None
    rmse = float(np.sqrt(np.mean(residual**2)))
    amplitude = float(np.max(y) - np.min(y))
    if rmse <= 0:
        return None
    return amplitude / rmse
 def compute_snr(metrics_data: Any | None, phenocam_data: Any | None) -> float | None:
    from_metrics = find_numeric_snr(metrics_data)
    if from_metrics is not None:
        return from_metrics
    return compute_snr_from_phenocam(phenocam_data)
 def compute_site(site_root: Path) -> dict[str, Any]:
    s2_data = load_json(find_s2_preselection(site_root) or Path("__missing__"))
    s3_data = load_json(find_s3_timeseries(site_root) or Path("__missing__"))
    metrics_data = load_json(find_metrics(site_root) or Path("__missing__"))
    phenocam_data = load_json(find_phenocam(site_root) or Path("__missing__"))
    snr = compute_snr(metrics_data, phenocam_data)
    n_s2_aggressive, n_s2_nonaggressive = compute_archive_density(s2_data)
    n_matched, pearson_r, p_value = compute_coherence(s2_data, s3_data)
    return {
        "snr": snr,
        "snr_pass": None if snr is None else snr >= SNR_THRESHOLD,
        "n_s2_aggressive": n_s2_aggressive,
        "n_s2_nonaggressive": n_s2_nonaggressive,
        "coherence_n_matched": n_matched,
        "coherence_pearson_r": pearson_r,
        "coherence_p_value": p_value,
    }
 def print_summary(results: dict[str, dict[str, Any]]) -> None:
    print("\nSuitability summary")
    if not results:
        print("(no sites found)")
        return
    columns = [
        ("site", "site"),
        ("snr", "snr"),
        ("snr_pass", "pass"),
        ("n_s2_aggressive", "n_s2_agg"),
        ("n_s2_nonaggressive", "n_s2_nonagg"),
        ("coherence_n_matched", "n_match"),
        ("coherence_pearson_r", "pearson_r"),
        ("coherence_p_value", "p_value"),
    ]
    def fmt(value: Any, key: str) -> str:
        if value is None:
            return "null"
        if key.startswith("n_") or key == "coherence_n_matched":
            return str(int(value))
        if isinstance(value, bool):
            return "true" if value else "false"
        if isinstance(value, (int, float)):
            return f"{float(value):.4g}"
        return str(value)
    rows = []
    for site, values in results.items():
        rows.append([site, *[fmt(values.get(key), key) for key, _ in columns[1:]]])
    widths = [
        max(len(header), *(len(row[i]) for row in rows))
        for i, (_, header) in enumerate(columns)
    ]
    header = "  ".join(header.ljust(widths[i]) for i, (_, header) in enumerate(columns))
    print(header)
    print("  ".join("-" * width for width in widths))
    for row in rows:
        print("  ".join(row[i].ljust(widths[i]) for i in range(len(columns))))
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--base-dir",
        required=True,
        type=Path,
        help="Pipeline output root containing one subdirectory per site.",
    )
    args = parser.parse_args()
    base_dir = args.base_dir.expanduser().resolve()
    site_roots = find_site_roots(base_dir)
    if site_roots:
        run_discovery(*choose_discovery_site(site_roots))
    else:
        print(f"[WARN] No site directories found under {base_dir}")
    results = {site_name: compute_site(site_root) for site_name, site_root in site_roots}
    output_path = base_dir / OUTPUT_NAME
    with output_path.open("w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, allow_nan=False)
        f.write("\n")
    print_summary(results)
    print(f"\nWrote {output_path}")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/uv.lock
+++ b/uv.lock
--- a/webapp/fusion.html
+++ b/webapp/fusion.html
@ -1,397 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Fusion Viewer</title>
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
    <script src="common.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
        .selectors { margin-bottom: 20px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { margin: 0 0 5px 0; font-size: 22px; }
        .season-row { padding-bottom: 15px; }
        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
        .download-links { margin-left: 10px; font-size: 14px; }
        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
        .download-links a:hover { text-decoration: underline; }
        #dateSlider { width: 100%; margin: 15px 0; }
        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
        #fusionMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
        .leaflet-image-layer { image-rendering: pixelated; }
        .leaflet-control-attribution { display: none; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header-sticky">
            <div class="nav">
                <a href="index.html">Full</a>
                <a href="preselection.html">Pre-selection</a>
                <a href="prepared.html">Prepared</a>
                <a href="fusion.html" class="active">Fusion</a>
                <a href="postprocessed.html">Postprocessed</a>
                <a href="metrics.html">Metrics</a>
                <a href="gap_validation.html">Gap validation</a>
                <a href="phenology.html">Phenology</a>
            </div>
            <h1 id="siteName">Innsbruck</h1>
            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
            <div class="selectors">
                <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
            <label>Strategy:</label>
            <select id="strategySelect">
                <option value="aggressive">Aggressive</option>
                <option value="nonaggressive">Non-aggressive</option>
            </select>
            <label>Sigma:</label>
            <select id="sigmaSelect">
                <option value="20">σ=20</option>
                <option value="30">σ=30</option>
            </select>
            <label>Mode:</label>
            <select id="fusionModeSelect" title="BtI = reflectance fusion; ItB = GCC fusion">
                <option value="bti">BtI (REFL)</option>
                <option value="itb">ItB (GCC)</option>
            </select>
            </div>
            <input type="range" id="dateSlider" min="0" max="365" value="0">
            <div id="dateDisplay">2024-01-01</div>
        </div>
        <div class="map-label" id="mapLabelFusion">Fusion RGB (closest available)</div>
        <div id="mapDate" class="map-date"></div>
        <div id="fusionMap"></div>
        <div id="plots">
            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
        </div>
    </div>
    <script>
        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
        let siteName = "innsbruck", season = "2024";
        let strategy = "aggressive", sigma = "20", fusionMode = "bti";
        let sitePosition = [47.116171, 11.320308];
        let start = new Date(2024, 0, 1);
        let availableSiteSeasons = {};
        let fusionMap = null, overlay = null, marker = null;
        let ndviTs = [], gccTs = [], bandsTs = [];
        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
        const urlParams = new URLSearchParams(location.search);
        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
        const daysFromDate = (dateStr) => {
            const [y, m, d] = dateStr.split("-").map(Number);
            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
        };
        function getPreparedBase() {
            return fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
        }
        function getFusionDir() {
            const sub = sigma === "30" ? "fusion_sigma30" : "fusion";
            return `data/${siteName}/${season}/${getPreparedBase()}/${sub}`;
        }
        function getFusionTimeseriesDir() {
            return sigma === "30" ? "fusion_sigma30" : "fusion";
        }
        async function loadTimeseries() {
            const sub = getFusionTimeseriesDir();
            const base = `data/${siteName}/${season}/${getPreparedBase()}`;
            try {
                if (fusionMode === "itb") {
                    const g = await fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : []));
                    ndviTs = [];
                    gccTs = g;
                    bandsTs = [];
                } else {
                    const [n, g, b] = await Promise.all([
                        fetch(`${base}/ndvi/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                        fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                        fetch(`${base}/bands/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                    ]);
                    ndviTs = n;
                    gccTs = g;
                    bandsTs = b;
                }
            } catch {
                ndviTs = [];
                gccTs = [];
                bandsTs = [];
            }
            drawPlots();
            updateDownloadLinks();
        }
        function drawPlot(canvasId, data, key, color) {
            const canvas = document.getElementById(canvasId);
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const pts = data.filter(t => t[key] != null);
            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
            const dates = pts.map(t => new Date(t.date));
            const vals = pts.map(t => t[key]);
            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
            const minV = Math.min(...vals), maxV = Math.max(...vals);
            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
            ctx.strokeStyle = color;
            ctx.beginPath();
            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
            ctx.stroke();
            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(curDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
        }
        function drawPlots() {
            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
        }
        function updateDownloadLinks() {
            const el = document.getElementById("downloadLinks");
            if (!el) return;
            const sub = getFusionTimeseriesDir();
            const prep = `data/${siteName}/${season}/${getPreparedBase()}`;
            if (fusionMode === "itb") {
                el.innerHTML = `<a href="${prep}/gcc/${sub}/timeseries.json">[GCC JSON]</a>`;
                return;
            }
            const base = `${prep}/export/${sub}`;
            const name = `${siteName}_${season}_fusion_${strategy}_${sub}`;
            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
        }
        async function findFusionFile(dateStr) {
            const target = new Date(dateStr);
            const yearEnd = new Date(parseInt(season), 11, 31);
            const seasonStart = start.getTime();
            const seasonEnd = yearEnd.getTime();
            for (let offset = 0; offset <= 365; offset++) {
                for (const dir of offset === 0 ? [0] : [-1, 1]) {
                    const d = new Date(target.getTime() + dir * offset * 86400000);
                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
                    const filename = (fusionMode === "itb" ? "GCC_" : "REFL_") + `${ds}.tif`;
                    try {
                        const res = await fetch(`${getFusionDir()}/${filename}`, { method: "HEAD" });
                        if (res.ok) return filename;
                    } catch {}
                }
            }
            return null;
        }
        function transformBounds(bbox, fromCRS) {
            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
            return [[sw[1], sw[0]], [ne[1], ne[0]]];
        }
        async function loadGeotiff(filename) {
            const path = `${getFusionDir()}/${filename}`;
            const buf = await (await fetch(path)).arrayBuffer();
            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
            const dateStr = filename.replace(/^(REFL|GCC)_/, "").replace(".tif", "");
            return { dataUrl, bounds, dateStr };
        }
        async function updateMap() {
            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const filename = await findFusionFile(dateStr);
            if (!filename || !fusionMap) {
                if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
                return;
            }
            try {
                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
                if (overlay) fusionMap.removeLayer(overlay);
                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(fusionMap);
                fusionMap.fitBounds(bounds);
                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
            } catch (e) {
                if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
            }
        }
        async function probeDataExists(sitename, s) {
            try {
                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
                return res.ok;
            } catch { return false; }
        }
        function getSiteBySitename(sn) {
            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
        }
        async function setSiteSeason(newSite, newSeason) {
            siteName = newSite;
            season = newSeason;
            start = new Date(parseInt(season), 0, 1);
            const site = getSiteBySitename(newSite);
            if (site?.geometry?.coordinates) {
                const [lon, lat] = site.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            if (fusionMap) { fusionMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
            document.getElementById("season").textContent = season;
            const yearEnd = new Date(parseInt(season), 11, 31);
            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
            const params = new URLSearchParams(location.search);
            params.set("site", siteName);
            params.set("season", season);
            params.set("mode", fusionMode);
            history.replaceState({}, "", `?${params}`);
            const urlDate = params.get("date");
            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            await loadTimeseries();
            await updateMap();
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch { window.sitesData = { features: [] }; }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeDataExists(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
                `<option value="${s}">${s}</option>`
            ).join("");
            document.getElementById("seasonSelect").value = initialSeason;
            strategy = urlParams.get("strategy") || "aggressive";
            sigma = urlParams.get("sigma") || "20";
            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
            document.getElementById("strategySelect").value = strategy;
            document.getElementById("sigmaSelect").value = sigma;
            document.getElementById("fusionModeSelect").value = fusionMode;
            const ml = document.getElementById("mapLabelFusion");
            if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
            const initSite = getSiteBySitename(initialSite);
            if (initSite?.geometry?.coordinates) {
                const [lon, lat] = initSite.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            fusionMap = L.map("fusionMap", { zoomControl: false }).setView(sitePosition, 12)
                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(fusionMap);
            siteSelect.addEventListener("change", function() {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                setSiteSeason(sn, document.getElementById("seasonSelect").value);
            });
            document.getElementById("seasonSelect").addEventListener("change", function() {
                setSiteSeason(siteSelect.value, this.value);
            });
            document.getElementById("strategySelect").addEventListener("change", function() {
                strategy = this.value;
                urlParams.set("strategy", strategy);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("sigmaSelect").addEventListener("change", function() {
                sigma = this.value;
                urlParams.set("sigma", sigma);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("fusionModeSelect").addEventListener("change", function() {
                fusionMode = this.value;
                urlParams.set("mode", fusionMode);
                history.replaceState({}, "", `?${urlParams}`);
                const ml = document.getElementById("mapLabelFusion");
                if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
                loadTimeseries(); updateMap();
            });
            await setSiteSeason(initialSite, initialSeason);
        }
        document.getElementById("dateSlider").addEventListener("input", function() {
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
            drawPlots(); updateMap();
        });
        init();
    </script>
 </body>
 </html>
--- a/webapp/gap_validation.html
+++ b/webapp/gap_validation.html
@ -1,284 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset="utf-8">
    <title>Gap validation</title>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1100px; margin: 0 auto; padding: 20px; }
        .selectors { margin-bottom: 18px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { font-size: 22px; margin-top: 0; }
        h2 { font-size: 16px; margin-top: 22px; color: #333; }
        h2:first-of-type { margin-top: 8px; }
        table { border-collapse: collapse; width: 100%; font-size: 12px; margin-bottom: 14px; }
        th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; vertical-align: top; }
        th { background: #f5f5f5; }
        td.num { text-align: right; font-variant-numeric: tabular-nums; }
        td.paths { font-size: 11px; word-break: break-all; color: #444; max-width: 420px; }
        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
                 padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
        .intro code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
        .section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; line-height: 1.45; }
        .empty { color: #666; font-style: italic; }
        .err { color: #a00; }
        details.meta { font-size: 12px; margin-top: 12px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
        details.meta summary { cursor: pointer; font-weight: 600; }
        details.meta pre { margin: 8px 0 0; overflow: auto; font-size: 11px; max-height: 200px; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="nav">
            <a href="index.html">Full</a>
            <a href="preselection.html">Pre-selection</a>
            <a href="prepared.html">Prepared</a>
            <a href="fusion.html">Fusion</a>
            <a href="postprocessed.html">Postprocessed</a>
            <a href="metrics.html">Metrics</a>
            <a href="gap_validation.html" class="active">Gap validation</a>
            <a href="phenology.html">Phenology</a>
        </div>
        <h1 id="pageTitle">Gap validation</h1>
        <div class="selectors">
            <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
        </div>
        <div id="content"></div>
    </div>
    <script>
        let siteName = "innsbruck",
            season = "2024";
        let availableSiteSeasons = {};
        const urlParams = new URLSearchParams(location.search);
        async function probeSummary(sn, s) {
            try {
                const res = await fetch(`data/${sn}/${s}/validation/gap_validation_summary.json`, {
                    method: "HEAD",
                });
                return res.ok;
            } catch {
                return false;
            }
        }
        function fmt(v, d = 4) {
            if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
            return v.toFixed(d);
        }
        function fmtInt(v) {
            if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
            return String(Math.round(v));
        }
        function crossoverBlock(summary) {
            const scen = summary.scenario;
            const wcRoot = summary.whittaker_crossover || {};
            const wc = (scen && wcRoot[scen]) || Object.values(wcRoot)[0];
            if (!wc) return "";
            const first = wc.first_gap_days_fusion_nse_below_whittaker;
            const def = wc.whittaker_definition || "";
            let h = `<h2>Whittaker crossover (NSE<sub>S2</sub>)</h2>`;
            h += `<p class="section-note">${def}</p>`;
            h += `<p class="section-note"><b>First gap length (days)</b> where fusion NSE<sub>S2</sub> &lt; Whittaker NSE<sub>S2</sub> (strict): <b>${first != null ? first : "—"}</b> (none if fusion never falls below).</p>`;
            const rows = wc.by_gap || [];
            if (rows.length) {
                h += `<table><tr><th>Gap days</th><th class="num">NSE<sub>S2</sub> fusion</th><th class="num">NSE<sub>S2</sub> Whittaker</th></tr>`;
                for (const r of rows) {
                    h += `<tr><td>${r.gap_days}</td><td class="num">${fmt(r.nse_s2_fusion, 3)}</td><td class="num">${fmt(r.nse_s2_whittaker, 3)}</td></tr>`;
                }
                h += `</table>`;
            }
            return h;
        }
        function manifestTable(manifest) {
            if (!manifest?.entries?.length) return "";
            let h = `<h2>Gap manifest</h2>`;
            h += `<p class="section-note">From <code>data/${siteName}/${season}/validation/gap_manifest.json</code>. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.</p>`;
            h += `<table><tr><th>Transition</th><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
            for (const e of manifest.entries) {
                const w = `${e.window_start} → ${e.window_end}`;
                h += `<tr><td>${e.transition || "—"}</td><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
            }
            h += `</table>`;
            return h;
        }
        function resultsTable(results) {
            if (!results?.length) return `<p class="empty">No result rows in summary.</p>`;
            const head = `<tr>
                <th>Transition</th><th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
                <th class="num">RMSE<br><span style="font-weight:normal">gap</span></th>
                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">gap</span></th>
                <th class="num">RMSE<br><span style="font-weight:normal">no gap</span></th>
                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">no gap</span></th>
                <th class="num">ΔRMSE</th><th class="num">ΔNSE</th>
                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">Whitt.</span></th>
                <th class="num">n</th>
                <th>Paths / error</th>
            </tr>`;
            const parts = [head];
            for (const r of results) {
                if (r.error) {
                    parts.push(
                        `<tr><td>${r.transition ?? "—"}</td><td>${r.gap_days ?? "—"}</td><td colspan="9" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
                    );
                    continue;
                }
                const g = r.spatial?.gap || {};
                const ng = r.spatial?.no_gap || {};
                const wh = r.spatial?.whittaker || {};
                const dRm = r.spatial?.delta_rmse;
                const dNs = r.spatial?.delta_nse;
                const p = r.paths || {};
                const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("<br>");
                parts.push(`<tr>
                    <td>${r.transition || "—"}</td>
                    <td>${r.gap_days}</td>
                    <td>${r.prediction_date || "—"}</td>
                    <td style="font-size:11px">${r.withheld_s2_filename || "—"}</td>
                    <td class="num">${fmt(g.rmse)}</td>
                    <td class="num">${fmt(g.nse_s2, 3)}</td>
                    <td class="num">${fmt(ng.rmse)}</td>
                    <td class="num">${fmt(ng.nse_s2, 3)}</td>
                    <td class="num">${fmt(dRm)}</td>
                    <td class="num">${fmt(dNs, 3)}</td>
                    <td class="num">${fmt(wh.nse_s2, 3)}</td>
                    <td class="num">${fmtInt(g.n_pixels)}</td>
                    <td class="paths">${pathNote}</td>
                </tr>`);
            }
            return `<table>${parts.join("")}</table>`;
        }
        function metaDetails(summary) {
            const cmd = summary.command_line;
            const git = summary.git_commit;
            if (!cmd && !git) return "";
            let h = `<details class="meta"><summary>Run metadata</summary>`;
            if (git) h += `<p>Git: <code>${git}</code></p>`;
            if (cmd?.length) h += `<pre>${cmd.map((x) => String(x)).join(" ")}</pre>`;
            h += `</details>`;
            return h;
        }
        async function render(summary, manifest) {
            const el = document.getElementById("content");
            if (!summary) {
                el.innerHTML = `<p class="err">Could not load <code>data/${siteName}/${season}/validation/gap_validation_summary.json</code>.</p>
                    <p class="section-note">From <code>processing/</code>: <code>python -m gap_validation.run --site ${siteName} --season ${season} --lat LAT --lon LON</code> (see <code>--help</code>). Serve from <code>processing/</code>: <code>python3 -m http.server 8000</code> → <code>/webapp/gap_validation.html</code> (<code>webapp/data</code> → <code>../data</code>).</p>`;
                if (manifest?.entries) el.innerHTML += manifestTable(manifest);
                return;
            }
            const scen = summary.scenario || "—";
            const sn = summary.site_name ?? siteName;
            const se = summary.season ?? season;
            let html = `<div class="intro">
                Tier-2 withheld S2, spatial GCC vs withheld scene, NSE<sub>S2</sub>, and Whittaker comparison.
                Summary: <code>data/${sn}/${se}/validation/gap_validation_summary.json</code>.
                Scenario in this file: <b>${scen}</b> (one run overwrites; re-run CLI for other strategy/σ/mode).
            </div>`;
            html += `<h2>Spatial metrics (per gap length)</h2>`;
            html += `<p class="section-note">Reference = GCC from withheld S2 REFL (bilinear to fusion grid). Prediction = fused GCC. ΔRMSE = RMSE<sub>gap</sub> − RMSE<sub>no gap</sub>; ΔNSE = NSE<sub>no gap</sub> − NSE<sub>gap</sub>.</p>`;
            html += resultsTable(summary.results);
            html += crossoverBlock(summary);
            html += metaDetails(summary);
            if (manifest?.entries) html += manifestTable(manifest);
            el.innerHTML = html;
        }
        async function load() {
            let summary = null,
                manifest = null;
            try {
                const r1 = await fetch(`data/${siteName}/${season}/validation/gap_validation_summary.json`);
                summary = r1.ok ? await r1.json() : null;
            } catch {
                summary = null;
            }
            try {
                const r2 = await fetch(`data/${siteName}/${season}/validation/gap_manifest.json`);
                manifest = r2.ok ? await r2.json() : null;
            } catch {
                manifest = null;
            }
            await render(summary, manifest);
            const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
            document.getElementById("pageTitle").textContent =
                (site?.properties?.description || siteName) + " — gap validation — " + season;
            urlParams.set("site", siteName);
            urlParams.set("season", season);
            history.replaceState({}, "", `?${urlParams}`);
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch {
                window.sitesData = { features: [] };
            }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeSummary(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
            const initialSeason =
                urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
                    ? urlSeason
                    : (availableSiteSeasons[initialSite] || [])[0] || "2024";
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
                .map((s) => `<option value="${s}">${s}</option>`)
                .join("");
            document.getElementById("seasonSelect").value = initialSeason;
            siteName = initialSite;
            season = initialSeason;
            siteSelect.addEventListener("change", function () {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                siteName = sn;
                season = document.getElementById("seasonSelect").value;
                load();
            });
            document.getElementById("seasonSelect").addEventListener("change", function () {
                season = this.value;
                load();
            });
            await load();
        }
        init();
    </script>
 </body>
 </html>
--- a/webapp/index.html
+++ b/webapp/index.html
--- a/webapp/metrics.html
+++ b/webapp/metrics.html
@ -1,367 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset="utf-8">
    <title>Metrics</title>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1100px; margin: 0 auto; padding: 20px; }
        .selectors { margin-bottom: 20px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { font-size: 22px; }
        h2 { font-size: 16px; margin-top: 24px; color: #333; }
        h2:first-of-type { margin-top: 8px; }
        h3 { font-size: 14px; margin: 14px 0 6px 0; color: #444; font-weight: 600; }
        table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 12px; }
        th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; }
        th { background: #f5f5f5; }
        td.num { text-align: right; font-variant-numeric: tabular-nums; }
        .fusion-block table { margin-bottom: 4px; }
        .fusion-block table + table { margin-top: 12px; }
        .section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
        .section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
                 padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
        .intro-short { margin-bottom: 0; }
        details.definitions { margin-top: 28px; font-size: 13px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
        details.definitions summary { cursor: pointer; font-weight: 600; color: #333; }
        details.definitions ul { margin: 8px 0 0 18px; padding: 0; }
        details.definitions li { margin-bottom: 4px; }
        .scenario-key { font-size: 11px; color: #666; font-weight: normal; }
        .empty { color: #666; font-style: italic; }
        .err { color: #a00; }
        details.how-read {
            font-size: 12px; color: #333; line-height: 1.5; max-width: 820px; margin: 0 0 18px 0;
            padding: 8px 12px 10px; border: 1px solid #ccd; border-radius: 4px; background: #f8fafc;
        }
        details.how-read summary {
            cursor: pointer; font-weight: 600; font-size: 13px; color: #111; margin-bottom: 0;
        }
        details.how-read ol { margin: 10px 0 0; padding-left: 1.35rem; }
        details.how-read li { margin-bottom: 7px; }
        details.how-read li:last-child { margin-bottom: 0; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="nav">
            <a href="index.html">Full</a>
            <a href="preselection.html">Pre-selection</a>
            <a href="prepared.html">Prepared</a>
            <a href="fusion.html">Fusion</a>
            <a href="postprocessed.html">Postprocessed</a>
            <a href="metrics.html" class="active">Metrics</a>
            <a href="gap_validation.html">Gap validation</a>
            <a href="phenology.html">Phenology</a>
        </div>
        <h1 id="siteName">Metrics</h1>
        <div class="selectors">
            <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
        </div>
        <div id="content"></div>
    </div>
    <script>
        /** Shown in the UI; pearson_r, rmse, mae, n_samples remain in metrics.json only. */
        const DISPLAY_METRIC_COLS = ["r_squared", "nrmse", "nse_pc"];
        const DISPLAY_METRIC_LABELS = {
            r_squared: "R² vs mean",
            nrmse: "nRMSE",
            nse_pc: "NSE_PC",
        };
        const FUSION_BTI_ROWS = [
            ["aggressive_sigma20", "Aggressive", 20],
            ["aggressive_sigma30", "Aggressive", 30],
            ["nonaggressive_sigma20", "Non-aggressive", 20],
            ["nonaggressive_sigma30", "Non-aggressive", 30],
        ];
        function mv(m, c) {
            return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
        }
        function fmtMetric(col, v) {
            if (v == null || typeof v !== "number") return "—";
            if (col === "r_squared" || col === "nse_pc") return v.toFixed(3);
            if (col === "nrmse") return v.toFixed(4);
            return fmt(v);
        }
        let siteName = "innsbruck", season = "2024";
        let availableSiteSeasons = {};
        const urlParams = new URLSearchParams(location.search);
        async function probeMetrics(sn, s) {
            try {
                const res = await fetch(`data/${sn}/${s}/metrics.json`, { method: "HEAD" });
                return res.ok;
            } catch { return false; }
        }
        function fmt(v) {
            if (v == null || typeof v !== "number") return "—";
            return Number.isInteger(v) ? String(v) : v.toFixed(4);
        }
        function fusionMeanResidual(m) {
            const x = m?.residual_vs_phenocam?.mean;
            const n = Number(x);
            return Number.isFinite(n) ? n : null;
        }
        function fusionSubTableRows(temporal, keysWithLabels, includeMeanResid) {
            const parts = [];
            for (const [key, stratLabel, sig] of keysWithLabels) {
                const m = temporal[key];
                if (!m) continue;
                const mr = fusionMeanResidual(m);
                const meanCell = includeMeanResid
                    ? `<td class="num">${mr !== null ? mr.toFixed(3) : "—"}</td>`
                    : "";
                parts.push(
                    `<tr><td>${stratLabel}, σ=${sig} <span class="scenario-key">(${key})</span></td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}${meanCell}</tr>`
                );
            }
            return parts;
        }
        function fusionTables(temporal) {
            if (!temporal || typeof temporal !== "object") {
                return `<p class="empty">No fusion temporal data</p>`;
            }
            const itbRows = FUSION_BTI_ROWS.map(([k, s, sig]) => [`${k}_itb`, s, sig]);
            const allKeys = [...FUSION_BTI_ROWS.map((r) => r[0]), ...itbRows.map((r) => r[0])];
            let showMean = false;
            for (const k of allKeys) {
                if (fusionMeanResidual(temporal[k]) !== null) {
                    showMean = true;
                    break;
                }
            }
            const btiBody = fusionSubTableRows(temporal, FUSION_BTI_ROWS, showMean);
            const itbBody = fusionSubTableRows(temporal, itbRows, showMean);
            if (!btiBody.length && !itbBody.length) {
                return `<p class="empty">No fusion scenarios in temporal</p>`;
            }
            const meanTh = showMean ? `<th class="num">Mean resid.</th>` : "";
            const head = `<tr><th>Setting</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}${meanTh}</tr>`;
            let h = `<div class="fusion-block">`;
            if (btiBody.length) {
                h += `<h3>Bands-then-Index (BtI)</h3>`;
                h += `<table>${head}${btiBody.join("")}</table>`;
            }
            if (itbBody.length) {
                h += `<h3>Index-then-Bands (ItB)</h3>`;
                h += `<table>${head}${itbBody.join("")}</table>`;
            }
            h += `</div>`;
            return h;
        }
        /** Returns only &lt;table&gt;…&lt;/table&gt; or empty string (no heading). */
        function baselineTable(b) {
            if (!b || typeof b !== "object") return "";
            const rows = [];
            const pushRow = (label, m) => {
                if (!m || typeof m !== "object") return;
                rows.push(
                    `<tr><td>${label}</td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}</tr>`
                );
            };
            pushRow("S2 GCC (all acquisitions)", b.s2);
            for (const strat of ["aggressive", "nonaggressive"]) {
                pushRow(`S3 composite GCC (${strat})`, b.s3?.[strat]);
                pushRow(`S2 GCC cloud-screened (${strat})`, b.s2_cloudfree?.[strat]);
                pushRow(`S2 Whittaker λ=400 (${strat})`, b.s2_whittaker_lambda400?.[strat]);
            }
            if (!rows.length) return "";
            const head = `<tr><th>Baseline</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}</tr>`;
            return `<table>${head}${rows.join("")}</table>`;
        }
        function fmtFixed3(v) {
            const n = Number(v);
            return Number.isFinite(n) ? n.toFixed(3) : "—";
        }
        function derivedSection(d) {
            if (!d) return "";
            const dn = d.delta_nse_pc_sigma20_minus_sigma30;
            const paired = d.bti_vs_itb_mean_residual || [];
            if (!dn && !paired.length) return "";
            let h = `<h2>Summaries</h2>`;
            h += `<p class="section-note">Same numbers as Fusion, condensed. First table: which σ fits PhenoCam better (NSE_PC only). Second: mean bias BtI vs ItB.</p>`;
            if (dn) {
                h += `<p class="section-note"><b>ΔNSE_PC</b> = NSE_PC(σ20) − NSE_PC(σ30). <b>+</b> → σ20 better. <b>−</b> → σ30 better.</p>`;
                h += `<table><tr><th>Mode</th><th>Strategy</th><th class="num">ΔNSE_PC</th></tr>`;
                let anyDelta = false;
                for (const mode of ["bti", "itb"]) {
                    for (const strat of ["aggressive", "nonaggressive"]) {
                        const v = dn[mode]?.[strat];
                        if (Number.isFinite(Number(v))) anyDelta = true;
                        h += `<tr><td>${mode.toUpperCase()}</td><td>${strat}</td><td class="num">${fmtFixed3(v)}</td></tr>`;
                    }
                }
                h += `</table>`;
                if (!anyDelta) {
                    h += `<p class="section-note">ΔNSE_PC needs both σ20 and σ30 fusion rows in <code>temporal</code> (BtI and ItB). Re-run <code>metrics_stats</code>.</p>`;
                }
            }
            if (paired.length) {
                h += `<p class="section-note">Mean(fused − PhenoCam) per row. <b>+</b> / <b>−</b> = average over / under PhenoCam. Closer to <b>0</b> in a column = less bias for that workflow.</p>`;
                h += `<table><tr><th>Strategy</th><th>σ</th><th class="num">Mean residual BtI</th><th class="num">Mean residual ItB</th></tr>`;
                for (const row of paired) {
                    h += `<tr><td>${row.strategy}</td><td>${row.sigma}</td><td class="num">${fmtFixed3(row.mean_residual_bti)}</td><td class="num">${fmtFixed3(row.mean_residual_itb)}</td></tr>`;
                }
                h += `</table>`;
            }
            return h;
        }
        function howToReadBlock() {
            return `<details class="how-read">
                <summary>How to read</summary>
                <ol>
                    <li>All scores are satellite or fusion <b>GCC</b> vs <b>PhenoCam GCC</b> at the site 3×3 window, <b>same calendar days</b> only. Extra stats: <code>metrics.json</code>.</li>
                    <li><b>R² vs mean</b> and <b>NSE_PC</b> are the same value (1 − SS<sub>res</sub>/SS<sub>tot</sub> vs predicting mean PhenoCam each day); not (Pearson <i>r</i>)²; can be negative. Higher = better. <b>nRMSE</b>: lower = better.</li>
                    <li><b>Fusion:</b> same row number in BtI and in ItB = same screening + same σ — compare left/right. Down one block = change screening or σ.</li>
                    <li><b>Mean resid.</b> (if present): mean(fused − PhenoCam). Sign = average bias; use R² vs mean / nRMSE / NSE_PC for overall fit.</li>
                    <li><b>Summaries:</b> ΔNSE_PC = NSE at σ20 minus NSE at σ30 (+ means σ20 wins). Paired table: closer to 0 = less mean bias.</li>
                </ol>
            </details>`;
        }
        function definitionsDetails() {
            return `<details class="definitions">
                <summary>Definitions</summary>
                <ul>
                    <li><b>BtI</b>: fuse reflectance bands, then GCC.</li>
                    <li><b>ItB</b>: GCC on S2 and S3, then fuse GCC.</li>
                    <li><b>Scenario</b>: screening (<code>aggressive</code> / <code>nonaggressive</code>) × σ (20 / 30 days).</li>
                    <li><a href="phenology.html">Phenology</a> — PhenoCam SOS/EOS (TIMESAT).</li>
                    <li><b>R² vs mean</b> — coefficient of determination vs a constant mean(PhenoCam) baseline; JSON key <code>r_squared</code>; duplicates <code>nse_pc</code>. Not (Pearson <i>r</i>)².</li>
                    <li><code>metrics.json</code> — also Pearson <i>r</i>, RMSE, MAE, <code>n_samples</code>.</li>
                </ul>
            </details>`;
        }
        function render(data) {
            const el = document.getElementById("content");
            if (!data) {
                el.innerHTML = `<p class="err">Could not load metrics.json</p>`;
                return;
            }
            let html = "";
            html += `<div class="intro intro-short">
                GCC at the 3×3 site window vs PhenoCam. Sections: PhenoCam → baselines → fusion (BtI, then ItB) → summaries.
                <code>data/${siteName}/${season}/metrics.json</code>
            </div>`;
            html += howToReadBlock();
            if (data.phenocam_stats) {
                html += `<h2>PhenoCam (ground truth)</h2>`;
                html += `<p class="section-note">Camera ROI GCC (not compared to itself). Dates / SOS–EOS: <a href="phenology.html">Phenology</a>.</p>`;
                html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
                const p = data.phenocam_stats;
                html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
            }
            const baselineTbl = baselineTable(data.baseline);
            if (baselineTbl) {
                html += `<h2>Baselines (vs PhenoCam)</h2>`;
                html += `<p class="section-note">Same columns as fusion (vs PhenoCam). Higher R² vs mean / NSE_PC, lower nRMSE = better. S3 = coarse-only; Whittaker = smoothed S2-only.</p>`;
                html += baselineTbl;
            }
            html += `<h2>Fusion (vs PhenoCam)</h2>`;
            html += `<p class="section-note">BtI block vs ItB block: same row = same screening + σ. Within a block: four EFAST combinations.</p>`;
            html += fusionTables(data.temporal || {});
            html += derivedSection(data.derived);
            html += definitionsDetails();
            el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
        }
        async function load() {
            try {
                const res = await fetch(`data/${siteName}/${season}/metrics.json`);
                render(res.ok ? await res.json() : null);
            } catch {
                render(null);
            }
            const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
            document.getElementById("siteName").textContent = (site?.properties?.description || siteName) + " — " + season;
            urlParams.set("site", siteName);
            urlParams.set("season", season);
            history.replaceState({}, "", `?${urlParams}`);
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch { window.sitesData = { features: [] }; }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeMetrics(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
            const initialSeason =
                urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
                    ? urlSeason
                    : (availableSiteSeasons[initialSite] || [])[0] || "2024";
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
                .map((s) => `<option value="${s}">${s}</option>`)
                .join("");
            document.getElementById("seasonSelect").value = initialSeason;
            siteName = initialSite;
            season = initialSeason;
            siteSelect.addEventListener("change", function () {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                siteName = sn;
                season = document.getElementById("seasonSelect").value;
                load();
            });
            document.getElementById("seasonSelect").addEventListener("change", function () {
                season = this.value;
                load();
            });
            await load();
        }
        init();
    </script>
 </body>
 </html>
--- a/webapp/phenology.html
+++ b/webapp/phenology.html
@ -1,146 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset="utf-8">
    <title>Phenology</title>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 900px; margin: 0 auto; padding: 20px; }
        h1 { font-size: 22px; margin-top: 0; }
        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
                 padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
        table { border-collapse: collapse; width: 100%; font-size: 13px; }
        th, td { border: 1px solid #ccc; padding: 8px 10px; text-align: left; }
        th { background: #f5f5f5; }
        td.num { text-align: center; font-variant-numeric: tabular-nums; }
        td.site { font-weight: 500; }
        a.rowlink { color: #0066cc; text-decoration: none; }
        a.rowlink:hover { text-decoration: underline; }
        .empty { color: #666; }
        .err { color: #a00; }
        .loading { color: #666; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="nav">
            <a href="index.html">Full</a>
            <a href="preselection.html">Pre-selection</a>
            <a href="prepared.html">Prepared</a>
            <a href="fusion.html">Fusion</a>
            <a href="postprocessed.html">Postprocessed</a>
            <a href="metrics.html">Metrics</a>
            <a href="gap_validation.html">Gap validation</a>
            <a href="phenology.html" class="active">Phenology</a>
        </div>
        <h1>PhenoCam phenology (50% amplitude)</h1>
        <p class="intro">
            Green-up and green-down dates from <code>data/&lt;site&gt;/&lt;season&gt;/raw/phenocam/phenocam_phenology.json</code>
            (TIMESAT on PhenoCam GCC). Site/season rows match <code>data/sites.geojson</code>.
            Run <code>python phenology_timesat.py --all</code> or the pipeline to generate missing JSON files.
        </p>
        <p id="status" class="loading">Loading…</p>
        <div id="tableWrap"></div>
    </div>
    <script>
        function escapeHtml(s) {
            return String(s)
                .replace(/&/g, "&amp;")
                .replace(/</g, "&lt;")
                .replace(/>/g, "&gt;")
                .replace(/"/g, "&quot;");
        }
        function cellDate(v) {
            if (v == null || v === "") return "<span class='empty'>—</span>";
            return escapeHtml(v);
        }
        async function loadPhenologyRow(site, season) {
            const path = `data/${site}/${season}/raw/phenocam/phenocam_phenology.json`;
            try {
                const res = await fetch(path);
                if (!res.ok) return { ok: false, up: null, down: null };
                const j = await res.json();
                return {
                    ok: true,
                    up: j.green_up_50pct_date ?? null,
                    down: j.green_down_50pct_date ?? null
                };
            } catch {
                return { ok: false, up: null, down: null };
            }
        }
        async function main() {
            const status = document.getElementById("status");
            const wrap = document.getElementById("tableWrap");
            let features = [];
            try {
                const res = await fetch("data/sites.geojson");
                if (!res.ok) throw new Error("Could not load sites.geojson");
                const g = await res.json();
                features = g.features || [];
            } catch (e) {
                status.textContent = "";
                status.className = "err";
                status.textContent = "Failed to load data/sites.geojson.";
                return;
            }
            const rows = [];
            for (const f of features) {
                const site = f.properties && f.properties.sitename;
                if (!site) continue;
                const desc = (f.properties && f.properties.description) || site;
                const seasons = f.properties && f.properties.seasons
                    ? Object.keys(f.properties.seasons).sort()
                    : [];
                for (const season of seasons) {
                    rows.push({ site, season, desc });
                }
            }
            rows.sort((a, b) => a.site.localeCompare(b.site) || a.season.localeCompare(b.season));
            const results = await Promise.all(
                rows.map((r) =>
                    loadPhenologyRow(r.site, r.season).then((phen) => ({ ...r, ...phen }))
                )
            );
            const head =
                "<thead><tr>" +
                "<th>Site</th><th>Season</th><th>Description</th>" +
                "<th>Green-up</th><th>Green-down</th>" +
                "</tr></thead>";
            const body = results
                .map((r) => {
                    const q = new URLSearchParams();
                    q.set("site", r.site);
                    q.set("season", r.season);
                    const viewer = `index.html?${q.toString()}`;
                    return (
                        "<tr>" +
                        `<td class="site"><a class="rowlink" href="${viewer}">${escapeHtml(r.site)}</a></td>` +
                        `<td class="num">${r.season}</td>` +
                        `<td>${escapeHtml(r.desc)}</td>` +
                        `<td class="num">${cellDate(r.up)}</td>` +
                        `<td class="num">${cellDate(r.down)}</td>` +
                        "</tr>"
                    );
                })
                .join("");
            status.textContent = "";
            status.className = "";
            wrap.innerHTML = "<table>" + head + "<tbody>" + body + "</tbody></table>";
        }
        main();
    </script>
 </body>
 </html>
--- a/webapp/postprocessed.html
+++ b/webapp/postprocessed.html
@ -1,390 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Postprocessed Viewer</title>
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
    <script src="common.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
        .selectors { margin-bottom: 20px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { margin: 0 0 5px 0; font-size: 22px; }
        .season-row { padding-bottom: 15px; }
        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
        .download-links { margin-left: 10px; font-size: 14px; }
        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
        .download-links a:hover { text-decoration: underline; }
        #dateSlider { width: 100%; margin: 15px 0; }
        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
        #postprocessedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
        .leaflet-image-layer { image-rendering: pixelated; }
        .leaflet-control-attribution { display: none; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header-sticky">
            <div class="nav">
                <a href="index.html">Full</a>
                <a href="preselection.html">Pre-selection</a>
                <a href="prepared.html">Prepared</a>
                <a href="fusion.html">Fusion</a>
                <a href="postprocessed.html" class="active">Postprocessed</a>
                <a href="metrics.html">Metrics</a>
                <a href="gap_validation.html">Gap validation</a>
                <a href="phenology.html">Phenology</a>
            </div>
            <h1 id="siteName">Innsbruck</h1>
            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
            <div class="selectors">
                <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
            <label>Strategy:</label>
            <select id="strategySelect">
                <option value="aggressive">Aggressive</option>
                <option value="nonaggressive">Non-aggressive</option>
            </select>
            <label>Sigma:</label>
            <select id="sigmaSelect">
                <option value="20">σ=20</option>
                <option value="30">σ=30</option>
            </select>
            <label>Source:</label>
            <select id="sourceSelect">
                <option value="s2">S2</option>
                <option value="fusion">Fusion</option>
                <option value="s3">S3</option>
            </select>
            <label>Mode:</label>
            <select id="fusionModeSelect" title="BtI vs ItB processed paths">
                <option value="bti">BtI</option>
                <option value="itb">ItB</option>
            </select>
            </div>
            <input type="range" id="dateSlider" min="0" max="365" value="0">
            <div id="dateDisplay">2024-01-01</div>
        </div>
        <div class="map-label">Postprocessed RGB (closest available)</div>
        <div id="mapDate" class="map-date"></div>
        <div id="postprocessedMap"></div>
        <div id="plots">
            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
        </div>
    </div>
    <script>
        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
        let siteName = "innsbruck", season = "2024";
        let strategy = "aggressive", sigma = "20", source = "s2", fusionMode = "bti";
        let sitePosition = [47.116171, 11.320308];
        let start = new Date(2024, 0, 1);
        let availableSiteSeasons = {};
        let postprocessedMap = null, overlay = null, marker = null;
        let ndviTs = [], gccTs = [], bandsTs = [];
        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
        const urlParams = new URLSearchParams(location.search);
        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
        const daysFromDate = (dateStr) => {
            const [y, m, d] = dateStr.split("-").map(Number);
            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
        };
        function getProcessedPath() {
            const mid = fusionMode === "itb" ? `processed_${strategy}_itb_sigma${sigma}` : `processed_${strategy}_sigma${sigma}`;
            return `data/${siteName}/${season}/${mid}`;
        }
        async function loadTimeseries() {
            const base = getProcessedPath();
            try {
                const [n, g, b] = await Promise.all([
                    fetch(`${base}/ndvi/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                    fetch(`${base}/gcc/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                    fetch(`${base}/bands/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
                ]);
                ndviTs = n;
                gccTs = g;
                bandsTs = b;
            } catch {
                ndviTs = [];
                gccTs = [];
                bandsTs = [];
            }
            drawPlots();
            updateDownloadLinks();
        }
        function drawPlot(canvasId, data, key, color) {
            const canvas = document.getElementById(canvasId);
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const pts = data.filter(t => t[key] != null);
            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
            const dates = pts.map(t => new Date(t.date));
            const vals = pts.map(t => t[key]);
            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
            const minV = Math.min(...vals), maxV = Math.max(...vals);
            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
            ctx.strokeStyle = color;
            ctx.beginPath();
            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
            ctx.stroke();
            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(curDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
        }
        function drawPlots() {
            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
        }
        function updateDownloadLinks() {
            const el = document.getElementById("downloadLinks");
            if (!el) return;
            const root = getProcessedPath();
            if (fusionMode === "itb") {
                el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
                return;
            }
            const base = `${root}/export/${source}`;
            const name = `${siteName}_${season}_postprocessed_${strategy}_sigma${sigma}_${source}`;
            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
        }
        async function findProcessedFile(dateStr) {
            const target = new Date(dateStr);
            const yearEnd = new Date(parseInt(season), 11, 31);
            const seasonStart = start.getTime();
            const seasonEnd = yearEnd.getTime();
            for (let offset = 0; offset <= 365; offset++) {
                for (const dir of offset === 0 ? [0] : [-1, 1]) {
                    const d = new Date(target.getTime() + dir * offset * 86400000);
                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
                    const filename = `${ds}_0.geotiff`;
                    try {
                        const res = await fetch(`${getProcessedPath()}/${source}/${filename}`, { method: "HEAD" });
                        if (res.ok) return filename;
                    } catch {}
                }
            }
            return null;
        }
        function transformBounds(bbox, fromCRS) {
            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
            return [[sw[1], sw[0]], [ne[1], ne[0]]];
        }
        async function loadGeotiff(filename) {
            const path = `${getProcessedPath()}/${source}/${filename}`;
            const buf = await (await fetch(path)).arrayBuffer();
            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
            const dateStr = filename.replace("_0.geotiff", "");
            return { dataUrl, bounds, dateStr };
        }
        async function updateMap() {
            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const filename = await findProcessedFile(dateStr);
            if (!filename || !postprocessedMap) {
                if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
                return;
            }
            try {
                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
                if (overlay) postprocessedMap.removeLayer(overlay);
                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(postprocessedMap);
                postprocessedMap.fitBounds(bounds);
                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
            } catch (e) {
                if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
            }
        }
        async function probeDataExists(sitename, s) {
            try {
                const res = await fetch(`data/${sitename}/${s}/metrics.json`, { method: "HEAD" });
                return res.ok;
            } catch { return false; }
        }
        function getSiteBySitename(sn) {
            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
        }
        async function setSiteSeason(newSite, newSeason) {
            siteName = newSite;
            season = newSeason;
            start = new Date(parseInt(season), 0, 1);
            const site = getSiteBySitename(newSite);
            if (site?.geometry?.coordinates) {
                const [lon, lat] = site.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            if (postprocessedMap) { postprocessedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
            document.getElementById("season").textContent = season;
            const yearEnd = new Date(parseInt(season), 11, 31);
            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
            const params = new URLSearchParams(location.search);
            params.set("site", siteName);
            params.set("season", season);
            params.set("mode", fusionMode);
            history.replaceState({}, "", `?${params}`);
            const urlDate = params.get("date");
            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            await loadTimeseries();
            await updateMap();
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch { window.sitesData = { features: [] }; }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeDataExists(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
                `<option value="${s}">${s}</option>`
            ).join("");
            document.getElementById("seasonSelect").value = initialSeason;
            strategy = urlParams.get("strategy") || "aggressive";
            sigma = urlParams.get("sigma") || "20";
            source = urlParams.get("source") || "s2";
            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
            document.getElementById("strategySelect").value = strategy;
            document.getElementById("sigmaSelect").value = sigma;
            document.getElementById("sourceSelect").value = source;
            document.getElementById("fusionModeSelect").value = fusionMode;
            const initSite = getSiteBySitename(initialSite);
            if (initSite?.geometry?.coordinates) {
                const [lon, lat] = initSite.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            postprocessedMap = L.map("postprocessedMap", { zoomControl: false }).setView(sitePosition, 12)
                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(postprocessedMap);
            siteSelect.addEventListener("change", function() {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                setSiteSeason(sn, document.getElementById("seasonSelect").value);
            });
            document.getElementById("seasonSelect").addEventListener("change", function() {
                setSiteSeason(siteSelect.value, this.value);
            });
            document.getElementById("strategySelect").addEventListener("change", function() {
                strategy = this.value;
                urlParams.set("strategy", strategy);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("sigmaSelect").addEventListener("change", function() {
                sigma = this.value;
                urlParams.set("sigma", sigma);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("sourceSelect").addEventListener("change", function() {
                source = this.value;
                urlParams.set("source", source);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("fusionModeSelect").addEventListener("change", function() {
                fusionMode = this.value;
                urlParams.set("mode", fusionMode);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            await setSiteSeason(initialSite, initialSeason);
        }
        document.getElementById("dateSlider").addEventListener("input", function() {
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
            drawPlots(); updateMap();
        });
        init();
    </script>
 </body>
 </html>
--- a/webapp/prepared.html
+++ b/webapp/prepared.html
@ -1,379 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Prepared S2/S3 Viewer</title>
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
    <script src="common.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
        .selectors { margin-bottom: 20px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { margin: 0 0 5px 0; font-size: 22px; }
        .season-row { padding-bottom: 15px; }
        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
        .download-links { margin-left: 10px; font-size: 14px; }
        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
        .download-links a:hover { text-decoration: underline; }
        #dateSlider { width: 100%; margin: 15px 0; }
        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
        #preparedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
        .leaflet-image-layer { image-rendering: pixelated; }
        .leaflet-control-attribution { display: none; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header-sticky">
            <div class="nav">
                <a href="index.html">Full</a>
                <a href="preselection.html">Pre-selection</a>
                <a href="prepared.html" class="active">Prepared</a>
                <a href="fusion.html">Fusion</a>
                <a href="postprocessed.html">Postprocessed</a>
                <a href="metrics.html">Metrics</a>
                <a href="gap_validation.html">Gap validation</a>
                <a href="phenology.html">Phenology</a>
            </div>
            <h1 id="siteName">Innsbruck</h1>
            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
            <div class="selectors">
                <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
            <label>Strategy:</label>
            <select id="strategySelect">
                <option value="aggressive">Aggressive</option>
                <option value="nonaggressive">Non-aggressive</option>
            </select>
            <label>Source:</label>
            <select id="sourceSelect">
                <option value="s2">S2</option>
                <option value="s3">S3</option>
            </select>
            <label>Mode:</label>
            <select id="fusionModeSelect" title="BtI = REFL/composite; ItB = GCC rasters">
                <option value="bti">BtI</option>
                <option value="itb">ItB</option>
            </select>
            </div>
            <input type="range" id="dateSlider" min="0" max="365" value="0">
            <div id="dateDisplay">2024-01-01</div>
        </div>
        <div class="map-label" id="mapLabel">Prepared RGB (closest available)</div>
        <div id="mapDate" class="map-date"></div>
        <div id="preparedMap"></div>
        <div id="plots">
            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
        </div>
    </div>
    <script>
        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
        let siteName = "innsbruck", season = "2024";
        let strategy = "aggressive", source = "s2", fusionMode = "bti";
        let sitePosition = [47.116171, 11.320308];
        let start = new Date(2024, 0, 1);
        let availableSiteSeasons = {};
        let preparedMap = null, overlay = null, marker = null;
        let ndviTs = [], gccTs = [], bandsTs = [];
        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
        const urlParams = new URLSearchParams(location.search);
        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
        const daysFromDate = (dateStr) => {
            const [y, m, d] = dateStr.split("-").map(Number);
            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
        };
        function getPreparedPath() {
            const mid = fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
            return `data/${siteName}/${season}/${mid}`;
        }
        async function loadTimeseries() {
            try {
                const [n, g, b] = await Promise.all([
                    fetch(`${getPreparedPath()}/ndvi/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
                    fetch(`${getPreparedPath()}/gcc/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
                    fetch(`${getPreparedPath()}/bands/${source}/timeseries.json`).then(r => r.ok ? r.json() : [])
                ]);
                ndviTs = n; gccTs = g; bandsTs = b;
            } catch { ndviTs = []; gccTs = []; bandsTs = []; }
            drawPlots();
            updateDownloadLinks();
        }
        function drawPlot(canvasId, data, key, color) {
            const canvas = document.getElementById(canvasId);
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const pts = data.filter(t => t[key] != null);
            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
            const dates = pts.map(t => new Date(t.date));
            const vals = pts.map(t => t[key]);
            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
            const minV = Math.min(...vals), maxV = Math.max(...vals);
            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
            ctx.strokeStyle = color;
            ctx.beginPath();
            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
            ctx.stroke();
            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(curDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
        }
        function drawPlots() {
            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
        }
        function updateDownloadLinks() {
            const el = document.getElementById("downloadLinks");
            if (!el) return;
            const root = getPreparedPath();
            if (fusionMode === "itb") {
                el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
                return;
            }
            const base = `${root}/export/${source}`;
            const name = `${siteName}_${season}_prepared_${strategy}_${source}`;
            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
        }
        async function findPreparedFile(dateStr) {
            const target = new Date(dateStr);
            const yearEnd = new Date(parseInt(season), 11, 31);
            const seasonStart = start.getTime();
            const seasonEnd = yearEnd.getTime();
            for (let offset = 0; offset <= 365; offset++) {
                for (const dir of offset === 0 ? [0] : [-1, 1]) {
                    const d = new Date(target.getTime() + dir * offset * 86400000);
                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
                    const filename =
                        source === "s2"
                            ? fusionMode === "itb"
                                ? `S2A_MSIL2A_${ds}_GCC.tif`
                                : `S2A_MSIL2A_${ds}_REFL.tif`
                            : `composite_${ds}.tif`;
                    try {
                        const res = await fetch(`${getPreparedPath()}/${source}/${filename}`, { method: "HEAD" });
                        if (res.ok) return filename;
                    } catch {}
                }
            }
            return null;
        }
        function transformBounds(bbox, fromCRS) {
            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
            return [[sw[1], sw[0]], [ne[1], ne[0]]];
        }
        async function loadGeotiff(filename) {
            const path = `${getPreparedPath()}/${source}/${filename}`;
            const buf = await (await fetch(path)).arrayBuffer();
            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
            const m = filename.match(/(\d{8})/);
            const dateStr = m ? m[1] : "";
            return { dataUrl, bounds, dateStr };
        }
        async function updateMap() {
            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const filename = await findPreparedFile(dateStr);
            if (!filename || !preparedMap) {
                if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
                return;
            }
            try {
                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
                if (overlay) preparedMap.removeLayer(overlay);
                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(preparedMap);
                preparedMap.fitBounds(bounds);
                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
            } catch (e) {
                if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
                document.getElementById("mapDate").textContent = "";
            }
        }
        async function probeDataExists(sitename, s) {
            try {
                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
                return res.ok;
            } catch { return false; }
        }
        function getSiteBySitename(sn) {
            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
        }
        async function setSiteSeason(newSite, newSeason) {
            siteName = newSite;
            season = newSeason;
            start = new Date(parseInt(season), 0, 1);
            const site = getSiteBySitename(newSite);
            if (site?.geometry?.coordinates) {
                const [lon, lat] = site.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            if (preparedMap) { preparedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
            document.getElementById("season").textContent = season;
            const yearEnd = new Date(parseInt(season), 11, 31);
            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
            const params = new URLSearchParams(location.search);
            params.set("site", siteName);
            params.set("season", season);
            params.set("mode", fusionMode);
            history.replaceState({}, "", `?${params}`);
            const urlDate = params.get("date");
            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            await loadTimeseries();
            await updateMap();
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch { window.sitesData = { features: [] }; }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeDataExists(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
                `<option value="${s}">${s}</option>`
            ).join("");
            document.getElementById("seasonSelect").value = initialSeason;
            strategy = urlParams.get("strategy") || "aggressive";
            source = urlParams.get("source") || "s2";
            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
            document.getElementById("strategySelect").value = strategy;
            document.getElementById("sourceSelect").value = source;
            document.getElementById("fusionModeSelect").value = fusionMode;
            const ml = document.getElementById("mapLabel");
            if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
            const initSite = getSiteBySitename(initialSite);
            if (initSite?.geometry?.coordinates) {
                const [lon, lat] = initSite.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            preparedMap = L.map("preparedMap", { zoomControl: false }).setView(sitePosition, 12)
                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(preparedMap);
            siteSelect.addEventListener("change", function() {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                setSiteSeason(sn, document.getElementById("seasonSelect").value);
            });
            document.getElementById("seasonSelect").addEventListener("change", function() {
                setSiteSeason(siteSelect.value, this.value);
            });
            document.getElementById("strategySelect").addEventListener("change", function() {
                strategy = this.value;
                urlParams.set("strategy", strategy);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("sourceSelect").addEventListener("change", function() {
                source = this.value;
                urlParams.set("source", source);
                history.replaceState({}, "", `?${urlParams}`);
                loadTimeseries(); updateMap();
            });
            document.getElementById("fusionModeSelect").addEventListener("change", function() {
                fusionMode = this.value;
                urlParams.set("mode", fusionMode);
                history.replaceState({}, "", `?${urlParams}`);
                const ml = document.getElementById("mapLabel");
                if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
                loadTimeseries(); updateMap();
            });
            await setSiteSeason(initialSite, initialSeason);
        }
        document.getElementById("dateSlider").addEventListener("input", function() {
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
            drawPlots(); updateMap();
        });
        init();
    </script>
 </body>
 </html>
--- a/webapp/preselection.html
+++ b/webapp/preselection.html
@ -1,541 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>S2 Band Reflectance Timeseries</title>
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
    <style>
        body { margin: 0; font-family: sans-serif; }
        .nav { margin-bottom: 15px; font-size: 14px; }
        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
        .nav a:hover { text-decoration: underline; }
        .nav a.active { font-weight: bold; }
        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
        .selectors { margin-bottom: 20px; }
        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
        h1 { margin: 0 0 5px 0; font-size: 22px; }
        .season-row { padding-bottom: 15px; }
        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
        .download-links { margin-left: 10px; font-size: 14px; }
        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
        .download-links a:hover { text-decoration: underline; }
        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        #dateSlider { width: 100%; margin: 15px 0; }
        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
        #s2map { height: 400px; border: 1px solid #ccc; margin-top: 10px; }
        .leaflet-image-layer { image-rendering: pixelated; }
        .leaflet-control-attribution { display: none; }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header-sticky">
            <div class="nav">
                <a href="index.html">Full</a>
                <a href="preselection.html" class="active">Pre-selection</a>
                <a href="prepared.html">Prepared</a>
                <a href="fusion.html">Fusion</a>
                <a href="postprocessed.html">Postprocessed</a>
                <a href="metrics.html">Metrics</a>
                <a href="gap_validation.html">Gap validation</a>
                <a href="phenology.html">Phenology</a>
            </div>
            <h1 id="siteName">Innsbruck</h1>
            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
            <div class="selectors">
            <label>Site:</label>
            <select id="siteSelect"></select>
            <label>Season:</label>
            <select id="seasonSelect"></select>
            <label>Source:</label>
            <select id="sourceSelect">
                <option value="s2">S2</option>
                <option value="s3">S3</option>
            </select>
            <label>Exclusion:</label>
            <select id="exclusionSelect">
                <option value="none">None</option>
                <option value="aggressive">Aggressive</option>
                <option value="nonaggressive">Non-aggressive</option>
            </select>
            </div>
            <input type="range" id="dateSlider" min="0" max="365" value="0">
            <div id="dateDisplay">2024-01-01</div>
        </div>
        <div class="map-label" id="mapLabel">S2 RGB (closest available)</div>
        <div id="s2rgbdate" class="map-date"></div>
        <div id="s2map"></div>
        <div id="bandPlots"></div>
    </div>
    <script>
        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
        const BANDS = [
            { key: "b02", label: "B02 (Blue)", color: "#0066ff" },
            { key: "b03", label: "B03 (Green)", color: "#00aa00" },
            { key: "b04", label: "B04 (Red)", color: "#cc0000" },
            { key: "b8a", label: "B8A (NIR)", color: "#9900cc" }
        ];
        let siteName = "innsbruck", season = "2024";
        let source = "s2";
        let exclusion = "none";
        let sitePosition = [47.116171, 11.320308];
        let start = new Date(2024, 0, 1);
        let timeseries = [];
        let gccTimeseries = [];
        let ndviTimeseries = [];
        let availableSiteSeasons = {};
        let s2Map = null, s2Overlay = null, s2Marker = null;
        const urlParams = new URLSearchParams(location.search);
        function filteredTimeseries(arr) {
            if (exclusion === "none") return arr;
            const key = exclusion === "aggressive" ? "excluded_aggressive" : "excluded_nonaggressive";
            return arr.filter(t => !t[key]);
        }
        function fmtDate(d) {
            return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
        }
        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
        const daysFromDate = (dateStr) => {
            const [y, m, d] = dateStr.split("-").map(Number);
            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
        };
        function drawBandPlot(canvasId, bandKey, bandLabel, color) {
            const canvas = document.getElementById(canvasId);
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const data = filteredTimeseries(timeseries).filter(t => t[bandKey] != null);
            if (!data.length) return;
            const dates = data.map(t => new Date(t.date));
            const values = data.map(t => t[bandKey]);
            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
            const dateRange = maxDate - minDate || 1;
            const minVal = Math.min(...values), maxVal = Math.max(...values);
            const valRange = maxVal - minVal || 1;
            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath();
            ctx.moveTo(pad, pad);
            ctx.lineTo(pad, pad + plotH);
            ctx.lineTo(pad + plotW, pad + plotH);
            ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minVal.toFixed(4), 2, pad + plotH + 10);
            ctx.fillText(maxVal.toFixed(4), 2, pad + 3);
            ctx.strokeStyle = color;
            ctx.beginPath();
            data.forEach((t, i) => {
                const px = x(t.date), py = y(t[bandKey]);
                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
            });
            ctx.stroke();
            ctx.fillStyle = "#888";
            const axisY = pad + plotH;
            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(currentDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath();
            ctx.moveTo(xPos, pad);
            ctx.lineTo(xPos, pad + plotH);
            ctx.stroke();
            const closest = data.reduce((c, t) =>
                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
            );
            if (closest) {
                ctx.fillStyle = "#f00";
                ctx.font = "bold 10px sans-serif";
                ctx.fillText(closest[bandKey].toFixed(4), xPos + 5, y(closest[bandKey]) - 5);
            }
        }
        function drawNdviPlot() {
            const canvas = document.getElementById("plot_ndvi");
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const data = filteredTimeseries(ndviTimeseries).filter(t => t.ndvi != null);
            if (!data.length) return;
            const dates = data.map(t => new Date(t.date));
            const values = data.map(t => t.ndvi);
            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
            const dateRange = maxDate - minDate || 1;
            const minVal = Math.min(...values), maxVal = Math.max(...values);
            const valRange = maxVal - minVal || 1;
            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath();
            ctx.moveTo(pad, pad);
            ctx.lineTo(pad, pad + plotH);
            ctx.lineTo(pad + plotW, pad + plotH);
            ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
            ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
            ctx.strokeStyle = "#2d7a3e";
            ctx.beginPath();
            data.forEach((t, i) => {
                const px = x(t.date), py = y(t.ndvi);
                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
            });
            ctx.stroke();
            ctx.fillStyle = "#888";
            const axisY = pad + plotH;
            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(currentDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath();
            ctx.moveTo(xPos, pad);
            ctx.lineTo(xPos, pad + plotH);
            ctx.stroke();
            const closest = data.reduce((c, t) =>
                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
            );
            if (closest) {
                ctx.fillStyle = "#f00";
                ctx.font = "bold 10px sans-serif";
                ctx.fillText(closest.ndvi.toFixed(3), xPos + 5, y(closest.ndvi) - 5);
            }
        }
        function drawGccPlot() {
            const canvas = document.getElementById("plot_gcc");
            if (!canvas) return;
            const ctx = canvas.getContext("2d");
            canvas.width = canvas.offsetWidth;
            canvas.height = 100;
            const w = canvas.width, h = canvas.height, pad = 30;
            const plotW = w - pad * 2, plotH = h - pad * 2;
            const data = filteredTimeseries(gccTimeseries).filter(t => t.greenness_index != null);
            if (!data.length) return;
            const dates = data.map(t => new Date(t.date));
            const values = data.map(t => t.greenness_index);
            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
            const dateRange = maxDate - minDate || 1;
            const minVal = Math.min(...values), maxVal = Math.max(...values);
            const valRange = maxVal - minVal || 1;
            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
            ctx.clearRect(0, 0, w, h);
            ctx.strokeStyle = "#ccc";
            ctx.beginPath();
            ctx.moveTo(pad, pad);
            ctx.lineTo(pad, pad + plotH);
            ctx.lineTo(pad + plotW, pad + plotH);
            ctx.stroke();
            ctx.fillStyle = "#000";
            ctx.font = "9px sans-serif";
            ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
            ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
            ctx.strokeStyle = "#00aa00";
            ctx.beginPath();
            data.forEach((t, i) => {
                const px = x(t.date), py = y(t.greenness_index);
                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
            });
            ctx.stroke();
            ctx.fillStyle = "#888";
            const axisY = pad + plotH;
            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const xPos = x(currentDate);
            ctx.strokeStyle = "#f00";
            ctx.lineWidth = 2;
            ctx.beginPath();
            ctx.moveTo(xPos, pad);
            ctx.lineTo(xPos, pad + plotH);
            ctx.stroke();
            const closest = data.reduce((c, t) =>
                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
            );
            if (closest) {
                ctx.fillStyle = "#f00";
                ctx.font = "bold 10px sans-serif";
                ctx.fillText(closest.greenness_index.toFixed(3), xPos + 5, y(closest.greenness_index) - 5);
            }
        }
        function drawAllPlots() {
            drawNdviPlot();
            drawGccPlot();
            BANDS.forEach(b => drawBandPlot(`plot_${b.key}`, b.key, b.label, b.color));
        }
        function computeGcc(entry) {
            const b = entry.b02 + entry.b03 + entry.b04;
            return b > 0 ? entry.b03 / b : null;
        }
        async function loadTimeseries() {
            const rawBase = `data/${siteName}/${season}/raw`;
            const src = document.getElementById("sourceSelect")?.value || "s2";
            source = src;
            try {
                const preselectionRes = await fetch(`${rawBase}/preselection/${source}_preselection.json`);
                const preselection = preselectionRes.ok ? await preselectionRes.json() : [];
                timeseries = preselection;
                ndviTimeseries = preselection;
                gccTimeseries = preselection.map(t => ({ ...t, greenness_index: computeGcc(t) })).filter(t => t.greenness_index != null);
            } catch {
                timeseries = [];
                ndviTimeseries = [];
                gccTimeseries = [];
            }
            const srcLabel = source.toUpperCase();
            document.getElementById("mapLabel").textContent = `${srcLabel} RGB (closest available)`;
            const jsonUrl = `${rawBase}/preselection/${source}_preselection.json`;
            const csvUrl = `${rawBase}/preselection/${source}_preselection.csv`;
            document.getElementById("downloadLinks").innerHTML =
                `<a href="${jsonUrl}" download="${siteName}_${season}_${source}_preselection.json" target="_blank">[JSON]</a>` +
                `<a href="${csvUrl}" download="${siteName}_${season}_${source}_preselection.csv" target="_blank">[CSV]</a>`;
            document.getElementById("bandPlots").innerHTML =
                `<div class="plot-label">${srcLabel} NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>` +
                `<div class="plot-label">${srcLabel} GCC (Greenness Index)</div><canvas id="plot_gcc" class="plot"></canvas>` +
                BANDS.map(b => `<div class="plot-label">${b.label}</div><canvas id="plot_${b.key}" class="plot"></canvas>`).join("");
            const yearEnd = new Date(parseInt(season), 11, 31);
            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
            drawAllPlots();
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            updateS2Imagery();
        }
        async function probeDataExists(sitename, s) {
            try {
                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
                return res.ok;
            } catch { return false; }
        }
        function getSiteBySitename(sitename) {
            return window.sitesData?.features?.find(f => f.properties?.sitename === sitename);
        }
        async function setSiteSeason(newSite, newSeason) {
            siteName = newSite;
            season = newSeason;
            start = new Date(parseInt(season), 0, 1);
            const site = getSiteBySitename(newSite);
            if (site?.geometry?.coordinates) {
                const [lon, lat] = site.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            if (s2Map) { s2Map.setView(sitePosition, 12); if (s2Marker) s2Marker.setLatLng(sitePosition); }
            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
            document.getElementById("season").textContent = season;
            const params = new URLSearchParams(location.search);
            params.set("site", siteName);
            params.set("season", season);
            history.replaceState({}, "", `?${params}`);
            await loadTimeseries();
            const urlDate = params.get("date");
            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
        }
        async function init() {
            try {
                const res = await fetch("data/sites.geojson");
                window.sitesData = res.ok ? await res.json() : { features: [] };
            } catch {
                window.sitesData = { features: [] };
            }
            const features = window.sitesData.features || [];
            for (const f of features) {
                const sn = f.properties?.sitename;
                if (!sn) continue;
                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
                const withData = [];
                for (const s of seasonsFromGeo) {
                    if (await probeDataExists(sn, s)) withData.push(s);
                }
                if (withData.length) availableSiteSeasons[sn] = withData;
            }
            const availableSites = Object.keys(availableSiteSeasons);
            const siteSelect = document.getElementById("siteSelect");
            siteSelect.innerHTML = "";
            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
                const opt = document.createElement("option");
                opt.value = sn;
                opt.textContent = sn;
                siteSelect.appendChild(opt);
                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
            });
            const urlSite = urlParams.get("site");
            const urlSeason = urlParams.get("season");
            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
            siteSelect.value = initialSite;
            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
                `<option value="${s}">${s}</option>`
            ).join("");
            document.getElementById("seasonSelect").value = initialSeason;
            document.getElementById("sourceSelect").value = urlParams.get("source") || "s2";
            exclusion = urlParams.get("exclusion") || "none";
            document.getElementById("exclusionSelect").value = exclusion;
            const initSite = getSiteBySitename(initialSite);
            if (initSite?.geometry?.coordinates) {
                const [lon, lat] = initSite.geometry.coordinates;
                sitePosition = [lat, lon];
            }
            const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
            s2Map = L.map("s2map", { zoomControl: false }).setView(sitePosition, 12)
                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
            s2Marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(s2Map);
            siteSelect.addEventListener("change", function() {
                const sn = this.value;
                const seas = availableSiteSeasons[sn] || [];
                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
                document.getElementById("seasonSelect").value = seas[0] || "2024";
                setSiteSeason(sn, document.getElementById("seasonSelect").value);
            });
            document.getElementById("seasonSelect").addEventListener("change", function() {
                setSiteSeason(siteSelect.value, this.value);
            });
            document.getElementById("sourceSelect").addEventListener("change", async function() {
                source = this.value;
                urlParams.set("source", source);
                history.replaceState({}, "", `?${urlParams}`);
                await loadTimeseries();
            });
            document.getElementById("exclusionSelect").addEventListener("change", function() {
                exclusion = this.value;
                urlParams.set("exclusion", exclusion);
                history.replaceState({}, "", `?${urlParams}`);
                drawAllPlots();
                updateS2Imagery();
            });
            await setSiteSeason(initialSite, initialSeason);
        }
        document.getElementById("dateSlider").addEventListener("input", function() {
            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
            drawAllPlots();
            updateS2Imagery();
        });
        function closestFilename(dateStr) {
            const target = new Date(dateStr);
            const withData = filteredTimeseries(timeseries).filter(t => t.filename);
            if (!withData.length) return null;
            const closest = withData.reduce((c, t) =>
                Math.abs(new Date(t.date) - target) < Math.abs(new Date(c.date) - target) ? t : c
            );
            return closest.filename;
        }
        function transformBounds(bbox, fromCRS) {
            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
            return [[sw[1], sw[0]], [ne[1], ne[0]]];
        }
        async function loadS2Geotiff(filename) {
            const path = `data/${siteName}/${season}/raw/${source}/${filename}`;
            const tiff = await GeoTIFF.fromArrayBuffer(await (await fetch(path)).arrayBuffer());
            const image = await tiff.getImage();
            const rasters = await image.readRasters();
            const width = image.getWidth(), height = image.getHeight();
            const bbox = image.getBoundingBox();
            const geoKeys = image.getGeoKeys();
            const crsCode = geoKeys.ProjectedCSTypeGeoKey ? `EPSG:${geoKeys.ProjectedCSTypeGeoKey}` :
                (geoKeys.GeographicTypeGeoKey !== 4326 ? `EPSG:${geoKeys.GeographicTypeGeoKey}` : "EPSG:4326");
            const [blue, green, red] = [0, 1, 2].map(i => Array.from(rasters[i]));
            const normalize = (arr) => {
                let min = Infinity, max = -Infinity;
                for (const v of arr) if (!isNaN(v) && v > 0) { min = Math.min(min, v); max = Math.max(max, v); }
                return arr.map(v => Math.max(0, Math.min(255, ((v - min) / (max - min || 1)) * 255)));
            };
            const [rN, gN, bN] = [red, green, blue].map(normalize);
            const canvas = Object.assign(document.createElement("canvas"), { width, height });
            const ctx = canvas.getContext("2d");
            ctx.imageSmoothingEnabled = false;
            const imgData = ctx.createImageData(width, height);
            for (let i = 0; i < rN.length; i++) {
                const idx = i * 4;
                if (rN[i] === 0 && gN[i] === 0 && bN[i] === 0) imgData.data[idx + 3] = 0;
                else { imgData.data[idx] = rN[i]; imgData.data[idx + 1] = gN[i]; imgData.data[idx + 2] = bN[i]; imgData.data[idx + 3] = 255; }
            }
            ctx.putImageData(imgData, 0, 0);
            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
            return { dataUrl: canvas.toDataURL(), bounds };
        }
        async function updateS2Imagery() {
            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
            const filename = closestFilename(dateStr);
            if (!filename || !s2Map) {
                if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
                document.getElementById("s2rgbdate").textContent = "";
                return;
            }
            try {
                const { dataUrl, bounds } = await loadS2Geotiff(filename);
                if (s2Overlay) s2Map.removeLayer(s2Overlay);
                s2Overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(s2Map);
                s2Map.fitBounds(bounds);
                const d = filename.split("_")[0];
                document.getElementById("s2rgbdate").textContent = `${d.slice(0,4)}-${d.slice(4,6)}-${d.slice(6,8)}`;
            } catch (e) {
                if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
                document.getElementById("s2rgbdate").textContent = "";
            }
        }
        init();
    </script>
 </body>
 </html>
		`@ -1 +0,0 @@`
			`"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""`