Switching horses.

2026-06-10 14:18:06 +02:00 · 2026-06-10 14:18:06 +02:00 · e3e14027fc
commit e3e14027fc
parent 25cbd97662
51 changed files with 5078 additions and 11678 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,9 @@
-# Project data
-data/*
-webapp/data
+# Generated caches and downloads (regenerate via pipeline steps)
+data/

-# Environment
+# Environment and secrets
 .env
-.venv
+.venv/
 venv/
 env/

@ -42,6 +41,3 @@ dist/
 # OS
 .DS_Store
 Thumbs.db
-
-AGENTS.md
-.vibe
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,8 +0,0 @@
-repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.4
-    hooks:
-      - id: ruff
-        args: [--fix]
-      - id: ruff-format
-
--- a/1-phenocam.py
+++ b/1-phenocam.py
@ -0,0 +1,278 @@
+"""Step 1: download worldwide PhenoCam sites for a calendar year.
+
+Inputs (``data/``): none — queries the PhenoCam API.
+
+Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
+
+- ``phenocam/{year}.json`` — site list manifest
+- ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata
+- ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV
+
+CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
+
+Next step: :mod:`2-phenocam-screening`.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import date
+from pathlib import Path
+from typing import Any
+
+import requests
+
+PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
+if str(PROCESSING_DIR) not in sys.path:
+    sys.path.insert(0, str(PROCESSING_DIR))
+
+from acquisition_phenocam import PHENOCAM_API  # noqa: E402
+from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date  # noqa: E402
+
+EVALUATION_YEAR = 2025
+HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
+ONE_DAY_CSV_SUFFIX = "_1day.csv"
+
+
+def check_phenocam_host() -> None:
+    try:
+        response = requests.get(HOST_PROBE, timeout=30)
+        response.raise_for_status()
+    except requests.RequestException as exc:
+        raise RuntimeError(
+            f"PhenoCam API unreachable (phenocam.nau.edu): "
+            f"{exc.__class__.__name__}: {exc}"
+        ) from exc
+
+
+def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
+    start = _parse_iso_date(first)
+    end = _parse_iso_date(last)
+    if start is None or end is None:
+        return False
+    return start <= date(season, 12, 31) and end >= date(season, 1, 1)
+
+
+def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
+    return cache_dir / "phenocam" / str(evaluation_year)
+
+
+def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
+    return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
+
+
+def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
+    return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
+
+
+def load_candidate_cameras(
+    evaluation_year: int,
+    *,
+    site_filter: set[str] | None = None,
+    active_only: bool = False,
+    limit: int | None = None,
+) -> list[dict[str, Any]]:
+    cameras: list[dict[str, Any]] = []
+    for camera in _paginate_cameras():
+        if active_only and not camera.get("active"):
+            continue
+        sitename = str(camera["Sitename"])
+        if site_filter is not None and sitename not in site_filter:
+            continue
+        if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year):
+            continue
+        cameras.append(dict(camera))
+    cameras.sort(key=lambda item: str(item["Sitename"]))
+    if limit is not None:
+        cameras = cameras[:limit]
+    return cameras
+
+
+def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
+    rois: list[dict[str, Any]] = []
+    url = f"{PHENOCAM_API}/roilists/"
+    params: dict[str, Any] | None = {"site": site_name}
+    while url:
+        response = requests.get(url, params=params, timeout=60)
+        response.raise_for_status()
+        payload = response.json()
+        rois.extend(
+            item for item in payload.get("results", []) if item.get("site") == site_name
+        )
+        url = payload.get("next")
+        params = None
+        if rois:
+            break
+    return dict(rois[0]) if rois else None
+
+
+def download_one_day_csv(csv_url: str, output_path: Path) -> None:
+    response = requests.get(csv_url, timeout=60)
+    response.raise_for_status()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(response.text, encoding="utf-8")
+
+
+def download_site(
+    camera: dict[str, Any],
+    evaluation_year: int,
+    cache_dir: Path,
+) -> str:
+    sitename = str(camera["Sitename"])
+    roi = fetch_roi_record(sitename)
+    payload = {"response": {"camera": camera, "roi": roi}}
+    json_path = site_json_path(cache_dir, evaluation_year, sitename)
+    json_path.parent.mkdir(parents=True, exist_ok=True)
+    json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+
+    csv_url = roi.get("one_day_summary") if roi else None
+    if csv_url:
+        download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename))
+    return sitename
+
+
+def load_or_download_site(
+    camera: dict[str, Any],
+    evaluation_year: int,
+    cache_dir: Path,
+    *,
+    refresh: bool,
+) -> str:
+    sitename = str(camera["Sitename"])
+    json_path = site_json_path(cache_dir, evaluation_year, sitename)
+    csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
+    if not refresh and json_path.is_file():
+        if not csv_path.is_file():
+            payload = json.loads(json_path.read_text(encoding="utf-8"))
+            roi = payload.get("response", {}).get("roi") or {}
+            csv_url = roi.get("one_day_summary")
+            if csv_url:
+                download_one_day_csv(csv_url, csv_path)
+        return sitename
+    return download_site(camera, evaluation_year, cache_dir)
+
+
+def run_download(
+    *,
+    cache_dir: Path,
+    evaluation_year: int,
+    active_only: bool = False,
+    site_filter: set[str] | None = None,
+    limit: int | None = None,
+    refresh: bool = False,
+) -> list[str]:
+    check_phenocam_host()
+    candidates = load_candidate_cameras(
+        evaluation_year,
+        site_filter=site_filter,
+        active_only=active_only,
+        limit=limit,
+    )
+    print(
+        f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
+        f"{evaluation_year}"
+    )
+
+    sitenames: list[str] = []
+    for index, camera in enumerate(candidates, start=1):
+        sitename = str(camera["Sitename"])
+        print(
+            f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
+            f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
+        )
+        sitenames.append(
+            load_or_download_site(
+                camera,
+                evaluation_year,
+                cache_dir,
+                refresh=refresh,
+            )
+        )
+    return sorted(sitenames)
+
+
+def write_manifest(
+    sitenames: list[str],
+    output_path: Path,
+    cache_dir: Path,
+    evaluation_year: int,
+) -> None:
+    rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent)
+    payload = {
+        "evaluation_year": evaluation_year,
+        "count": len(sitenames),
+        "sites_dir": rel_sites_dir.as_posix(),
+        "sites": sitenames,
+    }
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+    print(f"[PhenoCam-1] Wrote {output_path}")
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--cache-dir",
+        type=Path,
+        default=Path("data"),
+        help="Base directory for per-site files and manifest",
+    )
+    parser.add_argument(
+        "--evaluation-year",
+        type=int,
+        default=EVALUATION_YEAR,
+        help=f"Calendar year to download (default: {EVALUATION_YEAR})",
+    )
+    parser.add_argument(
+        "--active-only",
+        action="store_true",
+        help="Restrict candidates to cameras marked active in the API",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        help="Process only the first N candidate sites (testing)",
+    )
+    parser.add_argument(
+        "--sites",
+        type=str,
+        default=None,
+        help="Comma-separated sitenames to download (testing)",
+    )
+    parser.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Re-download sites even when cache files exist",
+    )
+    parser.add_argument(
+        "--output-json",
+        type=Path,
+        default=None,
+        help="Manifest output path (default: data/phenocam/{year}.json)",
+    )
+    args = parser.parse_args(argv)
+
+    site_filter = None
+    if args.sites:
+        site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
+
+    sitenames = run_download(
+        cache_dir=args.cache_dir,
+        evaluation_year=args.evaluation_year,
+        active_only=args.active_only,
+        site_filter=site_filter,
+        limit=args.limit,
+        refresh=args.refresh,
+    )
+    manifest_path = args.output_json or (
+        args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
+    )
+    write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/2-phenocam-screening.py
+++ b/2-phenocam-screening.py
@ -0,0 +1,495 @@
+"""Step 2: PhenoCam GCC + SNR screening on step-1 cache.
+
+Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
+
+- ``phenocam/{year}.json`` — step-1 manifest
+- ``phenocam/{year}/{sitename}.json`` — per-site metadata
+- ``phenocam/{year}/{sitename}_1day.csv`` — GCC timeseries
+
+Outputs (``data/phenocam_screening/``):
+
+- ``{year}.json`` — full per-site results
+- ``{year}.csv`` — flat summary table
+
+CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional; default: all manifest sites).
+
+Next step: :mod:`3-sentinel-clouds`.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from datetime import date, datetime
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from scipy.interpolate import UnivariateSpline
+
+PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
+if str(PROCESSING_DIR) not in sys.path:
+    sys.path.insert(0, str(PROCESSING_DIR))
+
+from acquisition_phenocam import _phenocam_summary_gcc_value  # noqa: E402
+
+MIN_GCC_POINTS = 30
+SNR_THRESHOLD = 2.0
+CLUSTER_RADIUS_M = 500.0
+GATE_ORDER = ("phenocam", "snr", "cluster")
+ONE_DAY_CSV_SUFFIX = "_1day.csv"
+_EARTH_RADIUS_M = 6371000.0
+
+
+def load_manifest(path: Path) -> dict[str, Any]:
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    for key in ("evaluation_year", "sites_dir", "sites"):
+        if key not in payload:
+            raise ValueError(f"Expected '{key}' in manifest {path}")
+    return payload
+
+
+def resolve_sites_dir(manifest_path: Path, manifest: dict[str, Any]) -> Path:
+    return (manifest_path.parent / manifest["sites_dir"]).resolve()
+
+
+def load_site_entry(sites_dir: Path, sitename: str) -> dict[str, Any]:
+    json_path = sites_dir / f"{sitename}.json"
+    payload = json.loads(json_path.read_text(encoding="utf-8"))
+    csv_path = sites_dir / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
+    payload["_one_day_csv"] = csv_path if csv_path.is_file() else None
+    return payload
+
+
+def parse_gcc90_series(csv_path: Path, evaluation_year: int) -> list[tuple[str, float]]:
+    lines = [
+        line
+        for line in csv_path.read_text(encoding="utf-8").split("\n")
+        if line and not line.startswith("#")
+    ]
+    reader = csv.DictReader(lines)
+    fieldnames = reader.fieldnames or ()
+    use_mean_fallback = "gcc_90" not in fieldnames
+
+    year_start = date(evaluation_year, 1, 1)
+    year_end = date(evaluation_year, 12, 31)
+    series: list[tuple[str, float]] = []
+    for row in reader:
+        date_str = row.get("date")
+        if not date_str:
+            continue
+        try:
+            row_date = datetime.strptime(date_str, "%Y-%m-%d").date()
+        except ValueError:
+            continue
+        if not (year_start <= row_date <= year_end):
+            continue
+        gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
+        if gcc is None:
+            continue
+        series.append((row_date.isoformat(), float(gcc)))
+    series.sort(key=lambda item: item[0])
+    return series
+
+
+def _months_covered(day_strings: list[str]) -> int:
+    months: set[int] = set()
+    for day in day_strings:
+        months.add(datetime.strptime(day, "%Y-%m-%d").month)
+    return len(months)
+
+
+def _aic_for_spline(x: np.ndarray, y: np.ndarray, spline: UnivariateSpline) -> float:
+    residuals = y - spline(x)
+    rss = float(np.sum(residuals**2))
+    n = len(y)
+    if rss <= 0 or n < 4:
+        return math.inf
+    edf = float(spline.get_knots().shape[0] + spline.get_coeffs().shape[0])
+    return n * math.log(rss / n) + 2.0 * edf
+
+
+def compute_snr_aic_spline(series: list[tuple[str, float]]) -> float | None:
+    if len(series) < MIN_GCC_POINTS:
+        return None
+
+    dates = [datetime.strptime(day, "%Y-%m-%d").date() for day, _ in series]
+    x = np.array([(d - dates[0]).days for d in dates], dtype=float)
+    y = np.array([value for _, value in series], dtype=float)
+    if len(np.unique(x)) < 5:
+        return None
+
+    y_var = float(np.var(y))
+    if y_var <= 0:
+        return None
+
+    candidates = np.logspace(-4, 2, 40) * y_var * len(y)
+    best_spline: UnivariateSpline | None = None
+    best_aic = math.inf
+    for smoothing in candidates:
+        try:
+            spline = UnivariateSpline(x, y, k=3, s=float(smoothing))
+        except Exception:
+            continue
+        aic = _aic_for_spline(x, y, spline)
+        if aic < best_aic:
+            best_aic = aic
+            best_spline = spline
+
+    if best_spline is None:
+        return None
+
+    residuals = y - best_spline(x)
+    rmse = float(np.sqrt(np.mean(residuals**2)))
+    amplitude = float(np.max(y) - np.min(y))
+    if rmse <= 0:
+        return None
+    return amplitude / rmse
+
+
+def screen_site(
+    site_entry: dict[str, Any],
+    *,
+    evaluation_year: int,
+    min_gcc_points: int,
+    snr_threshold: float,
+) -> dict[str, Any]:
+    response = site_entry["response"]
+    roi = response.get("roi")
+    csv_path = site_entry.get("_one_day_csv")
+    calculations: dict[str, Any] = {
+        "evaluation_year": evaluation_year,
+        "n_gcc_points": 0,
+        "first_gcc_date": None,
+        "last_gcc_date": None,
+        "months_with_gcc": 0,
+        "snr": None,
+        "min_gcc_points": min_gcc_points,
+        "snr_threshold": snr_threshold,
+        "status": "FAIL",
+        "failing_gate": None,
+        "passed_gates": [],
+        "reason": None,
+    }
+
+    if roi is None or not roi.get("one_day_summary") or csv_path is None:
+        calculations["failing_gate"] = "phenocam"
+        calculations["reason"] = "no_roi"
+        return {"response": response, "calculations": calculations}
+
+    series = parse_gcc90_series(csv_path, evaluation_year)
+    calculations["n_gcc_points"] = len(series)
+    if calculations["n_gcc_points"] == 0:
+        calculations["failing_gate"] = "phenocam"
+        calculations["reason"] = "no_gcc_in_year"
+        return {"response": response, "calculations": calculations}
+
+    day_strings = [day for day, _ in series]
+    calculations["first_gcc_date"] = day_strings[0]
+    calculations["last_gcc_date"] = day_strings[-1]
+    calculations["months_with_gcc"] = _months_covered(day_strings)
+
+    if calculations["n_gcc_points"] < min_gcc_points:
+        calculations["failing_gate"] = "phenocam"
+        calculations["reason"] = "insufficient_gcc_points"
+        return {"response": response, "calculations": calculations}
+
+    calculations["passed_gates"].append("phenocam")
+
+    snr = compute_snr_aic_spline(series)
+    calculations["snr"] = snr
+    if snr is None or snr < snr_threshold:
+        calculations["failing_gate"] = "snr"
+        calculations["reason"] = "insufficient_snr" if snr is not None else "snr_undefined"
+        return {"response": response, "calculations": calculations}
+
+    calculations["passed_gates"].append("snr")
+    calculations["status"] = "PASS"
+    calculations["failing_gate"] = None
+    calculations["reason"] = None
+    return {"response": response, "calculations": calculations}
+
+
+def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
+    p1, p2 = math.radians(lat1), math.radians(lat2)
+    dlat = math.radians(lat2 - lat1)
+    dlon = math.radians(lon2 - lon1)
+    a = math.sin(dlat / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
+    return 2 * _EARTH_RADIUS_M * math.asin(math.sqrt(a))
+
+
+def _site_coords(row: dict[str, Any]) -> tuple[float, float] | None:
+    camera = row["response"]["camera"]
+    lat, lon = camera.get("Lat"), camera.get("Lon")
+    if lat is None or lon is None:
+        return None
+    return float(lat), float(lon)
+
+
+def _cluster_rank(row: dict[str, Any]) -> tuple[int, float]:
+    calc = row["calculations"]
+    return calc["n_gcc_points"], float(calc.get("snr") or 0.0)
+
+
+def apply_cluster_gate(results: list[dict[str, Any]], *, radius_m: float) -> int:
+    pool: list[tuple[int, float, float]] = []
+    for idx, row in enumerate(results):
+        if "snr" not in row["calculations"]["passed_gates"]:
+            continue
+        coords = _site_coords(row)
+        if coords is None:
+            row["calculations"]["passed_gates"].append("cluster")
+            continue
+        pool.append((idx, coords[0], coords[1]))
+
+    n = len(pool)
+    parent = list(range(n))
+
+    def find(x: int) -> int:
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]
+            x = parent[x]
+        return x
+
+    def union(a: int, b: int) -> None:
+        ra, rb = find(a), find(b)
+        if ra != rb:
+            parent[rb] = ra
+
+    for i in range(n):
+        _, lat1, lon1 = pool[i]
+        for j in range(i + 1, n):
+            _, lat2, lon2 = pool[j]
+            if _haversine_m(lat1, lon1, lat2, lon2) <= radius_m:
+                union(i, j)
+
+    clusters: dict[int, list[int]] = {}
+    for i in range(n):
+        clusters.setdefault(find(i), []).append(i)
+
+    demoted = 0
+    for members in clusters.values():
+        result_indices = [pool[i][0] for i in members]
+        cluster_size = len(result_indices)
+        winner_idx = max(result_indices, key=lambda idx: _cluster_rank(results[idx]))
+        winner_name = str(results[winner_idx]["response"]["camera"]["Sitename"])
+        for idx in result_indices:
+            calc = results[idx]["calculations"]
+            calc["cluster_size"] = cluster_size
+            if idx == winner_idx:
+                calc["passed_gates"].append("cluster")
+            else:
+                calc["status"] = "FAIL"
+                calc["failing_gate"] = "cluster"
+                calc["reason"] = "nearby_duplicate"
+                calc["cluster_winner"] = winner_name
+                demoted += 1
+    return demoted
+
+
+def run_screening(
+    manifest: dict[str, Any],
+    sites_dir: Path,
+    *,
+    evaluation_year: int,
+    min_gcc_points: int,
+    snr_threshold: float,
+    site_filter: set[str] | None = None,
+) -> list[dict[str, Any]]:
+    results: list[dict[str, Any]] = []
+    sitenames = manifest["sites"]
+    if site_filter is not None:
+        sitenames = [name for name in sitenames if name in site_filter]
+    for index, sitename in enumerate(sitenames, start=1):
+        print(f"[PhenoCam-2] ({index}/{len(sitenames)}) {sitename}")
+        site_entry = load_site_entry(sites_dir, sitename)
+        results.append(
+            screen_site(
+                site_entry,
+                evaluation_year=evaluation_year,
+                min_gcc_points=min_gcc_points,
+                snr_threshold=snr_threshold,
+            )
+        )
+    return results
+
+
+def print_summary(results: list[dict[str, Any]], evaluation_year: int) -> None:
+    passing = [row for row in results if row["calculations"]["status"] == "PASS"]
+    gates_label = " + ".join(GATE_ORDER)
+    print(
+        f"\n[PhenoCam-2] Screening for {evaluation_year}: "
+        f"{len(passing)}/{len(results)} pass ({gates_label})"
+    )
+
+    for gate in GATE_ORDER:
+        fails = sum(1 for row in results if row["calculations"]["failing_gate"] == gate)
+        after = sum(1 for row in results if gate in row["calculations"]["passed_gates"])
+        print(f"  after_{gate}: {after}, fail_at_{gate}: {fails}")
+
+    print("\nPer-site table")
+    print(
+        f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} "
+        f"{'status':>6} gate reason"
+    )
+    print("-" * 72)
+    for row in sorted(
+        results,
+        key=lambda item: str(item["response"]["camera"]["Sitename"]),
+    ):
+        camera = row["response"]["camera"]
+        calc = row["calculations"]
+        snr_text = f"{calc['snr']:.2f}" if calc["snr"] is not None else ""
+        print(
+            f"{camera['Sitename']:<24} {calc['n_gcc_points']:4d} "
+            f"{calc['months_with_gcc']:3d} {snr_text:>6} "
+            f"{calc['status']:>6} {(calc['failing_gate'] or '-'):<8} "
+            f"{calc['reason'] or '-'}"
+        )
+
+
+def write_screening_json(
+    results: list[dict[str, Any]],
+    output_path: Path,
+    evaluation_year: int,
+) -> None:
+    passing = [row for row in results if row["calculations"]["status"] == "PASS"]
+    payload = {
+        "evaluation_year": evaluation_year,
+        "count": len(results),
+        "qualifying_count": len(passing),
+        "sites": sorted(
+            results,
+            key=lambda item: str(item["response"]["camera"]["Sitename"]),
+        ),
+    }
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+    print(f"[PhenoCam-2] Wrote {output_path}")
+
+
+def write_screening_csv(results: list[dict[str, Any]], output_path: Path) -> None:
+    rows: list[dict[str, Any]] = []
+    for row in results:
+        camera = row["response"]["camera"]
+        metadata = camera.get("sitemetadata") or {}
+        roi = row["response"].get("roi") or {}
+        calc = row["calculations"]
+        rows.append(
+            {
+                "Sitename": camera.get("Sitename"),
+                "Lat": camera.get("Lat"),
+                "Lon": camera.get("Lon"),
+                "site_description": metadata.get("site_description"),
+                "primary_veg_type": metadata.get("primary_veg_type"),
+                "site_type": metadata.get("site_type"),
+                "one_day_summary": roi.get("one_day_summary"),
+                **calc,
+            }
+        )
+    fieldnames = list(rows[0].keys()) if rows else ["Sitename", "status"]
+    if rows:
+        extra = [k for row in rows for k in row if k not in fieldnames]
+        fieldnames.extend(dict.fromkeys(extra))
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with output_path.open("w", encoding="utf-8", newline="") as handle:
+        writer = csv.DictWriter(handle, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+    print(f"[PhenoCam-2] Wrote {output_path}")
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--evaluation-year",
+        type=int,
+        default=2025,
+        help="Evaluation year (default: 2025)",
+    )
+    parser.add_argument(
+        "--sites",
+        type=str,
+        default=None,
+        help="Comma-separated sitenames (default: all sites in step-1 manifest)",
+    )
+    parser.add_argument(
+        "--min-gcc-points",
+        type=int,
+        default=MIN_GCC_POINTS,
+        help=f"Minimum valid gcc_90 observations in-year (default: {MIN_GCC_POINTS})",
+    )
+    parser.add_argument(
+        "--snr-threshold",
+        type=float,
+        default=SNR_THRESHOLD,
+        help=f"Minimum AIC-spline SNR (default: {SNR_THRESHOLD})",
+    )
+    parser.add_argument(
+        "--output-json",
+        type=Path,
+        default=None,
+        help="Screening output (default: data/phenocam_screening/{year}.json)",
+    )
+    parser.add_argument(
+        "--output-csv",
+        type=Path,
+        default=None,
+        help="Flat CSV summary path",
+    )
+    parser.add_argument(
+        "--cluster-radius-m",
+        type=float,
+        default=CLUSTER_RADIUS_M,
+        help=f"Deduplicate SNR-passed sites within this radius (default: {CLUSTER_RADIUS_M})",
+    )
+    parser.add_argument(
+        "--no-cluster",
+        action="store_true",
+        help="Skip nearby-site deduplication gate",
+    )
+    args = parser.parse_args(argv)
+
+    evaluation_year = args.evaluation_year
+    manifest_path = Path("data") / "phenocam" / f"{evaluation_year}.json"
+    if not manifest_path.is_file():
+        raise SystemExit(f"Step-1 manifest not found: {manifest_path}")
+
+    site_filter = None
+    if args.sites:
+        site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
+
+    manifest = load_manifest(manifest_path)
+    sites_dir_path = resolve_sites_dir(manifest_path, manifest)
+
+    results = run_screening(
+        manifest,
+        sites_dir_path,
+        evaluation_year=evaluation_year,
+        min_gcc_points=args.min_gcc_points,
+        snr_threshold=args.snr_threshold,
+        site_filter=site_filter,
+    )
+    if not args.no_cluster:
+        demoted = apply_cluster_gate(results, radius_m=args.cluster_radius_m)
+        if demoted:
+            print(f"[PhenoCam-2] Cluster dedup: demoted {demoted} nearby duplicate(s)")
+    print_summary(results, evaluation_year)
+
+    default_dir = Path("data") / "phenocam_screening"
+    json_name = f"{evaluation_year}.json"
+    csv_name = f"{evaluation_year}.csv"
+    write_screening_json(
+        results,
+        args.output_json or (default_dir / json_name),
+        evaluation_year,
+    )
+    write_screening_csv(results, args.output_csv or (default_dir / csv_name))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/3-sentinel-data.py
+++ b/3-sentinel-data.py
@ -0,0 +1,805 @@
+"""Step 3: Download S2 and S3 rasters and prepare EFAST inputs.
+
+Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
+
+- ``phenocam_screening/{year}.json`` — step-2 PASS sites (coordinates included)
+
+Outputs (``data/``):
+
+- ``sentinel_data/{year}/{sitename}/raw/s3/*.tif`` — S3 SYN L2 per-date GeoTIFFs
+- ``sentinel_data/{year}/{sitename}/prepared/s2/`` — S2 REFL + DIST_CLOUD GeoTIFFs
+- ``sentinel_data/{year}/{sitename}/prepared/s3/`` — S3 composite GeoTIFFs
+- ``sentinel_data/{year}/{sitename}/data.json`` — run summary
+
+Requires ``CDSE_USER`` / ``CDSE_PASSWORD`` (``uv sync`` installs efast).
+
+CLI:
+
+- ``--evaluation-year`` (default 2025)
+- ``--site`` (optional; default: all step-2 PASS sites)
+
+Prior step: :mod:`2-phenocam-screening`.
+Next step: :mod:`4-fusion`.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import shutil
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import netCDF4
+import numpy as np
+import openeo
+import rasterio
+import requests
+from dotenv import load_dotenv
+from pystac_client import Client
+from rasterio import shutil as rio_shutil
+from rasterio.enums import Resampling
+from rasterio.errors import WindowError
+from rasterio.transform import from_bounds
+from rasterio.vrt import WarpedVRT
+from rasterio.warp import transform_geom
+from rasterio.windows import Window
+from rasterio.windows import from_bounds as window_from_bounds
+from rasterio.windows import transform as window_transform
+from shapely import wkt as shapely_wkt
+from tqdm import tqdm
+
+# ---------------------------------------------------------------------------
+# Public constants — edit here to change pipeline behaviour
+# ---------------------------------------------------------------------------
+
+S2_BANDS = ["B02", "B03", "B04"]
+
+S3_BANDS = [
+    "Syn_Oa04_reflectance",
+    "Syn_Oa06_reflectance",
+    "Syn_Oa08_reflectance",
+    "Syn_Oa17_reflectance",
+]
+S3_BAND_NAMES = ["SDR_Oa04", "SDR_Oa06", "SDR_Oa08", "SDR_Oa17"]
+
+RESOLUTION_RATIO = 30
+S3_MOSAIC_DAYS = 100
+S3_COMPOSITE_STEP = 2
+S3_COMPOSITE_SIGMA_DOY = 10
+S3_COMPOSITE_D = 20
+S3_SMOOTHING_STD = 1
+S3_REFLECTANCE_SCALE = 10_000  # OpenEO SYN L2 SDR → 0–1 (EFAST expects < 5)
+
+# ---------------------------------------------------------------------------
+# Internal S2 constants
+# ---------------------------------------------------------------------------
+
+EARTH_SEARCH_URL = "https://earth-search.aws.element84.com/v1"
+
+_BAND_ASSETS: dict[str, str] = {
+    "B02": "blue",
+    "B03": "green",
+    "B04": "red",
+    "B05": "rededge1",
+    "B06": "rededge2",
+    "B07": "rededge3",
+    "B08": "nir",
+    "B8A": "nir08",
+    "B11": "swir16",
+    "B12": "swir22",
+}
+_SCL_ASSET = "scl"
+_MIN_BBOX_HALF_DEG = 0.008
+
+# ---------------------------------------------------------------------------
+# Internal S3 constants
+# ---------------------------------------------------------------------------
+
+CDSE_TOKEN_URL = (
+    "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/"
+    "protocol/openid-connect/token"
+)
+OPENEO_URL = "openeo.dataspace.copernicus.eu"
+S3_COLLECTION = "SENTINEL3_SYN_L2_SYN"
+
+DATA_DIR = Path("data")
+DEFAULT_YEAR = 2025
+
+
+# ---------------------------------------------------------------------------
+# Credentials
+# ---------------------------------------------------------------------------
+
+
+def _cdse_credentials() -> dict[str, str | None]:
+    load_dotenv()
+    return {
+        "username": os.getenv("CDSE_USER"),
+        "password": os.getenv("CDSE_PASSWORD"),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Screening manifest helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_screening_pass_sites(year: int) -> list[dict[str, Any]]:
+    """Return list of PASS-site dicts from step-2 screening JSON.
+
+    Each entry has ``sitename``, ``lat``, ``lon`` keys.
+    """
+    path = DATA_DIR / "phenocam_screening" / f"{year}.json"
+    if not path.is_file():
+        raise FileNotFoundError(f"Step-2 screening manifest not found: {path}")
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    sites = []
+    for row in payload.get("sites", []):
+        calc = row.get("calculations", {})
+        if calc.get("status") != "PASS":
+            continue
+        camera = row.get("response", {}).get("camera", {})
+        name = camera.get("Sitename")
+        lat = camera.get("Lat")
+        lon = camera.get("Lon")
+        if name and lat is not None and lon is not None:
+            sites.append({"sitename": str(name), "lat": float(lat), "lon": float(lon)})
+    return sites
+
+
+# ---------------------------------------------------------------------------
+# S2: geometry helpers (from s2_cloud_native.py)
+# ---------------------------------------------------------------------------
+
+
+def wkt_to_bbox(geometry_wkt: str) -> list[float]:
+    """Convert a WKT geometry to a ``[west, south, east, north]`` bbox."""
+    geom = shapely_wkt.loads(geometry_wkt)
+    minx, miny, maxx, maxy = geom.bounds
+    if minx == maxx and miny == maxy:
+        minx -= _MIN_BBOX_HALF_DEG
+        maxx += _MIN_BBOX_HALF_DEG
+        miny -= _MIN_BBOX_HALF_DEG
+        maxy += _MIN_BBOX_HALF_DEG
+    return [minx, miny, maxx, maxy]
+
+
+def _boa_offset(item: Any) -> int:
+    """Return the BOA additive offset for a STAC item.
+
+    Processing baseline >= 04.00 applies a -1000 offset; earlier baselines use 0.
+    """
+    if item.properties.get("earthsearch:boa_offset_applied"):
+        return 0
+    baseline_str = str(
+        item.properties.get("processing:baseline")
+        or item.properties.get("s2:processing_baseline")
+        or "0"
+    )
+    try:
+        baseline = float(baseline_str)
+    except ValueError:
+        baseline = 0.0
+    return -1000 if baseline >= 4.0 else 0
+
+
+def _window_for_bbox(
+    src: rasterio.io.DatasetReader,
+    bbox_4326: list[float],
+) -> Window | None:
+    """Return the rasterio Window for a EPSG:4326 bbox clipped to src bounds."""
+    bbox_geom = {
+        "type": "Polygon",
+        "coordinates": [
+            [
+                [bbox_4326[0], bbox_4326[1]],
+                [bbox_4326[2], bbox_4326[1]],
+                [bbox_4326[2], bbox_4326[3]],
+                [bbox_4326[0], bbox_4326[3]],
+                [bbox_4326[0], bbox_4326[1]],
+            ]
+        ],
+    }
+    src_geom = transform_geom("EPSG:4326", src.crs.to_wkt(), bbox_geom)
+    xs = [c[0] for c in src_geom["coordinates"][0][:4]]
+    ys = [c[1] for c in src_geom["coordinates"][0][:4]]
+    win = window_from_bounds(min(xs), min(ys), max(xs), max(ys), src.transform)
+    try:
+        return win.intersection(Window(0, 0, src.width, src.height))
+    except WindowError:
+        return None
+
+
+def _read_window(
+    href: str,
+    bbox_4326: list[float],
+    out_shape: tuple[int, int] | None = None,
+    resampling: Resampling = Resampling.bilinear,
+) -> tuple[np.ndarray, dict[str, Any]] | None:
+    """Range-read a single-band array for the bbox window from a COG URL."""
+    with rasterio.open(href) as src:
+        win = _window_for_bbox(src, bbox_4326)
+        if win is None:
+            return None
+        data = src.read(1, window=win, out_shape=out_shape, resampling=resampling)
+        profile: dict[str, Any] = {
+            "crs": src.crs,
+            "transform": window_transform(win, src.transform),
+            "height": data.shape[0],
+            "width": data.shape[1],
+            "dtype": src.dtypes[0],
+        }
+    return data, profile
+
+
+def _read_bands(
+    item: Any,
+    bbox: list[float],
+    bands: list[str],
+) -> tuple[list[np.ndarray], dict[str, Any]] | None:
+    """Range-read all requested bands for one STAC item."""
+    band_arrays: list[np.ndarray] = []
+    ref_profile: dict[str, Any] | None = None
+
+    for band_name in bands:
+        asset_key = _BAND_ASSETS.get(band_name)
+        if asset_key is None or asset_key not in item.assets:
+            return None
+        ref_shape = (
+            (ref_profile["height"], ref_profile["width"]) if ref_profile else None
+        )
+        result = _read_window(item.assets[asset_key].href, bbox, out_shape=ref_shape)
+        if result is None:
+            return None
+        data, profile = result
+        if ref_profile is None:
+            ref_profile = profile
+        band_arrays.append(data.astype("float32"))
+
+    return (band_arrays, ref_profile) if ref_profile is not None else None
+
+
+def _cloud_mask(item: Any, bbox: list[float], shape: tuple[int, int]) -> np.ndarray:
+    """Return a boolean cloud/shadow mask from the item's SCL band.
+
+    Masks SCL classes 0 (no data), 3 (cloud shadow), and >7 (clouds, cirrus, snow).
+    """
+    scl = item.assets.get(_SCL_ASSET)
+    result = (
+        _read_window(scl.href, bbox, out_shape=shape, resampling=Resampling.nearest)
+        if scl
+        else None
+    )
+    if result is None:
+        return np.zeros(shape, dtype=bool)
+    scl_data, _ = result
+    return (scl_data == 0) | (scl_data == 3) | (scl_data > 7)
+
+
+def _pad_to_multiple(arr: np.ndarray, ratio: int) -> np.ndarray:
+    """Zero-pad (bands, H, W) so H and W are multiples of ``ratio``."""
+    pad_h = (ratio - arr.shape[1] % ratio) % ratio
+    pad_w = (ratio - arr.shape[2] % ratio) % ratio
+    if pad_h or pad_w:
+        arr = np.pad(arr, ((0, 0), (0, pad_h), (0, pad_w)), constant_values=0)
+    return arr
+
+
+# ---------------------------------------------------------------------------
+# S2: STAC search + download (from s2_cloud_native.py)
+# ---------------------------------------------------------------------------
+
+
+def stac_search_s2(
+    bbox: list[float],
+    start_date: datetime,
+    end_date: datetime,
+) -> list[Any]:
+    """Search Earth Search for S2 L2A items intersecting a bbox."""
+    client = Client.open(EARTH_SEARCH_URL)
+    search = client.search(
+        collections=["sentinel-2-l2a"],
+        bbox=bbox,
+        datetime=(
+            f"{start_date.strftime('%Y-%m-%dT%H:%M:%SZ')}/"
+            f"{end_date.strftime('%Y-%m-%dT23:59:59Z')}"
+        ),
+        max_items=10_000,
+    )
+    return list({item.id: item for item in search.items()}.values())
+
+
+def download_s2_window(
+    items: list[Any],
+    bbox: list[float],
+    output_dir: Path,
+    bands: list[str],
+    ratio: int = RESOLUTION_RATIO,
+) -> None:
+    """Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
+
+    Writes ``{item.id}_REFL.tif`` directly — no intermediate raw download.
+    Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
+    ``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    for item in tqdm(items, unit="granule", desc="S2 COG window read"):
+        out_path = output_dir / f"{item.id}_REFL.tif"
+        if out_path.is_file():
+            continue
+
+        bands_result = _read_bands(item, bbox, bands)
+        if bands_result is None:
+            tqdm.write(f"[S2] Skipping {item.id}: missing asset or no bbox overlap")
+            continue
+        band_arrays, ref_profile = bands_result
+        target_shape = (ref_profile["height"], ref_profile["width"])
+        mask = _cloud_mask(item, bbox, target_shape)
+
+        stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
+        np.clip(stacked, 0, None, out=stacked)
+        stacked[:, mask] = 0.0
+        stacked = _pad_to_multiple(stacked, ratio)
+
+        out_profile = {
+            "driver": "GTiff",
+            "count": len(bands),
+            "dtype": "float32",
+            "nodata": 0,
+            "crs": ref_profile["crs"],
+            "transform": ref_profile["transform"],
+            "height": stacked.shape[1],
+            "width": stacked.shape[2],
+            "compress": "lzw",
+        }
+        with rasterio.open(out_path, "w", **out_profile) as dst:
+            dst.write(stacked)
+            for i, band_name in enumerate(bands, 1):
+                dst.set_band_description(i, band_name)
+
+
+# ---------------------------------------------------------------------------
+# S3: download (from s3_openeo.py)
+# ---------------------------------------------------------------------------
+
+
+def _utm_epsg(bbox: list[float]) -> int:
+    """Return the UTM EPSG code for the centre of a ``[W, S, E, N]`` bbox."""
+    lon = (bbox[0] + bbox[2]) / 2
+    lat = (bbox[1] + bbox[3]) / 2
+    zone = int((lon + 180) / 6) + 1
+    return 32600 + zone if lat >= 0 else 32700 + zone
+
+
+def _cdse_token(username: str, password: str) -> str:
+    """Obtain a CDSE bearer token via password grant."""
+    resp = requests.post(
+        CDSE_TOKEN_URL,
+        data={
+            "grant_type": "password",
+            "username": username,
+            "password": password,
+            "client_id": "cdse-public",
+        },
+        timeout=30,
+    )
+    resp.raise_for_status()
+    return resp.json()["access_token"]
+
+
+def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
+    """Split an OpenEO NetCDF into per-date GeoTIFFs.
+
+    Output filenames match the ``S3*__YYYYMMDDTHHMMSS.tif`` pattern that
+    ``s3_processing.produce_median_composite`` expects.
+
+    Handles half-pixel cell-centre coordinates, ascending y-axis (flip_y),
+    and fills NetCDF masked values with NaN.
+    """
+    written = 0
+    with netCDF4.Dataset(str(nc_path), "r") as nc:
+        times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
+        x_coords = np.asarray(nc.variables["x"][:], dtype=float)
+        y_coords = np.asarray(nc.variables["y"][:], dtype=float)
+
+        half_x = abs(x_coords[1] - x_coords[0]) / 2 if len(x_coords) > 1 else 0.0
+        half_y = abs(y_coords[1] - y_coords[0]) / 2 if len(y_coords) > 1 else 0.0
+        transform = from_bounds(
+            x_coords.min() - half_x,
+            y_coords.min() - half_y,
+            x_coords.max() + half_x,
+            y_coords.max() + half_y,
+            len(x_coords),
+            len(y_coords),
+        )
+        flip_y = len(y_coords) > 1 and y_coords[0] < y_coords[-1]
+
+        date_counts: dict[str, int] = {}
+        for t_idx, time_val in enumerate(times):
+            date_str = time_val.strftime("%Y%m%d")
+            n = date_counts.get(date_str, 0)
+            date_counts[date_str] = n + 1
+
+            raw = np.stack(
+                [nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0
+            )
+            stacked = (
+                np.ma.filled(raw, fill_value=np.nan).astype("float32")
+                / S3_REFLECTANCE_SCALE
+            )
+            if flip_y:
+                stacked = stacked[:, ::-1, :]
+
+            filename = f"S3_{date_str}_{n}__{date_str}T120000.tif"
+            with rasterio.open(
+                output_dir / filename,
+                "w",
+                driver="GTiff",
+                height=len(y_coords),
+                width=len(x_coords),
+                count=len(S3_BANDS),
+                dtype="float32",
+                nodata=float("nan"),
+                crs=f"EPSG:{epsg}",
+                transform=transform,
+                compress="lzw",
+            ) as dst:
+                dst.write(stacked)
+                for i, band_name in enumerate(S3_BAND_NAMES, 1):
+                    dst.set_band_description(i, band_name)
+            written += 1
+
+    return written
+
+
+def download_s3_openeo(
+    start_date: datetime,
+    end_date: datetime,
+    aoi_geometry: str,
+    output_dir: Path,
+    credentials: dict[str, str | None],
+) -> None:
+    """Download S3 SYN L2 SDR for an AOI via CDSE OpenEO, server-side clipped.
+
+    Writes per-date ``S3_{YYYYMMDD}_{n}__{YYYYMMDD}T120000.tif`` files to
+    ``output_dir``, ready for ``s3_processing.produce_median_composite``.
+    Skips if any ``S3*.tif`` files already exist.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    if any(output_dir.glob("S3*.tif")):
+        print("[S3-OEO] Skipping — output_dir already contains S3 GeoTIFFs")
+        return
+
+    bbox = wkt_to_bbox(aoi_geometry)
+    epsg = _utm_epsg(bbox)
+    spatial_extent = {
+        "west": bbox[0],
+        "east": bbox[2],
+        "south": bbox[1],
+        "north": bbox[3],
+    }
+
+    print("[S3-OEO] Authenticating with CDSE...")
+    token = _cdse_token(credentials["username"], credentials["password"])  # type: ignore[arg-type]
+    conn = openeo.connect(OPENEO_URL)
+    conn.authenticate_oidc_access_token(token)
+
+    start_str = start_date.strftime("%Y-%m-%d")
+    end_str = end_date.strftime("%Y-%m-%d")
+    print(f"[S3-OEO] Loading {S3_COLLECTION} ({start_str} → {end_str})...")
+    datacube = conn.load_collection(
+        S3_COLLECTION,
+        spatial_extent=spatial_extent,
+        temporal_extent=[start_str, end_str],
+        bands=S3_BANDS,
+    ).resample_spatial(projection=epsg)
+
+    nc_path = output_dir / "_s3_syn_l2.nc"
+    print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
+    t0 = time.time()
+    datacube.download(str(nc_path), format="NetCDF")
+    print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
+
+    print("[S3-OEO] Splitting into per-date GeoTIFFs...")
+    written = _netcdf_to_geotiffs(nc_path, output_dir, epsg)
+    nc_path.unlink(missing_ok=True)
+    print(f"[S3-OEO] {written} GeoTIFFs written to {output_dir}")
+
+
+# ---------------------------------------------------------------------------
+# S2: distance_to_clouds helper
+# ---------------------------------------------------------------------------
+
+
+def _import_distance_to_clouds():
+    try:
+        from efast.s2_processing import distance_to_clouds
+
+        return distance_to_clouds
+    except ImportError as exc:
+        raise ImportError(
+            "efast not found. Install with: uv sync"
+        ) from exc
+
+
+def _rescale_dist_cloud(s2_dir: Path) -> None:
+    """Ensure DIST_CLOUD values are in pixel units (not normalised to [0,1])."""
+    for dc_path in s2_dir.glob("*DIST_CLOUD.tif"):
+        with rasterio.open(dc_path) as src:
+            d = src.read(1)
+        if float(np.nanmax(d)) <= 1:
+            with rasterio.open(dc_path, "r+") as dst:
+                dst.write(np.where(d > 0, 2.0, d).astype(np.float32), 1)
+
+
+# ---------------------------------------------------------------------------
+# S3: compositing + reprojection helpers (from 4-sentinel-data.py)
+# ---------------------------------------------------------------------------
+
+
+def _import_s3_processing():
+    try:
+        from efast import s3_processing
+
+        return s3_processing
+    except ImportError as exc:
+        raise ImportError(
+            "efast not found. Install with: uv sync"
+        ) from exc
+
+
+def _reproject_s3_composites_to_s2_grid(
+    composite_dir: Path,
+    s2_refl_path: Path,
+    s3_out_dir: Path,
+    *,
+    resolution_ratio: int = RESOLUTION_RATIO,
+) -> None:
+    """Reproject S3 composites to the S2 spatial grid at LR resolution."""
+    s3_out_dir.mkdir(parents=True, exist_ok=True)
+    with rasterio.open(s2_refl_path) as s2_ref:
+        target_bounds = s2_ref.bounds
+        target_crs = s2_ref.crs
+        width = s2_ref.width // resolution_ratio
+        height = s2_ref.height // resolution_ratio
+        s3_transform = rasterio.transform.from_bounds(
+            target_bounds.left,
+            target_bounds.bottom,
+            target_bounds.right,
+            target_bounds.top,
+            width,
+            height,
+        )
+
+    for sen3_path in sorted(composite_dir.glob("composite_*.tif")):
+        date_part = sen3_path.stem.split("_", 1)[1].replace("-", "")
+        outfile = s3_out_dir / f"composite_{date_part}.tif"
+        vrt_options = {
+            "transform": s3_transform,
+            "height": height,
+            "width": width,
+            "crs": target_crs,
+            "resampling": Resampling.cubic,
+        }
+        with rasterio.open(sen3_path) as s3_src:
+            with WarpedVRT(s3_src, **vrt_options) as vrt:
+                profile = vrt.profile.copy()
+                profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
+                rio_shutil.copy(vrt, outfile, **profile)
+
+
+def _s3_reflectance_scale(raw_s3_dir: Path) -> float:
+    """Return multiplier that maps raw SYN L2 SDR values to 0–1 reflectance."""
+    for path in raw_s3_dir.glob("S3*.tif"):
+        with rasterio.open(path) as src:
+            mx = float(np.nanmax(src.read()))
+            if np.isfinite(mx) and mx > 5:
+                return 1.0 / S3_REFLECTANCE_SCALE
+    return 1.0
+
+
+def _stage_s3_for_efast(raw_s3_dir: Path, staging_dir: Path) -> int:
+    """Copy ``S3_*.tif`` inputs, scaling reflectance when still in DN form."""
+    scale = _s3_reflectance_scale(raw_s3_dir)
+    if staging_dir.exists():
+        shutil.rmtree(staging_dir)
+    staging_dir.mkdir(parents=True)
+
+    count = 0
+    for src_path in sorted(raw_s3_dir.glob("S3*.tif")):
+        dst_path = staging_dir / src_path.name
+        with rasterio.open(src_path) as src:
+            data = src.read().astype("float32") * scale
+            profile = src.profile.copy()
+            profile.update(dtype="float32")
+            descriptions = src.descriptions
+        with rasterio.open(dst_path, "w", **profile) as dst:
+            dst.write(data)
+            for i, desc in enumerate(descriptions, 1):
+                if desc:
+                    dst.set_band_description(i, desc)
+        count += 1
+
+    if scale != 1.0:
+        print(f"[S3-PREP] Scaled raw SDR by {scale:g} for EFAST compositing")
+    return count
+
+
+def _prepare_s3(
+    raw_s3_dir: Path,
+    s2_refl_path: Path,
+    s3_out_dir: Path,
+    *,
+    work_dir: Path | None = None,
+) -> None:
+    """Run EFAST S3 compositing pipeline and reproject to S2 grid."""
+    s3 = _import_s3_processing()
+    base = work_dir or (s3_out_dir / "_efast_work")
+    staging = base / "scaled"
+    composites = base / "composites"
+    blurred = base / "blurred"
+    calibrated = base / "calibrated"
+
+    for directory in (staging, composites, blurred, calibrated):
+        if directory.exists():
+            shutil.rmtree(directory)
+        directory.mkdir(parents=True, exist_ok=True)
+
+    staged = _stage_s3_for_efast(raw_s3_dir, staging)
+    if staged == 0:
+        raise ValueError(f"No S3*.tif files found in {raw_s3_dir}")
+
+    print(
+        f"[S3-PREP] produce_median_composite: mosaic_days={S3_MOSAIC_DAYS}, "
+        f"step={S3_COMPOSITE_STEP}, sigma_doy={S3_COMPOSITE_SIGMA_DOY}, "
+        f"D={S3_COMPOSITE_D}"
+    )
+    s3.produce_median_composite(
+        staging,
+        composites,
+        step=S3_COMPOSITE_STEP,
+        mosaic_days=S3_MOSAIC_DAYS,
+        s3_bands=[1, 2, 3, 4],
+        D=S3_COMPOSITE_D,
+        sigma_doy=S3_COMPOSITE_SIGMA_DOY,
+    )
+    s3.smoothing(
+        composites,
+        blurred,
+        product="composite",
+        std=S3_SMOOTHING_STD,
+        preserve_nan=False,
+    )
+    s3.reformat_s3(blurred, calibrated, product="composite", scaling_factor=1)
+
+    for old in s3_out_dir.glob("composite_*.tif"):
+        old.unlink()
+    _reproject_s3_composites_to_s2_grid(calibrated, s2_refl_path, s3_out_dir)
+
+    if work_dir is None and base.exists():
+        shutil.rmtree(base)
+
+    n_out = len(list(s3_out_dir.glob("composite_*.tif")))
+    print(f"[S3-PREP] Wrote {n_out} composites")
+
+
+# ---------------------------------------------------------------------------
+# Per-site pipeline
+# ---------------------------------------------------------------------------
+
+
+def process_site(
+    sitename: str,
+    lat: float,
+    lon: float,
+    year: int,
+) -> dict[str, Any]:
+    """Download S2 + S3 and run EFAST preparation for one site."""
+    site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
+    s2_out = site_dir / "prepared" / "s2"
+    s3_raw = site_dir / "raw" / "s3"
+    s3_out = site_dir / "prepared" / "s3"
+    aoi_wkt = f"POINT ({lon} {lat})"
+    bbox = wkt_to_bbox(aoi_wkt)
+    creds = _cdse_credentials()
+
+    # S3 download
+    print(f"[{sitename}] Downloading S3...")
+    download_s3_openeo(
+        start_date=datetime(year, 1, 1),
+        end_date=datetime(year, 12, 31),
+        aoi_geometry=aoi_wkt,
+        output_dir=s3_raw,
+        credentials=creds,
+    )
+
+    # S2 download
+    print(f"[{sitename}] Searching S2 on Earth Search...")
+    items = stac_search_s2(bbox, datetime(year, 1, 1), datetime(year, 12, 31))
+    print(f"[{sitename}] {len(items)} S2 items found — downloading windows...")
+    download_s2_window(items, bbox, s2_out, S2_BANDS, RESOLUTION_RATIO)
+
+    # S2 distance-to-clouds
+    print(f"[{sitename}] Computing distance-to-clouds...")
+    distance_to_clouds = _import_distance_to_clouds()
+    distance_to_clouds(s2_out, ratio=RESOLUTION_RATIO)
+    _rescale_dist_cloud(s2_out)
+
+    # S3 compositing
+    s2_refl_path = next(iter(s2_out.glob("*_REFL.tif")), None)
+    if s2_refl_path is None:
+        raise ValueError(f"No REFL files in {s2_out} — S2 download may have failed")
+    s3_out.mkdir(parents=True, exist_ok=True)
+    print(f"[{sitename}] Running S3 compositing pipeline...")
+    _prepare_s3(s3_raw, s2_refl_path, s3_out)
+
+    summary = {
+        "sitename": sitename,
+        "evaluation_year": year,
+        "lat": lat,
+        "lon": lon,
+        "s2_refl_count": len(list(s2_out.glob("*_REFL.tif"))),
+        "s2_dist_cloud_count": len(list(s2_out.glob("*_DIST_CLOUD.tif"))),
+        "s3_raw_count": len(list(s3_raw.glob("S3*.tif"))),
+        "s3_composite_count": len(list(s3_out.glob("composite_*.tif"))),
+    }
+    site_dir.mkdir(parents=True, exist_ok=True)
+    (site_dir / "data.json").write_text(
+        json.dumps(summary, indent=2) + "\n", encoding="utf-8"
+    )
+    return summary
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
+    parser.add_argument(
+        "--site",
+        type=str,
+        default=None,
+        help="Single sitename to process (default: all step-2 PASS sites)",
+    )
+    args = parser.parse_args(argv)
+    year = args.evaluation_year
+
+    pass_sites = _load_screening_pass_sites(year)
+    if not pass_sites:
+        print("[Sentinel-3] No PASS sites found in step-2 screening output")
+        return 1
+
+    if args.site:
+        pass_sites = [s for s in pass_sites if s["sitename"] == args.site]
+        if not pass_sites:
+            print(f"[Sentinel-3] Site '{args.site}' not found in step-2 PASS sites")
+            return 1
+
+    print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
+    for i, site in enumerate(pass_sites, 1):
+        sitename = site["sitename"]
+        print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
+        try:
+            summary = process_site(sitename, site["lat"], site["lon"], year)
+            print(
+                f"[Sentinel-3] {sitename} done — "
+                f"{summary['s2_refl_count']} REFL, "
+                f"{summary['s3_composite_count']} composites"
+            )
+        except Exception as exc:
+            print(f"[Sentinel-3] {sitename} FAILED: {exc}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/4-fusion.py
+++ b/4-fusion.py
@ -0,0 +1,330 @@
+"""Step 4: Compute GCC and run EFAST BtI + ItB fusion for prepared sites.
+
+Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
+
+- ``sentinel_data/{year}/{sitename}/prepared/s2/`` — ``*_REFL.tif`` + ``*_DIST_CLOUD.tif``
+- ``sentinel_data/{year}/{sitename}/prepared/s3/`` — ``composite_*.tif`` (4-band)
+
+Outputs (``data/``):
+
+- ``sentinel_data/{year}/{sitename}/prepared/s2/*_GCC.tif`` — S2 GCC (in-place)
+- ``sentinel_data/{year}/{sitename}/prepared/gcc_s3/*.tif`` — S3 GCC composites
+- ``fusion/{year}/{sitename}/bti/fusion/REFL_*.tif`` — BtI fused 4-band reflectance
+- ``fusion/{year}/{sitename}/bti/gcc/GCC_*.tif`` — GCC derived from BtI fusion
+- ``fusion/{year}/{sitename}/itb/s2/GCC_*.tif`` — per-acquisition S2 GCC (simplified names)
+- ``fusion/{year}/{sitename}/itb/s3/GCC_*.tif`` — per-composite S3 GCC (simplified names)
+- ``fusion/{year}/{sitename}/itb/fusion/GCC_*.tif`` — ItB fused GCC
+
+Requires ``uv sync`` (efast).
+
+CLI:
+
+- ``--evaluation-year`` (default 2025)
+- ``--site`` (optional; default: all prepared sites under ``sentinel_data/{year}/``)
+
+Prior step: :mod:`3-sentinel-data`.
+"""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import rasterio
+from dateutil import rrule
+
+# ---------------------------------------------------------------------------
+# Public constants
+# ---------------------------------------------------------------------------
+
+RESOLUTION_RATIO = 30
+MOSAIC_STEP = 2
+MAX_DAYS = 100
+MINIMUM_ACQUISITION_IMPORTANCE = 0
+
+DATA_DIR = Path("data")
+DEFAULT_YEAR = 2025
+
+
+# ---------------------------------------------------------------------------
+# efast import helper
+# ---------------------------------------------------------------------------
+
+
+def _import_efast():
+    try:
+        import efast.efast as efast_module
+
+        return efast_module
+    except ImportError as exc:
+        raise ImportError(
+            "efast not found. Install with: uv sync"
+        ) from exc
+
+
+# ---------------------------------------------------------------------------
+# GCC computation (from s2_cloud_native.py and s3_openeo.py)
+# ---------------------------------------------------------------------------
+
+
+def compute_gcc_s2(s2_dir: Path, output_dir: Path) -> None:
+    """Compute GCC from S2 REFL files and write ``*_GCC.tif`` to ``output_dir``.
+
+    Reads every ``*_REFL.tif`` (band order B02/B03/B04) and writes a co-located
+    single-band GCC file.  Cloud-masked pixels (zero in all bands) remain zero.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for src_path in sorted(s2_dir.glob("*_REFL.tif")):
+        out_path = output_dir / src_path.name.replace("_REFL.tif", "_GCC.tif")
+        if out_path.is_file():
+            continue
+        with rasterio.open(src_path) as src:
+            b, g, r = src.read(1), src.read(2), src.read(3)
+            profile = src.profile
+        total = b + g + r
+        gcc = g / (total + 1e-10)
+        gcc[total == 0] = 0
+        profile.update(count=1)
+        with rasterio.open(out_path, "w", **profile) as dst:
+            dst.write(gcc[np.newaxis].astype("float32"))
+
+
+def compute_gcc_s3(s3_dir: Path, output_dir: Path) -> None:
+    """Compute GCC from S3 composite files and write single-band GeoTIFFs.
+
+    Reads every ``composite_*.tif`` (band order Oa04/Oa06/Oa08/Oa17) and writes
+    a single-band GCC file.  NaN pixels in the input remain NaN.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for src_path in sorted(s3_dir.glob("composite_*.tif")):
+        out_path = output_dir / src_path.name
+        if out_path.is_file():
+            continue
+        with rasterio.open(src_path) as src:
+            b, g, r = src.read(1), src.read(2), src.read(3)
+            profile = src.profile
+        total = b + g + r
+        gcc = g / (total + 1e-10)
+        gcc[np.isnan(total)] = np.nan
+        profile.update(count=1, dtype="float32")
+        with rasterio.open(out_path, "w", **profile) as dst:
+            dst.write(gcc[np.newaxis].astype("float32"))
+
+
+def compute_gcc_from_refl(refl_dir: Path, gcc_dir: Path) -> None:
+    """Derive GCC from ``REFL_YYYYMMDD.tif`` files (BtI fusion output).
+
+    Reads every ``REFL_*.tif`` and writes a co-located single-band
+    ``GCC_YYYYMMDD.tif``.  Zero pixels remain zero.
+    """
+    gcc_dir.mkdir(parents=True, exist_ok=True)
+    for src_path in sorted(refl_dir.glob("REFL_*.tif")):
+        out_path = gcc_dir / src_path.name.replace("REFL_", "GCC_")
+        if out_path.is_file():
+            continue
+        with rasterio.open(src_path) as src:
+            b, g, r = src.read(1), src.read(2), src.read(3)
+            profile = src.profile
+        total = b + g + r
+        gcc = g / (total + 1e-10)
+        gcc[total == 0] = 0
+        profile.update(count=1)
+        with rasterio.open(out_path, "w", **profile) as dst:
+            dst.write(gcc[np.newaxis].astype("float32"))
+
+
+# ---------------------------------------------------------------------------
+# Date-range detection
+# ---------------------------------------------------------------------------
+
+
+def _refl_date_range(s2_dir: Path) -> tuple[datetime, datetime] | None:
+    """Return (start, end) datetime from REFL filenames in ``s2_dir``.
+
+    Filenames are expected to follow the S2 product naming convention, where
+    the acquisition date ``YYYYMMDD`` appears at position index 2 when the
+    stem is split by ``_``, e.g.
+    ``S2A_MSIL2A_20230911T114111_N0509_R025_T29PKT_20230911T153131_REFL.tif``.
+    """
+    dates: list[datetime] = []
+    for p in s2_dir.glob("*_REFL.tif"):
+        parts = p.stem.split("_")
+        if len(parts) >= 3:
+            try:
+                dates.append(datetime.strptime(parts[2][:8], "%Y%m%d"))
+            except ValueError:
+                pass
+    if not dates:
+        return None
+    return min(dates), max(dates)
+
+
+# ---------------------------------------------------------------------------
+# Per-site fusion
+# ---------------------------------------------------------------------------
+
+
+def fuse_site(sitename: str, year: int) -> dict[str, Any]:
+    """Run GCC computation and EFAST BtI + ItB fusion for one prepared site."""
+    efast = _import_efast()
+
+    s2_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s2"
+    s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3"
+    gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3"
+    base = DATA_DIR / "fusion" / str(year) / sitename
+
+    if not s2_dir.is_dir() or not any(s2_dir.glob("*_REFL.tif")):
+        raise FileNotFoundError(f"No REFL files in {s2_dir}")
+    if not s3_dir.is_dir() or not any(s3_dir.glob("composite_*.tif")):
+        raise FileNotFoundError(f"No composite files in {s3_dir}")
+
+    print(f"[{sitename}] Computing S2 GCC (in-place)...")
+    compute_gcc_s2(s2_dir, s2_dir)
+
+    print(f"[{sitename}] Computing S3 GCC...")
+    compute_gcc_s3(s3_dir, gcc_s3_dir)
+
+    date_range = _refl_date_range(s2_dir)
+    if date_range is None:
+        raise ValueError(f"Could not detect date range from REFL filenames in {s2_dir}")
+    start, end = date_range
+    print(f"[{sitename}] Date range: {start.date()} → {end.date()}")
+
+    fusion_dates = list(
+        rrule.rrule(
+            rrule.DAILY,
+            dtstart=start + timedelta(MOSAIC_STEP),
+            until=end - timedelta(MOSAIC_STEP),
+            interval=MOSAIC_STEP,
+        )
+    )
+
+    _fusion_kwargs = dict(
+        ratio=RESOLUTION_RATIO,
+        max_days=MAX_DAYS,
+        minimum_acquisition_importance=MINIMUM_ACQUISITION_IMPORTANCE,
+    )
+
+    # --- ItB: GCC first, then fuse GCC ---
+    itb_s2 = base / "itb" / "s2"
+    itb_s3 = base / "itb" / "s3"
+    itb_fusion = base / "itb" / "fusion"
+    itb_s2.mkdir(parents=True, exist_ok=True)
+    itb_s3.mkdir(parents=True, exist_ok=True)
+    itb_fusion.mkdir(parents=True, exist_ok=True)
+
+    for p in sorted(s2_dir.glob("*_GCC.tif")):
+        dst = itb_s2 / f"GCC_{p.stem.split('_')[2][:8]}.tif"
+        if not dst.exists():
+            shutil.copy2(p, dst)
+    for p in sorted(gcc_s3_dir.glob("composite_*.tif")):
+        dst = itb_s3 / f"GCC_{p.stem.split('_')[1]}.tif"
+        if not dst.exists():
+            shutil.copy2(p, dst)
+
+    print(f"[{sitename}] ItB: fusing GCC over {len(fusion_dates)} dates...")
+    for date in fusion_dates:
+        efast.fusion(date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs)
+
+    # --- BtI: fuse reflectance (3-band, matching S2 B02/B03/B04), then derive GCC ---
+    # S3 composites have 4 bands; strip band 4 (Oa17/NIR) so shapes match S2 REFL.
+    s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb"
+    s3_rgb_dir.mkdir(parents=True, exist_ok=True)
+    for p in sorted(s3_dir.glob("composite_*.tif")):
+        out = s3_rgb_dir / p.name
+        if not out.exists():
+            with rasterio.open(p) as src:
+                data = src.read([1, 2, 3])
+                profile = src.profile.copy()
+                profile.update(count=3)
+            with rasterio.open(out, "w", **profile) as dst:
+                dst.write(data)
+
+    bti_fusion = base / "bti" / "fusion"
+    bti_gcc = base / "bti" / "gcc"
+    bti_fusion.mkdir(parents=True, exist_ok=True)
+
+    print(f"[{sitename}] BtI: fusing REFL over {len(fusion_dates)} dates...")
+    for date in fusion_dates:
+        efast.fusion(date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs)
+
+    print(f"[{sitename}] BtI: deriving GCC from fused REFL...")
+    compute_gcc_from_refl(bti_fusion, bti_gcc)
+
+    return {
+        "sitename": sitename,
+        "evaluation_year": year,
+        "start": start.date().isoformat(),
+        "end": end.date().isoformat(),
+        "fusion_dates": len(fusion_dates),
+        "itb_fusion_files": len(list(itb_fusion.glob("*.tif"))),
+        "bti_fusion_files": len(list(bti_fusion.glob("*.tif"))),
+        "bti_gcc_files": len(list(bti_gcc.glob("*.tif"))),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Site discovery
+# ---------------------------------------------------------------------------
+
+
+def _discover_sites(year: int) -> list[str]:
+    """Return sitenames that have prepared S2 REFL files under sentinel_data."""
+    base = DATA_DIR / "sentinel_data" / str(year)
+    if not base.is_dir():
+        return []
+    return sorted(
+        d.name
+        for d in base.iterdir()
+        if d.is_dir() and any((d / "prepared" / "s2").glob("*_REFL.tif"))
+    )
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
+    parser.add_argument(
+        "--site",
+        type=str,
+        default=None,
+        help="Single sitename to fuse (default: all prepared sites)",
+    )
+    args = parser.parse_args(argv)
+    year = args.evaluation_year
+
+    if args.site:
+        sites = [args.site]
+    else:
+        sites = _discover_sites(year)
+        if not sites:
+            print(f"[Fusion] No prepared sites found under data/sentinel_data/{year}/")
+            return 1
+
+    print(f"[Fusion] Processing {len(sites)} site(s)")
+    for i, sitename in enumerate(sites, 1):
+        print(f"[Fusion] ({i}/{len(sites)}) {sitename}")
+        try:
+            summary = fuse_site(sitename, year)
+            print(
+                f"[Fusion] {sitename} done — "
+                f"{summary['fusion_dates']} dates, "
+                f"itb={summary['itb_fusion_files']} bti={summary['bti_fusion_files']} "
+                f"bti_gcc={summary['bti_gcc_files']}"
+            )
+        except Exception as exc:
+            print(f"[Fusion] {sitename} FAILED: {exc}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/5-metrics.py
+++ b/5-metrics.py
@ -0,0 +1,695 @@
+"""Step 5: Pre-compute per-site GCC timeseries + raster index for the webapp.
+
+Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
+
+- ``phenocam_screening/{year}.json`` — qualifying sites + metadata
+- ``phenocam/{year}/{site}_1day.csv`` — daily GCC timeseries
+- ``sentinel_data/{year}/{site}/prepared/s2/*_GCC.tif`` — S2 GCC rasters
+- ``sentinel_data/{year}/{site}/prepared/gcc_s3/composite_*.tif`` — S3 GCC rasters
+- ``fusion/{year}/{site}/bti/gcc/GCC_*.tif`` — BtI GCC rasters
+- ``fusion/{year}/{site}/itb/fusion/GCC_*.tif`` — ItB GCC rasters
+
+Outputs (``data/metrics/``):
+
+- ``manifest.json`` — years + per-site metadata
+- ``{year}/{site}/gcc_phenocam.json`` — PhenoCam ``gcc_90`` at matched dates
+- ``{year}/{site}/gcc_s2.json`` — S2 GCC (center pixel, cloud-free scenes)
+- ``{year}/{site}/gcc_s2_whittaker.json`` — Whittaker-smoothed S2 GCC
+- ``{year}/{site}/gcc_s3.json`` — S3 composite GCC
+- ``{year}/{site}/gcc_s3_smooth.json`` — S3 5-day moving average
+- ``{year}/{site}/gcc_fusion_bti.json`` — BtI fused GCC
+- ``{year}/{site}/gcc_fusion_itb.json`` — ItB fused GCC
+- ``{year}/{site}/phenocam_images.json`` — midday photo URLs for the viewer
+- ``{year}/{site}/rasters_s2_refl.json`` — S2 REFL paths (BtI view)
+- ``{year}/{site}/rasters_s3_composite.json`` — S3 composite paths (BtI view)
+- ``{year}/{site}/rasters_s2_gcc.json`` — S2 GCC paths (ItB view)
+- ``{year}/{site}/rasters_s3_gcc.json`` — S3 GCC paths (ItB view)
+- ``{year}/{site}/rasters_fusion_bti_refl.json`` — BtI fused REFL paths
+- ``{year}/{site}/rasters_fusion_itb_gcc.json`` — ItB fused GCC paths
+- ``{year}/{site}/metrics.json`` — NSE, RMSE, nRMSE, Pearson r vs PhenoCam per series
+- ``{year}/{site}/bands_s2.json`` — S2 center-pixel reflectance (B02, B03, B04) per scene
+- ``{year}/{site}/bands_s3.json`` — S3 center-pixel reflectance (Oa04, Oa06, Oa08, Oa17) per composite
+- ``{year}/{site}/covariates.json`` — spatial CV/std, S2/S3 counts, gap stats
+
+CLI:
+
+- ``--evaluation-year`` (default 2025)
+- ``--site`` (optional; default: all qualifying sites with sentinel data)
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+import datetime
+import numpy as np
+import rasterio
+from rasterio.crs import CRS
+from rasterio.transform import rowcol
+from pyproj import Transformer
+from scipy.stats import pearsonr
+from tqdm import tqdm
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+DATA_DIR = Path("data")
+DEFAULT_YEAR = 2025
+
+# GCC smoothing window for S3 moving average (days)
+S3_SMOOTH_WINDOW = 5
+
+# Whittaker lambda (penalised smoothing strength for S2)
+WHITTAKER_LAMBDA = 400.0
+
+# Half-width in metres for the spatial heterogeneity footprint (~300 m = 1 S3 pixel)
+SPATIAL_CV_HALF_M = 150
+
+# PhenoCam archive image URL pattern
+PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
+
+
+# ---------------------------------------------------------------------------
+# Helpers: raster pixel extraction
+# ---------------------------------------------------------------------------
+
+
+def _read_center_pixel(path: Path, lat: float, lon: float) -> float | None:
+    """Return the 3×3 mean GCC value at (lat, lon) from a single-band raster.
+
+    Returns ``None`` when the pixel is masked/zero/NaN.
+    """
+    try:
+        with rasterio.open(path) as src:
+            transformer = Transformer.from_crs(
+                CRS.from_epsg(4326), src.crs, always_xy=True
+            )
+            x, y = transformer.transform(lon, lat)
+            row, col = rowcol(src.transform, x, y)
+            h, w = src.height, src.width
+            r0, r1 = max(0, row - 1), min(h, row + 2)
+            c0, c1 = max(0, col - 1), min(w, col + 2)
+            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
+            data = src.read(1, window=window).astype(float)
+            nodata = src.nodata
+        if nodata is not None:
+            data = np.where(data == nodata, np.nan, data)
+        data[data == 0] = np.nan
+        val = np.nanmean(data)
+        return None if np.isnan(val) else float(val)
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Helpers: date extraction from filenames
+# ---------------------------------------------------------------------------
+
+
+def _date_from_gcc_tif(path: Path) -> str | None:
+    """Extract YYYYMMDD from ``GCC_YYYYMMDD.tif`` or ``composite_YYYYMMDD.tif``."""
+    m = re.search(r"(\d{8})", path.stem)
+    return m.group(1) if m else None
+
+
+def _date_from_s2_tif(path: Path) -> str | None:
+    """Extract YYYYMMDD from S2 product name ``S2X_TTTT_YYYYMMDD_…``."""
+    parts = path.stem.split("_")
+    if len(parts) >= 3:
+        m = re.match(r"(\d{8})", parts[2])
+        return m.group(1) if m else None
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Helpers: Whittaker smoother (2nd-order differences, tridiagonal solver)
+# ---------------------------------------------------------------------------
+
+
+def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
+    """Penalised least-squares smoother (Whittaker, 2nd-order differences).
+
+    Masked (None) values are filled via the smooth and then re-set to None in
+    the output so the caller can distinguish observed from gap-filled points.
+    """
+    n = len(values)
+    if n < 4:
+        return values[:]
+
+    obs_mask = [v is not None for v in values]
+    y = np.array([v if v is not None else 0.0 for v in values], dtype=float)
+    w = np.array([1.0 if m else 0.0 for m in obs_mask], dtype=float)
+
+    W = np.diag(w)
+    D = np.diff(np.eye(n), n=2, axis=0)  # (n-2) x n second-difference matrix
+    A = W + lam * D.T @ D
+    try:
+        z = np.linalg.solve(A, w * y)
+    except np.linalg.LinAlgError:
+        return values[:]
+
+    result: list[float | None] = []
+    for i, m in enumerate(obs_mask):
+        result.append(float(z[i]) if m else None)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Helpers: PhenoCam CSV parsing
+# ---------------------------------------------------------------------------
+
+
+def _parse_phenocam_csv(
+    csv_path: Path, year: int, site: str
+) -> tuple[list[dict], list[dict]]:
+    """Return (gcc_series, image_list) filtered to ``year``.
+
+    ``gcc_series`` entries: ``{"date": "YYYY-MM-DD", "gcc_90": float}``
+    ``image_list`` entries: ``{"date": "YYYY-MM-DD", "url": str}``
+    """
+    gcc_series: list[dict] = []
+    image_list: list[dict] = []
+    year_str = str(year)
+
+    if not csv_path.is_file():
+        return gcc_series, image_list
+
+    with csv_path.open() as f:
+        lines = [l for l in f if not l.startswith("#")]
+
+    reader = csv.DictReader(lines)
+    for row in reader:
+        if row.get("year") != year_str:
+            continue
+        date = row.get("date", "")
+        gcc_raw = row.get("gcc_90")
+        if gcc_raw and gcc_raw not in ("NA", ""):
+            try:
+                gcc_series.append({"date": date, "gcc_90": float(gcc_raw)})
+            except ValueError:
+                pass
+        fn = row.get("midday_filename", "").strip()
+        if fn and fn != "NA" and date:
+            month = date[5:7]
+            url = PHENOCAM_IMAGE_URL.format(
+                site=site, year=year_str, month=month, filename=fn
+            )
+            image_list.append({"date": date, "url": url})
+
+    return gcc_series, image_list
+
+
+# ---------------------------------------------------------------------------
+# Helpers: moving average
+# ---------------------------------------------------------------------------
+
+
+def _moving_average(
+    series: list[dict], value_key: str, window: int
+) -> list[dict]:
+    """Compute centred moving average; returns new list with ``_smooth`` suffix key."""
+    if not series:
+        return []
+    vals = [p[value_key] for p in series]
+    half = window // 2
+    smoothed = []
+    for i, pt in enumerate(series):
+        chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
+        smoothed.append({
+            "date": pt["date"],
+            value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
+        })
+    return smoothed
+
+
+# ---------------------------------------------------------------------------
+# Helpers: validation metrics
+# ---------------------------------------------------------------------------
+
+MATCH_TOLERANCE_DAYS = 5
+
+
+def compute_metrics(
+    ref: list[dict], ref_key: str,
+    pred: list[dict], pred_key: str,
+) -> dict | None:
+    """Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
+
+    Each pred point is matched to the nearest ref date within
+    ``MATCH_TOLERANCE_DAYS``.  Returns a dict or ``None`` if fewer than
+    2 matched pairs exist.
+    """
+    ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
+    if not ref_lookup:
+        return None
+
+    ref_dates = sorted(ref_lookup)
+
+    obs, sim = [], []
+    for pt in pred:
+        v = pt.get(pred_key)
+        if v is None:
+            continue
+        nearest = min(ref_dates, key=lambda d: abs((
+            np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
+        gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
+        if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
+            obs.append(ref_lookup[nearest])
+            sim.append(v)
+
+    if len(obs) < 2:
+        return None
+
+    obs_arr = np.array(obs)
+    sim_arr = np.array(sim)
+    obs_mean = obs_arr.mean()
+
+    rmse = float(np.sqrt(np.mean((sim_arr - obs_arr) ** 2)))
+    nrmse = rmse / obs_mean if obs_mean else None
+    ss_res = float(np.sum((obs_arr - sim_arr) ** 2))
+    ss_tot = float(np.sum((obs_arr - obs_mean) ** 2))
+    nse = (1.0 - ss_res / ss_tot) if ss_tot else None
+    r, _ = pearsonr(obs_arr, sim_arr)
+
+    def _r4(v: float | None) -> float | None:
+        return round(v, 4) if v is not None else None
+
+    return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
+
+
+S2_BAND_NAMES = ["B02", "B03", "B04"]
+S3_BAND_NAMES = ["Oa04", "Oa06", "Oa08", "Oa17"]
+
+
+def _read_multiband_center(
+    path: Path, lat: float, lon: float, band_names: list[str]
+) -> dict[str, float | None]:
+    """Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
+    try:
+        with rasterio.open(path) as src:
+            transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
+            x, y = transformer.transform(lon, lat)
+            row, col = rowcol(src.transform, x, y)
+            h, w = src.height, src.width
+            r0, r1 = max(0, row - 1), min(h, row + 2)
+            c0, c1 = max(0, col - 1), min(w, col + 2)
+            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
+            nodata = src.nodata
+            result = {}
+            for i, name in enumerate(band_names, 1):
+                if i > src.count:
+                    result[name] = None
+                    continue
+                data = src.read(i, window=window).astype(float)
+                if nodata is not None:
+                    data = np.where(data == nodata, np.nan, data)
+                data[data == 0] = np.nan
+                val = np.nanmean(data)
+                result[name] = None if np.isnan(val) else round(float(val), 6)
+        return result
+    except Exception:
+        return {name: None for name in band_names}
+
+
+def _multiband_series(
+    tif_paths: list[Path],
+    date_fn,
+    lat: float,
+    lon: float,
+    band_names: list[str],
+    desc: str,
+) -> list[dict]:
+    """Extract center-pixel values for all bands; return ``[{date, band1, band2, …}]``."""
+    result = []
+    for p in tqdm(tif_paths, desc=desc, leave=False):
+        date = date_fn(p)
+        if date is None:
+            continue
+        vals = _read_multiband_center(p, lat, lon, band_names)
+        if any(v is not None for v in vals.values()):
+            result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", **vals})
+    return sorted(result, key=lambda x: x["date"])
+
+
+# ---------------------------------------------------------------------------
+# Helpers: spatial heterogeneity + observation density
+# ---------------------------------------------------------------------------
+
+
+def _read_footprint_stats(
+    path: Path, lat: float, lon: float, half_m: float = SPATIAL_CV_HALF_M
+) -> tuple[float, float] | tuple[None, None]:
+    """Return (mean, std) of valid GCC pixels within a ±half_m metre square window.
+
+    Returns ``(None, None)`` on any error or when fewer than 4 valid pixels exist.
+    """
+    try:
+        with rasterio.open(path) as src:
+            transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
+            x, y = transformer.transform(lon, lat)
+            res = abs(src.transform.a)  # pixel size in CRS units (metres for UTM)
+            half_px = max(1, int(round(half_m / res)))
+            row, col = rowcol(src.transform, x, y)
+            h, w = src.height, src.width
+            r0, r1 = max(0, row - half_px), min(h, row + half_px + 1)
+            c0, c1 = max(0, col - half_px), min(w, col + half_px + 1)
+            window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
+            data = src.read(1, window=window).astype(float)
+            nodata = src.nodata
+        if nodata is not None:
+            data = np.where(data == nodata, np.nan, data)
+        data[data <= 0] = np.nan
+        valid = data[~np.isnan(data)]
+        if len(valid) < 4:
+            return None, None
+        return float(np.mean(valid)), float(np.std(valid))
+    except Exception:
+        return None, None
+
+
+def compute_covariates(
+    s2_gcc_paths: list[Path],
+    s2_series: list[dict],
+    s3_series: list[dict],
+    n_gcc_points: int | None,
+    lat: float,
+    lon: float,
+) -> dict:
+    """Compute spatial heterogeneity and temporal observation density covariates."""
+    # Spatial GCC statistics over ~300 m footprint
+    means, stds = [], []
+    for p in s2_gcc_paths:
+        m, s = _read_footprint_stats(p, lat, lon)
+        if m is not None and m > 0:
+            means.append(m)
+            stds.append(s)
+
+    spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
+    spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
+
+    # S2 temporal gap statistics
+    s2_dates = [datetime.date.fromisoformat(p["date"]) for p in s2_series]
+    if len(s2_dates) >= 2:
+        gaps = [(s2_dates[i + 1] - s2_dates[i]).days for i in range(len(s2_dates) - 1)]
+        s2_mean_gap = round(float(np.mean(gaps)), 1)
+        s2_max_gap = int(max(gaps))
+    else:
+        s2_mean_gap = None
+        s2_max_gap = None
+
+    return {
+        "spatial_gcc_cv":    spatial_gcc_cv,
+        "spatial_gcc_std":   spatial_gcc_std,
+        "s2_scene_count":    len(s2_series),
+        "s2_mean_gap_days":  s2_mean_gap,
+        "s2_max_gap_days":   s2_max_gap,
+        "s3_composite_count": len(s3_series),
+        "n_gcc_points":      n_gcc_points,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Per-site export
+# ---------------------------------------------------------------------------
+
+
+def _write_json(path: Path, data: Any) -> None:
+    path.write_text(json.dumps(data, separators=(",", ":")))
+
+
+def _raster_series(
+    tif_paths: list[Path],
+    date_fn,
+    lat: float,
+    lon: float,
+    desc: str,
+) -> list[dict]:
+    """Extract center-pixel GCC from each tif, return ``[{date, gcc}]`` sorted."""
+    result = []
+    for p in tqdm(tif_paths, desc=desc, leave=False):
+        date = date_fn(p)
+        if date is None:
+            continue
+        val = _read_center_pixel(p, lat, lon)
+        if val is not None:
+            result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", "gcc": val})
+    return sorted(result, key=lambda x: x["date"])
+
+
+def _raster_index(tif_paths: list[Path], date_fn, rel_root: Path) -> list[dict]:
+    """Build raster index: ``[{date, path}]`` sorted by date."""
+    result = []
+    for p in tif_paths:
+        date = date_fn(p)
+        if date is None:
+            continue
+        try:
+            rel = str(p.relative_to(rel_root))
+        except ValueError:
+            rel = str(p)
+        result.append({"date": date, "path": rel})
+    return sorted(result, key=lambda x: x["date"])
+
+
+def export_site(
+    site: str,
+    year: int,
+    lat: float,
+    lon: float,
+    out_dir: Path,
+    n_gcc_points: int | None = None,
+) -> bool:
+    """Export timeseries.json and rasters.json for one site. Returns True on success."""
+    sentinel_base = DATA_DIR / "sentinel_data" / str(year) / site / "prepared"
+    fusion_base = DATA_DIR / "fusion" / str(year) / site
+
+    s2_gcc_dir = sentinel_base / "s2"
+    s3_gcc_dir = sentinel_base / "gcc_s3"
+    bti_gcc_dir = fusion_base / "bti" / "gcc"
+    itb_gcc_dir = fusion_base / "itb" / "fusion"
+
+    # Raster slider sources
+    s2_refl_dir = sentinel_base / "s2"
+    s3_comp_dir = sentinel_base / "s3"
+    bti_refl_dir = fusion_base / "bti" / "fusion"
+
+    has_fusion = bti_gcc_dir.is_dir() and any(bti_gcc_dir.glob("GCC_*.tif"))
+    if not has_fusion:
+        return False
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # --- GCC timeseries from rasters ---
+    s2_gcc_paths = sorted(s2_gcc_dir.glob("*_GCC.tif"))
+    s3_gcc_paths = sorted(s3_gcc_dir.glob("composite_*.tif"))
+    bti_paths = sorted(bti_gcc_dir.glob("GCC_*.tif"))
+    itb_paths = sorted(itb_gcc_dir.glob("GCC_*.tif"))
+
+    s2_series = _raster_series(s2_gcc_paths, _date_from_s2_tif, lat, lon, f"{site} S2")
+    s3_series = _raster_series(s3_gcc_paths, _date_from_gcc_tif, lat, lon, f"{site} S3")
+    bti_series = _raster_series(bti_paths, _date_from_gcc_tif, lat, lon, f"{site} BtI")
+    itb_series = _raster_series(itb_paths, _date_from_gcc_tif, lat, lon, f"{site} ItB")
+
+    # Whittaker on S2
+    s2_vals = [p["gcc"] for p in s2_series]
+    s2_smooth_vals = _whittaker_smooth(s2_vals)
+    s2_whittaker = [
+        {"date": p["date"], "gcc": v}
+        for p, v in zip(s2_series, s2_smooth_vals)
+        if v is not None
+    ]
+
+    # S3 5-day moving average
+    s3_smooth = _moving_average(s3_series, "gcc", S3_SMOOTH_WINDOW)
+
+    # PhenoCam CSV
+    csv_path = DATA_DIR / "phenocam" / str(year) / f"{site}_1day.csv"
+    phenocam_series, image_list = _parse_phenocam_csv(csv_path, year, site)
+
+    s3_smooth_series = [
+        {"date": p["date"], "gcc": p["gcc_smooth"]}
+        for p in s3_smooth
+        if p.get("gcc_smooth") is not None
+    ]
+
+    # Band reflectance timeseries (multi-band center-pixel)
+    bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
+    bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
+
+    # --- Per-metric JSON outputs ---
+    _write_json(out_dir / "gcc_phenocam.json", phenocam_series)
+    _write_json(out_dir / "gcc_s2.json", s2_series)
+    _write_json(out_dir / "gcc_s2_whittaker.json", s2_whittaker)
+    _write_json(out_dir / "gcc_s3.json", s3_series)
+    _write_json(out_dir / "gcc_s3_smooth.json", s3_smooth_series)
+    _write_json(out_dir / "gcc_fusion_bti.json", bti_series)
+    _write_json(out_dir / "gcc_fusion_itb.json", itb_series)
+    _write_json(out_dir / "phenocam_images.json", image_list)
+    _write_json(out_dir / "bands_s2.json", bands_s2)
+    _write_json(out_dir / "bands_s3.json", bands_s3)
+
+    # --- Raster index for slider ---
+    rel_root = DATA_DIR.parent  # paths relative to project root
+
+    # Valid-pixel sets: only show S2/S3 rasters where the center pixel had
+    # usable data (non-zero GCC). This excludes cloud-masked / snow-covered
+    # scenes that would render as black or visually nonsensical.
+    s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
+    s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
+
+    s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
+               if r["date"] in s2_valid_dates]
+    s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
+               if r["date"] in s3_valid_dates]
+    s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
+              if r["date"] in s2_valid_dates]
+    s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
+              if r["date"] in s3_valid_dates]
+    bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
+    itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
+
+    _write_json(out_dir / "rasters_s2_refl.json", s2_refl)
+    _write_json(out_dir / "rasters_s3_composite.json", s3_comp)
+    _write_json(out_dir / "rasters_s2_gcc.json", s2_gcc)
+    _write_json(out_dir / "rasters_s3_gcc.json", s3_gcc)
+    _write_json(out_dir / "rasters_fusion_bti_refl.json", bti_refl)
+    _write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
+
+    # --- Site covariates (heterogeneity + observation density) ---
+    _write_json(out_dir / "covariates.json", compute_covariates(
+        s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
+    ))
+
+    # --- Validation metrics vs PhenoCam gcc_90 ---
+    _write_json(out_dir / "metrics.json", {
+        "bti":          compute_metrics(phenocam_series, "gcc_90", bti_series,       "gcc"),
+        "itb":          compute_metrics(phenocam_series, "gcc_90", itb_series,       "gcc"),
+        "s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker,     "gcc"),
+        "s3_smooth":    compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
+        "s2":           compute_metrics(phenocam_series, "gcc_90", s2_series,        "gcc"),
+        "s3":           compute_metrics(phenocam_series, "gcc_90", s3_series,        "gcc"),
+    })
+
+    # Remove legacy bundled outputs if present
+    for legacy in ("timeseries.json", "rasters.json"):
+        (out_dir / legacy).unlink(missing_ok=True)
+    return True
+
+
+# ---------------------------------------------------------------------------
+# Manifest
+# ---------------------------------------------------------------------------
+
+VEG_TYPE_LABELS = {
+    "AG": "Agriculture",
+    "DB": "Deciduous broadleaf",
+    "DN": "Deciduous needleleaf",
+    "EB": "Evergreen broadleaf",
+    "EN": "Evergreen needleleaf",
+    "GR": "Grassland",
+    "MX": "Mixed",
+    "SH": "Shrubland",
+    "TN": "Tundra",
+    "UN": "Unknown",
+    "WL": "Wetland",
+    "RF": "Reference",
+}
+
+
+def build_manifest(years: list[int], filter_site: str | None = None) -> dict:
+    manifest: dict[str, Any] = {"years": years, "sites": {}}
+
+    for year in years:
+        screening_path = DATA_DIR / "phenocam_screening" / f"{year}.json"
+        if not screening_path.is_file():
+            continue
+        data = json.loads(screening_path.read_text())
+        sites_meta: dict[str, Any] = {}
+        for entry in data.get("sites", []):
+            if entry.get("calculations", {}).get("status") != "PASS":
+                continue
+            cam = entry.get("response", {}).get("camera", {})
+            roi = entry.get("response", {}).get("roi", {})
+            calc = entry.get("calculations", {})
+            site = cam.get("Sitename", "")
+            if not site:
+                continue
+            if filter_site and site != filter_site:
+                continue
+            sm = cam.get("sitemetadata", {})
+            veg_raw = sm.get("primary_veg_type") or roi.get("roitype") or "UN"
+            fusion_dir = DATA_DIR / "fusion" / str(year) / site / "bti" / "gcc"
+            has_fusion = fusion_dir.is_dir() and any(fusion_dir.glob("GCC_*.tif"))
+            sites_meta[site] = {
+                "lat": cam.get("Lat"),
+                "lon": cam.get("Lon"),
+                "veg_type": veg_raw,
+                "veg_label": VEG_TYPE_LABELS.get(veg_raw, veg_raw),
+                "description": sm.get("site_description", ""),
+                "dominant_species": sm.get("dominant_species", ""),
+                "group": sm.get("group", ""),
+                "snr": calc.get("snr"),
+                "n_gcc_points": calc.get("n_gcc_points"),
+                "has_fusion": has_fusion,
+            }
+        manifest["sites"][str(year)] = sites_meta
+
+    return manifest
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
+    parser.add_argument("--site", type=str, default=None)
+    args = parser.parse_args()
+
+    year = args.evaluation_year
+    filter_site = args.site
+
+    out_base = DATA_DIR / "metrics"
+    out_base.mkdir(parents=True, exist_ok=True)
+
+    # Determine years with screening data
+    screening_dir = DATA_DIR / "phenocam_screening"
+    years = sorted(
+        int(p.stem) for p in screening_dir.glob("*.json") if p.stem.isdigit()
+    )
+    if not years:
+        years = [year]
+
+    print(f"Building manifest for years: {years}")
+    manifest = build_manifest(years, filter_site)
+
+    # Export per-site data for the requested year
+    year_sites = manifest["sites"].get(str(year), {})
+    fusion_sites = {s: m for s, m in year_sites.items() if m["has_fusion"]}
+    if filter_site:
+        fusion_sites = {s: m for s, m in fusion_sites.items() if s == filter_site}
+
+    print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
+    for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
+        out_dir = out_base / str(year) / site
+        ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
+        if ok:
+            print(f"  ✓ {site}")
+        else:
+            print(f"  ✗ {site} — no fusion data found")
+
+    manifest_path = out_base / "manifest.json"
+    manifest_path.write_text(json.dumps(manifest, separators=(",", ":")))
+    print(f"Manifest written → {manifest_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,151 @@
+# AGENTS.md
+
+Worldwide PhenoCam EFAST feasibility screening. Human summary: [`README.md`](README.md).
+
+---
+
+## Layout
+
+| Path | Role |
+|------|------|
+| `1-phenocam.py` | Step 1: download PhenoCam metadata + `one_day_summary` CSV |
+| `2-phenocam-screening.py` | Step 2: PhenoCam + SNR gates on cached CSVs |
+| `3-sentinel-data.py` | Step 3: S2 (Earth Search COG) + S3 (CDSE OpenEO) download + EFAST prep |
+| `4-fusion.py` | Step 4: GCC computation + EFAST BtI/ItB fusion loop |
+| `5-metrics.py` | Step 5: timeseries, covariates, `metrics.json`, webapp manifest |
+| `data/` | Manifests, per-site caches, screening outputs (large; mostly generated) |
+| `webapp/` | Static QA viewer (`make serve` from workspace root) |
+
+Workspace orchestration: [`../AGENTS.md`](../AGENTS.md).
+
+---
+
+## Where to work
+
+| Task | Location |
+|------|----------|
+| PhenoCam bulk download | `1-phenocam.py` |
+| GCC/SNR screening on disk | `2-phenocam-screening.py` |
+| S2/S3 download + EFAST prep | `3-sentinel-data.py` |
+| GCC + fusion | `4-fusion.py` |
+| Metrics + webapp index | `5-metrics.py` |
+| Web QA | `../Makefile` target `serve` → `webapp/index.html` |
+
+---
+
+## Setup
+
+**Preferred (uv):** from `processing/`:
+
+```bash
+uv sync                              # all deps from pyproject.toml (incl. efast)
+```
+
+Run any script as `uv run python <script>.py …`. Python version is pinned in `.python-version` (3.11.10).
+
+- `CDSE_USER` — Copernicus Data Space username
+- `CDSE_PASSWORD` — Copernicus Data Space password
+
+Required for step 3 S3 download (CDSE OpenEO). Step 3 S2 download uses AWS Earth Search (no auth).
+
+---
+
+## CLI convention
+
+Every numbered step script shares two user-facing flags:
+
+| Flag | Default | Role |
+|------|---------|------|
+| `--evaluation-year` | `2025` | Calendar year; input/output paths under `data/` use `{year}` |
+| `--site` | all eligible | Single sitename to limit scope (testing or single-site runs) |
+
+All other tunable parameters (bands, resolution ratio, compositing window, etc.) are public constants at the top of each script. Paths are derived from the year — do not pass manifest paths on the CLI. Each script docstring lists **Inputs** and **Outputs** under `data/`.
+
+Resume behaviour: step 3 skips S3 sites when `raw/s3/S3*.tif` already exist; step 3 skips S2 scenes when `*_REFL.tif` already exists. Step 4 skips GCC/fusion files that already exist. Step 5 overwrites JSON sidecars for processed sites.
+
+Example:
+
+```bash
+uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
+uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
+uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
+```
+
+---
+
+## Workflow
+
+### Stepped pipeline (resumable)
+
+```bash
+uv run python 1-phenocam.py --evaluation-year 2025
+uv run python 2-phenocam-screening.py --evaluation-year 2025
+uv run python 3-sentinel-data.py --evaluation-year 2025
+uv run python 4-fusion.py --evaluation-year 2025
+uv run python 5-metrics.py --evaluation-year 2025
+
+# single site
+uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
+uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
+uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
+```
+
+S3 uses CDSE OpenEO collection `SENTINEL3_SYN_L2_SYN` (bands Oa04/Oa06/Oa08/Oa17). S2 uses AWS Earth Search COG range reads (no auth). No S2↔S3 radiometric harmonisation.
+
+---
+
+## Screening gates
+
+### Step 2 (`2-phenocam-screening.py`)
+
+| Gate | Rule |
+|------|------|
+| `phenocam` | ROI + `one_day_summary` CSV; ≥ `MIN_GCC_POINTS` (30) valid `gcc_90` in evaluation year |
+| `snr` | AIC-selected cubic spline SNR ≥ `SNR_THRESHOLD` (2.0) |
+| `cluster` | SNR-passed sites within 500 m deduplicated; keep highest `n_gcc_points` (SNR tie-break) |
+
+---
+
+## Data layout
+
+**Naming:** `data/` paths follow step script names — `1-phenocam.py` → `phenocam/`, `2-phenocam-screening.py` → `phenocam_screening/`, `3-sentinel-data.py` → `sentinel_data/`, `4-fusion.py` → `fusion/`, `5-metrics.py` → `metrics/`.
+
+```
+data/
+  phenocam/
+    {year}.json                           # step-1 manifest
+    {year}/
+      {sitename}.json                     # camera + ROI API payload
+      {sitename}_1day.csv                 # raw PhenoCam summary CSV
+  phenocam_screening/
+    {year}.json                           # step-2 results
+    {year}.csv
+  sentinel_data/{year}/{sitename}/
+    raw/s3/                               # step 3: S3 SYN L2 per-date GeoTIFFs
+    prepared/s2/                          # step 3: *_REFL.tif, *_DIST_CLOUD.tif, *_GCC.tif
+    prepared/s3/                          # step 3: composite_*.tif
+    prepared/gcc_s3/                      # step 4: single-band GCC composites
+    data.json                             # step-3 run summary
+  fusion/{year}/{sitename}/
+    bti/fusion/REFL_*.tif                 # step 4: BtI fused reflectance
+    bti/gcc/GCC_*.tif                     # step 4: BtI GCC
+    itb/s2/GCC_*.tif                      # step 4: S2 GCC (ItB stack)
+    itb/s3/GCC_*.tif                      # step 4: S3 GCC (ItB stack)
+    itb/fusion/GCC_*.tif                  # step 4: ItB fused GCC
+  metrics/
+    manifest.json                         # step 5: years + site metadata for webapp
+    {year}/{sitename}/
+      gcc_*.json, metrics.json, covariates.json, rasters_*.json, bands_*.json
+```
+
+---
+
+## Module map
+
+| File | Responsibility |
+|------|----------------|
+| `1-phenocam.py` | Paginate PhenoCam API; cache JSON + CSV; write manifest |
+| `2-phenocam-screening.py` | Parse cached CSVs; PhenoCam + SNR gates |
+| `3-sentinel-data.py` | S2 COG range reads (Earth Search); S3 OpenEO download; EFAST REFL/DIST_CLOUD/composites |
+| `4-fusion.py` | GCC from S2 REFL + S3 composites; daily `efast.fusion` BtI + ItB |
+| `5-metrics.py` | PhenoCam-matched GCC series, baselines, fusion metrics, raster index, covariates |
--- a/619
+++ b/619
@ -1,619 +0,0 @@
-GNU AFFERO GENERAL PUBLIC LICENSE
-=================================
-
-Version 3, 19 November 2007
-
-Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-
-                            Preamble
-
-The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
-The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
-When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
-A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate.  Many developers of free software are heartened and
-encouraged by the resulting cooperation.  However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
-The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community.  It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server.  Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
-An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals.  This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
-The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-"This License" refers to version 3 of the GNU Affero General Public License.
-
-"Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-"The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-a) The work must carry prominent notices stating that you modified
-it, and giving a relevant date.
-
-b) The work must carry prominent notices stating that it is
-released under this License and any conditions added under section
-7.  This requirement modifies the requirement in section 4 to
-"keep intact all notices".
-
-c) You must license the entire work, as a whole, under this
-License to anyone who comes into possession of a copy.  This
-License will therefore apply, along with any applicable section 7
-additional terms, to the whole of the work, and all its parts,
-regardless of how they are packaged.  This License gives no
-permission to license the work in any other way, but it does not
-invalidate such permission if you have separately received it.
-
-d) If the work has interactive user interfaces, each must display
-Appropriate Legal Notices; however, if the Program has interactive
-interfaces that do not display Appropriate Legal Notices, your
-work need not make them do so.
-
-A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-a) Convey the object code in, or embodied in, a physical product
-(including a physical distribution medium), accompanied by the
-Corresponding Source fixed on a durable physical medium
-customarily used for software interchange.
-
-b) Convey the object code in, or embodied in, a physical product
-(including a physical distribution medium), accompanied by a
-written offer, valid for at least three years and valid for as
-long as you offer spare parts or customer support for that product
-model, to give anyone who possesses the object code either (1) a
-copy of the Corresponding Source for all the software in the
-product that is covered by this License, on a durable physical
-medium customarily used for software interchange, for a price no
-more than your reasonable cost of physically performing this
-conveying of source, or (2) access to copy the
-Corresponding Source from a network server at no charge.
-
-c) Convey individual copies of the object code with a copy of the
-written offer to provide the Corresponding Source.  This
-alternative is allowed only occasionally and noncommercially, and
-only if you received the object code with such an offer, in accord
-with subsection 6b.
-
-d) Convey the object code by offering access from a designated
-place (gratis or for a charge), and offer equivalent access to the
-Corresponding Source in the same way through the same place at no
-further charge.  You need not require recipients to copy the
-Corresponding Source along with the object code.  If the place to
-copy the object code is a network server, the Corresponding Source
-may be on a different server (operated by you or a third party)
-that supports equivalent copying facilities, provided you maintain
-clear directions next to the object code saying where to find the
-Corresponding Source.  Regardless of what server hosts the
-Corresponding Source, you remain obligated to ensure that it is
-available for as long as needed to satisfy these requirements.
-
-e) Convey the object code using peer-to-peer transmission, provided
-you inform other peers where the object code and Corresponding
-Source of the work are being offered to the general public at no
-charge under subsection 6d.
-
-A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-"Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-"Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-a) Disclaiming warranty or limiting liability differently from the
-terms of sections 15 and 16 of this License; or
-
-b) Requiring preservation of specified reasonable legal notices or
-author attributions in that material or in the Appropriate Legal
-Notices displayed by works containing it; or
-
-c) Prohibiting misrepresentation of the origin of that material, or
-requiring that modified versions of such material be marked in
-reasonable ways as different from the original version; or
-
-d) Limiting the use for publicity purposes of names of licensors or
-authors of the material; or
-
-e) Declining to grant rights under trademark law for use of some
-trade names, trademarks, or service marks; or
-
-f) Requiring indemnification of licensors and authors of that
-material by anyone who conveys the material (or modified versions of
-it) with contractual assumptions of liability to the recipient, for
-any liability that these contractual assumptions directly impose on
-those licensors and authors.
-
-All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Remote Network Interaction; Use with the GNU General Public License.
-
-Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software.  This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
-Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
-  14. Revised Versions of this License.
-
-The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time.  Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
--- a/README.md
+++ b/README.md
@ -1,146 +1,57 @@
-# Satellite Data Fusion Pipeline
+# Worldwide PhenoCam EFAST feasibility screening

-Python pipeline for downloading Sentinel-2 and Sentinel-3 imagery and PhenoCam ground truth, applying NDVI-based cloud pre-selection, fusing sensors with the [EFAST](https://github.com/DHI-GRAS/efast) algorithm, and evaluating fused **Green Chromatic Coordinate (GCC)** time series against PhenoCam `gcc_90`.
+Screen the global [PhenoCam Network](https://phenocam.nau.edu/) for sites where EFAST Sentinel-2 / Sentinel-3 fusion is likely to work: enough PhenoCam `gcc_90`, seasonal signal, and S2/S3 coverage for a calendar year.

-## Features
+Agent-oriented detail: [`AGENTS.md`](AGENTS.md).

- **Acquisition** — S2 L2A (AWS Element84 STAC), S3 OLCI L1B (Copernicus OpenEO), PhenoCam midday images and GCC CSV
- **Pre-selection** — Aggressive and non-aggressive NDVI-based cloud screening (plus dark-scene rejection)
- **Preparation** — Harmonised reflectance/GCC rasters, distance-to-cloud weights, S3 compositing and optional temporal smoothing
- **Fusion** — EFAST under eight scenarios per site (BtI and ItB × two strategies × σ ∈ {20, 30} days)
- **Post-processing** — Crop to valid-data window; NDVI and GCC timeseries at the site
- **Metrics** — Temporal comparison vs PhenoCam (`metrics.json`); optional Tier-2 withheld-S2 gap validation
- **Web viewer** — Static HTML dashboard over pipeline outputs (`webapp/`)
+---

-## Installation
+## Quick start
+
+From `processing/`:

 ```bash
-pip install -r requirements.txt
-pip install git+https://github.com/DHI-GRAS/efast.git   # not on PyPI
+uv sync
+uv run python 1-phenocam.py --evaluation-year 2025
 ```

-Create `.env` with Copernicus Data Space credentials:
+### Stepped pipeline (resumable)

- `CDSE_USER`
- `CDSE_PASSWORD`
-
-Python version is pinned in `.python-version` (use `.venv/` locally).
-
-## Usage
-
-```python
-from run import run_pipeline
-
-run_pipeline(season=2024, site_position=(47.116171, 11.320308), site_name="innsbruck")
-```
-
-`site_position` is always **`(lat, lon)`**. Study sites are listed at the bottom of `run.py`: `innsbruck`, `forthgr`, `pitsalu`, `vindeln2`, `sunflowerjerez1`, `institutekarnobat`.
-
-By default, most stages in `run.py` are **commented out** (metrics-only). Uncomment acquisition → pre-selection → preparation → fusion → post-processing for a full run.
-
-### Pipeline stages
-
-1. Download S2, S3, and PhenoCam
-2. Pre-selection (per-sensor NDVI screening → `raw/preselection/`)
-3. Prepare S2/S3 for each strategy (`prepared_{aggressive|nonaggressive}/` and `_itb/` variants)
-4. EFAST fusion (BtI reflectance and ItB GCC products)
-5. Post-process crops and timeseries (`processed_*_sigma{20,30}/`)
-6. Compute metrics vs PhenoCam → `metrics.json`
-
-### Gap validation (optional)
-
-With prepared data and EFAST installed:
+All steps use `--evaluation-year` (default 2025) and optional `--site`. See each script docstring for inputs/outputs under `data/`.

 ```bash
-# Phenology sidecars (TIMESAT 50 % amplitude)
-python -m phenology_timesat --all
+uv run python 1-phenocam.py --evaluation-year 2025
+uv run python 2-phenocam-screening.py --evaluation-year 2025
+uv run python 3-sentinel-data.py --evaluation-year 2025
+uv run python 4-fusion.py --evaluation-year 2025
+uv run python 5-metrics.py --evaluation-year 2025

-# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up)
-python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \
-  --strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30
-
-# All six sites, best BtI scenario per site
-python -m gap_validation.batch_spatial
-
-# Full-season NSE_PC on gap-degraded stack (slow)
-python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688
-python -m gap_validation.batch_temporal
-
-# TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier)
-python -m gap_validation.phenology_offsets
+# single site
+uv run python 3-sentinel-data.py --evaluation-year 2025 --site innsbruck
+uv run python 4-fusion.py --evaluation-year 2025 --site innsbruck
+uv run python 5-metrics.py --evaluation-year 2025 --site innsbruck
 ```

-Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`.
+Step 3 S3 uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`); S2 uses AWS Earth Search COG range reads (no auth).

-## Data layout
+---

-```
-data/{site_name}/{season}/
-  raw/
-    s2/                    # {YYYYMMDD}_{n}.geotiff — B02, B03, B04, B8A
-    s3/                    # {YYYYMMDD}_{n}.geotiff — Oa04, Oa06, Oa08, Oa17
-    phenocam/              # JPEGs, GCC JSON, phenology sidecar
-    preselection/          # {s2,s3}_preselection.{json,csv}
-  prepared_{strategy}/
-    s2/                    # REFL + DIST_CLOUD GeoTIFFs
-    s3/                    # composite_{YYYYMMDD}.tif
-    fusion/                # REFL_{YYYYMMDD}.tif (σ≈20)
-    fusion_sigma30/        # REFL (σ=30)
-  prepared_{strategy}_itb/
-    s2/  s3/  fusion/      # GCC products (Index-then-Blend)
-  processed_{strategy}_sigma{20,30}/
-    s2/  s3/  fusion/      # cropped {YYYYMMDD}_0.geotiff
-    gcc/  ndvi/            # timeseries.json per source
-  processed_{strategy}_itb_sigma{20,30}/
-    s2/  s3/  fusion/  gcc/
-  validation/            # gap experiment (when run)
-  metrics.json
-```
+## Outputs (under `data/`)

-Site metadata: `data/sites.geojson` (six thesis sites). `data/coweeta/` is local/legacy and not listed there.
+| Artifact | Step | Role |
+|----------|------|------|
+| `phenocam/{year}.json` | 1 | Site list + `sites_dir` pointer |
+| `phenocam/{year}/{site}.json`, `{site}_1day.csv` | 1 | Raw API + GCC CSV |
+| `phenocam_screening/{year}.json` / `.csv` | 2 | PhenoCam + SNR gate results |
+| `sentinel_data/{year}/{site}/prepared/s2/` | 3 | S2 REFL + DIST_CLOUD GeoTIFFs |
+| `sentinel_data/{year}/{site}/prepared/s3/` | 3 | S3 composite GeoTIFFs |
+| `fusion/{year}/{site}/` | 4 | BtI/ItB fused rasters |
+| `metrics/{year}/{site}/`, `metrics/manifest.json` | 5 | Timeseries JSON, covariates, webapp manifest |

-### File formats
+The 2025 manifest currently lists **739** cameras with archive overlap; most per-site CSV/JSON files are cached under `data/phenocam/2025/`.

-**Sentinel-2** — Multi-band GeoTIFF; bands `[blue, green, red, nir]`; `VIEWING_ZENITH_ANGLE` metadata; filename `{YYYYMMDD}_{increment}.geotiff`.
-
-**Sentinel-3** — Multi-band GeoTIFF; same band order; filename `{YYYYMMDD}_{increment}.geotiff`.
-
-**Prepared S2** — `S2A_MSIL2A_{YYYYMMDD}_REFL.tif` plus `*DIST_CLOUD.tif` (cloud-distance weights for EFAST).
+---

 ## Web viewer

-Static HTML/JS in `webapp/` — no build step. Shared GeoTIFF helpers: `webapp/common.js`. CDN: Leaflet, geotiff.js, proj4. Symlink: `webapp/data` → `../data`.
-
-Serve from the **repository root** (not `webapp/`):
-
-```bash
-python3 -m http.server 8000
-# http://localhost:8000/webapp/index.html
-```
-
-Or from the workspace root: `make serve`.
-
-| Page | Purpose | Primary data paths |
-|------|---------|-------------------|
-| `index.html` | Post-processed maps, NDVI/GCC timeseries, PhenoCam | `processed_{strategy}_sigma{n}/`, `raw/phenocam/` |
-| `preselection.html` | Cloud-screening diagnostics | `raw/preselection/{s2,s3}_preselection.json` |
-| `prepared.html` | Prepared REFL/GCC before crop | `prepared_{strategy}/`, `prepared_{strategy}_itb/` |
-| `fusion.html` | EFAST daily fusion rasters | `prepared_*/fusion/`, `fusion_sigma30/` |
-| `postprocessed.html` | Cropped processed stacks | `processed_*_sigma*/` |
-| `metrics.html` | Tabular `metrics.json` (thesis export source) | `{site}/{season}/metrics.json` under `webapp/data/` |
-| `gap_validation.html` | Withheld-S2 gap experiment | `{site}/{season}/validation/gap_validation_summary.json` |
-| `phenology.html` | TIMESAT on PhenoCam GCC | `raw/phenocam/phenocam_phenology.json` |
-
-Site/season dropdowns use `data/sites.geojson`. Map pages: **BtI | ItB**; scenarios `aggressive` / `nonaggressive`, σ 20 / 30. Keep the shared nav consistent across all eight pages. QA only — thesis tables are exported from the workspace root (`make export` or `../scripts/export_thesis_tables.py`).
-
-## Development
-
-```bash
-ruff check --fix . && ruff format .
-```
-
-Pre-commit hooks: `.pre-commit-config.yaml`.
-
-## License
-
-GNU Affero General Public License v3.0 (AGPL-3.0). See [LICENSE](LICENSE).
+From the workspace root, `make serve` serves `processing/` at [http://localhost:8000/webapp/index.html](http://localhost:8000/webapp/index.html). Requires step 5 (`data/metrics/manifest.json`).
--- a/acquisition_phenocam.py
+++ b/acquisition_phenocam.py
@ -1,282 +0,0 @@
-"""PhenoCam acquisition from PhenoCam Network API."""
-
-import csv
-import json
-import requests
-from pathlib import Path
-from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
-
-PHENOCAM_API = "https://phenocam.nau.edu/api"
-
-
-def _phenocam_summary_gcc_value(row, use_mean_fallback: bool):
-    """Extract daily GCC from a one-day summary row.
-
-    Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth).
-    Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present.
-    With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``.
-    """
-    if not use_mean_fallback:
-        oflag = row.get("outlierflag_gcc_90")
-        if oflag is not None and str(oflag).strip() in ("1", "1.0"):
-            return None
-
-    raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
-    if raw is None:
-        return None
-    text = str(raw).strip()
-    if not text or text.upper() == "NA":
-        return None
-    try:
-        val = float(text)
-    except ValueError:
-        return None
-    if val <= -9998.0:
-        return None
-    return val
-
-
-def _find_start_offset(site_name, start_dt, total_count):
-    """Binary search to find approximate offset for start date."""
-    low, high = 0, total_count - 1
-    limit = 1
-
-    for _ in range(15):
-        mid = (low + high) // 2
-        response = requests.get(
-            f"{PHENOCAM_API}/middayimages/",
-            params={"site": site_name, "limit": limit, "offset": mid},
-            timeout=30,
-        )
-        response.raise_for_status()
-        results = response.json().get("results", [])
-        if not results:
-            break
-
-        mid_date_str = results[0].get("imgdate", "")
-        if not mid_date_str:
-            break
-
-        try:
-            mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
-            if mid_date < start_dt:
-                low = mid + 1
-            else:
-                high = mid
-        except ValueError:
-            break
-
-    return max(0, low - 100)
-
-
-def download_phenocam(season, site_position, site_name, date_range=None):
-    """Wrapper that downloads both phenocam images and GCC time series."""
-    _download_phenocam_images(season, site_position, site_name, date_range)
-    _download_phenocam_gcc(season, site_position, site_name, date_range)
-
-
-def _download_phenocam_images(season, site_position, site_name, date_range=None):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    start_date, end_date = datetime_range.split("/")
-    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
-    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
-
-    try:
-        response = requests.get(
-            f"{PHENOCAM_API}/middayimages/",
-            params={"site": site_name, "limit": 1},
-            timeout=30,
-        )
-        response.raise_for_status()
-        total_count = response.json().get("count", 0)
-
-        if total_count == 0:
-            print(f"[PhenoCam] No images found for site '{site_name}'")
-            return
-
-        print(
-            f"[PhenoCam] Found {total_count} total images, estimating start offset..."
-        )
-        start_offset = _find_start_offset(site_name, start_dt, total_count)
-
-        url = f"{PHENOCAM_API}/middayimages/"
-        params = {"site": site_name, "offset": start_offset}
-
-        print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
-        images = []
-        page = 1
-        max_pages = 500
-        past_end_date = False
-
-        while url and page <= max_pages and not past_end_date:
-            response = requests.get(url, params=params, timeout=30)
-            response.raise_for_status()
-            data = response.json()
-            results = data.get("results", [])
-
-            if not results:
-                break
-
-            for img in results:
-                img_date_str = img.get("imgdate", "")
-                if not img_date_str:
-                    continue
-                try:
-                    img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
-                    if img_date > end_dt:
-                        past_end_date = True
-                        break
-                    if start_dt <= img_date <= end_dt:
-                        images.append(img)
-                except ValueError:
-                    continue
-
-            if url and not past_end_date:
-                url = data.get("next")
-                params = None
-                page += 1
-                if page % 50 == 0:
-                    print(
-                        f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..."
-                    )
-    except requests.exceptions.HTTPError as e:
-        if e.response.status_code == 404:
-            print(f"[PhenoCam] Site '{site_name}' not found")
-            return
-        raise
-
-    print(f"[PhenoCam] Found {len(images)} images")
-
-    def _download_image(img):
-        date_str = img.get("imgdate", "").replace("-", "")
-        if not date_str:
-            return None
-
-        filepath = output_dir / f"{date_str}.jpg"
-        if filepath.exists():
-            return f"Skipped {date_str}.jpg (exists)"
-
-        img_path = img.get("imgpath")
-        if not img_path:
-            return None
-
-        img_url = f"https://phenocam.nau.edu{img_path}"
-        try:
-            img_response = requests.get(img_url, timeout=30)
-            img_response.raise_for_status()
-            filepath.write_bytes(img_response.content)
-            return f"Saved {date_str}.jpg"
-        except Exception as e:
-            return f"Error downloading {date_str}: {e}"
-
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        futures = [executor.submit(_download_image, img) for img in images]
-        for future in as_completed(futures):
-            result = future.result()
-            if result:
-                print(f"[PhenoCam] {result}")
-
-    print("[PhenoCam] Completed")
-
-
-def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
-    """Fetch greenness-index time series from PhenoCam API. Saves JSON and CSV."""
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_file = Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
-    output_file.parent.mkdir(parents=True, exist_ok=True)
-
-    start_date, end_date = datetime_range.split("/")
-    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
-    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
-
-    print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
-
-    # Get ROIs for site (paginate through results)
-    try:
-        url = f"{PHENOCAM_API}/roilists/"
-        params = {"site": site_name}
-        rois = []
-        while url:
-            r = requests.get(url, params=params, timeout=30)
-            r.raise_for_status()
-            data = r.json()
-            rois.extend(
-                [roi for roi in data.get("results", []) if roi["site"] == site_name]
-            )
-            url = data.get("next")
-            params = None
-            if len(rois) > 0:
-                break
-        if not rois:
-            print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
-            return
-        csv_url = rois[0].get("one_day_summary")
-        if not csv_url:
-            print("[PhenoCam-GI] No CSV data URL found for ROI")
-            return
-    except requests.exceptions.RequestException as e:
-        print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
-        return
-
-    # Fetch CSV data
-    try:
-        csv_r = requests.get(csv_url, timeout=30)
-        csv_r.raise_for_status()
-        lines = [
-            line for line in csv_r.text.split("\n") if line and not line.startswith("#")
-        ]
-        reader = csv.DictReader(lines)
-        fieldnames = reader.fieldnames or ()
-        use_mean_fallback = "gcc_90" not in fieldnames
-        if use_mean_fallback:
-            print(
-                "[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)"
-            )
-
-        timeseries = []
-        for row in reader:
-            try:
-                date_str = row.get("date")
-                if not date_str:
-                    continue
-                date = datetime.strptime(date_str, "%Y-%m-%d")
-                if start_dt <= date <= end_dt:
-                    gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
-                    if gcc is not None:
-                        timeseries.append(
-                            {"date": date.isoformat(), "greenness_index": gcc}
-                        )
-            except (ValueError, KeyError):
-                continue
-    except requests.exceptions.RequestException as e:
-        print(f"[PhenoCam-GI] Error fetching CSV: {e}")
-        return
-
-    timeseries.sort(key=lambda x: x["date"])
-
-    output_dir = output_file.parent
-    json_path = output_dir / "phenocam_gcc.json"
-    csv_path = output_dir / "phenocam_gcc.csv"
-
-    with open(json_path, "w") as f:
-        json.dump(timeseries, f, indent=2)
-
-    with open(csv_path, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=["date", "greenness_index"])
-        writer.writeheader()
-        writer.writerows(timeseries)
-
-    print(
-        f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)"
-    )
-
-    from phenocam_snr import write_phenocam_snr
-
-    write_phenocam_snr(site_name, season, base=Path("data"))
--- a/acquisition_s2.py
+++ b/acquisition_s2.py
@ -1,190 +0,0 @@
-"""Sentinel-2-MSI acquisition from AWS Element84 Earth Search (STAC catalog)."""
-import numpy as np
-import rasterio
-import xml.etree.ElementTree as ET
-import requests
-from pathlib import Path
-from rasterio.crs import CRS
-from rasterio.warp import Resampling, calculate_default_transform, reproject, transform_geom
-from rasterio.windows import from_bounds, transform as window_transform
-from pystac_client import Client
-
-BBOX_SIZE = 0.011
-TARGET_CRS = CRS.from_epsg(32632)
-
-
-def _get_bbox(lon, lat):
-    half = BBOX_SIZE / 2
-    return [lon - half, lat - half, lon + half, lat + half]
-
-
-def _get_window_for_bbox(src, bbox):
-    bbox_geom = {
-        "type": "Polygon",
-        "coordinates": [
-            [
-                [bbox[0], bbox[1]],
-                [bbox[2], bbox[1]],
-                [bbox[2], bbox[3]],
-                [bbox[0], bbox[3]],
-                [bbox[0], bbox[1]],
-            ]
-        ],
-    }
-    bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
-    coords = bbox_transformed["coordinates"][0]
-    x_coords = [c[0] for c in coords[:4]]
-    y_coords = [c[1] for c in coords[:4]]
-    bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
-    src_bounds = src.bounds
-    intersect_bbox = [
-        max(bbox_crs[0], src_bounds.left),
-        max(bbox_crs[1], src_bounds.bottom),
-        min(bbox_crs[2], src_bounds.right),
-        min(bbox_crs[3], src_bounds.top),
-    ]
-    return from_bounds(*intersect_bbox, src.transform)
-
-
-def _extract_viewing_angle(item):
-    if "granule_metadata" not in item.assets:
-        return None
-    try:
-        xml_url = item.assets["granule_metadata"].href
-        xml_resp = requests.get(xml_url, timeout=10)
-        xml_resp.raise_for_status()
-        root = ET.fromstring(xml_resp.content)
-        angles = [
-            abs(float(zenith_elem.text))
-            for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle")
-            if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None
-        ]
-        return angles[0] if angles else None
-    except Exception as e:
-        print(f"[S2] Warning: Could not extract viewing angle: {e}")
-        return None
-
-
-def download_s2(season, site_position, site_name, date_range=None):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_dir = Path(f"data/{site_name}/{season}/raw/s2/")
-
-    print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    bbox = _get_bbox(lon, lat)
-    bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    print("[S2] Connecting to STAC catalog...")
-    client = Client.open("https://earth-search.aws.element84.com/v1")
-    search = client.search(
-        collections=["sentinel-2-l2a"],
-        intersects={"type": "Point", "coordinates": [lon, lat]},
-        datetime=datetime_range,
-        max_items=1000,
-    )
-
-    print("[S2] Searching items...")
-    items_by_key = {}
-    for item in search.items():
-        date = item.datetime.strftime("%Y%m%d")
-        parts = item.id.split("_")
-        increment = parts[3] if len(parts) > 3 else "0"
-        key = (date, increment)
-        if key not in items_by_key:
-            items_by_key[key] = item
-
-    print(f"[S2] Found {len(items_by_key)} unique items")
-
-    for (date, increment), item in items_by_key.items():
-        filepath = output_dir / f"{date}_{increment}.geotiff"
-        if filepath.exists():
-            print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
-            continue
-
-        print(f"[S2] Processing {date}_{increment}...")
-        band_data = {}
-        profile = None
-
-        for band_name, asset_name in bands.items():
-            if asset_name not in item.assets:
-                continue
-            asset = item.assets[asset_name]
-            with rasterio.open(asset.href) as src:
-                window = _get_window_for_bbox(src, bbox)
-                if window.height <= 0 or window.width <= 0:
-                    continue
-                data = src.read(window=window)
-                new_transform = window_transform(window, src.transform)
-                if profile is None:
-                    profile = {
-                        "driver": "GTiff",
-                        "height": window.height,
-                        "width": window.width,
-                        "count": len(bands),
-                        "dtype": data.dtype,
-                        "crs": src.crs,
-                        "transform": new_transform,
-                        "compress": "lzw",
-                    }
-                band_idx = list(bands.keys()).index(band_name)
-                band_data[band_idx] = data[0]
-
-        if profile and len(band_data) == len(bands):
-            stacked = np.array([band_data[i] for i in sorted(band_data.keys())])
-            band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
-            viewing_angle = _extract_viewing_angle(item)
-
-            if profile["crs"] != TARGET_CRS:
-                src_transform = profile["transform"]
-                src_height, src_width = profile["height"], profile["width"]
-                left, bottom, right, top = rasterio.transform.array_bounds(
-                    src_height, src_width, src_transform
-                )
-                dst_transform, dst_width, dst_height = calculate_default_transform(
-                    profile["crs"], TARGET_CRS, src_width, src_height,
-                    left=left, bottom=bottom, right=right, top=top,
-                )
-                reprojected = np.empty(
-                    (len(stacked), dst_height, dst_width), dtype=stacked.dtype
-                )
-                for i in range(len(stacked)):
-                    reproject(
-                        source=stacked[i],
-                        destination=reprojected[i],
-                        src_transform=src_transform,
-                        src_crs=profile["crs"],
-                        dst_transform=dst_transform,
-                        dst_crs=TARGET_CRS,
-                        resampling=Resampling.bilinear,
-                    )
-                stacked = reprojected
-                profile.update({
-                    "crs": TARGET_CRS,
-                    "transform": dst_transform,
-                    "width": dst_width,
-                    "height": dst_height,
-                })
-
-            with rasterio.open(filepath, "w", **profile) as dst:
-                for i, data in enumerate(stacked, 1):
-                    dst.write(data, i)
-                    dst.set_band_description(i, band_names[i - 1])
-                tags = {}
-                if viewing_angle is not None:
-                    tags["VIEWING_ZENITH_ANGLE"] = str(viewing_angle)
-                pb = item.properties.get("s2:processing_baseline")
-                if pb is not None:
-                    tags["PROCESSING_BASELINE"] = str(pb)
-                if tags:
-                    dst.update_tags(**tags)
-
-            angle_msg = (
-                f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else ""
-            )
-            print(f"[S2] Saved: {filepath}{angle_msg}")
-        else:
-            print(f"[S2] Skipping {date}_{increment} (missing bands)")
-
-    print("[S2] Completed")
--- a/acquisition_s3.py
+++ b/acquisition_s3.py
@ -1,160 +0,0 @@
-"""Sentinel-3-OLCI acquisition from Copernicus Data Space OpenEO API."""
-import os
-import time
-from pathlib import Path
-from datetime import datetime
-from dotenv import load_dotenv
-import openeo
-import requests
-import netCDF4
-import numpy as np
-import rasterio
-from rasterio.transform import from_bounds
-
-load_dotenv()
-
-BBOX_SIZE = 0.016  # Larger than S2 to ensure full coverage including padded pixels
-
-
-def _get_bbox(lon, lat):
-    half = BBOX_SIZE / 2
-    return [lon - half, lat - half, lon + half, lat + half]
-
-
-def _process_netcdf(nc_file, output_dir, bands, openeo_bands):
-    with netCDF4.Dataset(str(nc_file), "r") as nc:
-        times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
-        x_coords = nc.variables["x"][:]
-        y_coords = nc.variables["y"][:]
-        band_vars = sorted(
-            [v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
-        )
-        band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
-
-        transform = from_bounds(
-            float(x_coords.min()),
-            float(y_coords.min()),
-            float(x_coords.max()),
-            float(y_coords.max()),
-            len(x_coords),
-            len(y_coords),
-        )
-
-        print(f"[S3] Found {len(times)} time steps")
-        date_counts = {}
-        for t_idx, time_val in enumerate(times):
-            dt = (
-                time_val
-                if isinstance(time_val, datetime)
-                else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
-            )
-            date_str = dt.strftime("%Y%m%d")
-            increment = date_counts.get(date_str, 0)
-            date_counts[date_str] = increment + 1
-
-            band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
-            stacked = np.stack(band_data, axis=0)
-
-            output_path = output_dir / f"{date_str}_{increment}.geotiff"
-            with rasterio.open(
-                output_path,
-                "w",
-                driver="GTiff",
-                height=len(y_coords),
-                width=len(x_coords),
-                count=len(band_data),
-                dtype=stacked.dtype,
-                crs="EPSG:32632",
-                transform=transform,
-                compress="lzw",
-            ) as dst:
-                dst.write(stacked)
-                for i, band_name in enumerate(band_names, 1):
-                    dst.set_band_description(i, band_name)
-            print(f"[S3] Saved: {output_path}")
-
-
-def download_s3(season, site_position, site_name, date_range=None):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_dir = Path(f"data/{site_name}/{season}/raw/s3/")
-
-    print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    bbox = _get_bbox(lon, lat)
-    bands = {
-        "SDR_Oa04": "blue",
-        "SDR_Oa06": "green",
-        "SDR_Oa08": "red",
-        "SDR_Oa17": "nir",
-    }
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    band_map = {
-        "SDR_Oa04": "B04",
-        "SDR_Oa06": "B06",
-        "SDR_Oa08": "B08",
-        "SDR_Oa17": "B17",
-    }
-    openeo_bands = [band_map.get(b, b) for b in bands.keys()]
-
-    start_date, end_date = datetime_range.split("/")
-    spatial_extent = {
-        "west": bbox[0],
-        "east": bbox[2],
-        "south": bbox[1],
-        "north": bbox[3],
-    }
-
-    print("[S3] Authenticating...")
-    token_response = requests.post(
-        "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
-        data={
-            "grant_type": "password",
-            "username": os.getenv("CDSE_USER"),
-            "password": os.getenv("CDSE_PASSWORD"),
-            "client_id": "cdse-public",
-        },
-    )
-    token_response.raise_for_status()
-    tokens = token_response.json()
-    access_token = tokens["access_token"]
-
-    print("[S3] Connecting to OpenEO...")
-    conn = openeo.connect("openeo.dataspace.copernicus.eu")
-    conn.authenticate_oidc_access_token(access_token)
-
-    print("[S3] Loading collection...")
-    datacube = conn.load_collection(
-        "SENTINEL3_OLCI_L1B",
-        spatial_extent=spatial_extent,
-        temporal_extent=[start_date, end_date],
-        bands=openeo_bands,
-    ).resample_spatial(projection=32632)
-
-    output_file = output_dir / "s3_data.nc"
-    print(f"[S3] Downloading NetCDF to {output_file}...")
-    print(f"[S3] Temporal extent: {start_date} to {end_date}")
-    print(f"[S3] Spatial extent: {spatial_extent}")
-    print(f"[S3] Bands: {openeo_bands}")
-    print("[S3] This may take several minutes depending on data volume...")
-
-    start_time = time.time()
-    try:
-        datacube.download(str(output_file), format="NetCDF")
-        elapsed = time.time() - start_time
-        print(f"[S3] Download completed in {elapsed:.1f} seconds")
-    except Exception as e:
-        elapsed = time.time() - start_time
-        print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}")
-        raise
-
-    print("[S3] Processing NetCDF...")
-    process_start = time.time()
-    _process_netcdf(output_file, output_dir, bands, openeo_bands)
-    process_elapsed = time.time() - process_start
-    print(f"[S3] Processing completed in {process_elapsed:.1f} seconds")
-
-    print(f"[S3] Removing temporary NetCDF file...")
-    os.remove(output_file)
-    print("[S3] Completed")
--- a/data/sites.geojson
+++ b/data/sites.geojson
@ -1,132 +0,0 @@
-{
-  "type": "FeatureCollection",
-  "features": [
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          25.0743,
-          35.3045
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2024": {}
-        },
-        "elevation": 68,
-        "description": "FORTH Heraklion Greece",
-        "sitename": "forthgr",
-        "ndvi_selected": true,
-        "vegetation_type": "Agriculture"
-      }
-    },
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          11.320308,
-          47.116171
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2020": {},
-          "2024": {}
-        },
-        "elevation": 972,
-        "description": "Neustift Field Site, Stubai Valley, Tyrol, Austria",
-        "sitename": "innsbruck",
-        "ndvi_selected": true,
-        "vegetation_type": "Grassland"
-      }
-    },
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          24.3688,
-          58.5633
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2024": {}
-        },
-        "elevation": 3,
-        "description": "Abandoned peat extraction area, Estonia",
-        "sitename": "pitsalu",
-        "ndvi_selected": true,
-        "vegetation_type": "Wetland"
-      }
-    },
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          19.7673,
-          64.2437
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2023": {}
-        },
-        "elevation": 224,
-        "description": "SITES Svartberget Research Station, Vindeln, Sweden",
-        "sitename": "vindeln2",
-        "ndvi_selected": true,
-        "vegetation_type": "Deciduous Broadleaf"
-      }
-    },
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          -6.0033,
-          36.7455
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2024": {}
-        },
-        "elevation": 56,
-        "description": "Sun flower plot, Jerez, Spain",
-        "sitename": "sunflowerjerez1",
-        "ndvi_selected": true,
-        "vegetation_type": "Agriculture"
-      }
-    },
-    {
-      "type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [
-          26.9837,
-          42.6558
-        ]
-      },
-      "properties": {
-        "country": "",
-        "seasons": {
-          "2024": {}
-        },
-        "elevation": 262,
-        "description": "Institute of Agriculture in Karnobat (selection fields)",
-        "sitename": "institutekarnobat",
-        "ndvi_selected": true,
-        "vegetation_type": "Agriculture"
-      }
-    }
-  ]
-}
--- a/deploy.sh
+++ b/deploy.sh
@ -1,84 +0,0 @@
-#!/bin/bash
-set -e
-
-MODE="${1:-setup}"
-SERVER="${2:-root@49.12.2.88}"
-APP_DIR="/opt/satellite-fusion"
-DATA_DIR="$APP_DIR/data"
-
-case "$MODE" in
-    setup)
-        echo "Deploying to $SERVER..."
-        TEMP_DIR=$(mktemp -d)
-        rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$TEMP_DIR/"
-        cat > "$TEMP_DIR/.env.example" <<EOF
-CDSE_USER=your_username_here
-CDSE_PASSWORD=your_password_here
-EOF
-        ssh $SERVER "mkdir -p $APP_DIR"
-        rsync -av --delete "$TEMP_DIR/" "$SERVER:$APP_DIR/"
-        rm -rf "$TEMP_DIR"
-
-        ssh $SERVER <<ENDSSH
-set -e
-cd $APP_DIR
-
-# Find/install Python 3.11
-if ! command -v python3.11 &> /dev/null; then
-    apt-get update -qq
-    apt-get install -y python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
-        apt-get install -y -t trixie-backports python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
-            apt-get install -y software-properties-common
-            add-apt-repository -y ppa:deadsnakes/ppa 2>/dev/null || true
-            apt-get update -qq
-            apt-get install -y python3.11 python3.11-venv python3.11-dev
-        }
-    }
-fi
-
-# Setup venv
-[ -d venv ] && rm -rf venv
-python3.11 -m venv venv
-source venv/bin/activate
-pip install --upgrade pip -q
-pip install -r requirements.txt -q
-pip install git+https://github.com/DHI-GRAS/efast.git -q
-
-# Setup .env
-[ ! -f .env ] && [ -f .env.example ] && cp .env.example .env
-
-# Setup systemd service
-if [ -f satellite-fusion-web.service ]; then
-    sed "s|/opt/satellite-fusion|$APP_DIR|g" satellite-fusion-web.service | \
-        sed "s|--directory /opt/satellite-fusion|--directory $APP_DIR/webapp|g" > /tmp/satellite-fusion-web.service
-    cp /tmp/satellite-fusion-web.service /etc/systemd/system/
-    systemctl daemon-reload
-fi
-
-# Create data directory and webapp/data symlink
-mkdir -p $DATA_DIR
-ln -sf ../data $APP_DIR/webapp/data
-ENDSSH
-        echo "Setup complete!"
-        ;;
-    
-    upload)
-        echo "Uploading data to $SERVER..."
-        rsync -avh --progress --exclude='*.pyc' --exclude='__pycache__' data/ "$SERVER:$DATA_DIR/"
-        echo "Data upload complete!"
-        ;;
-    
-    code)
-        echo "Uploading code to $SERVER..."
-        rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$SERVER:$APP_DIR/"
-        echo "Code upload complete!"
-        ;;
-    
-    *)
-        echo "Usage: $0 {setup|upload|code} [server]"
-        echo "  setup  - Deploy code and setup server (default)"
-        echo "  upload - Upload data directory only"
-        echo "  code   - Upload code files only (no setup)"
-        exit 1
-        ;;
-esac
--- a/fusion.py
+++ b/fusion.py
@ -1,176 +0,0 @@
-"""EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
-
-from datetime import datetime, timedelta
-
-from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
-
-
-def _import_efast():
-    """Lazy import of efast to avoid import errors when not using efast functions."""
-    try:
-        import efast
-
-        return efast
-    except ImportError:
-        raise ImportError(
-            "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
-        )
-
-
-def run_efast(
-    season,
-    site_position,
-    site_name,
-    cleaning_strategy="aggressive",
-    sigma=None,
-    date_range=None,
-    *,
-    s2_output_dir=None,
-    s3_output_dir=None,
-    fusion_output_dir=None,
-):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-
-    efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy)
-    s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
-    s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
-    fusion_output_dir = fusion_output_dir or (
-        efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
-    )
-
-    fusion_output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    efast = _import_efast()
-
-    start_str, end_str = datetime_range.split("/")
-    start_date = datetime.strptime(start_str, "%Y-%m-%d")
-    end_date = datetime.strptime(end_str, "%Y-%m-%d")
-
-    current_date = start_date
-    while current_date <= end_date:
-        date_str = current_date.strftime("%Y%m%d")
-        output_file = fusion_output_dir / f"REFL_{date_str}.tif"
-        try:
-            kwargs = {
-                "product": "REFL",
-                "max_days": 30,
-                "date_position": 2,
-                "minimum_acquisition_importance": 0.0,
-                "ratio": RESOLUTION_RATIO,
-            }
-            if sigma is not None:
-                kwargs["sigma"] = sigma
-            efast.fusion(
-                current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
-            )
-            print(
-                f"[EFAST] Saved: {output_file}"
-                if output_file.exists()
-                else f"[EFAST] No output for {date_str} (insufficient nearby data)"
-            )
-        except Exception as e:
-            print(f"[EFAST] Error processing {date_str}: {e}")
-        current_date += timedelta(days=1)
-
-    print("[EFAST] Completed")
-
-
-def run_all_efast_scenarios(
-    season, site_position, site_name, sigma_value=30, date_range=None
-):
-    """Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
-    for strategy in ["aggressive", "nonaggressive"]:
-        run_efast(
-            season,
-            site_position,
-            site_name,
-            cleaning_strategy=strategy,
-            sigma=None,
-            date_range=date_range,
-        )
-        run_efast(
-            season,
-            site_position,
-            site_name,
-            cleaning_strategy=strategy,
-            sigma=sigma_value,
-            date_range=date_range,
-        )
-
-
-def run_efast_itb(
-    season,
-    site_position,
-    site_name,
-    cleaning_strategy="aggressive",
-    sigma=None,
-    date_range=None,
-    *,
-    s2_output_dir=None,
-    s3_output_dir=None,
-    fusion_output_dir=None,
-):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
-    s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
-    s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
-    fusion_output_dir = fusion_output_dir or (
-        efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
-    )
-    fusion_output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-    efast = _import_efast()
-    start_str, end_str = datetime_range.split("/")
-    start_date = datetime.strptime(start_str, "%Y-%m-%d")
-    end_date = datetime.strptime(end_str, "%Y-%m-%d")
-    current_date = start_date
-    while current_date <= end_date:
-        date_str = current_date.strftime("%Y%m%d")
-        output_file = fusion_output_dir / f"GCC_{date_str}.tif"
-        try:
-            kwargs = {
-                "product": "GCC",
-                "max_days": 30,
-                "date_position": 2,
-                "minimum_acquisition_importance": 0.0,
-                "ratio": RESOLUTION_RATIO,
-            }
-            if sigma is not None:
-                kwargs["sigma"] = sigma
-            efast.fusion(
-                current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
-            )
-            print(
-                f"[EFAST-ITB] Saved: {output_file}"
-                if output_file.exists()
-                else f"[EFAST-ITB] No output for {date_str}"
-            )
-        except Exception as e:
-            print(f"[EFAST-ITB] Error {date_str}: {e}")
-        current_date += timedelta(days=1)
-    print("[EFAST-ITB] Completed")
-
-
-def run_all_efast_itb_scenarios(
-    season, site_position, site_name, sigma_value=30, date_range=None
-):
-    for strategy in ["aggressive", "nonaggressive"]:
-        run_efast_itb(
-            season,
-            site_position,
-            site_name,
-            cleaning_strategy=strategy,
-            sigma=None,
-            date_range=date_range,
-        )
-        run_efast_itb(
-            season,
-            site_position,
-            site_name,
-            cleaning_strategy=strategy,
-            sigma=sigma_value,
-            date_range=date_range,
-        )
--- a/fusion_phenology.py
+++ b/fusion_phenology.py
@ -1,263 +0,0 @@
-"""
-No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
-
-Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
-directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
-``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
-and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
-
-Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
-(:mod:`gap_validation.phenology_offsets`).
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import re
-from datetime import datetime
-from pathlib import Path
-
-from metrics_stats import _norm_date_key, load_timeseries
-from phenology_timesat import (
-    _timesat as _timesat_pkg,
-    build_yraw_three_years,
-    iter_sites_seasons_from_sites_geojson,
-    phenocam_phenology_path,
-    run_timesat_phenology_from_yraw,
-)
-
-FUSION_SCENARIO_KEYS: tuple[str, ...] = (
-    "aggressive_sigma20",
-    "aggressive_sigma30",
-    "nonaggressive_sigma20",
-    "nonaggressive_sigma30",
-    "aggressive_sigma20_itb",
-    "aggressive_sigma30_itb",
-    "nonaggressive_sigma20_itb",
-    "nonaggressive_sigma30_itb",
-)
-
-
-def fusion_phenology_path(site_name: str, season: int) -> Path:
-    return Path(f"data/{site_name}/{season}/fusion_phenology.json")
-
-
-def parse_scenario_key(key: str) -> tuple[str, int, str]:
-    """``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
-    mode = "itb" if key.endswith("_itb") else "bti"
-    base = key.replace("_itb", "")
-    m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
-    if not m:
-        raise ValueError(f"Cannot parse scenario key: {key!r}")
-    return m.group(1), int(m.group(2)), mode
-
-
-def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
-    strategy, sigma, mode = parse_scenario_key(scenario_key)
-    if mode == "bti":
-        processed = f"processed_{strategy}_sigma{sigma}"
-    else:
-        processed = f"processed_{strategy}_itb_sigma{sigma}"
-    return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
-
-
-def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
-    """YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
-    raw = load_timeseries(ts_path)
-    out: dict[str, float] = {}
-    for k, v in raw.items():
-        nk = _norm_date_key(k)
-        if nk and v is not None:
-            try:
-                fv = float(v)
-            except (TypeError, ValueError):
-                continue
-            if fv == fv:  # finite
-                out[nk] = fv
-    return out
-
-
-def timesat_transitions_from_by_date(
-    by_date: dict[str, float], season: int
-) -> dict[str, str | float | None]:
-    """Run TIMESAT on fusion GCC; return transition dates for *season*."""
-    if len(by_date) < 10:
-        return {
-            "green_up_50pct_date": None,
-            "green_down_50pct_date": None,
-            "timesat_input": None,
-            "n_values": len(by_date),
-        }
-    y1, y2, y3 = season - 1, season, season + 1
-    yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
-    out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
-    return {
-        "green_up_50pct_date": out.get("green_up_50pct_date"),
-        "green_down_50pct_date": out.get("green_down_50pct_date"),
-        "timesat_input": stack_mode,
-        "n_values": len(by_date),
-    }
-
-
-def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
-    if not iso_a or not iso_b:
-        return None
-    try:
-        a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
-        b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
-        return abs((a - b).days)
-    except ValueError:
-        return None
-
-
-def _offsets_vs_reference(
-    fused: dict[str, str | float | None], reference: dict
-) -> dict[str, int | None]:
-    ref_up = reference.get("green_up_50pct_date")
-    ref_dn = reference.get("green_down_50pct_date")
-    fup = fused.get("green_up_50pct_date")
-    fdn = fused.get("green_down_50pct_date")
-    return {
-        "abs_day_offset_green_up": _day_offset(fup, ref_up),
-        "abs_day_offset_green_down": _day_offset(fdn, ref_dn),
-    }
-
-
-def compute_fusion_phenology_for_site(
-    site_name: str,
-    season: int,
-    *,
-    scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
-) -> dict:
-    ref_path = phenocam_phenology_path(site_name, season)
-    reference = (
-        json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
-    )
-    scenarios: dict[str, dict] = {}
-    for key in scenario_keys:
-        ts_path = fusion_gcc_timeseries_path(site_name, season, key)
-        if not ts_path.is_file():
-            scenarios[key] = {
-                "workflow": parse_scenario_key(key)[2],
-                "missing_timeseries": str(ts_path),
-            }
-            continue
-        by_date = fusion_gcc_by_date(ts_path)
-        fused = timesat_transitions_from_by_date(by_date, season)
-        strategy, sigma, mode = parse_scenario_key(key)
-        scenarios[key] = {
-            "workflow": mode,
-            "strategy": strategy,
-            "sigma": sigma,
-            "timeseries_path": str(ts_path),
-            **fused,
-            **_offsets_vs_reference(fused, reference),
-        }
-    return {
-        "site_name": site_name,
-        "season": season,
-        "reference": {
-            "source": str(ref_path) if ref_path.is_file() else None,
-            "green_up_50pct_date": reference.get("green_up_50pct_date"),
-            "green_down_50pct_date": reference.get("green_down_50pct_date"),
-        },
-        "scenarios": scenarios,
-    }
-
-
-def write_fusion_phenology_for_site(
-    site_name: str,
-    season: int,
-    *,
-    scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
-) -> Path | None:
-    if _timesat_pkg is None:
-        out = fusion_phenology_path(site_name, season)
-        print(
-            f"[Fusion phenology] Skipped (no timesat); would write {out}. "
-            "pip install timesat"
-        )
-        return None
-    payload = compute_fusion_phenology_for_site(
-        site_name, season, scenario_keys=scenario_keys
-    )
-    out = fusion_phenology_path(site_name, season)
-    out.parent.mkdir(parents=True, exist_ok=True)
-    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-    n_ok = sum(
-        1
-        for s in payload["scenarios"].values()
-        if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
-    )
-    print(
-        f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
-        f"≥1 transition date)"
-    )
-    return out
-
-
-def write_fusion_phenology_all(
-    *,
-    sites_geojson: str | Path = "data/sites.geojson",
-    seasons: dict[str, int] | None = None,
-) -> int:
-    if seasons:
-        pairs = sorted((s, seasons[s]) for s in seasons.keys())
-    else:
-        pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
-    n = 0
-    for site, season in pairs:
-        print(f"=== {site} {season} ===")
-        if write_fusion_phenology_for_site(site, season):
-            n += 1
-    print(f"[Fusion phenology] Processed {n} site/season pair(s).")
-    return n
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(
-        description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
-    )
-    ap.add_argument("--site", type=str, default=None)
-    ap.add_argument("--season", type=int, default=None)
-    ap.add_argument(
-        "--all",
-        action="store_true",
-        help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
-    )
-    ap.add_argument(
-        "--primary-only",
-        action="store_true",
-        help="With --all: only thesis primary seasons per site.",
-    )
-    ap.add_argument(
-        "--sites-geojson",
-        type=Path,
-        default=Path("data/sites.geojson"),
-    )
-    args = ap.parse_args()
-    if _timesat_pkg is None:
-        raise SystemExit("Install timesat: pip install timesat")
-
-    primary = {
-        "forthgr": 2024,
-        "innsbruck": 2024,
-        "pitsalu": 2024,
-        "vindeln2": 2023,
-        "sunflowerjerez1": 2024,
-        "institutekarnobat": 2024,
-    }
-    if args.all:
-        write_fusion_phenology_all(
-            sites_geojson=args.sites_geojson,
-            seasons=primary if args.primary_only else None,
-        )
-        return
-    if not args.site or args.season is None:
-        raise SystemExit("Provide --site and --season, or use --all --primary-only")
-    write_fusion_phenology_for_site(args.site, args.season)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/init.py
+++ b/gap_validation/init.py
@ -1 +0,0 @@
-"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""
--- a/gap_validation/main.py
+++ b/gap_validation/main.py
@ -1,4 +0,0 @@
-from gap_validation.run import main
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/batch_spatial.py
+++ b/gap_validation/batch_spatial.py
@ -1,135 +0,0 @@
-"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site)."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import re
-from pathlib import Path
-
-from gap_validation.run import run_validation
-
-# Primary season per site (matches scripts/export_thesis_tables.py).
-PRIMARY_SEASON = {
-    "forthgr": 2024,
-    "innsbruck": 2024,
-    "pitsalu": 2024,
-    "vindeln2": 2023,
-    "sunflowerjerez1": 2024,
-    "institutekarnobat": 2024,
-}
-
-
-def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]:
-    data = json.loads(geojson.read_text(encoding="utf-8"))
-    out: dict[str, tuple[float, float]] = {}
-    for feat in data.get("features", []):
-        props = feat.get("properties") or {}
-        name = props.get("sitename")
-        coords = (feat.get("geometry") or {}).get("coordinates")
-        if not name or not coords or len(coords) < 2:
-            continue
-        lon, lat = float(coords[0]), float(coords[1])
-        out[str(name)] = (lat, lon)
-    return out
-
-
-def _parse_scenario(key: str) -> tuple[str, int | None, str]:
-    """``aggressive_sigma20`` → (strategy, sigma, bti)."""
-    mode = "itb" if key.endswith("_itb") else "bti"
-    base = key.replace("_itb", "")
-    m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
-    if not m:
-        raise ValueError(f"Cannot parse scenario key: {key!r}")
-    strategy = m.group(1)
-    sigma = int(m.group(2))
-    return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode
-
-
-def _best_from_metrics(metrics_path: Path, workflow: str) -> str | None:
-    """Best scenario key (max no-gap NSE_PC) for ``workflow`` (``bti`` or ``itb``)."""
-    if workflow not in ("bti", "itb"):
-        raise ValueError(f"workflow must be bti or itb, got {workflow!r}")
-    if not metrics_path.is_file():
-        return None
-    temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {}
-    want_itb = workflow == "itb"
-    best_key, best_nse = None, None
-    for k, v in temporal.items():
-        if k.endswith("_itb") != want_itb or not isinstance(v, dict):
-            continue
-        n = v.get("nse_pc")
-        if isinstance(n, (int, float)) and (best_nse is None or n > best_nse):
-            best_nse = n
-            best_key = k
-    return best_key
-
-
-def _best_bti_from_metrics(metrics_path: Path) -> str | None:
-    return _best_from_metrics(metrics_path, "bti")
-
-
-def _best_itb_from_metrics(metrics_path: Path) -> str | None:
-    return _best_from_metrics(metrics_path, "itb")
-
-
-def _resolve_workflows(workflow: str) -> tuple[str, ...]:
-    return ("bti", "itb") if workflow == "both" else (workflow,)
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).")
-    ap.add_argument("--data-dir", type=Path, default=Path("data"))
-    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
-    ap.add_argument("--skip-fusion", action="store_true")
-    ap.add_argument("--write-manifest-only", action="store_true")
-    ap.add_argument(
-        "--workflow",
-        choices=["bti", "itb", "both"],
-        default="both",
-        help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
-    )
-    ap.add_argument(
-        "--gap-days",
-        type=int,
-        action="append",
-        help="Filter gap lengths (default: all 15 and 30 in manifest).",
-    )
-    args = ap.parse_args()
-    positions = _site_positions(args.sites_geojson)
-    gap_filter = args.gap_days
-    workflows = _resolve_workflows(args.workflow)
-
-    for site, season in sorted(PRIMARY_SEASON.items()):
-        pos = positions.get(site)
-        if not pos:
-            print(f"[skip] No coordinates for {site}")
-            continue
-        metrics_path = args.data_dir / site / str(season) / "metrics.json"
-        for workflow in workflows:
-            scenario_key = _best_from_metrics(metrics_path, workflow)
-            if not scenario_key:
-                print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
-                continue
-            strategy, sigma, mode = _parse_scenario(scenario_key)
-            sigma_kw = 30 if sigma == 30 else None
-            print(f"=== {site} {season} {scenario_key} ===")
-            out = run_validation(
-                site,
-                season,
-                pos,
-                strategy,
-                sigma_kw,
-                mode,
-                skip_manifest=False,
-                skip_fusion=args.skip_fusion,
-                write_manifest_only=args.write_manifest_only,
-                gap_days_filter=gap_filter,
-                transition_filter=None,
-                s2_calendar_strategy=strategy,
-            )
-            print(out)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/batch_temporal.py
+++ b/gap_validation/batch_temporal.py
@ -1,65 +0,0 @@
-"""Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario)."""
-
-from __future__ import annotations
-
-import argparse
-from pathlib import Path
-
-from gap_validation.batch_spatial import (
-    PRIMARY_SEASON,
-    _best_from_metrics,
-    _parse_scenario,
-    _resolve_workflows,
-    _site_positions,
-)
-from gap_validation.temporal_pc import run_temporal_pc
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).")
-    ap.add_argument("--data-dir", type=Path, default=Path("data"))
-    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
-    ap.add_argument("--skip-fusion", action="store_true")
-    ap.add_argument(
-        "--workflow",
-        choices=["bti", "itb", "both"],
-        default="both",
-        help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
-    )
-    ap.add_argument("--gap-days", type=int, action="append")
-    args = ap.parse_args()
-    positions = _site_positions(args.sites_geojson)
-    workflows = _resolve_workflows(args.workflow)
-
-    for site, season in sorted(PRIMARY_SEASON.items()):
-        pos = positions.get(site)
-        if not pos:
-            print(f"[skip] No coordinates for {site}")
-            continue
-        metrics_path = args.data_dir / site / str(season) / "metrics.json"
-        for workflow in workflows:
-            scenario_key = _best_from_metrics(metrics_path, workflow)
-            if not scenario_key:
-                print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
-                continue
-            strategy, sigma, mode = _parse_scenario(scenario_key)
-            sigma_kw = 30 if sigma == 30 else None
-            print(f"=== {site} {season} temporal {scenario_key} ===")
-            out = run_temporal_pc(
-                site,
-                season,
-                pos,
-                strategy,
-                sigma_kw,
-                mode,
-                skip_manifest=False,
-                skip_fusion=args.skip_fusion,
-                gap_days_filter=args.gap_days,
-                transition_filter=None,
-                s2_calendar_strategy=strategy,
-            )
-            print(out)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/calendar.py
+++ b/gap_validation/calendar.py
@ -1,210 +0,0 @@
-"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
-
-from __future__ import annotations
-
-import json
-import re
-from datetime import date, datetime, timedelta
-from pathlib import Path
-
-from phenology_timesat import phenocam_phenology_path
-
-REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
-DEFAULT_GAP_LENGTHS = (15, 30)
-TRANSITIONS = ("green_up", "green_down")
-
-
-def validation_dir(site_name: str, season: int) -> Path:
-    return Path(f"data/{site_name}/{season}/validation")
-
-
-def _parse_iso_date(s, season: int) -> date | None:
-    if not s or not isinstance(s, str):
-        return None
-    try:
-        d = datetime.strptime(s[:10], "%Y-%m-%d").date()
-    except ValueError:
-        return None
-    y0, y1 = date(season, 1, 1), date(season, 12, 31)
-    return d if y0 <= d <= y1 else None
-
-
-def transition_midpoint(
-    site_name: str,
-    season: int,
-    transition: str,
-    phenology_path: Path | None = None,
-) -> date | None:
-    """TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
-    if transition not in TRANSITIONS:
-        raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
-    path = phenology_path or phenocam_phenology_path(site_name, season)
-    if not path.is_file():
-        return None
-    try:
-        rec = json.loads(path.read_text(encoding="utf-8"))
-    except (OSError, json.JSONDecodeError):
-        return None
-    key = (
-        "green_up_50pct_date"
-        if transition == "green_up"
-        else "green_down_50pct_date"
-    )
-    return _parse_iso_date(rec.get(key), season)
-
-
-def phenology_midpoint(
-    site_name: str, season: int, phenology_path: Path | None = None
-) -> date:
-    """Legacy: green-up if in season, else green-down, else July 1."""
-    for tr in ("green_up", "green_down"):
-        d = transition_midpoint(site_name, season, tr, phenology_path)
-        if d:
-            return d
-    return date(season, 7, 1)
-
-
-def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
-    """[start, end] inclusive, gap_days wide, clamped to calendar year."""
-    half = gap_days // 2
-    start = mid - timedelta(days=half)
-    end = mid + timedelta(days=gap_days - 1 - half)
-    y0, y1 = date(season, 1, 1), date(season, 12, 31)
-    if start < y0:
-        end = min(y1, end + (y0 - start))
-        start = y0
-    if end > y1:
-        start = max(y0, start - (end - y1))
-        end = y1
-    return start, end
-
-
-def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
-    """Return sorted (acquisition_date, filename) for *REFL.tif."""
-    out: list[tuple[date, str]] = []
-    if not prepared_s2.is_dir():
-        return out
-    for p in sorted(prepared_s2.glob("*REFL.tif")):
-        m = REFL_DATE_RE.search(p.name)
-        if not m:
-            continue
-        d = datetime.strptime(m.group(1), "%Y%m%d").date()
-        out.append((d, p.name))
-    out.sort(key=lambda x: x[0])
-    return out
-
-
-def nearest_s2_acquisition(
-    prediction: date, pairs: list[tuple[date, str]]
-) -> tuple[date, str] | None:
-    if not pairs:
-        return None
-    return min(pairs, key=lambda t: abs((t[0] - prediction).days))
-
-
-def build_manifest_entries(
-    site_name: str,
-    season: int,
-    gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
-    transitions: tuple[str, ...] = TRANSITIONS,
-    s2_calendar_strategy: str = "aggressive",
-) -> list[dict]:
-    """One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
-    prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
-    pairs = list_s2_refl_dates(prepared_s2)
-    entries: list[dict] = []
-    for transition in transitions:
-        mid = transition_midpoint(site_name, season, transition)
-        if mid is None:
-            continue
-        for gap_days in gap_lengths:
-            w0, w1 = centered_window(mid, gap_days, season)
-            prediction = mid
-            ns = nearest_s2_acquisition(prediction, pairs)
-            if ns is None:
-                withheld_date = None
-                withheld_filename = None
-            else:
-                withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
-            entries.append(
-                {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "midpoint_rule": f"{transition}_50pct_date",
-                    "midpoint_date": mid.isoformat(),
-                    "window_start": w0.isoformat(),
-                    "window_end": w1.isoformat(),
-                    "prediction_date": prediction.isoformat(),
-                    "withheld_s2_date": withheld_date,
-                    "withheld_s2_filename": withheld_filename,
-                }
-            )
-    return entries
-
-
-def write_gap_withheld_images(
-    site_name: str,
-    season: int,
-    entries: list[dict],
-) -> Path:
-    """Reproducibility sidecar for withheld scenes and gap placement."""
-    path = validation_dir(site_name, season) / "gap_withheld_images.json"
-    records = []
-    for e in entries:
-        records.append(
-            {
-                "site_name": site_name,
-                "season": season,
-                "transition": e.get("transition"),
-                "gap_days": e.get("gap_days"),
-                "midpoint_date": e.get("midpoint_date"),
-                "window_start": e.get("window_start"),
-                "window_end": e.get("window_end"),
-                "withheld_s2_date": e.get("withheld_s2_date"),
-                "withheld_s2_filename": e.get("withheld_s2_filename"),
-            }
-        )
-    path.write_text(
-        json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
-        + "\n",
-        encoding="utf-8",
-    )
-    return path
-
-
-def write_manifest(
-    site_name: str,
-    season: int,
-    site_position: tuple[float, float],
-    s2_calendar_strategy: str = "aggressive",
-    *,
-    gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
-    transitions: tuple[str, ...] = TRANSITIONS,
-) -> Path:
-    out_dir = validation_dir(site_name, season)
-    out_dir.mkdir(parents=True, exist_ok=True)
-    entries = build_manifest_entries(
-        site_name,
-        season,
-        gap_lengths=gap_lengths,
-        transitions=transitions,
-        s2_calendar_strategy=s2_calendar_strategy,
-    )
-    path = out_dir / "gap_manifest.json"
-    payload = {
-        "site_name": site_name,
-        "season": season,
-        "site_position_lat_lon": list(site_position),
-        "s2_calendar_strategy": s2_calendar_strategy,
-        "entries": entries,
-    }
-    path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-    write_gap_withheld_images(site_name, season, entries)
-    return path
-
-
-def load_manifest(site_name: str, season: int) -> dict:
-    path = validation_dir(site_name, season) / "gap_manifest.json"
-    if not path.is_file():
-        raise FileNotFoundError(f"Missing manifest: {path}")
-    return json.loads(path.read_text(encoding="utf-8"))
--- a/gap_validation/export_rasters.py
+++ b/gap_validation/export_rasters.py
@ -1,438 +0,0 @@
-"""Export 2×4 RGB panels for Tier-A gap validation (thesis appendix).
-
-Crops follow the same fusion-valid bounding box as ``postprocessing.process_cropped``
-and the webapp (``processed_*`` / ``common.js``), anchored on gap-degraded fusion at the
-prediction date; S2 and S3 are read from prepared stacks on that shared window.
-"""
-
-from __future__ import annotations
-
-import json
-import re
-from datetime import date, datetime
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import rasterio
-from rasterio import windows
-from rasterio.transform import rowcol
-from rasterio.warp import Resampling, reproject
-
-from gap_validation.s2_mask_dir import acquisition_yyyymmdd_in_window, yyyymmdd_from_iso
-
-REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
-S3_COMPOSITE_RE = re.compile(r"composite_(\d{8})\.tif$")
-TRANSITIONS = ("green_up", "green_down")
-COL_TITLES = ("Withheld S2", "Gap fusion", "S3 composite", "Nearest S2")
-ROW_LABELS = {"green_up": "Green-up", "green_down": "Green-down"}
-VALID_REFL_THRESHOLD = 0.001
-NODATA_RGB = (0.15, 0.15, 0.15)
-
-
-def _parse_bti_scenario(scenario: str) -> tuple[str, int]:
-    m = re.match(r"^(aggressive|nonaggressive)_sigma(20|30)$", scenario)
-    if not m:
-        raise ValueError(f"expected BtI scenario key, got {scenario!r}")
-    return m.group(1), int(m.group(2))
-
-
-def _prepared_base(data_dir: Path, site: str, season: int, strategy: str) -> Path:
-    return data_dir / site / str(season) / f"prepared_{strategy}"
-
-
-def _s2_strategy_fallbacks(strategy: str, manifest: dict) -> tuple[str, ...]:
-    """Prepared trees to try for S2 REFL (best-BtI first, then manifest calendar)."""
-    order: list[str] = []
-    for s in (strategy, manifest.get("s2_calendar_strategy")):
-        if isinstance(s, str) and s and s not in order:
-            order.append(s)
-    for s in ("aggressive", "nonaggressive"):
-        if s not in order:
-            order.append(s)
-    return tuple(order)
-
-
-def _find_prepared_s2_refl(
-    data_dir: Path,
-    site: str,
-    season: int,
-    filename: str,
-    strategies: tuple[str, ...],
-) -> Path | None:
-    for strat in strategies:
-        p = _prepared_base(data_dir, site, season, strat) / "s2" / filename
-        if p.is_file():
-            return p
-    return None
-
-
-def _gap_spatial_fusion_dir(
-    data_dir: Path,
-    site: str,
-    season: int,
-    gap_days: int,
-    transition: str,
-    strategy: str,
-    sigma: int,
-) -> Path:
-    return (
-        data_dir
-        / site
-        / str(season)
-        / "validation"
-        / "fusion"
-        / f"gap_{gap_days}_{transition}"
-        / f"{strategy}_sigma{sigma}_bti"
-    )
-
-
-def _iso_to_date(iso_d: str) -> date:
-    return datetime.strptime(iso_d[:10], "%Y-%m-%d").date()
-
-
-def _exclude_ymds(entry: dict) -> set[str]:
-    withheld_fn = entry.get("withheld_s2_filename") or ""
-    m = REFL_DATE_RE.search(withheld_fn)
-    return {m.group(1)} if m else set()
-
-
-def nearest_stack_s2(
-    prepared_s2_dir: Path,
-    prediction_iso: str,
-    *,
-    exclude_ymds: set[str],
-) -> Path | None:
-    if not prepared_s2_dir.is_dir():
-        return None
-    target = _iso_to_date(prediction_iso)
-    best_path: Path | None = None
-    best_delta: int | None = None
-    for p in prepared_s2_dir.glob("S2A_MSIL2A_*_REFL.tif"):
-        m = REFL_DATE_RE.search(p.name)
-        if not m or m.group(1) in exclude_ymds:
-            continue
-        delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
-        if best_delta is None or delta < best_delta:
-            best_delta = delta
-            best_path = p
-    return best_path
-
-
-def nearest_s3_composite(prepared_s3_dir: Path, prediction_iso: str) -> Path | None:
-    if not prepared_s3_dir.is_dir():
-        return None
-    target = _iso_to_date(prediction_iso)
-    best_path: Path | None = None
-    best_delta: int | None = None
-    for p in prepared_s3_dir.glob("composite_*.tif"):
-        m = S3_COMPOSITE_RE.search(p.name)
-        if not m:
-            continue
-        delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
-        if best_delta is None or delta < best_delta:
-            best_delta = delta
-            best_path = p
-    return best_path
-
-
-def _crop_window_from_fusion(fusion_path: Path) -> dict | None:
-    """Fusion-valid crop (``postprocessing.process_cropped``) on the full prepared grid."""
-    if not fusion_path.is_file():
-        return None
-    with rasterio.open(fusion_path) as src:
-        data = src.read()
-        valid = np.isfinite(data) & (data > VALID_REFL_THRESHOLD)
-        rows = np.any(valid, axis=(0, 2))
-        cols = np.any(valid, axis=(0, 1))
-        row_idx = np.where(rows)[0]
-        col_idx = np.where(cols)[0]
-        if len(row_idx) == 0 or len(col_idx) == 0:
-            return None
-        r0, r1 = int(row_idx[0]), int(row_idx[-1])
-        c0, c1 = int(col_idx[0]), int(col_idx[-1])
-        w, h = c1 - c0 + 1, r1 - r0 + 1
-        win = windows.Window(c0, r0, w, h)
-        return {
-            "window": win,
-            "crop_transform": windows.transform(win, src.transform),
-            "full_transform": src.transform,
-            "crs": src.crs,
-            "profile": src.profile.copy(),
-        }
-
-
-def _read_bgr_prepared_s2(prepared_refl: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
-    if not prepared_refl.is_file():
-        return None
-    with rasterio.open(prepared_refl) as src:
-        if src.count < 3:
-            return None
-        b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
-        return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
-
-
-def _read_bgr_gap_fusion(fusion_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
-    if not fusion_path.is_file():
-        return None
-    with rasterio.open(fusion_path) as src:
-        if src.count < 3:
-            return None
-        b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
-        return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
-
-
-def _read_bgr_prepared_s3(s3_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
-    """Resample S3 composite to the fusion grid, then crop (matches ``process_cropped``)."""
-    if not s3_path.is_file():
-        return None
-    with rasterio.open(s3_path) as src:
-        if src.count < 3:
-            return None
-        temp_profile = crop["profile"].copy()
-        temp_profile.update({"dtype": "float32", "count": src.count})
-        bands: list[np.ndarray] = []
-        with rasterio.MemoryFile() as memfile:
-            with memfile.open(**temp_profile) as resampled:
-                for i in range(1, src.count + 1):
-                    reproject(
-                        source=rasterio.band(src, i),
-                        destination=rasterio.band(resampled, i),
-                        src_transform=src.transform,
-                        src_crs=src.crs,
-                        dst_transform=crop["full_transform"],
-                        dst_crs=crop["crs"],
-                        resampling=Resampling.nearest,
-                    )
-                b, g, r = resampled.read(
-                    indexes=(1, 2, 3), window=crop["window"]
-                )
-                bands = [
-                    b.astype(np.float64),
-                    g.astype(np.float64),
-                    r.astype(np.float64),
-                ]
-        return bands[0], bands[1], bands[2]
-
-
-def _refl_valid(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
-    return (
-        np.isfinite(blue)
-        & np.isfinite(green)
-        & np.isfinite(red)
-        & (blue > VALID_REFL_THRESHOLD)
-        & (green > VALID_REFL_THRESHOLD)
-        & (red > VALID_REFL_THRESHOLD)
-    )
-
-
-def _panel_stretch_limits(
-    blue: np.ndarray, green: np.ndarray, red: np.ndarray, valid: np.ndarray
-) -> tuple[float, float]:
-    """Per-panel 2--98 % stretch on positive reflectance (webapp ``common.js`` style)."""
-    if not valid.any():
-        return 0.0, 1.0
-    vals = np.concatenate([red[valid], green[valid], blue[valid]])
-    lo, hi = np.percentile(vals, (2, 98))
-    if hi <= lo:
-        return 0.0, 1.0
-    return float(lo), float(hi)
-
-
-def _bgr_to_rgba(
-    blue: np.ndarray,
-    green: np.ndarray,
-    red: np.ndarray,
-    *,
-    valid: np.ndarray,
-    vmin: float,
-    vmax: float,
-) -> np.ndarray:
-    rgba = np.zeros((*blue.shape, 4), dtype=np.float32)
-    rgba[..., 3] = 1.0
-    rgba[~valid, 0] = NODATA_RGB[0]
-    rgba[~valid, 1] = NODATA_RGB[1]
-    rgba[~valid, 2] = NODATA_RGB[2]
-    span = vmax - vmin or 1.0
-    for band, idx in ((red, 0), (green, 1), (blue, 2)):
-        norm = np.clip((band - vmin) / span, 0.0, 1.0)
-        rgba[..., idx] = np.where(valid, norm, rgba[..., idx])
-    return rgba
-
-
-def _phenocam_pixel_cropped(
-    crop: dict, site_position_lat_lon: tuple[float, float]
-) -> tuple[int, int] | None:
-    lat, lon = site_position_lat_lon
-    try:
-        r, c = rowcol(
-            crop["crop_transform"], [lon], [lat], op=crop["crs"]
-        )
-        return int(r[0]), int(c[0])
-    except Exception:
-        return None
-
-
-def _resolve_row_paths(
-    data_dir: Path,
-    site: str,
-    season: int,
-    entry: dict,
-    strategy: str,
-    sigma: int,
-    *,
-    gap_days: int,
-    manifest: dict,
-) -> tuple[Path, Path, Path, Path] | None:
-    pred_ymd = yyyymmdd_from_iso(entry["prediction_date"])
-    transition = entry["transition"]
-    prep = _prepared_base(data_dir, site, season, strategy)
-    s2_strats = _s2_strategy_fallbacks(strategy, manifest)
-    withheld_fn = entry.get("withheld_s2_filename")
-    if not withheld_fn:
-        return None
-    withheld = _find_prepared_s2_refl(
-        data_dir, site, season, withheld_fn, s2_strats
-    )
-    fusion = (
-        _gap_spatial_fusion_dir(data_dir, site, season, gap_days, transition, strategy, sigma)
-        / f"REFL_{pred_ymd}.tif"
-    )
-    s3_exact = prep / "s3" / f"composite_{pred_ymd}.tif"
-    s3 = (
-        s3_exact
-        if s3_exact.is_file()
-        else nearest_s3_composite(prep / "s3", entry["prediction_date"])
-    )
-    w0 = _iso_to_date(entry["window_start"])
-    w1 = _iso_to_date(entry["window_end"])
-    nearest: Path | None = None
-    for strat in s2_strats:
-        prep_s2 = _prepared_base(data_dir, site, season, strat) / "s2"
-        window_ymds = acquisition_yyyymmdd_in_window(prep_s2, w0, w1)
-        exclude = window_ymds | _exclude_ymds(entry)
-        nearest = nearest_stack_s2(
-            prep_s2, entry["prediction_date"], exclude_ymds=exclude
-        )
-        if nearest is not None:
-            break
-    if withheld is None or not fusion.is_file() or s3 is None or nearest is None:
-        return None
-    return withheld, fusion, s3, nearest
-
-
-def build_site_panel(
-    site: str,
-    season: int,
-    data_dir: Path,
-    out_png: Path,
-    *,
-    best_bti_scenario: str,
-    site_label: str,
-    site_position_lat_lon: tuple[float, float] | None = None,
-    gap_days: int = 30,
-) -> bool:
-    """Build 2×4 RGB figure; return False if manifest or any transition row is incomplete."""
-    manifest_path = data_dir / site / str(season) / "validation" / "gap_manifest.json"
-    if not manifest_path.is_file():
-        return False
-    manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
-    strategy, sigma = _parse_bti_scenario(best_bti_scenario)
-    rows: list[tuple[str, dict, tuple[Path, Path, Path, Path]]] = []
-    for transition in TRANSITIONS:
-        entry = next(
-            (
-                e
-                for e in manifest["entries"]
-                if e.get("gap_days") == gap_days and e.get("transition") == transition
-            ),
-            None,
-        )
-        if not entry:
-            continue
-        paths = _resolve_row_paths(
-            data_dir,
-            site,
-            season,
-            entry,
-            strategy,
-            sigma,
-            gap_days=gap_days,
-            manifest=manifest,
-        )
-        if paths is None:
-            continue
-        rows.append((transition, entry, paths))
-
-    if not rows:
-        return False
-
-    readers = (
-        _read_bgr_prepared_s2,
-        _read_bgr_gap_fusion,
-        _read_bgr_prepared_s3,
-        _read_bgr_prepared_s2,
-    )
-
-    fig, axes = plt.subplots(
-        len(rows),
-        4,
-        figsize=(12.0, 2.8 * len(rows)),
-        squeeze=False,
-        constrained_layout=True,
-    )
-    for row_idx, (transition, entry, paths) in enumerate(rows):
-        row_title = ROW_LABELS.get(transition, transition)
-        crop = _crop_window_from_fusion(paths[1])
-        if crop is None:
-            for ax in axes[row_idx]:
-                ax.set_visible(False)
-            continue
-
-        layers: list[tuple[np.ndarray, np.ndarray, np.ndarray]] = []
-        for path, read_fn in zip(paths, readers, strict=True):
-            bgr = read_fn(path, crop)
-            if bgr is None:
-                layers = []
-                break
-            layers.append(bgr)
-        if len(layers) != 4:
-            for ax in axes[row_idx]:
-                ax.set_visible(False)
-            continue
-
-        mark: tuple[int, int] | None = None
-        if site_position_lat_lon:
-            mark = _phenocam_pixel_cropped(crop, site_position_lat_lon)
-
-        for col_idx, (col_title, bgr) in enumerate(zip(COL_TITLES, layers, strict=True)):
-            ax = axes[row_idx, col_idx]
-            blue, green, red = bgr
-            valid = _refl_valid(blue, green, red)
-            vmin, vmax = _panel_stretch_limits(blue, green, red, valid)
-            rgba = _bgr_to_rgba(
-                blue, green, red, valid=valid, vmin=vmin, vmax=vmax
-            )
-            ax.imshow(rgba, origin="upper", aspect="equal", interpolation="nearest")
-            h, w = rgba.shape[:2]
-            if col_idx == 0 and mark and 0 <= mark[0] < h and 0 <= mark[1] < w:
-                ax.plot(
-                    mark[1],
-                    mark[0],
-                    "+",
-                    color="red",
-                    markersize=8,
-                    markeredgewidth=1.2,
-                )
-            if row_idx == 0:
-                ax.set_title(col_title, fontsize=9)
-            if col_idx == 0:
-                ax.set_ylabel(row_title, fontsize=9)
-            ax.set_xticks([])
-            ax.set_yticks([])
-
-    fig.suptitle(f"{site_label} ({season})", fontsize=10)
-    out_png.parent.mkdir(parents=True, exist_ok=True)
-    fig.savefig(out_png, dpi=150)
-    plt.close(fig)
-    return True
--- a/gap_validation/fusion_masked.py
+++ b/gap_validation/fusion_masked.py
@ -1,200 +0,0 @@
-"""EFAST with symlinked S2 dir (gap window omitted); outputs under validation/."""
-
-from __future__ import annotations
-
-from datetime import datetime
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
-from fusion import run_efast, run_efast_itb
-from preparation import _get_base_dir, _get_itb_base_dir
-
-from gap_validation.s2_mask_dir import (
-    acquisition_yyyymmdd_in_window,
-    assert_no_leakage,
-    build_masked_s2_dir_bti,
-    build_masked_s2_dir_itb,
-)
-
-
-def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
-    return _get_base_dir(season, site_name, strategy) / "s3"
-
-
-def validation_fusion_dir(
-    site_name: str,
-    season: int,
-    gap_days: int,
-    transition: str,
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-) -> Path:
-    """``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``."""
-    sig = 30 if sigma == 30 else 20
-    return (
-        Path(f"data/{site_name}/{season}/validation")
-        / "fusion"
-        / f"gap_{gap_days}_{transition}"
-        / f"{strategy}_sigma{sig}_{mode}"
-    )
-
-
-def excluded_acquisition_days(
-    prepared_s2: Path,
-    window_start_iso: str,
-    window_end_iso: str,
-    withheld_yyyymmdd: str,
-) -> set[str]:
-    """Union of gap-window S2 days and the withheld validation acquisition."""
-    w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
-    w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
-    excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1)
-    excluded.add(withheld_yyyymmdd)
-    return excluded
-
-
-def run_masked_fusion_one_date(
-    season: int,
-    site_position: tuple[float, float],
-    site_name: str,
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-    prediction_date_iso: str,
-    window_start_iso: str,
-    window_end_iso: str,
-    withheld_yyyymmdd: str,
-    fusion_output_dir: Path,
-) -> Path:
-    """Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only."""
-    fusion_output_dir.mkdir(parents=True, exist_ok=True)
-    date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
-
-    with TemporaryDirectory(prefix="gapval_s2_") as tmp:
-        tmp_s2 = Path(tmp) / "s2"
-        if mode == "bti":
-            prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
-            excl = excluded_acquisition_days(
-                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
-            )
-            build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
-            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
-            run_efast(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-                date_range=date_range,
-                s2_output_dir=tmp_s2,
-                s3_output_dir=prepared_s3_dir(season, site_name, strategy),
-                fusion_output_dir=fusion_output_dir,
-            )
-        elif mode == "itb":
-            prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
-            excl = excluded_acquisition_days(
-                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
-            )
-            build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
-            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
-            run_efast_itb(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-                date_range=date_range,
-                s2_output_dir=tmp_s2,
-                s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
-                fusion_output_dir=fusion_output_dir,
-            )
-        else:
-            raise ValueError(f"mode must be bti or itb, got {mode!r}")
-
-    return fusion_output_dir
-
-
-def run_masked_fusion_season(
-    season: int,
-    site_position: tuple[float, float],
-    site_name: str,
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-    window_start_iso: str,
-    window_end_iso: str,
-    withheld_yyyymmdd: str,
-    fusion_output_dir: Path,
-) -> Path:
-    """Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier)."""
-    fusion_output_dir.mkdir(parents=True, exist_ok=True)
-    date_range = f"{season}-01-01/{season}-12-31"
-
-    with TemporaryDirectory(prefix="gapval_s2_") as tmp:
-        tmp_s2 = Path(tmp) / "s2"
-        if mode == "bti":
-            prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
-            excl = excluded_acquisition_days(
-                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
-            )
-            build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
-            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
-            run_efast(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-                date_range=date_range,
-                s2_output_dir=tmp_s2,
-                s3_output_dir=prepared_s3_dir(season, site_name, strategy),
-                fusion_output_dir=fusion_output_dir,
-            )
-        else:
-            prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
-            excl = excluded_acquisition_days(
-                prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
-            )
-            build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
-            assert_no_leakage(withheld_yyyymmdd, tmp_s2)
-            run_efast_itb(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-                date_range=date_range,
-                s2_output_dir=tmp_s2,
-                s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
-                fusion_output_dir=fusion_output_dir,
-            )
-
-    return fusion_output_dir
-
-
-def production_fusion_path(
-    season: int,
-    site_name: str,
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-    yyyymmdd: str,
-) -> Path:
-    """Single-date fused raster from the normal prepared tree (no-gap baseline)."""
-    if mode == "bti":
-        base = _get_base_dir(season, site_name, strategy)
-        sub = f"fusion_sigma{sigma}" if sigma else "fusion"
-        return base / sub / f"REFL_{yyyymmdd}.tif"
-    base = _get_itb_base_dir(season, site_name, strategy)
-    sub = f"fusion_sigma{sigma}" if sigma else "fusion"
-    return base / sub / f"GCC_{yyyymmdd}.tif"
-
-
-def withheld_s2_refl_path(
-    season: int, site_name: str, strategy: str, withheld_filename: str | None
-) -> Path | None:
-    if not withheld_filename:
-        return None
-    p = _get_base_dir(season, site_name, strategy) / "s2" / withheld_filename
-    return p if p.is_file() else None
--- a/gap_validation/phenology_offsets.py
+++ b/gap_validation/phenology_offsets.py
@ -1,163 +0,0 @@
-"""TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference."""
-
-from __future__ import annotations
-
-import argparse
-import json
-from datetime import datetime
-from pathlib import Path
-
-from fusion_phenology import timesat_transitions_from_by_date
-from phenology_timesat import phenocam_phenology_path
-
-from gap_validation.batch_spatial import (
-    PRIMARY_SEASON,
-    _best_from_metrics,
-    _parse_scenario,
-    _resolve_workflows,
-    _site_positions,
-)
-from gap_validation.calendar import load_manifest, validation_dir
-from gap_validation.temporal_pc import _fusion_gcc_timeseries
-
-
-def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
-    if not iso_a or not iso_b:
-        return None
-    try:
-        a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
-        b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
-        return abs((a - b).days)
-    except ValueError:
-        return None
-
-
-def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
-    out = timesat_transitions_from_by_date(by_date, season)
-    return {
-        "green_up": out.get("green_up_50pct_date"),
-        "green_down": out.get("green_down_50pct_date"),
-    }
-
-
-def _temporal_fusion_dir(
-    site: str, season: int, gap_days: int, transition: str, scenario_key: str
-) -> Path:
-    strategy, sigma, mode = _parse_scenario(scenario_key)
-    sig = 30 if sigma == 30 else 20
-    return (
-        validation_dir(site, season)
-        / "temporal"
-        / f"gap_{gap_days}_{transition}"
-        / f"{strategy}_sigma{sig}_{mode}"
-    )
-
-
-def compute_offsets_for_site(
-    site: str,
-    season: int,
-    site_position: tuple[float, float],
-    *,
-    workflow: str = "bti",
-    gap_days_list: tuple[int, ...] = (15, 30),
-) -> list[dict]:
-    base = Path(f"data/{site}/{season}")
-    metrics_path = base / "metrics.json"
-    scenario_key = _best_from_metrics(metrics_path, workflow)
-    if not scenario_key:
-        return []
-    ref_path = phenocam_phenology_path(site, season)
-    reference = (
-        json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
-    )
-    manifest = load_manifest(site, season)
-    rows: list[dict] = []
-    for entry in manifest["entries"]:
-        gd = entry.get("gap_days")
-        tr = entry.get("transition")
-        if gd not in gap_days_list or tr not in ("green_up", "green_down"):
-            continue
-        fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key)
-        if not fusion_dir.is_dir():
-            continue
-        _, _, mode = _parse_scenario(scenario_key)
-        ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode)
-        if len(ts) < 10:
-            continue
-        fused = _timesat_transitions(ts, season)
-        ref_key = (
-            "green_up_50pct_date"
-            if tr == "green_up"
-            else "green_down_50pct_date"
-        )
-        ref_date = reference.get(ref_key)
-        fused_date = fused.get("green_up" if tr == "green_up" else "green_down")
-        rows.append(
-            {
-                "site_name": site,
-                "season": season,
-                "transition": tr,
-                "gap_days": gd,
-                "scenario": scenario_key,
-                "reference_date": ref_date,
-                "fused_date": fused_date,
-                "abs_day_offset": _day_offset(fused_date, ref_date),
-                "window_start": entry.get("window_start"),
-                "window_end": entry.get("window_end"),
-            }
-        )
-    return rows
-
-
-def write_phenology_offsets(
-    site: str,
-    season: int,
-    site_position: tuple[float, float],
-    *,
-    workflow: str = "bti",
-    gap_days_list: tuple[int, ...] = (15, 30),
-) -> Path:
-    rows = compute_offsets_for_site(
-        site, season, site_position, workflow=workflow, gap_days_list=gap_days_list
-    )
-    vdir = validation_dir(site, season)
-    payload = {
-        "site_name": site,
-        "season": season,
-        "workflow": workflow,
-        "records": rows,
-    }
-    out = vdir / f"gap_phenology_offsets_{workflow}.json"
-    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-    if workflow == "bti":
-        # Legacy alias for backward-compatible readers.
-        (vdir / "gap_phenology_offsets.json").write_text(
-            json.dumps(payload, indent=2) + "\n", encoding="utf-8"
-        )
-    return out
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.")
-    ap.add_argument("--data-dir", type=Path, default=Path("data"))
-    ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
-    ap.add_argument(
-        "--workflow",
-        choices=["bti", "itb", "both"],
-        default="both",
-        help="Fusion workflow(s) (default: both best BtI and best ItB).",
-    )
-    args = ap.parse_args()
-    positions = _site_positions(args.sites_geojson)
-    workflows = _resolve_workflows(args.workflow)
-    for site, season in sorted(PRIMARY_SEASON.items()):
-        pos = positions.get(site)
-        if not pos:
-            continue
-        for workflow in workflows:
-            p = write_phenology_offsets(site, season, pos, workflow=workflow)
-            print(p)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/run.py
+++ b/gap_validation/run.py
@ -1,352 +0,0 @@
-"""Tier-2 gap validation CLI: manifest, masked EFAST, spatial ``nse_s2``, Whittaker crossover."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import subprocess
-import sys
-from datetime import datetime
-from pathlib import Path
-
-from gap_validation.calendar import (
-    DEFAULT_GAP_LENGTHS,
-    TRANSITIONS,
-    load_manifest,
-    validation_dir,
-    write_manifest,
-)
-from gap_validation.fusion_masked import (
-    production_fusion_path,
-    run_masked_fusion_one_date,
-    validation_fusion_dir,
-    withheld_s2_refl_path,
-)
-from gap_validation.spatial_metrics import evaluate_gap_vs_withheld
-from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
-
-
-def _ymd_from_iso(iso_d: str) -> str:
-    return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
-
-
-def _yyyymmdd_from_withheld_filename(fn: str) -> str | None:
-    for part in fn.replace(".tif", "").split("_"):
-        if len(part) == 8 and part.isdigit():
-            return part
-    return None
-
-
-def _withheld_iso(entry: dict) -> str | None:
-    d = entry.get("withheld_s2_date")
-    if isinstance(d, str) and len(d) >= 10:
-        return d[:10]
-    fn = entry.get("withheld_s2_filename")
-    if not fn or not isinstance(fn, str):
-        return None
-    ymd = _yyyymmdd_from_withheld_filename(fn)
-    if not ymd:
-        return None
-    return datetime.strptime(ymd, "%Y%m%d").date().isoformat()
-
-
-def _fused_file(fusion_dir: Path, mode: str, ymd: str) -> Path:
-    stem = "REFL" if mode == "bti" else "GCC"
-    return fusion_dir / f"{stem}_{ymd}.tif"
-
-
-def _scenario_key(strategy: str, sigma: int | None, mode: str) -> str:
-    sig = 30 if sigma == 30 else 20
-    return f"{strategy}_sigma{sig}_{mode}"
-
-
-def _git_rev() -> str | None:
-    try:
-        return subprocess.check_output(
-            ["git", "rev-parse", "HEAD"],
-            cwd=Path(__file__).resolve().parent.parent,
-            text=True,
-        ).strip()
-    except (OSError, subprocess.CalledProcessError):
-        return None
-
-
-def _filter_entries(
-    entries: list[dict],
-    gap_days_filter: list[int] | None,
-    transition_filter: list[str] | None,
-) -> list[dict]:
-    out = entries
-    if gap_days_filter:
-        out = [e for e in out if e.get("gap_days") in gap_days_filter]
-    if transition_filter:
-        out = [e for e in out if e.get("transition") in transition_filter]
-    return out
-
-
-def run_validation(
-    site_name: str,
-    season: int,
-    site_position: tuple[float, float],
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-    *,
-    skip_manifest: bool,
-    skip_fusion: bool,
-    write_manifest_only: bool,
-    gap_days_filter: list[int] | None,
-    transition_filter: list[str] | None,
-    s2_calendar_strategy: str,
-    manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
-    manifest_transitions: tuple[str, ...] = TRANSITIONS,
-) -> Path:
-    base = Path(f"data/{site_name}/{season}")
-    vdir = validation_dir(site_name, season)
-    vdir.mkdir(parents=True, exist_ok=True)
-
-    if not skip_manifest:
-        write_manifest(
-            site_name,
-            season,
-            site_position,
-            s2_calendar_strategy=s2_calendar_strategy,
-            gap_lengths=manifest_gap_lengths,
-            transitions=manifest_transitions,
-        )
-    if write_manifest_only:
-        return vdir / "gap_manifest.json"
-
-    manifest = load_manifest(site_name, season)
-    entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
-
-    results: list[dict] = []
-    for entry in entries:
-        gap_days = entry["gap_days"]
-        transition = entry.get("transition", "green_up")
-        pred = entry["prediction_date"]
-        w0 = entry["window_start"]
-        w1 = entry["window_end"]
-        fn = entry.get("withheld_s2_filename")
-        if not fn:
-            results.append(
-                {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "error": "no_withheld_s2_filename",
-                    "entry": entry,
-                }
-            )
-            continue
-        ymd = _ymd_from_iso(pred)
-        wh_ymd = _yyyymmdd_from_withheld_filename(fn)
-        if not wh_ymd:
-            results.append(
-                {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "error": "could_not_parse_withheld_yyyymmdd",
-                    "withheld_s2_filename": fn,
-                }
-            )
-            continue
-        withheld_iso = (
-            _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
-        )
-
-        fusion_out = validation_fusion_dir(
-            site_name, season, gap_days, transition, strategy, sigma, mode
-        )
-        if not skip_fusion:
-            try:
-                run_masked_fusion_one_date(
-                    season,
-                    site_position,
-                    site_name,
-                    strategy,
-                    sigma,
-                    mode,
-                    pred,
-                    w0,
-                    w1,
-                    wh_ymd,
-                    fusion_out,
-                )
-            except RuntimeError as e:
-                results.append(
-                    {
-                        "transition": transition,
-                        "gap_days": gap_days,
-                        "error": str(e),
-                        "entry": entry,
-                    }
-                )
-                continue
-
-        fused_gap = _fused_file(fusion_out, mode, ymd)
-        prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
-        wh_path = withheld_s2_refl_path(season, site_name, strategy, fn)
-        if wh_path is None or not fused_gap.is_file():
-            results.append(
-                {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "prediction_date": pred,
-                    "withheld_s2_filename": fn,
-                    "scenario": {
-                        "strategy": strategy,
-                        "sigma": 30 if sigma == 30 else 20,
-                        "mode": mode,
-                    },
-                    "error": "missing_withheld_refl_or_fused_gap",
-                    "fused_gap_path": str(fused_gap),
-                }
-            )
-            continue
-
-        spatial = evaluate_gap_vs_withheld(
-            wh_path,
-            fused_gap,
-            prod if prod.is_file() else None,
-            mode,
-            whittaker_context=(base, strategy, pred, withheld_iso, w0, w1),
-        )
-        fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
-        wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
-        results.append(
-            {
-                "transition": transition,
-                "gap_days": gap_days,
-                "prediction_date": pred,
-                "window_start": w0,
-                "window_end": w1,
-                "withheld_s2_filename": fn,
-                "scenario": {
-                    "strategy": strategy,
-                    "sigma": 30 if sigma == 30 else 20,
-                    "mode": mode,
-                },
-                "paths": {
-                    "fused_gap": str(fused_gap),
-                    "fused_no_gap": str(prod) if prod.is_file() else None,
-                    "withheld_s2_refl": str(wh_path),
-                },
-                "spatial": spatial,
-                "whittaker_crossover_row": {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "nse_s2_fusion": fusion_nse,
-                    "nse_s2_whittaker": wh_nse,
-                },
-            }
-        )
-
-    scenario = _scenario_key(strategy, sigma, mode)
-    crossover_rows = [
-        r["whittaker_crossover_row"]
-        for r in results
-        if isinstance(r.get("whittaker_crossover_row"), dict)
-    ]
-    summary = {
-        "site_name": site_name,
-        "season": season,
-        "scenario": scenario,
-        "command_line": sys.argv,
-        "git_commit": _git_rev(),
-        "manifest": str(vdir / "gap_manifest.json"),
-        "gap_withheld_images": str(vdir / "gap_withheld_images.json"),
-        "results": results,
-        "whittaker_crossover": {
-            scenario: {
-                "metric": "nse_s2_spatial_vs_withheld_s2_gcc",
-                "whittaker_definition": (
-                    "Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
-                    "all S2 dates in the gap window and the withheld acquisition removed; "
-                    "prediction is a spatially constant field at smoothed GCC(prediction_date)."
-                ),
-                "first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
-                    crossover_rows,
-                    fusion_key="nse_s2_fusion",
-                    whittaker_key="nse_s2_whittaker",
-                ),
-                "by_gap": crossover_rows,
-            }
-        },
-    }
-    out_path = vdir / f"gap_validation_summary_{mode}.json"
-    out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
-    if mode == "bti":
-        # Legacy alias for backward-compatible readers (webapp, older scripts).
-        (vdir / "gap_validation_summary.json").write_text(
-            json.dumps(summary, indent=2) + "\n", encoding="utf-8"
-        )
-    return out_path
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(
-        description="Tier-2 withheld-S2 gap validation (outputs under data/.../validation/)."
-    )
-    ap.add_argument("--site", required=True)
-    ap.add_argument("--season", type=int, required=True)
-    ap.add_argument("--lat", type=float, required=True)
-    ap.add_argument("--lon", type=float, required=True)
-    ap.add_argument(
-        "--strategy", default="aggressive", choices=["aggressive", "nonaggressive"]
-    )
-    ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
-    ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
-    ap.add_argument(
-        "--gap-days",
-        type=int,
-        action="append",
-        metavar="N",
-        help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
-    )
-    ap.add_argument(
-        "--transition",
-        choices=list(TRANSITIONS),
-        action="append",
-        help="Restrict to transition(s); repeatable (default: all in manifest).",
-    )
-    ap.add_argument("--skip-manifest", action="store_true")
-    ap.add_argument(
-        "--skip-fusion",
-        action="store_true",
-        help="Reuse existing validation fusion rasters.",
-    )
-    ap.add_argument(
-        "--write-manifest-only",
-        action="store_true",
-        help="Write gap_manifest.json + gap_withheld_images.json and exit.",
-    )
-    ap.add_argument(
-        "--s2-calendar-strategy",
-        default="aggressive",
-        choices=["aggressive", "nonaggressive"],
-        help="Which prepared_*/s2 tree is used to pick nearest S2 for withholding.",
-    )
-    args = ap.parse_args()
-    sigma_kw = 30 if args.sigma == 30 else None
-    site_position = (args.lat, args.lon)
-    gap_filter = args.gap_days if args.gap_days else None
-    trans_filter = args.transition if args.transition else None
-    out = run_validation(
-        args.site,
-        args.season,
-        site_position,
-        args.strategy,
-        sigma_kw,
-        args.mode,
-        skip_manifest=args.skip_manifest,
-        skip_fusion=args.skip_fusion,
-        write_manifest_only=args.write_manifest_only,
-        gap_days_filter=gap_filter,
-        transition_filter=trans_filter,
-        s2_calendar_strategy=args.s2_calendar_strategy,
-    )
-    print(out)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/s2_mask_dir.py
+++ b/gap_validation/s2_mask_dir.py
@ -1,91 +0,0 @@
-"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
-
-from __future__ import annotations
-
-import re
-from datetime import date, datetime
-from pathlib import Path
-
-# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
-S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
-
-
-def yyyymmdd_in_name(name: str) -> str | None:
-    m = S2_PREP_DATE_RE.search(name)
-    return m.group(1) if m else None
-
-
-def yyyymmdd_from_iso(iso_d: str) -> str:
-    return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
-
-
-def acquisition_yyyymmdd_in_window(
-    prepared_s2: Path, window_start: date, window_end: date
-) -> set[str]:
-    """All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
-    out: set[str] = set()
-    if not prepared_s2.is_dir():
-        return out
-    for p in prepared_s2.glob("*REFL.tif"):
-        m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
-        if not m:
-            continue
-        d = datetime.strptime(m.group(1), "%Y%m%d").date()
-        if window_start <= d <= window_end:
-            out.add(m.group(1))
-    return out
-
-
-def build_masked_s2_dir(
-    prepared_s2: Path,
-    excluded_yyyymmdd: set[str],
-    dest: Path,
-    patterns: tuple[str, ...],
-) -> int:
-    """Symlink all files matching ``patterns`` except excluded acquisition days."""
-    dest.mkdir(parents=True, exist_ok=True)
-    n = 0
-    for pattern in patterns:
-        for src in sorted(prepared_s2.glob(pattern)):
-            if not src.is_file() and not src.is_symlink():
-                continue
-            y = yyyymmdd_in_name(src.name)
-            if y and y in excluded_yyyymmdd:
-                continue
-            link = dest / src.name
-            if link.exists() or link.is_symlink():
-                link.unlink()
-            link.symlink_to(src.resolve())
-            n += 1
-    return n
-
-
-def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
-    """Fail if the withheld validation acquisition is present in the fusion input dir."""
-    for p in masked_s2_dir.iterdir():
-        y = yyyymmdd_in_name(p.name)
-        if y == withheld_yyyymmdd:
-            raise RuntimeError(
-                f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
-                f"found in masked S2 dir {masked_s2_dir}"
-            )
-
-
-def build_masked_s2_dir_bti(
-    prepared_s2: Path,
-    excluded_yyyymmdd: set[str],
-    dest: Path,
-) -> int:
-    return build_masked_s2_dir(
-        prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
-    )
-
-
-def build_masked_s2_dir_itb(
-    prepared_s2: Path,
-    excluded_yyyymmdd: set[str],
-    dest: Path,
-) -> int:
-    return build_masked_s2_dir(
-        prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
-    )
--- a/gap_validation/spatial_metrics.py
+++ b/gap_validation/spatial_metrics.py
@ -1,234 +0,0 @@
-"""Per-pixel GCC vs withheld S2; NSE (nse_s2); no-gap baseline; deltas."""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-import numpy as np
-import rasterio
-from rasterio.warp import reproject, Resampling
-from scipy.stats import pearsonr
-
-# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
-VALID_REFL_THRESHOLD = 0.001
-GCC_DENOM_EPS = 1e-3
-MAX_REPORTED_NSE_S2 = 20.0
-
-
-def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
-    t = red.astype(np.float64) + green.astype(np.float64) + blue.astype(np.float64)
-    out = np.full_like(blue, np.nan, dtype=np.float64)
-    m = (
-        np.isfinite(t)
-        & (t >= GCC_DENOM_EPS)
-        & np.isfinite(blue)
-        & np.isfinite(green)
-        & np.isfinite(red)
-        & (blue > GCC_DENOM_EPS)
-        & (green > GCC_DENOM_EPS)
-        & (red > GCC_DENOM_EPS)
-    )
-    out[m] = green[m].astype(np.float64) / t[m]
-    return out.astype(np.float32)
-
-
-def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
-    """Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
-    with rasterio.open(fusion_path) as src:
-        if src.count < 3:
-            return None
-        stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
-    return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
-
-
-def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
-    """Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
-    with rasterio.open(fusion_path) as src:
-        if src.count >= 4:
-            b = src.read(1).astype(np.float32)
-            g = src.read(2).astype(np.float32)
-            r = src.read(3).astype(np.float32)
-            gcc = _gcc_from_rgb(b, g, r)
-        else:
-            gcc = src.read(1).astype(np.float32)
-        prof = src.profile.copy()
-    return gcc, prof
-
-
-def warp_refl_bands_to_grid(
-    refl_path: Path,
-    height: int,
-    width: int,
-    transform,
-    crs,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-    """Resample S2 REFL blue/green/red to fusion grid (bilinear)."""
-    with rasterio.open(refl_path) as src:
-        b = np.empty((height, width), dtype=np.float32)
-        g = np.empty((height, width), dtype=np.float32)
-        r = np.empty((height, width), dtype=np.float32)
-        for i, dst in enumerate((b, g, r), start=1):
-            reproject(
-                source=rasterio.band(src, i),
-                destination=dst,
-                src_transform=src.transform,
-                src_crs=src.crs,
-                dst_transform=transform,
-                dst_crs=crs,
-                resampling=Resampling.bilinear,
-            )
-    return b, g, r
-
-
-def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
-    """Valid pixels: BtI uses REFL-style mask; ItB uses single-band GCC (postprocessing ItB)."""
-    with rasterio.open(fusion_path) as src:
-        if mode == "itb" or src.count < 4:
-            d = src.read(1).astype(np.float32)
-            return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
-        stacks = src.read().astype(np.float32)
-        with np.errstate(all="ignore"):
-            mx = np.nanmax(stacks, axis=0)
-        ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
-            mx > VALID_REFL_THRESHOLD
-        )
-        return ok
-
-
-def spatial_scores(
-    y_true_gcc: np.ndarray,
-    y_pred_gcc: np.ndarray,
-    mask: np.ndarray,
-) -> dict:
-    """RMSE, MAE, mean bias, Pearson r, nse_s2 (Nash–Sutcliffe vs spatial truth)."""
-    yt = y_true_gcc[mask].astype(np.float64).ravel()
-    yp = y_pred_gcc[mask].astype(np.float64).ravel()
-    n = int(yt.size)
-    if n < 2:
-        return {"n_pixels": n}
-    mean_t = float(np.mean(yt))
-    rmse = float(np.sqrt(np.mean((yt - yp) ** 2)))
-    mae = float(np.mean(np.abs(yt - yp)))
-    bias = float(np.mean(yp - yt))
-    den = float(np.sum((yt - mean_t) ** 2))
-    nse_s2 = None
-    if den > 0:
-        raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
-        if abs(raw) <= MAX_REPORTED_NSE_S2:
-            nse_s2 = raw
-    r = None
-    if np.std(yt) > 0 and np.std(yp) > 0:
-        r = float(pearsonr(yt, yp)[0])
-    return {
-        "n_pixels": n,
-        "rmse": rmse,
-        "mae": mae,
-        "mean_bias": bias,
-        "pearson_r": r,
-        "nse_s2": nse_s2,
-    }
-
-
-def withheld_gcc_on_fusion_grid(
-    withheld_refl_path: Path, fused_path: Path
-) -> tuple[np.ndarray, np.ndarray, dict]:
-    """``y_true`` GCC (withheld S2) and ``y_pred`` GCC from ``fused_path``, same grid."""
-    yp, prof = read_fused_gcc(fused_path)
-    h, w = yp.shape
-    b, g, r = warp_refl_bands_to_grid(
-        withheld_refl_path, h, w, prof["transform"], prof["crs"]
-    )
-    yt = _gcc_from_rgb(b, g, r)
-    return yt, yp, prof
-
-
-def mask_gap_whittaker(
-    yt: np.ndarray,
-    y_gap: np.ndarray,
-    fused_gap_path: Path,
-    mode: str,
-) -> np.ndarray:
-    """Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
-    m = (
-        valid_mask_fused(fused_gap_path, mode)
-        & np.isfinite(yt)
-        & np.isfinite(y_gap)
-        & (yt > VALID_REFL_THRESHOLD)
-        & (yt <= 1.0)
-        & (y_gap > VALID_REFL_THRESHOLD)
-        & (y_gap <= 1.0)
-    )
-    pos = _positive_bgr_mask(fused_gap_path)
-    if pos is not None:
-        m &= pos
-    return m
-
-
-def common_valid_mask(
-    yt: np.ndarray,
-    y_gap: np.ndarray,
-    y_nogap: np.ndarray | None,
-    fused_gap_path: Path,
-    mode: str,
-) -> np.ndarray:
-    """Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
-    m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
-    if y_nogap is not None:
-        m &= (
-            np.isfinite(y_nogap)
-            & (y_nogap > VALID_REFL_THRESHOLD)
-            & (y_nogap <= 1.0)
-        )
-    return m
-
-
-def evaluate_gap_vs_withheld(
-    withheld_refl_path: Path,
-    fused_gap_path: Path,
-    fused_nogap_path: Path | None,
-    mode: str,
-    *,
-    whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
-) -> dict:
-    """Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
-
-    ``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
-    ``delta_nse`` = NSE_no_gap − NSE_gap, not exported to thesis tables).
-    """
-    yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
-    y_nogap = None
-    if fused_nogap_path is not None and fused_nogap_path.is_file():
-        y_nogap, _ = read_fused_gcc(fused_nogap_path)
-    mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
-    out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
-    if y_nogap is not None:
-        mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
-        out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
-        g, ng = out["gap"], out["no_gap"]
-        if g.get("rmse") is not None and ng.get("rmse") is not None:
-            out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
-        if g.get("nse_s2") is not None and ng.get("nse_s2") is not None:
-            out["delta_nse"] = float(ng["nse_s2"] - g["nse_s2"])
-    if whittaker_context is not None:
-        from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
-
-        base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context
-        wgcc = whittaker_gcc_on_gap_masked_series(
-            base,
-            strategy,
-            prediction_iso,
-            withheld_iso,
-            window_start_iso=w0,
-            window_end_iso=w1,
-        )
-        if wgcc is not None:
-            out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
-    return out
-
-
-def constant_field_scores(
-    y_true_gcc: np.ndarray, scalar: float, mask: np.ndarray
-) -> dict:
-    """NSE / RMSE when prediction is a spatially constant Whittaker value (same mask as fusion)."""
-    yp = np.full_like(y_true_gcc, scalar, dtype=np.float32)
-    return spatial_scores(y_true_gcc, yp, mask)
--- a/gap_validation/temporal_pc.py
+++ b/gap_validation/temporal_pc.py
@ -1,293 +0,0 @@
-"""Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation)."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import re
-from datetime import datetime
-from pathlib import Path
-
-from metrics_indices import _get_gcc_from_original
-from metrics_stats import (
-    WHITTAKER_LAMBDA_DAYS_SQ,
-    _norm_date_key,
-    _s2_gcc_series_from_preselection,
-    _whittaker_smooth_dict,
-    calculate_temporal_metrics,
-    load_timeseries,
-)
-
-from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest
-from gap_validation.fusion_masked import run_masked_fusion_season
-from gap_validation.run import (
-    _filter_entries,
-    _scenario_key,
-    _withheld_iso,
-    _yyyymmdd_from_withheld_filename,
-)
-from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
-
-
-def _fusion_gcc_timeseries(
-    fusion_dir: Path, site_position: tuple[float, float], mode: str
-) -> dict[str, float]:
-    """3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``."""
-    pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif"
-    out: dict[str, float] = {}
-    for p in sorted(fusion_dir.glob(pattern)):
-        m = re.search(r"_(\d{8})\.tif$", p.name)
-        if not m:
-            continue
-        d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat()
-        gcc = _get_gcc_from_original(p, site_position)
-        if gcc is not None:
-            out[d] = float(gcc)
-    return out
-
-
-def whittaker_timeseries_gap_degraded(
-    base: Path,
-    strategy: str,
-    window_start_iso: str,
-    window_end_iso: str,
-    withheld_iso: str,
-    lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
-) -> dict[str, float]:
-    """Daily Whittaker GCC on S2 preselection with gap window + withheld day removed."""
-    all_gcc, flags = _s2_gcc_series_from_preselection(base)
-    if not all_gcc:
-        return {}
-    idx = 0 if strategy == "aggressive" else 1
-    w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
-    w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
-    wh_k = _norm_date_key(withheld_iso)
-
-    def in_window(dk: str) -> bool:
-        try:
-            d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
-        except ValueError:
-            return False
-        return w0 <= d <= w1
-
-    kept = sorted(
-        (d, g)
-        for d, g in all_gcc.items()
-        if d in flags
-        and not flags[d][idx]
-        and _norm_date_key(d) != wh_k
-        and not in_window(_norm_date_key(d) or "")
-    )
-    if len(kept) < 2:
-        return {}
-    obs_d, obs_v = zip(*kept)
-    return _whittaker_smooth_dict(obs_d, obs_v, lam)
-
-
-def run_temporal_pc(
-    site_name: str,
-    season: int,
-    site_position: tuple[float, float],
-    strategy: str,
-    sigma: int | None,
-    mode: str,
-    *,
-    skip_manifest: bool,
-    skip_fusion: bool,
-    gap_days_filter: list[int] | None,
-    transition_filter: list[str] | None,
-    s2_calendar_strategy: str,
-) -> Path:
-    """Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``."""
-    base = Path(f"data/{site_name}/{season}")
-    vdir = validation_dir(site_name, season)
-    vdir.mkdir(parents=True, exist_ok=True)
-
-    if not skip_manifest:
-        write_manifest(
-            site_name,
-            season,
-            site_position,
-            s2_calendar_strategy=s2_calendar_strategy,
-        )
-
-    manifest = load_manifest(site_name, season)
-    entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
-    phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
-    phenocam_ts = load_timeseries(phenocam_ts_path)
-
-    nogap_metrics_path = base / "metrics.json"
-    nogap_nse: dict[str, float | None] = {}
-    if nogap_metrics_path.is_file():
-        m = json.loads(nogap_metrics_path.read_text(encoding="utf-8"))
-        sk = _scenario_key(strategy, sigma, mode)
-        block = (m.get("temporal") or {}).get(sk) or {}
-        nogap_nse["nse_pc"] = block.get("nse_pc")
-
-    results: list[dict] = []
-    crossover_rows: list[dict] = []
-
-    for entry in entries:
-        transition = entry.get("transition", "green_up")
-        gap_days = entry["gap_days"]
-        pred = entry["prediction_date"]
-        w0, w1 = entry["window_start"], entry["window_end"]
-        fn = entry.get("withheld_s2_filename")
-        if not fn:
-            results.append(
-                {"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"}
-            )
-            continue
-        wh_ymd = _yyyymmdd_from_withheld_filename(fn)
-        if not wh_ymd:
-            results.append(
-                {
-                    "transition": transition,
-                    "gap_days": gap_days,
-                    "error": "bad_withheld_filename",
-                }
-            )
-            continue
-        withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
-
-        temporal_dir = (
-            vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode)
-        )
-        if not skip_fusion:
-            try:
-                run_masked_fusion_season(
-                    season,
-                    site_position,
-                    site_name,
-                    strategy,
-                    sigma,
-                    mode,
-                    w0,
-                    w1,
-                    wh_ymd,
-                    temporal_dir,
-                )
-            except RuntimeError as e:
-                results.append(
-                    {
-                        "transition": transition,
-                        "gap_days": gap_days,
-                        "error": str(e),
-                    }
-                )
-                continue
-            fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
-        else:
-            fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
-
-        fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
-        wh_ts = whittaker_timeseries_gap_degraded(
-            base, strategy, w0, w1, withheld_iso
-        )
-        wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts)
-
-        row: dict = {
-            "transition": transition,
-            "gap_days": gap_days,
-            "prediction_date": pred,
-            "window_start": w0,
-            "window_end": w1,
-            "withheld_s2_filename": fn,
-            "temporal": {
-                "fused": fused_metrics,
-                "whittaker": wh_metrics,
-            },
-            "fusion_dir": str(temporal_dir),
-        }
-        if fused_metrics and nogap_nse.get("nse_pc") is not None:
-            g_rmse = fused_metrics.get("rmse")
-            ng_rmse = None
-            if nogap_metrics_path.is_file():
-                sk = _scenario_key(strategy, sigma, mode)
-                ng_rmse = (
-                    (json.loads(nogap_metrics_path.read_text()).get("temporal") or {})
-                    .get(sk, {})
-                    .get("rmse")
-                )
-            n_g = fused_metrics.get("nse_pc")
-            n_ng = nogap_nse["nse_pc"]
-            if g_rmse is not None and ng_rmse is not None:
-                row["delta_rmse"] = float(g_rmse - ng_rmse)
-            if n_g is not None and n_ng is not None:
-                row["delta_nse"] = float(n_ng - n_g)
-
-        fn_pc = (fused_metrics or {}).get("nse_pc")
-        wh_pc = (wh_metrics or {}).get("nse_pc")
-        row["utility_crossover_row"] = {
-            "transition": transition,
-            "gap_days": gap_days,
-            "nse_pc_fusion": fn_pc,
-            "nse_pc_whittaker": wh_pc,
-        }
-        crossover_rows.append(row["utility_crossover_row"])
-        results.append(row)
-
-    scenario = _scenario_key(strategy, sigma, mode)
-    payload = {
-        "site_name": site_name,
-        "season": season,
-        "scenario": scenario,
-        "tier": "temporal_nse_pc",
-        "manifest": str(vdir / "gap_manifest.json"),
-        "results": results,
-        "utility_crossover": {
-            scenario: {
-                "metric": "nse_pc_vs_phenocam_gcc90",
-                "first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker(
-                    crossover_rows,
-                    fusion_key="nse_pc_fusion",
-                    whittaker_key="nse_pc_whittaker",
-                ),
-                "by_gap": crossover_rows,
-            }
-        },
-    }
-    out_path = vdir / f"gap_metrics_{mode}.json"
-    out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-    if mode == "bti":
-        # Legacy alias for backward-compatible readers.
-        (vdir / "gap_metrics.json").write_text(
-            json.dumps(payload, indent=2) + "\n", encoding="utf-8"
-        )
-    return out_path
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.")
-    ap.add_argument("--site", required=True)
-    ap.add_argument("--season", type=int, required=True)
-    ap.add_argument("--lat", type=float, required=True)
-    ap.add_argument("--lon", type=float, required=True)
-    ap.add_argument("--strategy", default="aggressive")
-    ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
-    ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
-    ap.add_argument("--gap-days", type=int, action="append")
-    ap.add_argument("--transition", choices=list(TRANSITIONS), action="append")
-    ap.add_argument("--skip-manifest", action="store_true")
-    ap.add_argument("--skip-fusion", action="store_true")
-    ap.add_argument("--s2-calendar-strategy", default="aggressive")
-    args = ap.parse_args()
-    sigma_kw = 30 if args.sigma == 30 else None
-    out = run_temporal_pc(
-        args.site,
-        args.season,
-        (args.lat, args.lon),
-        args.strategy,
-        sigma_kw,
-        args.mode,
-        skip_manifest=args.skip_manifest,
-        skip_fusion=args.skip_fusion,
-        gap_days_filter=args.gap_days,
-        transition_filter=args.transition,
-        s2_calendar_strategy=args.s2_calendar_strategy,
-    )
-    print(out)
-
-
-if __name__ == "__main__":
-    main()
--- a/gap_validation/whittaker_compare.py
+++ b/gap_validation/whittaker_compare.py
@ -1,81 +0,0 @@
-"""Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
-
-from __future__ import annotations
-
-from datetime import date, datetime
-from pathlib import Path
-
-from metrics_stats import (
-    WHITTAKER_LAMBDA_DAYS_SQ,
-    _norm_date_key,
-    _s2_gcc_series_from_preselection,
-    _whittaker_smooth_dict,
-)
-
-
-def _date_in_window(dk: str, start: date, end: date) -> bool:
-    try:
-        d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
-    except ValueError:
-        return False
-    return start <= d <= end
-
-
-def whittaker_gcc_on_gap_masked_series(
-    base: Path,
-    strategy: str,
-    prediction_iso: str,
-    withheld_iso: str,
-    *,
-    window_start_iso: str | None = None,
-    window_end_iso: str | None = None,
-    lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
-) -> float | None:
-    """Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
-    pred_k = _norm_date_key(prediction_iso)
-    wh_k = _norm_date_key(withheld_iso)
-    if not pred_k or not wh_k:
-        return None
-    w0 = w1 = None
-    if window_start_iso and window_end_iso:
-        w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
-        w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
-    all_gcc, flags = _s2_gcc_series_from_preselection(base)
-    if not all_gcc:
-        return None
-    idx = 0 if strategy == "aggressive" else 1
-    kept = []
-    for d, g in all_gcc.items():
-        if d not in flags or flags[d][idx]:
-            continue
-        dk = _norm_date_key(d)
-        if not dk or dk == wh_k:
-            continue
-        if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
-            continue
-        kept.append((d, g))
-    kept.sort(key=lambda t: t[0])
-    if len(kept) < 2:
-        return None
-    obs_d, obs_v = zip(*kept)
-    smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
-    return smooth.get(pred_k)
-
-
-def first_gap_where_fusion_below_whittaker(
-    rows: list[dict],
-    *,
-    fusion_key: str = "nse_s2",
-    whittaker_key: str = "nse_s2",
-) -> int | None:
-    """Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
-    eligible = [
-        r
-        for r in rows
-        if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
-    ]
-    eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
-    for r in eligible:
-        if r[fusion_key] < r[whittaker_key]:
-            return int(r["gap_days"])
-    return None
--- a/metrics_indices.py
+++ b/metrics_indices.py
@ -1,689 +0,0 @@
-"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
-
-import json
-import numpy as np
-import rasterio
-from rasterio.warp import transform as transform_coords
-from pathlib import Path
-from datetime import datetime
-
-from preselection import _sample_3x3
-
-RED_BAND = 3
-NIR_BAND = 4
-BLUE_BAND = 1
-GREEN_BAND = 2
-
-
-def _calculate_and_write_ndvi(input_file, output_file):
-    with rasterio.open(input_file) as src:
-        red = src.read(RED_BAND).astype(np.float32)
-        nir = src.read(NIR_BAND).astype(np.float32)
-
-        mask = (red > 0) & (nir > 0)
-        ndvi = np.zeros_like(red, dtype=np.float32)
-        ndvi[mask] = (nir[mask] - red[mask]) / (nir[mask] + red[mask])
-
-        profile = src.profile.copy()
-        profile.update(
-            {
-                "count": 1,
-                "dtype": "float32",
-                "nodata": 0,
-                "compress": "lzw",
-            }
-        )
-
-        with rasterio.open(output_file, "w", **profile) as dst:
-            dst.write(ndvi, 1)
-            dst.set_band_description(1, "NDVI")
-
-
-def _get_ndvi_value(ndvi_file, site_position):
-    try:
-        with rasterio.open(ndvi_file) as src:
-            lon, lat = site_position[1], site_position[0]
-            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-
-            # Check if point is within bounds
-            if not (
-                src.bounds.left <= x[0] <= src.bounds.right
-                and src.bounds.bottom <= y[0] <= src.bounds.top
-            ):
-                return None  # Point is outside raster bounds
-
-            samples = list(src.sample([(x[0], y[0])]))
-            if samples:
-                value = float(samples[0][0])
-                # Check if it's actually nodata (using raster's nodata value)
-                if src.nodata is not None and value == src.nodata:
-                    return None  # This is nodata, not a valid 0 value
-                if np.isnan(value):
-                    return None  # NaN is invalid
-                # 0 is a valid NDVI value (no vegetation), so return it
-                return value
-    except Exception as e:
-        print(f"Error sampling {ndvi_file.name}: {e}")
-        pass
-    return None
-
-
-def _create_timeseries_for_dir(
-    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
-):
-    print(f"[NDVI-{source_name}] Creating timeseries.json...")
-    timeseries = []
-
-    for input_file in sorted(input_dir.glob(pattern)):
-        if "DIST_CLOUD" in input_file.name:
-            continue
-
-        filename = input_file.name
-        parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
-        date_str = None
-
-        for part in parts:
-            if len(part) == 8 and part.isdigit():
-                date_str = part
-                break
-
-        if date_str:
-            try:
-                date = datetime.strptime(date_str, "%Y%m%d").isoformat()
-            except ValueError:
-                date = date_str
-        else:
-            date_str = parts[0]
-            date = date_str
-            print(
-                f"[NDVI-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
-            )
-
-        ndvi_value, band_means = _sample_3x3(input_file, site_position)
-        blue_mean = band_means.get("b02") if band_means else None
-        if ndvi_value is None:
-            print(
-                f"[NDVI-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
-            )
-
-        entry = {"date": date, "filename": filename, "ndvi": ndvi_value}
-        if blue_mean is not None:
-            entry["blue"] = blue_mean
-        timeseries.append(entry)
-
-    timeseries.sort(key=lambda x: x["date"])
-    output_dir.mkdir(parents=True, exist_ok=True)
-    timeseries_file = output_dir / "timeseries.json"
-    with open(timeseries_file, "w") as f:
-        json.dump(timeseries, f, indent=2)
-
-    print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
-
-
-def _process_ndvi_files(
-    input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
-):
-    output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"[NDVI-{source_name}] Processing {input_dir}...")
-
-    geotiff_files = sorted(input_dir.glob(pattern))
-    if not geotiff_files:
-        print(f"[NDVI-{source_name}] No files found")
-        return
-
-    for geotiff_file in geotiff_files:
-        # Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI)
-        if "DIST_CLOUD" in geotiff_file.name:
-            continue
-
-        # Check if file has enough bands (need at least 4 for RED and NIR)
-        try:
-            with rasterio.open(geotiff_file) as src:
-                if src.count < 4:
-                    print(
-                        f"[NDVI-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 4+)"
-                    )
-                    continue
-        except Exception as e:
-            print(
-                f"[NDVI-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
-            )
-            continue
-
-        output_file = output_dir / (
-            output_namer(geotiff_file) if output_namer else geotiff_file.name
-        )
-
-        _calculate_and_write_ndvi(geotiff_file, output_file)
-        print(f"[NDVI-{source_name}] Saved: {output_file}")
-
-
-def generate_ndvi_raw(season, site_position, site_name):
-    # No longer creating NDVI GeoTIFF files, only timeseries
-    pass
-
-
-def _get_output_name_prepared(geotiff_file):
-    if geotiff_file.suffix == ".tif":
-        if "REFL" in geotiff_file.stem:
-            # For S2: S2A_MSIL2A_20240101_REFL -> date is at index [2]
-            # For S3: composite_20240101.tif -> date is at index [1] after removing .tif
-            parts = geotiff_file.stem.split("_")
-            if len(parts) >= 3 and parts[0].startswith("S2"):
-                # S2 format: S2A_MSIL2A_YYYYMMDD_REFL
-                date_str = parts[2]
-            elif len(parts) >= 2 and parts[0] == "composite":
-                # S3 format: composite_YYYYMMDD
-                date_str = parts[1]
-            else:
-                # Fallback: try index [1] for other formats
-                date_str = parts[1] if len(parts) > 1 else parts[0]
-            return f"{date_str}_ndvi.geotiff"
-        return geotiff_file.name.replace(".tif", ".geotiff")
-    return geotiff_file.name
-
-
-def _fusion_namer(f):
-    date_str = f.stem.split("_")[1]
-    return f"{date_str}_ndvi.geotiff"
-
-
-def generate_ndvi_post_process(season, site_position, site_name):
-    # No longer creating NDVI GeoTIFF files, only timeseries
-    pass
-
-
-def create_ndvi_timeseries_post_process(season, site_position, site_name):
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [20, 30]:
-            processed_dir = f"processed_{strategy}_sigma{sigma}"
-            for source in ["s2", "s3"]:
-                input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
-                output_dir = Path(
-                    f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
-                )
-                _create_timeseries_for_dir(
-                    input_dir,
-                    output_dir,
-                    site_position,
-                    f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
-                )
-            input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
-            output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
-            _create_timeseries_for_dir(
-                input_dir,
-                output_dir,
-                site_position,
-                f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
-            )
-
-
-def _calculate_and_write_gcc(input_file, output_file):
-    with rasterio.open(input_file) as src:
-        blue = src.read(BLUE_BAND).astype(np.float32)
-        green = src.read(GREEN_BAND).astype(np.float32)
-        red = src.read(RED_BAND).astype(np.float32)
-
-        total = red + green + blue
-        mask = total > 0
-        gcc = np.zeros_like(green, dtype=np.float32)
-        gcc[mask] = green[mask] / total[mask]
-
-        profile = src.profile.copy()
-        profile.update(
-            {
-                "count": 1,
-                "dtype": "float32",
-                "nodata": 0,
-                "compress": "lzw",
-            }
-        )
-
-        with rasterio.open(output_file, "w", **profile) as dst:
-            dst.write(gcc, 1)
-            dst.set_band_description(1, "GCC")
-
-
-def _get_gcc_value(gcc_file, site_position):
-    try:
-        with rasterio.open(gcc_file) as src:
-            lon, lat = site_position[1], site_position[0]
-            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-
-            if not (
-                src.bounds.left <= x[0] <= src.bounds.right
-                and src.bounds.bottom <= y[0] <= src.bounds.top
-            ):
-                return None
-
-            samples = list(src.sample([(x[0], y[0])]))
-            if samples:
-                value = float(samples[0][0])
-                if src.nodata is not None and value == src.nodata:
-                    return None
-                if np.isnan(value):
-                    return None
-                return value
-    except Exception as e:
-        print(f"Error sampling {gcc_file.name}: {e}")
-        pass
-    return None
-
-
-def _get_gcc_from_original(input_file, site_position):
-    """Calculate GCC directly from original file without creating GeoTIFF."""
-    try:
-        with rasterio.open(input_file) as src:
-            if src.count == 1:
-                g = src.read(1).astype(np.float32)
-                lon, lat = site_position[1], site_position[0]
-                x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-                if not (
-                    src.bounds.left <= x[0] <= src.bounds.right
-                    and src.bounds.bottom <= y[0] <= src.bounds.top
-                ):
-                    return None
-                row, col = src.index(x[0], y[0])
-                if row < 0 or row >= src.height or col < 0 or col >= src.width:
-                    return None
-                r0, r1 = max(0, row - 1), min(src.height, row + 2)
-                c0, c1 = max(0, col - 1), min(src.width, col + 2)
-                win = g[r0:r1, c0:c1]
-                mask = np.isfinite(win) & (win > 0)
-                if not np.any(mask):
-                    return None
-                return float(np.mean(win[mask]))
-            if src.count < 3:
-                return None
-
-            blue = src.read(BLUE_BAND).astype(np.float32)
-            green = src.read(GREEN_BAND).astype(np.float32)
-            red = src.read(RED_BAND).astype(np.float32)
-
-            lon, lat = site_position[1], site_position[0]
-            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-
-            if not (
-                src.bounds.left <= x[0] <= src.bounds.right
-                and src.bounds.bottom <= y[0] <= src.bounds.top
-            ):
-                return None
-
-            row, col = src.index(x[0], y[0])
-            if row < 0 or row >= src.height or col < 0 or col >= src.width:
-                return None
-
-            # Extract 3x3 window with boundary handling
-            r0, r1 = max(0, row - 1), min(src.height, row + 2)
-            c0, c1 = max(0, col - 1), min(src.width, col + 2)
-            blue_window = blue[r0:r1, c0:c1]
-            green_window = green[r0:r1, c0:c1]
-            red_window = red[r0:r1, c0:c1]
-
-            # Calculate GCC for each pixel in window
-            total = red_window + green_window + blue_window
-            mask = (
-                (total > 0)
-                & ~np.isnan(total)
-                & (blue_window >= 0)
-                & (green_window >= 0)
-                & (red_window >= 0)
-            )
-            if not np.any(mask):
-                negative_pixels = np.sum(
-                    (blue_window < 0) | (green_window < 0) | (red_window < 0)
-                )
-                if negative_pixels > 0:
-                    print(
-                        f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
-                    )
-                return None
-
-            gcc_window = np.zeros_like(green_window, dtype=np.float32)
-            gcc_window[mask] = green_window[mask] / total[mask]
-
-            # Return mean of valid GCC values
-            valid_gcc = gcc_window[mask]
-            return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
-    except Exception:
-        return None
-
-
-def _create_gcc_timeseries_for_dir(
-    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
-):
-    print(f"[GCC-{source_name}] Creating timeseries.json...")
-    timeseries = []
-
-    for input_file in sorted(input_dir.glob(pattern)):
-        if "DIST_CLOUD" in input_file.name:
-            continue
-
-        filename = input_file.name
-        parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
-        date_str = None
-
-        for part in parts:
-            if len(part) == 8 and part.isdigit():
-                date_str = part
-                break
-
-        if date_str:
-            try:
-                date = datetime.strptime(date_str, "%Y%m%d").isoformat()
-            except ValueError:
-                date = date_str
-        else:
-            date_str = parts[0]
-            date = date_str
-            print(
-                f"[GCC-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
-            )
-
-        gcc_value = _get_gcc_from_original(input_file, site_position)
-        if gcc_value is None:
-            print(
-                f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
-            )
-
-        timeseries.append(
-            {"date": date, "filename": filename, "greenness_index": gcc_value}
-        )
-
-    timeseries.sort(key=lambda x: x["date"])
-    output_dir.mkdir(parents=True, exist_ok=True)
-    timeseries_file = output_dir / "timeseries.json"
-    with open(timeseries_file, "w") as f:
-        json.dump(timeseries, f, indent=2)
-
-    print(f"[GCC-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
-
-
-def _process_gcc_files(
-    input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
-):
-    output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"[GCC-{source_name}] Processing {input_dir}...")
-
-    geotiff_files = sorted(input_dir.glob(pattern))
-    if not geotiff_files:
-        print(f"[GCC-{source_name}] No files found")
-        return
-
-    for geotiff_file in geotiff_files:
-        if "DIST_CLOUD" in geotiff_file.name:
-            continue
-
-        try:
-            with rasterio.open(geotiff_file) as src:
-                if src.count < 3:
-                    print(
-                        f"[GCC-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 3+)"
-                    )
-                    continue
-        except Exception as e:
-            print(
-                f"[GCC-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
-            )
-            continue
-
-        output_file = output_dir / (
-            output_namer(geotiff_file) if output_namer else geotiff_file.name
-        )
-
-        _calculate_and_write_gcc(geotiff_file, output_file)
-        print(f"[GCC-{source_name}] Saved: {output_file}")
-
-
-def generate_gcc_post_process(season, site_position, site_name):
-    # No longer creating GCC GeoTIFF files, only timeseries
-    pass
-
-
-def create_gcc_timeseries_post_process(season, site_position, site_name):
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [20, 30]:
-            processed_dir = f"processed_{strategy}_sigma{sigma}"
-            for source in ["s2", "s3"]:
-                input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
-                output_dir = Path(
-                    f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
-                )
-                _create_gcc_timeseries_for_dir(
-                    input_dir,
-                    output_dir,
-                    site_position,
-                    f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
-                )
-            input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
-            output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
-            _create_gcc_timeseries_for_dir(
-                input_dir,
-                output_dir,
-                site_position,
-                f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
-            )
-            itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
-            base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
-            if not base_itb.exists():
-                continue
-            for source in ["s2", "s3"]:
-                inp, out = base_itb / source, base_itb / "gcc" / source
-                _create_gcc_timeseries_for_dir(
-                    inp,
-                    out,
-                    site_position,
-                    f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
-                )
-            _create_gcc_timeseries_for_dir(
-                base_itb / "fusion",
-                base_itb / "gcc" / "fusion",
-                site_position,
-                f"POST-ITB-FUSION-{strategy}-σ{sigma}",
-            )
-
-
-def _get_bands_from_original(input_file, site_position):
-    """Extract mean B02, B03, B04, B8A from 3x3 window at site. Returns dict or None."""
-    try:
-        with rasterio.open(input_file) as src:
-            if src.count < 4:
-                return None
-            lon, lat = site_position[1], site_position[0]
-            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-            if not (
-                src.bounds.left <= x[0] <= src.bounds.right
-                and src.bounds.bottom <= y[0] <= src.bounds.top
-            ):
-                return None
-            row, col = src.index(x[0], y[0])
-            r0, r1 = max(0, row - 1), min(src.height, row + 2)
-            c0, c1 = max(0, col - 1), min(src.width, col + 2)
-            bands = [
-                src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
-                for i in range(4)
-            ]
-            mask = ~np.any([np.isnan(b) for b in bands], axis=0)
-            mask &= np.all([b > 0 for b in bands], axis=0)
-            if not np.any(mask):
-                return None
-            return {
-                "b02": float(np.mean(bands[0][mask])),
-                "b03": float(np.mean(bands[1][mask])),
-                "b04": float(np.mean(bands[2][mask])),
-                "b8a": float(np.mean(bands[3][mask])),
-            }
-    except Exception:
-        return None
-
-
-def _create_bands_timeseries_for_dir(
-    input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
-):
-    print(f"[BANDS-{source_name}] Creating timeseries.json...")
-    timeseries = []
-    for f in sorted(input_dir.glob(pattern)):
-        if "DIST_CLOUD" in f.name:
-            continue
-        parts = f.name.replace(".geotiff", "").replace(".tif", "").split("_")
-        date_str = next((p for p in parts if len(p) == 8 and p.isdigit()), None)
-        if not date_str:
-            continue
-        date = datetime.strptime(date_str, "%Y%m%d").isoformat()
-        bands = _get_bands_from_original(f, site_position)
-        timeseries.append({"date": date, "filename": f.name, **(bands or {})})
-    timeseries.sort(key=lambda x: x["date"])
-    output_dir.mkdir(parents=True, exist_ok=True)
-    (output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
-    print(
-        f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
-    )
-
-
-def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
-    """Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
-
-    def load(p):
-        p = Path(p)
-        if not p.exists():
-            return []
-        try:
-            return json.loads((p / "timeseries.json").read_text())
-        except Exception:
-            return []
-
-    ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
-    gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
-    bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
-    keys = sorted(set(ndvi) | set(gcc) | set(bands))
-    merged = []
-    for k in keys:
-        r = {"date": k, "filename": ""}
-        for d in [ndvi.get(k, {}), gcc.get(k, {}), bands.get(k, {})]:
-            r.update({x: d[x] for x in d if x not in ("date",)})
-        merged.append(r)
-    export_dir.mkdir(parents=True, exist_ok=True)
-    (export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
-    cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
-
-    def esc(v):
-        s = str(v) if v is not None else ""
-        return f'"{s}"' if "," in s or '"' in s else s
-
-    rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
-    (export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
-    print(
-        f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
-    )
-
-
-def create_prepared_fusion_timeseries(season, site_position, site_name):
-    """Generate NDVI, GCC, and band timeseries for prepared S2/S3 and fusion outputs."""
-    for strategy in ["aggressive", "nonaggressive"]:
-        base = Path(f"data/{site_name}/{season}/prepared_{strategy}")
-        for source in ["s2", "s3"]:
-            inp = base / source
-            if inp.exists():
-                _create_timeseries_for_dir(
-                    inp,
-                    base / "ndvi" / source,
-                    site_position,
-                    f"PREPARED-{source.upper()}-{strategy}",
-                    "*.tif",
-                )
-                _create_gcc_timeseries_for_dir(
-                    inp,
-                    base / "gcc" / source,
-                    site_position,
-                    f"PREPARED-{source.upper()}-{strategy}",
-                    "*.tif",
-                )
-                _create_bands_timeseries_for_dir(
-                    inp,
-                    base / "bands" / source,
-                    site_position,
-                    f"PREPARED-{source.upper()}-{strategy}",
-                    "*.tif",
-                )
-                _write_export(
-                    base / "ndvi" / source,
-                    base / "gcc" / source,
-                    base / "bands" / source,
-                    base / "export" / source,
-                )
-        for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
-            inp = base / fusion_sub
-            if inp.exists():
-                _create_timeseries_for_dir(
-                    inp,
-                    base / "ndvi" / fusion_sub,
-                    site_position,
-                    f"FUSION-{strategy}-σ{sig or 20}",
-                    "*.tif",
-                )
-                _create_gcc_timeseries_for_dir(
-                    inp,
-                    base / "gcc" / fusion_sub,
-                    site_position,
-                    f"FUSION-{strategy}-σ{sig or 20}",
-                    "*.tif",
-                )
-                _create_bands_timeseries_for_dir(
-                    inp,
-                    base / "bands" / fusion_sub,
-                    site_position,
-                    f"FUSION-{strategy}-σ{sig or 20}",
-                    "*.tif",
-                )
-                _write_export(
-                    base / "ndvi" / fusion_sub,
-                    base / "gcc" / fusion_sub,
-                    base / "bands" / fusion_sub,
-                    base / "export" / fusion_sub,
-                )
-        itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
-        if not itb.exists():
-            continue
-        for source in ["s2", "s3"]:
-            inp = itb / source
-            if inp.exists():
-                _create_gcc_timeseries_for_dir(
-                    inp,
-                    itb / "gcc" / source,
-                    site_position,
-                    f"PREPARED-ITB-{source.upper()}-{strategy}",
-                    "*.tif",
-                )
-        for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
-            inp = itb / fusion_sub
-            if inp.exists():
-                _create_gcc_timeseries_for_dir(
-                    inp,
-                    itb / "gcc" / fusion_sub,
-                    site_position,
-                    f"FUSION-ITB-{strategy}-σ{sig or 20}",
-                    "*.tif",
-                )
-
-
-def create_bands_timeseries_post_process(season, site_position, site_name):
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [20, 30]:
-            processed_dir = f"processed_{strategy}_sigma{sigma}"
-            base = Path(f"data/{site_name}/{season}/{processed_dir}")
-            for source in ["s2", "s3", "fusion"]:
-                inp, out = base / source, base / "bands" / source
-                if inp.exists():
-                    _create_bands_timeseries_for_dir(
-                        inp,
-                        out,
-                        site_position,
-                        f"POST-{source.upper()}-{strategy}-σ{sigma}",
-                        "*.geotiff",
-                    )
-                    _write_export(
-                        base / "ndvi" / source,
-                        base / "gcc" / source,
-                        base / "bands" / source,
-                        base / "export" / source,
-                    )
--- a/metrics_stats.py
+++ b/metrics_stats.py
@ -1,529 +0,0 @@
-"""Metrics and statistics: temporal metrics and PhenoCam stats."""
-
-import json
-import numpy as np
-from pathlib import Path
-from datetime import datetime, timedelta
-from scipy import sparse
-from scipy.sparse.linalg import spsolve
-from scipy.stats import pearsonr
-
-WHITTAKER_LAMBDA_DAYS_SQ = 400.0
-
-
-def _norm_date_key(s):
-    if s is None:
-        return None
-    t = str(s).strip()
-    return t.split("T")[0][:10] if "T" in t else t[:10]
-
-
-def load_timeseries(filepath):
-    """Load JSON timeseries and return dict mapping date -> value."""
-    if not Path(filepath).exists():
-        return {}
-    with open(filepath) as f:
-        data = json.load(f)
-    return {item["date"]: item.get("greenness_index") for item in data}
-
-
-def match_dates(fusion_ts, phenocam_ts):
-    """Match dates between timeseries, return aligned numpy arrays (filter None values)."""
-
-    def _bundle(m):
-        out = {}
-        for k, v in m.items():
-            nk = _norm_date_key(k)
-            if nk and nk not in out:
-                out[nk] = v
-        return out
-
-    fa, pa = _bundle(fusion_ts), _bundle(phenocam_ts)
-    common_dates = set(fa) & set(pa)
-    fusion_vals = []
-    phenocam_vals = []
-    dates = []
-
-    for date in sorted(common_dates):
-        fusion_val = fa[date]
-        phenocam_val = pa[date]
-        if fusion_val is not None and phenocam_val is not None:
-            fusion_vals.append(fusion_val)
-            phenocam_vals.append(phenocam_val)
-            dates.append(date)
-
-    return np.array(fusion_vals), np.array(phenocam_vals), dates
-
-
-def pearson_correlation(y_true, y_pred):
-    """Calculate Pearson correlation coefficient r."""
-    if len(y_true) < 2 or np.std(y_true) == 0 or np.std(y_pred) == 0:
-        return None
-    r, _ = pearsonr(y_true, y_pred)
-    return float(r)
-
-
-def r_squared(y_true, y_pred):
-    """Generalized R² vs predicting mean(y_true); can be negative. Same formula as ``nse`` with the same arguments; not Pearson r squared."""
-    if len(y_true) < 2 or np.std(y_true) == 0:
-        return None
-    ss_res = np.sum((y_true - y_pred) ** 2)
-    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
-    if ss_tot == 0:
-        return None
-    return float(1 - (ss_res / ss_tot))
-
-
-def rmse(y_true, y_pred):
-    """Calculate Root Mean Square Error."""
-    if len(y_true) == 0:
-        return None
-    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
-
-
-def mae(y_true, y_pred):
-    """Calculate Mean Absolute Error."""
-    if len(y_true) == 0:
-        return None
-    return float(np.mean(np.abs(y_true - y_pred)))
-
-
-def nrmse(y_true, y_pred):
-    """Calculate normalized RMSE (RMSE / mean(y_true))."""
-    if len(y_true) == 0:
-        return None
-    mean_val = np.mean(y_true)
-    if mean_val == 0:
-        return None
-    rmse_val = rmse(y_true, y_pred)
-    return float(rmse_val / mean_val) if rmse_val is not None else None
-
-
-def nse(y_true, y_pred):
-    """Calculate Nash-Sutcliffe Efficiency."""
-    if len(y_true) < 2:
-        return None
-    numerator = np.sum((y_true - y_pred) ** 2)
-    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
-    if denominator == 0:
-        return None
-    return float(1 - (numerator / denominator))
-
-
-def residual_vs_phenocam(fusion_ts, phenocam_ts):
-    """Stats of (fused_GCC − PhenoCam_GCC) on matched dates; None if too few points.
-
-    Mean: positive → fusion systematically above PhenoCam; negative → below; ~0 → unbiased mean.
-    Compare BtI vs ItB means at same strategy/σ (``derived.bti_vs_itb_mean_residual``): closer to 0 → less mean bias vs PhenoCam.
-    """
-    yf, yp, _dates = match_dates(fusion_ts, phenocam_ts)
-    if len(yf) < 2:
-        return None
-    r = yf - yp
-    return {
-        "mean": float(np.mean(r)),
-        "std": float(np.std(r)),
-        "mae": float(np.mean(np.abs(r))),
-        "rmse": float(np.sqrt(np.mean(r**2))),
-        "n_samples": int(len(r)),
-    }
-
-
-def calculate_temporal_metrics(fusion_ts, phenocam_ts):
-    """Temporal metrics vs PhenoCam (nse_pc; nse is the same value)."""
-    fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
-
-    if len(fusion_vals) < 2:
-        return None
-
-    n_pc = nse(phenocam_vals, fusion_vals)
-    metrics = {
-        "pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
-        "r_squared": r_squared(phenocam_vals, fusion_vals),
-        "rmse": rmse(phenocam_vals, fusion_vals),
-        "mae": mae(phenocam_vals, fusion_vals),
-        "nrmse": nrmse(phenocam_vals, fusion_vals),
-        "nse_pc": n_pc,
-        "nse": n_pc,
-        "n_samples": len(fusion_vals),
-        "date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
-    }
-    rv = residual_vs_phenocam(fusion_ts, phenocam_ts)
-    if rv:
-        metrics["residual_vs_phenocam"] = rv
-    return metrics
-
-
-def derived_tier1(temporal: dict) -> dict:
-    """ΔNSE_PC (σ20 − σ30) and paired BtI vs ItB mean residual; needs temporal fusion keys.
-
-    ΔNSE_PC > 0 → NSE_PC higher at σ=20 than σ=30 (tighter EFAST temporal kernel wins).
-    ΔNSE_PC < 0 → σ=30 wins (broader smoothing matches PhenoCam better).
-    """
-    d_nse = {"bti": {}, "itb": {}}
-    for strategy in ("aggressive", "nonaggressive"):
-        for mode, suf in (("bti", ""), ("itb", "_itb")):
-            k20 = f"{strategy}_sigma20{suf}"
-            k30 = f"{strategy}_sigma30{suf}"
-            n20 = (temporal.get(k20) or {}).get("nse_pc")
-            n30 = (temporal.get(k30) or {}).get("nse_pc")
-            if isinstance(n20, (int, float)) and isinstance(n30, (int, float)):
-                d_nse[mode][strategy] = float(n20 - n30)
-            else:
-                d_nse[mode][strategy] = None
-
-    paired = []
-    for strategy in ("aggressive", "nonaggressive"):
-        for sig in (20, 30):
-            kb, ki = f"{strategy}_sigma{sig}", f"{strategy}_sigma{sig}_itb"
-            mb = (temporal.get(kb) or {}).get("residual_vs_phenocam", {}).get("mean")
-            mi = (temporal.get(ki) or {}).get("residual_vs_phenocam", {}).get("mean")
-            paired.append(
-                {
-                    "strategy": strategy,
-                    "sigma": sig,
-                    "mean_residual_bti": float(mb)
-                    if isinstance(mb, (int, float))
-                    else None,
-                    "mean_residual_itb": float(mi)
-                    if isinstance(mi, (int, float))
-                    else None,
-                }
-            )
-    return {
-        "delta_nse_pc_sigma20_minus_sigma30": d_nse,
-        "bti_vs_itb_mean_residual": paired,
-    }
-
-
-MATCHED_PAIR_CONFIGS = (
-    "aggressive_sigma20",
-    "aggressive_sigma30",
-    "nonaggressive_sigma20",
-    "nonaggressive_sigma30",
-)
-
-
-def derived_matched_pair_workflow(temporal: dict) -> dict:
-    """Per-config BtI vs ItB NSE_PC/RMSE pairs and site-level consistency flags."""
-    per_config = []
-    nse_deltas: list[float] = []
-    nse_bti_wins_count = 0
-    residual_bti_wins_count = 0
-
-    for config in MATCHED_PAIR_CONFIGS:
-        kb = config
-        ki = f"{config}_itb"
-        tb = temporal.get(kb) or {}
-        ti = temporal.get(ki) or {}
-        nse_bti = tb.get("nse_pc")
-        nse_itb = ti.get("nse_pc")
-        rmse_bti = tb.get("rmse")
-        rmse_itb = ti.get("rmse")
-        mb = (tb.get("residual_vs_phenocam") or {}).get("mean")
-        mi = (ti.get("residual_vs_phenocam") or {}).get("mean")
-
-        delta_nse = None
-        delta_rmse = None
-        bti_wins = None
-        residual_bti_wins = None
-
-        if isinstance(nse_bti, (int, float)) and isinstance(nse_itb, (int, float)):
-            delta_nse = float(nse_bti) - float(nse_itb)
-            bti_wins = delta_nse > 0
-            nse_deltas.append(delta_nse)
-            if bti_wins:
-                nse_bti_wins_count += 1
-
-        if isinstance(rmse_bti, (int, float)) and isinstance(rmse_itb, (int, float)):
-            delta_rmse = float(rmse_bti) - float(rmse_itb)
-
-        if isinstance(mb, (int, float)) and isinstance(mi, (int, float)):
-            if float(mb) > float(mi):
-                residual_bti_wins_count += 1
-                residual_bti_wins = True
-            elif float(mb) < float(mi):
-                residual_bti_wins = False
-            else:
-                residual_bti_wins = None
-
-        per_config.append(
-            {
-                "config": config,
-                "nse_pc_bti": float(nse_bti) if isinstance(nse_bti, (int, float)) else None,
-                "nse_pc_itb": float(nse_itb) if isinstance(nse_itb, (int, float)) else None,
-                "rmse_bti": float(rmse_bti) if isinstance(rmse_bti, (int, float)) else None,
-                "rmse_itb": float(rmse_itb) if isinstance(rmse_itb, (int, float)) else None,
-                "delta_nse_bti_minus_itb": delta_nse,
-                "delta_rmse_bti_minus_itb": delta_rmse,
-                "bti_wins": bti_wins,
-                "residual_bti_wins": residual_bti_wins,
-            }
-        )
-
-    mean_delta_nse = (
-        float(sum(nse_deltas) / len(nse_deltas)) if nse_deltas else None
-    )
-    return {
-        "per_config": per_config,
-        "consistency": nse_bti_wins_count,
-        "nse_bti_wins_count": nse_bti_wins_count,
-        "residual_bti_wins_count": residual_bti_wins_count,
-        "residual_nse_mismatch": residual_bti_wins_count != nse_bti_wins_count,
-        "mean_delta_nse": mean_delta_nse,
-    }
-
-
-def calculate_phenocam_stats(phenocam_ts):
-    """Calculate phenocam summary statistics."""
-    values = [v for v in phenocam_ts.values() if v is not None]
-    if len(values) == 0:
-        return None
-
-    vals = np.array(values)
-    return {
-        "mean": float(np.mean(vals)),
-        "std": float(np.std(vals)),
-        "min": float(np.min(vals)),
-        "max": float(np.max(vals)),
-        "n_samples": len(vals),
-    }
-
-
-def _s2_gcc_series_from_preselection(base: Path):
-    """Build the raw S2 GCC series from s2_preselection.json.
-
-    Uses the 3x3 site-window band means stored per raw S2 acquisition and
-    computes GCC = b03 / (b02 + b03 + b04). Scale cancels, so DN vs
-    reflectance is irrelevant. Returns (all_gcc, flags) where all_gcc maps
-    YYYY-MM-DD -> gcc for every row with a positive band sum, and flags maps
-    the same date key -> (excluded_aggressive, excluded_nonaggressive).
-    """
-    path = base / "raw" / "preselection" / "s2_preselection.json"
-    if not path.exists():
-        return {}, {}
-    with open(path) as f:
-        rows = json.load(f)
-    all_gcc: dict = {}
-    flags: dict = {}
-    for e in rows:
-        nk = _norm_date_key(e.get("date"))
-        if not nk:
-            continue
-        try:
-            b02 = float(e.get("b02"))
-            b03 = float(e.get("b03"))
-            b04 = float(e.get("b04"))
-        except (TypeError, ValueError):
-            continue
-        total = b02 + b03 + b04
-        if not np.isfinite(total) or total <= 0:
-            continue
-        gcc = b03 / total
-        if not np.isfinite(gcc):
-            continue
-        if nk in all_gcc:
-            continue
-        all_gcc[nk] = float(gcc)
-        flags[nk] = (
-            bool(e.get("excluded_aggressive")),
-            bool(e.get("excluded_nonaggressive")),
-        )
-    return all_gcc, flags
-
-
-def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
-    """Daily Whittaker (weights 1 at obs); returns {YYYY-MM-DD: z}."""
-    pairs = [
-        (_norm_date_key(d), float(v))
-        for d, v in zip(obs_dates, obs_values)
-        if v is not None and _norm_date_key(d)
-    ]
-    if len(pairs) < 2:
-        return {}
-    days = sorted({p[0] for p in pairs})
-    t0 = datetime.strptime(days[0], "%Y-%m-%d").date()
-    t1 = datetime.strptime(days[-1], "%Y-%m-%d").date()
-    n = (t1 - t0).days + 1
-    if n < n_min:
-        return {}
-
-    w = np.zeros(n)
-    y = np.zeros(n)
-    for dk, val in pairs:
-        i = (datetime.strptime(dk, "%Y-%m-%d").date() - t0).days
-        if 0 <= i < n:
-            w[i] = 1.0
-            y[i] = val
-
-    D = sparse.diags(
-        [1.0, -2.0, 1.0], [0, 1, 2], shape=(n - 2, n), format="csc", dtype=np.float64
-    )
-    H = D.T @ D
-    Wm = sparse.diags(w.astype(np.float64), format="csc")
-    z = spsolve(Wm + lam * H, w * y)
-    out = {}
-    for i in range(n):
-        out[(t0 + timedelta(days=i)).isoformat()] = float(z[i])
-    return out
-
-
-def calculate_all_metrics(season, site_name, site_position):
-    """Calculate metrics for all 4 scenarios and save to JSON."""
-    del site_position
-    results = {"temporal": {}}
-    base = Path(f"data/{site_name}/{season}")
-
-    # Load phenocam timeseries once (same for all scenarios)
-    phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
-    phenocam_ts = load_timeseries(phenocam_ts_path)
-
-    if not phenocam_ts:
-        print("[METRICS] Warning: No phenocam data found")
-        return results
-
-    # Calculate phenocam stats
-    phenocam_stats = calculate_phenocam_stats(phenocam_ts)
-    if phenocam_stats:
-        results["phenocam_stats"] = phenocam_stats
-
-    from phenocam_snr import compute_snr, load_phenocam_snr, write_phenocam_snr
-
-    snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
-    if not snr_info:
-        write_phenocam_snr(
-            site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
-        )
-        snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
-    if not snr_info:
-        snr_info = compute_snr(
-            site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
-        )
-    if snr_info.get("snr") is not None:
-        results["phenocam_snr"] = {
-            "amplitude": snr_info.get("amplitude"),
-            "spline_rmse_gcc90": snr_info.get("spline_rmse_gcc90"),
-            "snr": snr_info.get("snr"),
-        }
-
-    baseline = {}
-    all_gcc, flags = _s2_gcc_series_from_preselection(base)
-    if all_gcc:
-        m0 = calculate_temporal_metrics(all_gcc, phenocam_ts)
-        if m0:
-            baseline["s2"] = m0
-        for strategy, flag_idx in (("aggressive", 0), ("nonaggressive", 1)):
-            kept_items = sorted(
-                (
-                    (d, g)
-                    for d, g in all_gcc.items()
-                    if d in flags and not flags[d][flag_idx]
-                ),
-                key=lambda x: x[0],
-            )
-            if not kept_items:
-                continue
-            kept_ts = dict(kept_items)
-            mcf = calculate_temporal_metrics(kept_ts, phenocam_ts)
-            if mcf:
-                baseline.setdefault("s2_cloudfree", {})[strategy] = mcf
-            obs_d, obs_v = zip(*kept_items)
-            smooth = _whittaker_smooth_dict(obs_d, obs_v, WHITTAKER_LAMBDA_DAYS_SQ)
-            if smooth:
-                mw = calculate_temporal_metrics(smooth, phenocam_ts)
-                if mw:
-                    baseline.setdefault("s2_whittaker_lambda400", {})[strategy] = mw
-
-    for strategy in ("aggressive", "nonaggressive"):
-        p = base / f"processed_{strategy}_sigma20" / "gcc" / "s3" / "timeseries.json"
-        if not p.exists():
-            continue
-        s3_ts = load_timeseries(p)
-        if s3_ts:
-            m3 = calculate_temporal_metrics(s3_ts, phenocam_ts)
-            if m3:
-                baseline.setdefault("s3", {})[strategy] = m3
-
-    if baseline:
-        results["baseline"] = baseline
-
-    # Calculate fusion metrics for each scenario
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [20, 30]:
-            scenario_name = f"{strategy}_sigma{sigma}"
-            print(f"[METRICS] Calculating metrics for {scenario_name}...")
-
-            processed_dir = f"processed_{strategy}_sigma{sigma}"
-
-            # Load fusion timeseries
-            fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
-            fusion_ts = load_timeseries(fusion_ts_path)
-
-            if not fusion_ts:
-                print(
-                    f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
-                )
-                continue
-
-            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
-            if temporal_metrics:
-                results["temporal"][scenario_name] = temporal_metrics
-
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [20, 30]:
-            scenario_name = f"{strategy}_sigma{sigma}_itb"
-            processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
-            fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
-            fusion_ts = load_timeseries(fusion_ts_path)
-            if not fusion_ts:
-                print(
-                    f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
-                )
-                continue
-            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
-            if temporal_metrics:
-                results["temporal"][scenario_name] = temporal_metrics
-
-    if results["temporal"]:
-        derived = derived_tier1(results["temporal"])
-        derived["matched_pair_workflow"] = derived_matched_pair_workflow(
-            results["temporal"]
-        )
-        results["derived"] = derived
-
-    # Save results
-    output_path = Path(f"data/{site_name}/{season}/metrics.json")
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(output_path, "w") as f:
-        json.dump(results, f, indent=2)
-    print(f"[METRICS] Saved results to {output_path}")
-
-    return results
-
-
-def main():
-    """Standalone script entry point."""
-    import sys
-
-    if len(sys.argv) < 4:
-        print("Usage: metrics_stats.py <season> <site_name> <lat> <lon>")
-        print("Example: metrics_stats.py 2024 innsbruck 47.116171 11.320308")
-        sys.exit(1)
-
-    season = int(sys.argv[1])
-    site_name = sys.argv[2]
-    site_position = (float(sys.argv[3]), float(sys.argv[4]))
-
-    results = calculate_all_metrics(season, site_name, site_position)
-
-    # Save results
-    output_path = Path(f"data/{site_name}/{season}/metrics.json")
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(output_path, "w") as f:
-        json.dump(results, f, indent=2)
-
-    print(f"[METRICS] Saved results to {output_path}")
-
-
-if __name__ == "__main__":
-    main()
--- a/phenocam_snr.py
+++ b/phenocam_snr.py
@ -1,328 +0,0 @@
-"""PhenoCam signal-to-noise ratio for aggregate utility eligibility (Richardson et al., 2018)."""
-
-from __future__ import annotations
-
-import json
-import re
-from pathlib import Path
-
-import requests
-
-PHENOCAM_API = "https://phenocam.nau.edu/api"
-SPLINE_RMSE_RE = re.compile(
-    r"^\s*#\s*Spline\s+RMSE\s+gcc_90\s*:\s*([0-9.eE+-]+)\s*$",
-    re.IGNORECASE,
-)
-
-PRIMARY_SEASON: dict[str, int] = {
-    "forthgr": 2024,
-    "innsbruck": 2024,
-    "pitsalu": 2024,
-    "vindeln2": 2023,
-    "sunflowerjerez1": 2024,
-    "institutekarnobat": 2024,
-}
-
-# PhenoCam ROI type codes for archive URLs (first ROI used by acquisition when multiple exist).
-SITE_ROITYPE: dict[str, str] = {
-    "forthgr": "AG",
-    "innsbruck": "GR",
-    "pitsalu": "WL",
-    "vindeln2": "MX",
-    "sunflowerjerez1": "AG",
-    "institutekarnobat": "AG",
-}
-
-PHENOCAM_ARCHIVE = "https://phenocam.nau.edu/data/archive"
-
-
-def phenocam_snr_path(site_name: str, season: int, base: Path | None = None) -> Path:
-    root = base or Path("data")
-    return root / site_name / str(season) / "raw" / "phenocam" / "phenocam_snr.json"
-
-
-def parse_spline_rmse_gcc90(text: str) -> float | None:
-    """Parse ``# Spline RMSE gcc_90: <value>`` from transition-dates CSV header."""
-    for line in text.splitlines():
-        m = SPLINE_RMSE_RE.match(line)
-        if m:
-            try:
-                return float(m.group(1))
-            except ValueError:
-                return None
-    return None
-
-
-def transition_dates_archive_url(site_name: str, roitype: str, seq: int = 1000) -> str:
-    return (
-        f"{PHENOCAM_ARCHIVE}/{site_name}/ROI/"
-        f"{site_name}_{roitype}_{seq}_1day_transition_dates.csv"
-    )
-
-
-def transition_dates_url(site_name: str) -> str | None:
-    """Return ``one_day_transition_dates`` URL for the site's primary ROI."""
-    roitype = SITE_ROITYPE.get(site_name)
-    if roitype:
-        for seq in (1000, 2000, 1001):
-            url = transition_dates_archive_url(site_name, roitype, seq)
-            try:
-                r = requests.head(url, timeout=15, allow_redirects=True)
-                if r.status_code == 200:
-                    return url
-            except requests.RequestException:
-                continue
-    try:
-        url = f"{PHENOCAM_API}/roilists/"
-        params: dict | None = {"site": site_name}
-        while url:
-            r = requests.get(url, params=params, timeout=30)
-            r.raise_for_status()
-            data = r.json()
-            for roi in data.get("results", []):
-                if roi.get("site") == site_name:
-                    td = roi.get("one_day_transition_dates")
-                    if td:
-                        return td
-            url = data.get("next")
-            params = None
-    except requests.RequestException:
-        pass
-    return None
-
-
-def fetch_spline_rmse_from_archive(site_name: str) -> float | None:
-    """Fetch spline RMSE via PhenoCam archive URL (fast path)."""
-    roitype = SITE_ROITYPE.get(site_name)
-    if not roitype:
-        return None
-    for seq in (1000, 2000, 1001):
-        url = transition_dates_archive_url(site_name, roitype, seq)
-        try:
-            r = requests.get(url, timeout=20)
-            if r.status_code != 200:
-                continue
-            rmse = parse_spline_rmse_gcc90(r.text)
-            if rmse is not None:
-                return rmse
-        except requests.RequestException:
-            continue
-    return None
-
-
-def fetch_spline_rmse_gcc90(site_name: str) -> float | None:
-    """Download transition-dates file header and return spline RMSE for gcc_90."""
-    rmse = fetch_spline_rmse_from_archive(site_name)
-    if rmse is not None:
-        return rmse
-    td_url = transition_dates_url(site_name)
-    if not td_url:
-        return None
-    try:
-        r = requests.get(td_url, timeout=30)
-        r.raise_for_status()
-        return parse_spline_rmse_gcc90(r.text)
-    except requests.RequestException:
-        return None
-
-
-def season_amplitude(
-    site_name: str,
-    season: int,
-    *,
-    base: Path | None = None,
-    metrics: dict | None = None,
-) -> float | None:
-    """Seasonal amplitude max(gcc_90) - min(gcc_90) over the evaluation season."""
-    if metrics:
-        ps = metrics.get("phenocam_stats") or {}
-        mn, mx = ps.get("min"), ps.get("max")
-        if isinstance(mn, (int, float)) and isinstance(mx, (int, float)):
-            return float(mx - mn)
-
-    root = base or Path("data")
-    p = root / site_name / str(season) / "raw" / "phenocam" / "phenocam_gcc.json"
-    if not p.is_file():
-        return None
-    data = json.loads(p.read_text(encoding="utf-8"))
-    if isinstance(data, list):
-        vals = [
-            it.get("greenness_index")
-            for it in data
-            if isinstance(it.get("greenness_index"), (int, float))
-        ]
-    elif isinstance(data, dict):
-        vals = [v for v in data.values() if isinstance(v, (int, float))]
-    else:
-        return None
-    if not vals:
-        return None
-    return float(max(vals) - min(vals))
-
-
-def compute_snr(
-    site_name: str,
-    season: int,
-    *,
-    base: Path | None = None,
-    metrics: dict | None = None,
-    spline_rmse: float | None = None,
-    fetch_if_missing: bool = True,
-) -> dict:
-    """Return amplitude, spline RMSE, and SNR; may fetch RMSE from PhenoCam API."""
-    root = base or Path("data")
-    amp = season_amplitude(site_name, season, base=root, metrics=metrics)
-    rmse = spline_rmse
-    if rmse is None:
-        sidecar = phenocam_snr_path(site_name, season, root)
-        if sidecar.is_file():
-            cached = json.loads(sidecar.read_text(encoding="utf-8"))
-            rmse = cached.get("spline_rmse_gcc90")
-        elif fetch_if_missing:
-            rmse = fetch_spline_rmse_gcc90(site_name)
-    snr = None
-    if isinstance(amp, (int, float)) and isinstance(rmse, (int, float)) and rmse > 0:
-        snr = float(amp) / float(rmse)
-    return {
-        "site": site_name,
-        "season": season,
-        "amplitude": amp,
-        "spline_rmse_gcc90": rmse,
-        "snr": snr,
-    }
-
-
-def write_phenocam_snr(
-    site_name: str,
-    season: int,
-    *,
-    base: Path | None = None,
-    metrics: dict | None = None,
-    fetch_if_missing: bool = True,
-) -> Path | None:
-    """Compute SNR and write ``phenocam_snr.json``; returns path or None on failure."""
-    root = base or Path("data")
-    info = compute_snr(
-        site_name,
-        season,
-        base=root,
-        metrics=metrics,
-        fetch_if_missing=fetch_if_missing,
-    )
-    if info.get("spline_rmse_gcc90") is None:
-        print(
-            f"[PhenoCam-SNR] Warning: no spline RMSE for {site_name} {season}; "
-            "skipping phenocam_snr.json"
-        )
-        return None
-    out = phenocam_snr_path(site_name, season, root)
-    out.parent.mkdir(parents=True, exist_ok=True)
-    td_url = transition_dates_url(site_name)
-    payload = {
-        "site": site_name,
-        "season": season,
-        "amplitude": info.get("amplitude"),
-        "spline_rmse_gcc90": info.get("spline_rmse_gcc90"),
-        "snr": info.get("snr"),
-        "source": "phenocam_1day_transition_dates_header",
-        "transition_dates_url": td_url,
-        "roitype": SITE_ROITYPE.get(site_name),
-    }
-    out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-    print(f"[PhenoCam-SNR] Saved: {out} (SNR={info.get('snr')})")
-    return out
-
-
-def load_phenocam_snr(
-    site_name: str, season: int, *, base: Path | None = None
-) -> dict | None:
-    """Load cached SNR sidecar if present."""
-    p = phenocam_snr_path(site_name, season, base)
-    if not p.is_file():
-        return None
-    return json.loads(p.read_text(encoding="utf-8"))
-
-
-def suggest_snr_threshold(snrs: list[float]) -> tuple[float, str]:
-    """
-    Choose eligibility threshold from cross-site SNR distribution.
-
-    Returns (threshold, rationale). Uses a distribution-based split only when it
-    separates a low-SNR group (max below 2) from a high-SNR group (min at or above 2).
-    Otherwise defaults to SNR >= 2.
-    """
-    if not snrs:
-        return 2.0, "default SNR >= 2 (no site SNR values available)"
-    sorted_snrs = sorted(snrs)
-    if len(sorted_snrs) == 1:
-        return 2.0, "default SNR >= 2 (single site only)"
-    if all(s >= 2.0 for s in sorted_snrs):
-        return 2.0, "default SNR >= 2 (all sites exceed 2; no low-SNR exclusion group)"
-
-    for i in range(1, len(sorted_snrs)):
-        low, high = sorted_snrs[:i], sorted_snrs[i:]
-        if not low or not high:
-            continue
-        gap = high[0] - low[-1]
-        if gap >= 0.5 and low[-1] < 2.0 <= high[0]:
-            threshold = (low[-1] + high[0]) / 2.0
-            return (
-                round(threshold, 3),
-                f"gap between {low[-1]:.3f} and {high[0]:.3f} straddles SNR=2 "
-                f"(midpoint {threshold:.3f})",
-            )
-    return 2.0, "default SNR >= 2 (no clear low/high cluster separation)"
-
-
-def report_all_sites(
-    *,
-    base: Path | None = None,
-    sites: dict[str, int] | None = None,
-    fetch_if_missing: bool = True,
-) -> list[dict]:
-    """Compute SNR for all primary-season sites; print table and return rows."""
-    root = base or Path("data")
-    site_seasons = sites or PRIMARY_SEASON
-    rows: list[dict] = []
-    for site in sorted(site_seasons.keys()):
-        season = site_seasons[site]
-        metrics_path = root / site / str(season) / "metrics.json"
-        metrics = None
-        if metrics_path.is_file():
-            metrics = json.loads(metrics_path.read_text(encoding="utf-8"))
-        info = compute_snr(
-            site,
-            season,
-            base=root,
-            metrics=metrics,
-            fetch_if_missing=fetch_if_missing,
-        )
-        rows.append(info)
-
-    print(f"{'site':<20} {'season':>6} {'amplitude':>10} {'rmse_spl':>10} {'SNR':>8}")
-    print("-" * 58)
-    for r in rows:
-        amp = r.get("amplitude")
-        rmse = r.get("spline_rmse_gcc90")
-        snr = r.get("snr")
-        print(
-            f"{r['site']:<20} {r['season']:>6} "
-            f"{amp if amp is not None else '---':>10} "
-            f"{rmse if rmse is not None else '---':>10} "
-            f"{snr if snr is not None else '---':>8}"
-        )
-
-    valid_snrs = [r["snr"] for r in rows if isinstance(r.get("snr"), (int, float))]
-    threshold, rationale = suggest_snr_threshold(valid_snrs)
-    print(f"\nSuggested threshold: SNR >= {threshold} ({rationale})")
-    for r in rows:
-        snr = r.get("snr")
-        if isinstance(snr, (int, float)):
-            r["eligible_at_2"] = snr >= 2.0
-            r["eligible_at_3"] = snr >= 3.0
-            r["eligible_at_suggested"] = snr >= threshold
-    return rows
-
-
-if __name__ == "__main__":
-    report_all_sites()
--- a/phenology_timesat.py
+++ b/phenology_timesat.py
@ -1,738 +0,0 @@
-"""
-PhenoCam GCC: green-up and green-down (50 % of seasonal amplitude) via TIMESAT.
-
-Reads ``data/.../raw/phenocam/phenocam_gcc.json`` (or any path) and uses the
-``timesat`` package (``timesat.tsfprocess``) with the same seasonal-threshold
-meaning as the TIMESAT GUI: *startmethod* 1, *p_startcutoff* (0.5, 0.5) = 50 % of
-the **per-season** amplitude above the local base. See the TIMESAT manual,
-section 4.3 and row 37–38 (season start method = seasonal amplitude).
-
-**License:** the ``timesat`` PyPI wheel is under the TIMESAT Research License
-(non-commercial research; see package metadata on PyPI).
-
-PhenoCam time series: single-year acquisition writes
-  ``phenocam_gcc.json`` (and ``phenocam_gcc.csv``). The three-year series used
-  for TIMESAT is stored separately as ``phenocam_gcc_3y.json`` in the same
-  folder (created on first use from the one-day summary API, then reused).
-
-Importable: ``write_phenocam_phenology_for_site`` is called from ``run.py``;
-the CLI entry point remains optional for ad-hoc runs.
-
-**Saving results:** use ``-o path.json`` or ``--sidecar`` to write a JSON file
-(see ``--help``). Sidecar mode writes ``phenocam_phenology.json`` (two dates
-only) next to ``phenocam_gcc.json``.
-
-``run_pipeline`` in ``run.py`` writes the same ``phenocam_phenology.json`` by
-default when ``timesat`` is installed. GCC for TIMESAT uses ``phenocam_gcc_3y.json``
-if present, otherwise the PhenoCam API for that site (listed in
-``data/sites.geojson``; not a site list from the API). One-year
-``phenocam_gcc.json`` on disk can still fill gaps when merged.
-
-Use ``python phenology_timesat.py --all`` to batch every
-``(sitename, season)`` from ``data/sites.geojson`` (``properties.sitename`` and
-``properties.seasons``).
-"""
-
-from __future__ import annotations
-
-import argparse
-import csv
-import json
-import sys
-from datetime import datetime, timedelta
-from pathlib import Path
-
-import numpy as np
-import requests
-
-PHENOCAM_API = "https://phenocam.nau.edu/api"
-
-try:
-    import timesat as _timesat
-except ImportError:
-    _timesat = None
-
-NODATA = -9999.0
-
-
-def load_phenocam_gcc(path: Path) -> dict[str, float]:
-    """Return map YYYY-MM-DD -> greenness index from PhenoCam JSON list."""
-    with open(path) as f:
-        rows = json.load(f)
-    out: dict[str, float] = {}
-    for row in rows:
-        d = str(row.get("date", ""))[:10]
-        v = row.get("greenness_index")
-        if d and v is not None and np.isfinite(v):
-            out[d] = float(v)
-    return out
-
-
-def _gcc_from_summary_row(row: dict, use_mean_fallback: bool) -> float | None:
-    """Extract daily GCC from a one-day summary row (same rules as acquisition)."""
-    if not use_mean_fallback:
-        oflag = row.get("outlierflag_gcc_90")
-        if oflag is not None and str(oflag).strip() in ("1", "1.0"):
-            return None
-
-    raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
-    if raw is None:
-        return None
-    text = str(raw).strip()
-    if not text or text.upper() == "NA":
-        return None
-    try:
-        val = float(text)
-    except ValueError:
-        return None
-    if val <= -9998.0:
-        return None
-    return val
-
-
-def _phenocam_one_day_summary_csv_url(site_name: str) -> str | None:
-    """Return URL of the one-day summary CSV for *site_name*, or None on failure."""
-    try:
-        url = f"{PHENOCAM_API}/roilists/"
-        params: dict | None = {"site": site_name}
-        rois: list[dict] = []
-        while url:
-            r = requests.get(url, params=params, timeout=30)
-            r.raise_for_status()
-            data = r.json()
-            rois.extend(
-                [roi for roi in data.get("results", []) if roi["site"] == site_name]
-            )
-            url = data.get("next")
-            params = None
-            if rois:
-                break
-        if not rois:
-            return None
-        return rois[0].get("one_day_summary") or None
-    except requests.RequestException:
-        return None
-
-
-def _parse_phenocam_gcc_from_csv_text(
-    text: str, start_date: str, end_date: str
-) -> dict[str, float]:
-    """Map YYYY-MM-DD -> gcc for rows in [start_date, end_date] inclusive."""
-    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
-    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
-    lines = [line for line in text.split("\n") if line and not line.startswith("#")]
-    reader = csv.DictReader(lines)
-    fieldnames = reader.fieldnames or ()
-    use_mean_fallback = "gcc_90" not in fieldnames
-    out: dict[str, float] = {}
-    for row in reader:
-        try:
-            date_str = row.get("date")
-            if not date_str:
-                continue
-            date = datetime.strptime(date_str, "%Y-%m-%d")
-            if not (start_dt <= date <= end_dt):
-                continue
-            gcc = _gcc_from_summary_row(row, use_mean_fallback)
-            if gcc is not None:
-                out[date.date().isoformat()] = gcc
-        except (ValueError, KeyError):
-            continue
-    return out
-
-
-def save_phenocam_gcc_json(path: Path, by_date: dict[str, float]) -> None:
-    """Write the same list-of-objects format as :func:`acquisition_phenocam` GCC JSON."""
-    rows = [
-        {"date": d, "greenness_index": v}
-        for d, v in sorted(by_date.items(), key=lambda x: x[0])
-    ]
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with open(path, "w", encoding="utf-8") as f:
-        json.dump(rows, f, indent=2)
-        f.write("\n")
-
-
-def fetch_phenocam_gcc_three_years_separately(
-    site_name: str, season: int
-) -> dict[str, float]:
-    """
-    Download PhenoCam one-day summary GCC for three **calendar** years
-    (``season-1`` … ``season+1``), independently of :mod:`acquisition_phenocam`.
-
-    Uses one HTTP GET of the full summary CSV, then **three** per-year
-    extractions (same logic as the acquisition CSV filter, three date windows).
-    """
-    out: dict[str, float] = {}
-    csv_url = _phenocam_one_day_summary_csv_url(site_name)
-    if not csv_url:
-        print(
-            f"[PhenoCam phenology] No PhenoCam one-day summary URL for site {site_name!r}"
-        )
-        return out
-    try:
-        csv_r = requests.get(csv_url, timeout=30)
-        csv_r.raise_for_status()
-    except requests.RequestException as e:
-        print(f"[PhenoCam phenology] API CSV fetch failed: {e}")
-        return out
-    text = csv_r.text
-    for y in (season - 1, season, season + 1):
-        part = _parse_phenocam_gcc_from_csv_text(text, f"{y}-01-01", f"{y}-12-31")
-        out.update(part)
-    return out
-
-
-def load_or_fetch_phenocam_gcc_3y(
-    site_name: str, season: int, gcc_3y_path: Path
-) -> dict[str, float]:
-    """
-    Use ``phenocam_gcc_3y.json`` on disk if it exists and parses; else fetch
-    three years from the PhenoCam one-day summary for *site_name* and save to
-    *gcc_3y_path*.
-    """
-    if gcc_3y_path.is_file():
-        try:
-            cached = load_phenocam_gcc(gcc_3y_path)
-        except (OSError, json.JSONDecodeError):
-            cached = {}
-        if cached:
-            print(f"[PhenoCam phenology] Using {gcc_3y_path} ({len(cached)} values)")
-            return cached
-    out = fetch_phenocam_gcc_three_years_separately(site_name, season)
-    if not out:
-        return {}
-    save_phenocam_gcc_json(gcc_3y_path, out)
-    print(
-        f"[PhenoCam phenology] Fetched and wrote {gcc_3y_path} "
-        f"({len(out)} values for {season - 1}–{season + 1})"
-    )
-    return out
-
-
-def resolve_phenocam_gcc_for_timesat(
-    site_name: str, season: int, gcc_path: Path
-) -> dict[str, float]:
-    """
-    Load three-year series from ``phenocam_gcc_3y.json`` (or fetch once and
-    save there), merge with one-year ``gcc_path`` if present; three-year values
-    win on duplicate dates.
-    """
-    gcc_3y = gcc_path.parent / "phenocam_gcc_3y.json"
-    by_3y = load_or_fetch_phenocam_gcc_3y(site_name, season, gcc_3y)
-    by_1y: dict[str, float] = {}
-    if gcc_path.is_file():
-        try:
-            by_1y = load_phenocam_gcc(gcc_path)
-        except (OSError, json.JSONDecodeError):
-            pass
-    if by_3y:
-        return {**by_1y, **by_3y}
-    return by_1y
-
-
-def _day_count(calendar_year: int) -> int:
-    a = datetime(calendar_year, 1, 1)
-    b = datetime(calendar_year + 1, 1, 1)
-    return (b - a).days
-
-
-def daily_profile_for_year(by_date: dict[str, float], calendar_year: int) -> np.ndarray:
-    """
-    One value per day (length 365 or 366 for leap years). Gaps are filled by
-    linear interpolation in time along the year; if only one valid point exists,
-    that value is used for the whole year.
-    """
-    n = _day_count(calendar_year)
-    raw = np.full(n, np.nan, dtype=np.float64)
-    for d in range(1, n + 1):
-        dt = datetime(calendar_year, 1, 1) + timedelta(days=d - 1)
-        key = dt.strftime("%Y-%m-%d")
-        if key in by_date:
-            raw[d - 1] = by_date[key]
-    valid = np.isfinite(raw) & (raw > 0.0)
-    if not np.any(valid):
-        raise ValueError(f"No valid GCC in JSON for calendar year {calendar_year}")
-    if np.sum(valid) == 1:
-        v = float(raw[valid][0])
-        return np.full(n, v, dtype=np.float32)
-    idx = np.arange(n, dtype=np.float64)
-    raw = np.interp(idx, idx[valid], raw[valid])
-    return raw.astype(np.float32)
-
-
-def _gcc_profile_365_for_timesat(profile: np.ndarray) -> np.ndarray:
-    """TIMESAT uses 365 days per season; drop Dec 31 on leap years."""
-    p = np.asarray(profile, dtype=np.float32).ravel()
-    if p.size == 366:
-        return p[:365]
-    if p.size == 365:
-        return p
-    raise ValueError(f"expected 365 or 366 daily values, got {p.size}")
-
-
-def yyyydoy_to_iso(v: float) -> str:
-    x = int(round(float(v)))
-    y = x // 1000
-    doy = x - y * 1000
-    d = datetime(y, 1, 1) + timedelta(days=doy - 1)
-    return d.date().isoformat()
-
-
-def build_yraw_three_years(
-    by_date: dict[str, float], y1: int, y2: int, y3: int
-) -> tuple[np.ndarray, str]:
-    """
-    Stack three calendar years of daily GCC (365 pts/year) for TIMESAT.
-
-    If each of *y1*, *y2*, *y3* has at least one valid GCC in *by_date* (after
-    per-year gap filling), returns their concatenation — **three real years**.
-
-    If any of those years cannot be built (e.g. single-year download only),
-    falls back to **replicating** the profile for *y2* three times (legacy
-    TIMESAT workaround).
-    """
-    try:
-        p1 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y1))
-        p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
-        p3 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y3))
-        yraw = np.concatenate([p1, p2, p3]).astype(np.float32, copy=False)
-        return yraw, "three_independent_years"
-    except ValueError:
-        p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
-        yraw = np.tile(p2, 3)
-        return yraw, "single_year_replicated"
-
-
-def run_timesat_phenology_from_yraw(
-    yraw: np.ndarray,
-    years_triplet: tuple[int, int, int],
-    *,
-    start_cutoff: tuple[float, float] = (0.5, 0.5),
-    smooth_window: float = 2.0,
-    p_ignoreday: int = 366,
-) -> dict[str, str | float | None]:
-    """
-    Run TIMESAT on a length ``365 * 3`` daily VI stack and calendar *years_triplet*
-    (YYYY, YYYY, YYYY) for the time vector. Middle year in the triplet is the
-    season whose SOS/EOS we report.
-    """
-    yraw = np.asarray(yraw, dtype=np.float32).ravel()
-    y1, y2, y3 = years_triplet
-    nyear = 3
-    npt = 365 * nyear
-    if yraw.size != npt:
-        raise ValueError(f"yraw must have length {npt}, got {yraw.size}")
-    tlist: list[int] = []
-    for y in (y1, y2, y3):
-        t0 = datetime(y, 1, 1)
-        for d in range(365):
-            tlist.append(int((t0 + timedelta(days=d)).strftime("%Y%j")))
-    tv = np.array(tlist, dtype=np.int32)
-    if len(tv) != npt:
-        raise RuntimeError("internal: length mismatch")
-
-    vi = np.asfortranarray(yraw.reshape(1, 1, -1))
-    qa = np.asfortranarray(np.ones((1, 1, npt), dtype=np.float32))
-    lc = np.ones((1, 1), dtype=np.uint8)
-    landuse = np.ones(255, dtype=np.uint8)
-    p_out = np.arange(1, npt + 1, dtype=np.int32)
-    p_ylu = np.asfortranarray(np.array([0.0, 1.0], dtype=np.float64))
-    ci = 0
-    p_fitmethod = np.zeros(255, dtype=np.int32)
-    p_fitmethod[ci] = 1
-    p_smooth = np.zeros(255, dtype=np.float64)
-    p_smooth[ci] = float(smooth_window)
-    p_nenvi = np.zeros(255, dtype=np.int32)
-    p_nenvi[ci] = 1
-    p_wfact = np.zeros(255, dtype=np.float64)
-    p_wfact[ci] = 1.0
-    p_startmethod = np.zeros(255, dtype=np.int32)
-    p_startmethod[ci] = 1
-    p_startcutoff = np.zeros((255, 2), dtype=np.float64, order="F")
-    p_startcutoff[ci, :] = np.array(
-        [start_cutoff[0], start_cutoff[1]], dtype=np.float64
-    )
-    p_low = np.zeros(255, dtype=np.float64)
-    p_fillbase = np.zeros(255, dtype=np.int32)
-    p_seasonmethod = np.zeros(255, dtype=np.int32)
-    p_seasonmethod[ci] = 1
-    p_seapar = np.zeros(255, dtype=np.float64)
-    p_seapar[ci] = 1.0
-
-    if _timesat is None:
-        raise ImportError("Install the 'timesat' package: pip install timesat")
-    vpp, _vppqa, nseason, yfit, _yfitqa, _seasonfit, _tseq = _timesat.tsfprocess(
-        nyear,
-        vi,
-        qa,
-        tv,
-        lc,
-        1,
-        landuse,
-        p_out,
-        p_ignoreday,
-        p_ylu,
-        0,
-        p_fitmethod,
-        p_smooth,
-        NODATA,
-        45,
-        0,
-        p_nenvi,
-        p_wfact,
-        p_startmethod,
-        p_startcutoff,
-        p_low,
-        p_fillbase,
-        1,
-        p_seasonmethod,
-        p_seapar,
-        1,
-        1,
-        1,
-        npt,
-        len(p_out),
-    )
-    a = vpp[0, 0, :]
-    # three growing-season rows at indices 0, 13*2, 13*4 in the raw vector
-    middle_block = 2
-    off = 13 * middle_block
-    sosd = a[off + 0] if a.size > off + 0 else np.nan
-    sosv = a[off + 1] if a.size > off + 1 else np.nan
-    eosd = a[off + 3] if a.size > off + 3 else np.nan
-    eosv = a[off + 4] if a.size > off + 4 else np.nan
-    yfit_max = float(np.max(yfit)) if yfit.size else float("nan")
-
-    def pick(x: float) -> str | None:
-        if not np.isfinite(x) or x < 1.0e5 or x < 0:
-            return None
-        try:
-            return yyyydoy_to_iso(x)
-        except (OverflowError, ValueError):
-            return None
-
-    return {
-        "reference_calendar_year": y2,
-        "green_up_50pct_date": pick(sosd),
-        "green_up_50pct_fitted_gcc": float(sosv) if np.isfinite(sosv) else None,
-        "green_down_50pct_date": pick(eosd),
-        "green_down_50pct_fitted_gcc": float(eosv) if np.isfinite(eosv) else None,
-        "nseason": nseason[0, 0].tolist() if nseason.ndim >= 2 else [],
-        "yfit_max": yfit_max,
-    }
-
-
-def run_timesat_phenology(
-    daily_profile: np.ndarray,
-    years_triplet: tuple[int, int, int],
-    *,
-    start_cutoff: tuple[float, float] = (0.5, 0.5),
-    smooth_window: float = 2.0,
-    p_ignoreday: int = 366,
-) -> dict[str, str | float | None]:
-    """
-    Back-compat: run TIMESAT on one year’s 365(–366) profile **replicated** three times.
-    Prefer :func:`build_yraw_three_years` + :func:`run_timesat_phenology_from_yraw`.
-    """
-    prof = np.asarray(daily_profile, dtype=np.float32).ravel()
-    if len(prof) not in (365, 366):
-        raise ValueError("daily_profile must have length 365 or 366")
-    if len(prof) == 366:
-        prof = prof[:365]
-    yraw = np.tile(prof, 3)
-    return run_timesat_phenology_from_yraw(
-        yraw,
-        years_triplet,
-        start_cutoff=start_cutoff,
-        smooth_window=smooth_window,
-        p_ignoreday=p_ignoreday,
-    )
-
-
-def phenocam_gcc_path(site_name: str, season: int) -> Path:
-    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
-
-
-def phenocam_gcc_3y_path(site_name: str, season: int) -> Path:
-    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc_3y.json")
-
-
-def iter_sites_seasons_with_phenocam(
-    data_root: str | Path = "data",
-) -> list[tuple[str, int]]:
-    """``(site_name, season)`` for every ``phenocam_gcc.json`` under *data_root* (legacy)."""
-    root = Path(data_root)
-    if not root.is_dir():
-        return []
-    out: list[tuple[str, int]] = []
-    seen: set[tuple[str, int]] = set()
-    for p in sorted(root.glob("*/*/raw/phenocam/phenocam_gcc.json")):
-        rel = p.relative_to(root)
-        site, season_s = rel.parts[0], rel.parts[1]
-        if not season_s.isdigit():
-            continue
-        season = int(season_s)
-        key = (site, season)
-        if key not in seen:
-            seen.add(key)
-            out.append(key)
-    return out
-
-
-def iter_sites_seasons_from_sites_geojson(
-    path: str | Path = "data/sites.geojson",
-) -> list[tuple[str, int]]:
-    """
-    ``(sitename, season)`` from a GeoJSON FeatureCollection: each feature’s
-    ``properties.sitename`` and each key in ``properties.seasons`` (4-digit year).
-    """
-    path = Path(path)
-    if not path.is_file():
-        return []
-    with open(path, encoding="utf-8") as f:
-        fc = json.load(f)
-    out: list[tuple[str, int]] = []
-    for feat in fc.get("features", []):
-        props = feat.get("properties") or {}
-        name = props.get("sitename")
-        seasons = props.get("seasons")
-        if not name or not isinstance(seasons, dict):
-            continue
-        for skey in sorted(seasons.keys()):
-            if skey.isdigit() and len(skey) == 4:
-                out.append((str(name), int(skey)))
-    return out
-
-
-def write_phenocam_phenology_all(
-    *,
-    sites_geojson: str | Path | None = None,
-    data_root: str | Path = "data",
-    smooth_window: float = 2.0,
-    p_ignoreday: int = 366,
-) -> int:
-    """
-    Run :func:`write_phenocam_phenology_for_site` for every ``(site, season)`` in
-    *sites_geojson* (default: :file:`<data_root>/sites.geojson`), not a glob over
-    ``data/``.
-    """
-    geo = Path(
-        sites_geojson
-        if sites_geojson is not None
-        else Path(data_root) / "sites.geojson"
-    )
-    pairs = iter_sites_seasons_from_sites_geojson(geo)
-    if not pairs and geo.is_file():
-        print(
-            f"[PhenoCam phenology] No (sitename, season) entries in {geo} "
-            "(check properties.sitename and properties.seasons)."
-        )
-    elif not pairs:
-        print(f"[PhenoCam phenology] Missing or empty sites file: {geo}")
-    n = 0
-    for site, season in pairs:
-        print(f"=== {site} {season} ===")
-        write_phenocam_phenology_for_site(
-            site, season, smooth_window=smooth_window, p_ignoreday=p_ignoreday
-        )
-        n += 1
-    print(f"[PhenoCam phenology] Processed {n} site/season pair(s) from {geo}.")
-    return n
-
-
-def phenocam_phenology_path(site_name: str, season: int) -> Path:
-    return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_phenology.json")
-
-
-def write_phenocam_phenology_for_site(
-    site_name: str,
-    season: int,
-    *,
-    smooth_window: float = 2.0,
-    p_ignoreday: int = 366,
-) -> None:
-    """
-    If ``timesat`` is installed, build GCC from ``phenocam_gcc_3y.json`` (or fetch
-    three years once and save there), with optional one-year ``phenocam_gcc.json``,
-    then write
-    ``phenocam_phenology.json`` in the same directory with
-    ``green_up_50pct_date`` and ``green_down_50pct_date`` (ISO dates or null).
-    """
-    if _timesat is None:
-        out = phenocam_phenology_path(site_name, season)
-        print(
-            f"[PhenoCam phenology] Skipped (no timesat); would write {out}. "
-            "pip install timesat"
-        )
-        return
-    gcc = phenocam_gcc_path(site_name, season)
-    try:
-        by_date = resolve_phenocam_gcc_for_timesat(site_name, season, gcc)
-    except OSError as e:
-        print(f"[PhenoCam phenology] Skipped: {e}")
-        return
-    if not by_date:
-        g3 = gcc.parent / "phenocam_gcc_3y.json"
-        print(
-            f"[PhenoCam phenology] No GCC ({gcc} and no data in {g3} after API); "
-            f"skipping {phenocam_phenology_path(site_name, season).name}."
-        )
-        return
-    try:
-        yraw, stack_mode = build_yraw_three_years(
-            by_date, season - 1, season, season + 1
-        )
-    except (OSError, ValueError) as e:
-        print(f"[PhenoCam phenology] Skipped: {e}")
-        return
-    out = run_timesat_phenology_from_yraw(
-        yraw,
-        (season - 1, season, season + 1),
-        smooth_window=smooth_window,
-        p_ignoreday=p_ignoreday,
-    )
-    record = {
-        "green_up_50pct_date": out.get("green_up_50pct_date"),
-        "green_down_50pct_date": out.get("green_down_50pct_date"),
-    }
-    out_path = phenocam_phenology_path(site_name, season)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(out_path, "w", encoding="utf-8") as f:
-        json.dump(record, f, indent=2)
-        f.write("\n")
-    gup, gdn = record["green_up_50pct_date"], record["green_down_50pct_date"]
-    print(
-        f"[PhenoCam phenology] Wrote {out_path} (green-up {gup!r}, green-down {gdn!r}; "
-        f"TIMESAT input={stack_mode})"
-    )
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(
-        description="TIMESAT 50 % seasonal-amplitude green-up / green-down for PhenoCam GCC JSON."
-    )
-    ap.add_argument(
-        "--all",
-        action="store_true",
-        help="Write phenocam for every (sitename, season) in the sites GeoJSON (see --sites-geojson).",
-    )
-    ap.add_argument(
-        "--data-root",
-        type=Path,
-        default=Path("data"),
-        help="Resolves default --sites-geojson to <data-root>/sites.geojson.",
-    )
-    ap.add_argument(
-        "--sites-geojson",
-        type=Path,
-        default=None,
-        help="For --all: path to data/sites.geojson (default: <data-root>/sites.geojson).",
-    )
-    ap.add_argument(
-        "gcc_json",
-        type=Path,
-        nargs="?",
-        default=Path("data/innsbruck/2024/raw/phenocam/phenocam_gcc.json"),
-        help="Path to phenocam_gcc.json (default: Innsbruck 2024 if present).",
-    )
-    ap.add_argument(
-        "--season",
-        type=int,
-        default=None,
-        help="Calendar year to build the daily GCC profile (default: infer from file path .../<year>/...).",
-    )
-    ap.add_argument(
-        "--savitzky-hw",
-        type=float,
-        default=2.0,
-        help="Half-width for fitmethod 1 (Savitzky–Golay); default 2.",
-    )
-    ap.add_argument(
-        "--p-ignoreday",
-        type=int,
-        default=366,
-        help="TIMESAT p_ignoreday (default 366).",
-    )
-    ap.add_argument(
-        "-o",
-        "--output",
-        type=Path,
-        default=None,
-        help="Write results to this JSON file (same schema as stdout, plus metadata).",
-    )
-    ap.add_argument(
-        "--sidecar",
-        action="store_true",
-        help="Save two-date JSON next to input as phenocam_phenology.json (implies -o).",
-    )
-    args = ap.parse_args()
-    if _timesat is None:
-        raise SystemExit(
-            "The 'timesat' package is required. Install with: pip install timesat"
-        )
-    if args.all:
-        write_phenocam_phenology_all(
-            sites_geojson=args.sites_geojson,
-            data_root=args.data_root,
-            smooth_window=args.savitzky_hw,
-            p_ignoreday=args.p_ignoreday,
-        )
-        return
-    path: Path = args.gcc_json
-    if not path.is_file():
-        raise SystemExit(f"Not a file: {path}")
-
-    season = args.season
-    if season is None:
-        for part in path.parts:
-            if part.isdigit() and len(part) == 4:
-                season = int(part)
-                break
-        if season is None:
-            season = datetime.now().year
-
-    by_date = load_phenocam_gcc(path)
-    yraw, stack_mode = build_yraw_three_years(by_date, season - 1, season, season + 1)
-    out = run_timesat_phenology_from_yraw(
-        yraw,
-        (season - 1, season, season + 1),
-        smooth_window=args.savitzky_hw,
-        p_ignoreday=args.p_ignoreday,
-    )
-    payload = {
-        **out,
-        "source_gcc_json": str(path.resolve()),
-        "profile_year": season,
-        "timesat_input": stack_mode,
-        "method": "TIMESAT tsfprocess; startmethod=1; p_startcutoff=[0.5,0.5] (50% seasonal amplitude)",
-    }
-    out_path = args.output
-    if args.sidecar:
-        out_path = path.parent / "phenocam_phenology.json"
-    if out_path is not None:
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-        to_write = (
-            {
-                "green_up_50pct_date": out.get("green_up_50pct_date"),
-                "green_down_50pct_date": out.get("green_down_50pct_date"),
-            }
-            if args.sidecar
-            else payload
-        )
-        with open(out_path, "w", encoding="utf-8") as f:
-            json.dump(to_write, f, indent=2)
-            f.write("\n")
-        print(f"Wrote {out_path}", file=sys.stderr)
-    print(json.dumps(payload, indent=2))
-    gup = out.get("green_up_50pct_date")
-    gdn = out.get("green_down_50pct_date")
-    if gup and gdn:
-        print(
-            f"Green-up (50 %): {gup}  |  Green-down (50 %): {gdn}  "
-            f"(profile year {season}, TIMESAT reference year {out['reference_calendar_year']})"
-        )
-
-
-if __name__ == "__main__":
-    main()
--- a/postprocessing.py
+++ b/postprocessing.py
@ -1,268 +0,0 @@
-"""Post-processing: crop fusion/S2/S3 to valid pixels."""
-
-from pathlib import Path
-import numpy as np
-import rasterio
-from rasterio import windows
-from rasterio.warp import reproject, Resampling
-
-
-def process_cropped(
-    season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
-):
-    """Crop fusion to valid data, then crop S2/S3 to match."""
-    base = Path(f"data/{site_name}/{season}")
-    prepared = base / f"prepared_{cleaning_strategy}"
-    processed_dir = (
-        f"processed_{cleaning_strategy}_sigma{sigma}"
-        if sigma
-        else f"processed_{cleaning_strategy}_sigma20"
-    )
-    processed = base / processed_dir
-
-    s2_prep = prepared / "s2"
-    s3_prep = prepared / "s3"
-    fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
-
-    for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-    print(
-        f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
-    )
-
-    # Crop fusion to valid data and get dimensions
-    fusion_dims = {}
-    for fusion_file in fusion_prep.glob("REFL_*.tif"):
-        date_str = fusion_file.stem.split("_")[1]
-        with rasterio.open(fusion_file) as src:
-            data = src.read()
-            valid = ~np.isnan(data) & (data > 0.001)
-            rows = np.any(valid, axis=(0, 2))
-            cols = np.any(valid, axis=(0, 1))
-            row_idx = np.where(rows)[0]
-            col_idx = np.where(cols)[0]
-            if len(row_idx) == 0 or len(col_idx) == 0:
-                print(f"[PROCESS] Skipping {fusion_file.name} (no valid pixels)")
-                continue
-            r0, r1 = row_idx[0], row_idx[-1]
-            c0, c1 = col_idx[0], col_idx[-1]
-            w, h = c1 - c0 + 1, r1 - r0 + 1
-            window = windows.Window(c0, r0, w, h)
-            data_crop = src.read(window=window)
-            transform = rasterio.windows.transform(window, src.transform)
-            p = src.profile.copy()
-            p.update({"width": w, "height": h, "transform": transform})
-            output_file = processed / "fusion" / f"{date_str}_0.geotiff"
-            with rasterio.open(output_file, "w", **p) as dst:
-                dst.write(data_crop)
-            fusion_dims[date_str] = (
-                c0,
-                r0,
-                w,
-                h,
-                transform,
-                src.transform,
-                src.crs,
-                src.profile,
-            )
-        print(f"[PROCESS] Cropped fusion: {output_file}")
-
-    # Crop S2 and S3 to fusion size
-    for date_str, (
-        c0,
-        r0,
-        w,
-        h,
-        transform,
-        fusion_transform,
-        crs,
-        fusion_profile,
-    ) in fusion_dims.items():
-        window = windows.Window(c0, r0, w, h)
-        # S2
-        for s2_file in s2_prep.glob("*REFL.tif"):
-            if s2_file.stem.split("_")[2] == date_str:
-                output_file = processed / "s2" / f"{date_str}_0.geotiff"
-                with rasterio.open(s2_file) as src:
-                    data = src.read(window=window)
-                    p2 = src.profile.copy()
-                    p2.update(
-                        {"width": w, "height": h, "transform": transform, "crs": crs}
-                    )
-                    with rasterio.open(output_file, "w", **p2) as dst:
-                        dst.write(data)
-                print(f"[PROCESS] Cropped: {output_file}")
-        # S3: resample to fusion pixel size, then crop
-        s3_file = s3_prep / f"composite_{date_str}.tif"
-        if s3_file.exists():
-            output_file = processed / "s3" / f"{date_str}_0.geotiff"
-            with rasterio.open(s3_file) as src:
-                # Resample to fusion pixel size
-                temp_profile = fusion_profile.copy()
-                temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
-                with rasterio.MemoryFile() as memfile:
-                    with memfile.open(**temp_profile) as resampled:
-                        for i in range(1, src.count + 1):
-                            reproject(
-                                source=rasterio.band(src, i),
-                                destination=rasterio.band(resampled, i),
-                                src_transform=src.transform,
-                                src_crs=src.crs,
-                                dst_transform=fusion_transform,
-                                dst_crs=crs,
-                                resampling=Resampling.nearest,
-                            )
-                        # Crop using same window
-                        data = resampled.read(window=window)
-                        p2 = resampled.profile.copy()
-                        p2.update({"width": w, "height": h, "transform": transform})
-                        with rasterio.open(output_file, "w", **p2) as dst:
-                            dst.write(data)
-            print(f"[PROCESS] Cropped: {output_file}")
-
-    print("[PROCESS] Completed")
-
-
-def process_cropped_itb(
-    season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
-):
-    base = Path(f"data/{site_name}/{season}")
-    prepared = base / f"prepared_{cleaning_strategy}_itb"
-    processed_dir = (
-        f"processed_{cleaning_strategy}_itb_sigma{sigma}"
-        if sigma
-        else f"processed_{cleaning_strategy}_itb_sigma20"
-    )
-    processed = base / processed_dir
-    s2_prep = prepared / "s2"
-    s3_prep = prepared / "s3"
-    fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
-    for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
-        output_dir.mkdir(parents=True, exist_ok=True)
-    print(
-        f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
-    )
-    fusion_dims = {}
-    for fusion_file in fusion_prep.glob("GCC_*.tif"):
-        date_str = fusion_file.stem.split("_")[1]
-        with rasterio.open(fusion_file) as src:
-            data = src.read()
-            valid = ~np.isnan(data) & (data > 0.001)
-            rows = np.any(valid, axis=(0, 2))
-            cols = np.any(valid, axis=(0, 1))
-            row_idx = np.where(rows)[0]
-            col_idx = np.where(cols)[0]
-            if len(row_idx) == 0 or len(col_idx) == 0:
-                print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
-                continue
-            r0, r1 = row_idx[0], row_idx[-1]
-            c0, c1 = col_idx[0], col_idx[-1]
-            w, h = c1 - c0 + 1, r1 - r0 + 1
-            window = windows.Window(c0, r0, w, h)
-            data_crop = src.read(window=window)
-            transform = rasterio.windows.transform(window, src.transform)
-            p = src.profile.copy()
-            p.update({"width": w, "height": h, "transform": transform})
-            output_file = processed / "fusion" / f"{date_str}_0.geotiff"
-            with rasterio.open(output_file, "w", **p) as dst:
-                dst.write(data_crop)
-            fusion_dims[date_str] = (
-                c0,
-                r0,
-                w,
-                h,
-                transform,
-                src.transform,
-                src.crs,
-                src.profile,
-            )
-        print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
-    for date_str, (
-        c0,
-        r0,
-        w,
-        h,
-        transform,
-        fusion_transform,
-        crs,
-        fusion_profile,
-    ) in fusion_dims.items():
-        window = windows.Window(c0, r0, w, h)
-        for s2_file in s2_prep.glob("*GCC.tif"):
-            parts = s2_file.stem.split("_")
-            if len(parts) > 2 and parts[2] == date_str:
-                output_file = processed / "s2" / f"{date_str}_0.geotiff"
-                with rasterio.open(s2_file) as src:
-                    data = src.read(window=window)
-                    p2 = src.profile.copy()
-                    p2.update(
-                        {"width": w, "height": h, "transform": transform, "crs": crs}
-                    )
-                    with rasterio.open(output_file, "w", **p2) as dst:
-                        dst.write(data)
-                print(f"[PROCESS-ITB] Cropped: {output_file}")
-                break
-        s3_file = s3_prep / f"composite_{date_str}.tif"
-        if s3_file.exists():
-            output_file = processed / "s3" / f"{date_str}_0.geotiff"
-            with rasterio.open(s3_file) as src:
-                temp_profile = fusion_profile.copy()
-                temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
-                with rasterio.MemoryFile() as memfile:
-                    with memfile.open(**temp_profile) as resampled:
-                        for i in range(1, src.count + 1):
-                            reproject(
-                                source=rasterio.band(src, i),
-                                destination=rasterio.band(resampled, i),
-                                src_transform=src.transform,
-                                src_crs=src.crs,
-                                dst_transform=fusion_transform,
-                                dst_crs=crs,
-                                resampling=Resampling.nearest,
-                            )
-                        data = resampled.read(window=window)
-                        p2 = resampled.profile.copy()
-                        p2.update({"width": w, "height": h, "transform": transform})
-                        with rasterio.open(output_file, "w", **p2) as dst:
-                            dst.write(data)
-            print(f"[PROCESS-ITB] Cropped: {output_file}")
-    print("[PROCESS-ITB] Completed")
-
-
-def post_process_all_itb_scenarios(season, site_position, site_name):
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [None, 30]:
-            process_cropped_itb(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-            )
-
-
-def post_process_all_scenarios(season, site_position, site_name):
-    """Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
-    for strategy in ["aggressive", "nonaggressive"]:
-        for sigma in [None, 30]:
-            process_cropped(
-                season,
-                site_position,
-                site_name,
-                cleaning_strategy=strategy,
-                sigma=sigma,
-            )
-
-
-def post_process_timeseries(season, site_position, site_name):
-    """Generate NDVI, GCC, and S2 bands timeseries for all 4 scenarios."""
-    from metrics_indices import (
-        create_ndvi_timeseries_post_process,
-        create_gcc_timeseries_post_process,
-        create_bands_timeseries_post_process,
-    )
-
-    create_ndvi_timeseries_post_process(season, site_position, site_name)
-    create_gcc_timeseries_post_process(season, site_position, site_name)
-    create_bands_timeseries_post_process(season, site_position, site_name)
--- a/preparation.py
+++ b/preparation.py
@ -1,364 +0,0 @@
-"""Data preparation: S2/S3 preprocessing for fusion."""
-
-import json
-import shutil
-from pathlib import Path
-from collections import defaultdict
-import numpy as np
-import rasterio
-from rasterio.warp import Resampling
-from rasterio.vrt import WarpedVRT
-from rasterio import shutil as rio_shutil
-
-RESOLUTION_RATIO = 21
-# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
-S3_MOVING_AVERAGE_WINDOW_DAYS = 5
-
-
-def _apply_s3_temporal_moving_average(s3_dir, window):
-    """In-place smoothing of composite_*.tif along calendar order; nodata 0 → NaN for averaging."""
-    if window <= 1:
-        return
-    paths = sorted(s3_dir.glob("composite_*.tif"), key=lambda p: p.stem.split("_")[1])
-    if not paths:
-        return
-    k = (window - 1) // 2
-    arrs = []
-    profiles = []
-    for p in paths:
-        with rasterio.open(p) as src:
-            d = src.read().astype(np.float32)
-            d[d == 0] = np.nan
-            arrs.append(d)
-            profiles.append(src.profile.copy())
-    stack = np.stack(arrs, axis=0)
-    t, _, _, _ = stack.shape
-    out = np.empty_like(stack)
-    for i in range(t):
-        lo, hi = max(0, i - k), min(t, i + k + 1)
-        out[i] = np.nanmean(stack[lo:hi], axis=0)
-    out = np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
-    for p, prof, slc in zip(paths, profiles, out):
-        prof.update({"dtype": "float32", "nodata": 0})
-        with rasterio.open(p, "w", **prof) as dst:
-            dst.write(slc)
-    print(f"[S3-PREP] Applied {window}-day centred MA ({t} composites)")
-
-
-def _import_distance_to_clouds():
-    """Lazy import of efast.distance_to_clouds."""
-    try:
-        from efast.s2_processing import distance_to_clouds
-
-        return distance_to_clouds
-    except ImportError:
-        raise ImportError(
-            "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
-        )
-
-
-def _load_excluded(season, site_name, cleaning_strategy):
-    """Load excluded filenames from NDVI timeseries (excluded_aggressive / excluded_nonaggressive)."""
-    base = Path(f"data/{site_name}/{season}/raw/preselection")
-    key = f"excluded_{cleaning_strategy}"
-    clouds = {"s2": set(), "s3": set()}
-    for source in ["s2", "s3"]:
-        ts_file = base / f"{source}_preselection.json"
-        if ts_file.exists():
-            data = json.loads(ts_file.read_text())
-            clouds[source] = {e["filename"] for e in data if e.get(key)}
-    return clouds
-
-
-def _get_base_dir(season, site_name, cleaning_strategy):
-    return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
-
-
-def _get_itb_base_dir(season, site_name, cleaning_strategy):
-    return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
-
-
-def _compute_gcc_from_refl_array(blue, green, red):
-    total = (
-        blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
-    )
-    mask = (total > 0) & np.isfinite(total)
-    gcc = np.zeros_like(green, dtype=np.float32)
-    gcc[mask] = green[mask].astype(np.float32) / total[mask]
-    return gcc
-
-
-def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
-    dst_s2_dir.mkdir(parents=True, exist_ok=True)
-    for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
-        dst = dst_s2_dir / src.name
-        if dst.is_symlink() or dst.exists():
-            dst.unlink(missing_ok=True)
-        try:
-            dst.symlink_to(src.resolve())
-        except OSError:
-            shutil.copy2(src, dst)
-
-
-def prepare_s2_gcc_for_itb(
-    season, site_position, site_name, cleaning_strategy="aggressive"
-):
-    base = _get_base_dir(season, site_name, cleaning_strategy)
-    itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
-    s2_prep = base / "s2"
-    itb_s2.mkdir(parents=True, exist_ok=True)
-    for refl in sorted(s2_prep.glob("*REFL.tif")):
-        out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
-        if out.exists():
-            continue
-        with rasterio.open(refl) as src:
-            if src.count < 4:
-                continue
-            b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
-            gcc = _compute_gcc_from_refl_array(b, g, r)
-            profile = src.profile.copy()
-            profile.update({"count": 1, "dtype": "float32", "nodata": 0})
-            with rasterio.open(out, "w", **profile) as dst:
-                dst.write(gcc, 1)
-        print(f"[S2-ITB] Saved {out.name}")
-    _link_dist_cloud_from_prepared(s2_prep, itb_s2)
-
-
-def prepare_s3_gcc_for_itb(
-    season, site_position, site_name, cleaning_strategy="aggressive"
-):
-    base = _get_base_dir(season, site_name, cleaning_strategy)
-    itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
-    itb_s3.mkdir(parents=True, exist_ok=True)
-    for comp in sorted((base / "s3").glob("composite_*.tif")):
-        out = itb_s3 / comp.name
-        if out.exists():
-            continue
-        with rasterio.open(comp) as src:
-            if src.count < 4:
-                continue
-            b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
-            gcc = _compute_gcc_from_refl_array(b, g, r)
-            profile = src.profile.copy()
-            profile.update({"count": 1, "dtype": "float32", "nodata": 0})
-            with rasterio.open(out, "w", **profile) as dst:
-                dst.write(gcc, 1)
-        print(f"[S3-ITB] Saved {out.name}")
-
-
-def _reproject_raster_to_target(
-    src_path,
-    dst_path,
-    target_bounds,
-    target_crs,
-    width,
-    height,
-    resampling=Resampling.bilinear,
-):
-    dst_transform = rasterio.transform.from_bounds(
-        target_bounds.left,
-        target_bounds.bottom,
-        target_bounds.right,
-        target_bounds.top,
-        width,
-        height,
-    )
-    with rasterio.open(src_path) as src:
-        vrt_options = {
-            "transform": dst_transform,
-            "height": height,
-            "width": width,
-            "crs": target_crs,
-            "resampling": resampling,
-        }
-        with WarpedVRT(src, **vrt_options) as vrt:
-            profile = vrt.profile.copy()
-            profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
-            rio_shutil.copy(vrt, dst_path, **profile)
-
-
-def _rescale_dist_cloud_for_small_roi(s2_output_dir):
-    """Rescale DIST_CLOUD when max distance ≤1 so EFAST fusion gets valid weights.
-
-    EFAST uses wo_i = (distance - 1) / D; values ≤1 yield zero/NaN weights. In small
-    ROIs (e.g. PhenoCam sites, 7×4 LR grid), distance_transform_edt never exceeds 1.
-    Scale non-zero values to ≥2 so fusion can produce non-NaN output.
-    """
-    for dc_path in s2_output_dir.glob("*DIST_CLOUD.tif"):
-        with rasterio.open(dc_path, "r") as src:
-            d = src.read(1)
-        d_max = float(np.nanmax(d))
-        if d_max <= 1:
-            # Map (0, 1] -> (0, 2] so (d-1)/15 gives positive weight
-            d_scaled = np.where(d > 0, 2.0, d).astype(np.float32)
-            with rasterio.open(dc_path, "r+") as dst:
-                dst.write(d_scaled, 1)
-            print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
-
-
-def prepare_s2(
-    season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
-):
-    lat, lon = site_position
-    s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
-    s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
-    s2_output_dir = _get_base_dir(season, site_name, cleaning_strategy) / "s2"
-
-    clouds = _load_excluded(season, site_name, cleaning_strategy)
-    s2_output_dir.mkdir(parents=True, exist_ok=True)
-
-    print(
-        f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
-    )
-
-    s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
-    if not s3_files:
-        raise ValueError("No non-cloud S3 files found for reference bounds")
-
-    with rasterio.open(s3_files[0]) as s3_ref:
-        target_bounds = s3_ref.bounds
-        target_crs = s3_ref.crs
-        s2_width = s3_ref.width * RESOLUTION_RATIO
-        s2_height = s3_ref.height * RESOLUTION_RATIO
-
-    for s2_file in sorted(s2_dir.glob("*.geotiff")):
-        if s2_file.name in clouds["s2"]:
-            print(
-                f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
-            )
-            continue
-        date_str = s2_file.name.split("_")[0]
-        refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
-        if refl_dst.exists():
-            print(f"[S2-PREP] Skipping {s2_file.name} (exists)")
-            continue
-
-        print(f"[S2-PREP] Processing {s2_file.name}...")
-        temp_normalized = s2_output_dir / f"temp_{s2_file.name}"
-        with rasterio.open(s2_file) as src:
-            data = src.read().astype("float32") / 10000.0
-            profile = src.profile.copy()
-            profile.update({"dtype": "float32", "nodata": 0})
-            with rasterio.open(temp_normalized, "w", **profile) as dst:
-                dst.write(data)
-
-        _reproject_raster_to_target(
-            temp_normalized, refl_dst, target_bounds, target_crs, s2_width, s2_height
-        )
-        temp_normalized.unlink()
-        print(f"[S2-PREP] Saved: {refl_dst}")
-
-    print("[S2-PREP] Computing distance-to-clouds...")
-    distance_to_clouds = _import_distance_to_clouds()
-    distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
-    _rescale_dist_cloud_for_small_roi(s2_output_dir)
-    print("[S2-PREP] Completed")
-
-
-def prepare_s3(
-    season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
-):
-    lat, lon = site_position
-    s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
-    base_dir = _get_base_dir(season, site_name, cleaning_strategy)
-    s2_prepared_dir = base_dir / "s2"
-    s3_preprocessed_dir = base_dir / "s3"
-
-    clouds = _load_excluded(season, site_name, cleaning_strategy)
-    s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
-
-    print(
-        f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
-    )
-
-    s3_by_date = defaultdict(list)
-    for s3_file in s3_dir.glob("*.geotiff"):
-        if s3_file.name not in clouds["s3"]:
-            s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
-        else:
-            print(
-                f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
-            )
-
-    print(
-        f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
-    )
-
-    temp_composite_dir = s3_preprocessed_dir / "temp_composites"
-    if temp_composite_dir.exists():
-        shutil.rmtree(temp_composite_dir)
-    temp_composite_dir.mkdir()
-
-    for date_str, s3_files in sorted(s3_by_date.items()):
-        composite_path = temp_composite_dir / f"composite_{date_str}.tif"
-        if len(s3_files) == 1:
-            shutil.copy(s3_files[0], composite_path)
-            print(f"[S3-PREP] Composite {date_str}: 1 acquisition")
-        else:
-            s3_stack = []
-            for s3_file in s3_files:
-                with rasterio.open(s3_file) as src:
-                    data = src.read()
-                    data[:, np.abs(np.nanmean(data, axis=0)) >= 5] = np.nan
-                    s3_stack.append(data)
-            composite = np.nanmean(np.array(s3_stack), axis=0).astype("float32")
-            with rasterio.open(s3_files[0]) as src:
-                profile = src.profile.copy()
-                profile.update({"count": composite.shape[0], "dtype": "float32"})
-            with rasterio.open(composite_path, "w", **profile) as dst:
-                dst.write(composite)
-            print(
-                f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
-            )
-
-    # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
-    # This ensures fusion covers the same area as S2 and dimensions match
-    sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif"))
-    if len(sen2_ref_paths) == 0:
-        raise ValueError(f"No REFL files found in {s2_prepared_dir}")
-
-    # Get bounds from REFL file (full coverage, matches S2)
-    # Use integer division to match distance_to_clouds logic exactly
-    with rasterio.open(sen2_ref_paths[0]) as s2_ref:
-        target_bounds = s2_ref.bounds
-        target_crs = s2_ref.crs
-        # Use integer division matching distance_to_clouds: s2_height // ratio, s2_width // ratio
-        width = s2_ref.width // RESOLUTION_RATIO
-        height = s2_ref.height // RESOLUTION_RATIO
-        s3_transform = rasterio.transform.from_bounds(
-            target_bounds.left,
-            target_bounds.bottom,
-            target_bounds.right,
-            target_bounds.top,
-            width,
-            height,
-        )
-
-    print(
-        f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
-    )
-
-    # Reproject each S3 composite to match S2 REFL bounds
-    sen3_paths = sorted(temp_composite_dir.glob("*.tif"))
-    for sen3_path in sen3_paths:
-        vrt_options = {
-            "transform": s3_transform,
-            "height": height,
-            "width": width,
-            "crs": target_crs,
-            "resampling": Resampling.cubic,
-        }
-        with rasterio.open(sen3_path) as s3_src:
-            with WarpedVRT(s3_src, **vrt_options) as vrt:
-                name = sen3_path.name
-                outfile = s3_preprocessed_dir / name
-                profile = vrt.profile.copy()
-                profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
-                rio_shutil.copy(vrt, outfile, **profile)
-        print(f"[S3-PREP] Saved: {outfile}")
-
-    _apply_s3_temporal_moving_average(
-        s3_preprocessed_dir, S3_MOVING_AVERAGE_WINDOW_DAYS
-    )
-    shutil.rmtree(temp_composite_dir)
-    print("[S3-PREP] Completed")
--- a/preselection.py
+++ b/preselection.py
@ -1,142 +0,0 @@
-"""Pre-selection: self-contained NDVI timeseries with cloud/dark-imagery exclusion markers."""
-import csv
-import json
-import numpy as np
-import rasterio
-from rasterio.warp import transform as transform_coords
-from pathlib import Path
-from datetime import datetime
-
-WINDOW_DAYS = 14
-MIN_WINDOW_SIZE = 3
-THRESHOLDS = {"aggressive": {"threshold": 0.3, "delta": 0.15}, "nonaggressive": {"threshold": 0.2, "delta": 0.25}}
-# S2 uses reflectance * 10000, S3 uses 0-1
-BLUE_MIN = {"s2": 100, "s3": 0.01}
-
-GREEN_BAND = 2
-RED_BAND = 3
-NIR_BAND = 4
-BLUE_BAND = 1
-BAND_KEYS = ["b02", "b03", "b04", "b8a"]
-
-
-def _sample_3x3(input_file, site_position):
-    """Sample mean NDVI and all four bands (3x3 window) at site. Returns (ndvi, {b02,b03,b04,b8a}) or (None, None)."""
-    try:
-        with rasterio.open(input_file) as src:
-            if src.count < 4:
-                return None, None
-            bands = [src.read(i).astype(np.float32) for i in range(1, 5)]
-            lon, lat = site_position[1], site_position[0]
-            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
-            if not (
-                src.bounds.left <= x[0] <= src.bounds.right
-                and src.bounds.bottom <= y[0] <= src.bounds.top
-            ):
-                return None, None
-            row, col = src.index(x[0], y[0])
-            if row < 0 or row >= src.height or col < 0 or col >= src.width:
-                return None, None
-            r0, r1 = max(0, row - 1), min(src.height, row + 2)
-            c0, c1 = max(0, col - 1), min(src.width, col + 2)
-            windows = [b[r0:r1, c0:c1] for b in bands]
-            red_w, nir_w = windows[RED_BAND - 1], windows[NIR_BAND - 1]
-            mask = (red_w > 0) & (nir_w > 0) & ~np.isnan(red_w) & ~np.isnan(nir_w)
-            if not np.any(mask):
-                return None, None
-            ndvi = float(np.mean((nir_w[mask] - red_w[mask]) / (nir_w[mask] + red_w[mask])))
-            band_means = {k: round(float(np.mean(w[mask])), 6) for k, w in zip(BAND_KEYS, windows)}
-            return ndvi, band_means
-    except Exception:
-        return None, None
-
-
-def _extract_date(filename):
-    for part in filename.replace(".geotiff", "").split("_"):
-        if len(part) == 8 and part.isdigit():
-            return part, datetime.strptime(part, "%Y%m%d").isoformat()
-    return None, None
-
-
-def _is_excluded(entry, entries, strategy, source="s2"):
-    """True if entry is excluded by strategy (NDVI threshold/delta or dark blue)."""
-    th = THRESHOLDS[strategy]
-    if entry.get("ndvi") is None:
-        return True
-    blue_min = BLUE_MIN.get(source, BLUE_MIN["s2"])
-    if entry.get("b02") is not None and entry["b02"] < blue_min:
-        return True
-    entry_date = datetime.fromisoformat(entry["date"].replace("Z", "+00:00"))
-    window_ndvi = []
-    for e in entries:
-        if e.get("ndvi") is None:
-            continue
-        d = datetime.fromisoformat(e["date"].replace("Z", "+00:00"))
-        if abs((d - entry_date).days) <= WINDOW_DAYS:
-            window_ndvi.append(e["ndvi"])
-    if len(window_ndvi) < MIN_WINDOW_SIZE:
-        return False
-    threshold = max(window_ndvi) - th["delta"]
-    return entry["ndvi"] < threshold and entry["ndvi"] < th["threshold"]
-
-
-def create_timeseries(season, site_position, site_name):
-    """Build NDVI timeseries (3x3 window) for raw S2/S3, with exclusion markers for both strategies."""
-    lat, lon = site_position
-    base = Path(f"data/{site_name}/{season}")
-
-    print(f"[PRESELECT] Creating NDVI timeseries: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    for source in ["s2", "s3"]:
-        input_dir = base / "raw" / source
-        out_dir = base / "raw" / "preselection"
-        out_dir.mkdir(parents=True, exist_ok=True)
-        output_file = out_dir / f"{source}_preselection.json"
-
-        if not input_dir.exists():
-            print(f"[PRESELECT] Skipping {source}: {input_dir} not found")
-            continue
-
-        timeseries = []
-        for f in sorted(input_dir.glob("*.geotiff")):
-            if "DIST_CLOUD" in f.name:
-                continue
-            date_str, date_iso = _extract_date(f.name)
-            if not date_str:
-                continue
-            ndvi, band_means = _sample_3x3(f, site_position)
-            entry = {"filename": f.name, "date": date_iso, "ndvi": ndvi}
-            if band_means:
-                entry.update(band_means)
-            timeseries.append(entry)
-
-        timeseries.sort(key=lambda e: e["date"])
-        for e in timeseries:
-            e["excluded_aggressive"] = _is_excluded(e, timeseries, "aggressive", source)
-            e["excluded_nonaggressive"] = _is_excluded(e, timeseries, "nonaggressive", source)
-
-        with open(output_file, "w") as out:
-            json.dump(timeseries, out, indent=2)
-
-        csv_file = out_dir / f"{source}_preselection.csv"
-        fieldnames = ["filename", "date", "ndvi"] + BAND_KEYS + ["excluded_aggressive", "excluded_nonaggressive"]
-        with open(csv_file, "w", newline="") as out:
-            w = csv.DictWriter(out, fieldnames=fieldnames, extrasaction="ignore")
-            w.writeheader()
-            for e in timeseries:
-                w.writerow({k: e.get(k) for k in fieldnames})
-
-        n_excl_agg = sum(1 for e in timeseries if e["excluded_aggressive"])
-        n_excl_non = sum(1 for e in timeseries if e["excluded_nonaggressive"])
-        print(f"[PRESELECT] Saved {output_file} + {csv_file.name}: {len(timeseries)} entries ({n_excl_agg} aggressive, {n_excl_non} nonaggressive excluded)")
-
-    print("[PRESELECT] Completed")
-
-
-# Backward compatibility
-def detect_clouds(season, site_position, site_name, cleaning_strategy="aggressive"):
-    """Create timeseries with exclusion markers. Strategy is read from timeseries when preparing."""
-    create_timeseries(season, site_position, site_name)
-
-
-preselect = create_timeseries
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,2 +1,31 @@
+[project]
+name = "worldwide"
+version = "0.1.0"
+description = "Worldwide PhenoCam EFAST feasibility screening"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "efast @ git+https://github.com/DHI-GRAS/efast.git",
+    "netCDF4",
+    "numpy",
+    "openeo",
+    "pystac-client",
+    "python-dateutil",
+    "python-dotenv",
+    "rasterio",
+    "requests",
+    "scipy",
+    "shapely",
+    "tqdm",
+]
+
+[dependency-groups]
+dev = [
+    "ruff",
+]
+
 [tool.ruff.lint.per-file-ignores]
-"run.py" = ["F401"]
+"1-phenocam.py" = ["E402"]
+"2-phenocam-screening.py" = ["E402"]
+"3-sentinel-data.py" = ["E402"]
+"4-fusion.py" = ["E402"]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,12 +0,0 @@
-pystac-client
-rasterio
-openeo
-python-dotenv
-netCDF4
-numpy
-timesat
-requests
-scipy
-matplotlib
-ruff
-pre-commit
--- a/run.py
+++ b/run.py
@ -1,87 +0,0 @@
-"""Pipeline entry point.
-
-Active snippet below only **regenerates metrics.json** (temporal, baseline,
-`derived`, `residual_vs_phenocam`). Requires existing post-processed GCC
-timeseries under `data/{site}/{season}/processed_*`.
-
-Un-comment imports and steps below for acquisition → fusion → post-process.
-"""
-
-# from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
-# from postprocessing import (
-#     post_process_all_scenarios,
-#     post_process_all_itb_scenarios,
-#     post_process_timeseries,
-# )
-# from acquisition_s2 import download_s2
-# from acquisition_s3 import download_s3
-# from acquisition_phenocam import download_phenocam
-# from preselection import create_timeseries
-# from preparation import (
-#     prepare_s2,
-#     prepare_s3,
-#     prepare_s2_gcc_for_itb,
-#     prepare_s3_gcc_for_itb,
-# )
-# from metrics_indices import create_prepared_fusion_timeseries
-from metrics_stats import calculate_all_metrics
-
-# from phenology_timesat import write_phenocam_phenology_for_site
-
-
-def run_pipeline(season, site_position, site_name):
-    """Run pipeline (metrics-only by default; see module docstring)."""
-    try:
-        # print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}")
-        # download_s2(season, site_position, site_name)
-        # download_s3(season, site_position, site_name)
-        # download_phenocam(season, site_position, site_name)
-
-        # print(f"PhenoCam phenology (50 % amplitude): {site_name}, {season}")
-        # write_phenocam_phenology_for_site(site_name, season)
-
-        # print(f"Creating preselection timeseries: {site_name}, {season}")
-        # create_timeseries(season, site_position, site_name)
-
-        # print(f"Preparing S2 and S3 for fusion: {site_name}, {season}")
-        # for strategy in ["aggressive", "nonaggressive"]:
-        #     prepare_s2(season, site_position, site_name, cleaning_strategy=strategy)
-        #     prepare_s3(season, site_position, site_name, cleaning_strategy=strategy)
-
-        # print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
-        # run_all_efast_scenarios(season, site_position, site_name)
-
-        # print(f"Index-then-Blend (ItB): {site_name}, {season}")
-        # for strategy in ["aggressive", "nonaggressive"]:
-        #     prepare_s2_gcc_for_itb(
-        #         season, site_position, site_name, cleaning_strategy=strategy
-        #     )
-        #     prepare_s3_gcc_for_itb(
-        #         season, site_position, site_name, cleaning_strategy=strategy
-        #     )
-        # run_all_efast_itb_scenarios(season, site_position, site_name)
-        # post_process_all_itb_scenarios(season, site_position, site_name)
-
-        # print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
-        # create_prepared_fusion_timeseries(season, site_position, site_name)
-
-        # print(f"Post-processing (crop): {site_name}, {season}")
-        # post_process_all_scenarios(season, site_position, site_name)
-        # post_process_timeseries(season, site_position, site_name)
-
-        print(f"Calculating metrics: {site_name}, {season}")
-        calculate_all_metrics(season, site_name, site_position)
-
-    except Exception as e:
-        print(f"Error: {e}")
-        raise
-
-
-if __name__ == "__main__":
-    run_pipeline(2024, (47.116171, 11.320308), "innsbruck")
-    run_pipeline(2024, (35.3045, 25.0743), "forthgr")
-    run_pipeline(2020, (47.116171, 11.320308), "innsbruck")
-    run_pipeline(2024, (58.5633, 24.3688), "pitsalu")
-    run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
-    run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
-    run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")
--- a/satellite-fusion-web.service
+++ b/satellite-fusion-web.service
@ -1,16 +0,0 @@
-[Unit]
-Description=Satellite Fusion Pipeline Web Server
-After=network.target
-
-[Service]
-Type=simple
-User=root
-WorkingDirectory=/opt/satellite-fusion/webapp
-Environment="PATH=/opt/satellite-fusion/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
-ExecStart=/opt/satellite-fusion/venv/bin/python3 -m http.server 8000 --directory /opt/satellite-fusion/webapp
-Restart=always
-RestartSec=10
-
-[Install]
-WantedBy=multi-user.target
-
--- a/suitability_screening.py
+++ b/suitability_screening.py
@ -1,634 +0,0 @@
-#!/usr/bin/env python3
-"""Compute per-site suitability indicators from existing pipeline outputs.
-
-The script is intentionally schema-tolerant: it prints one site's discovered JSON
-structure first, then uses a small set of common field-name conventions to compute
-SNR, S2 archive density, and S2-S3 GCC coherence.
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import math
-import re
-from collections.abc import Iterable
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-import pandas as pd
-from scipy.interpolate import UnivariateSpline
-from scipy.stats import pearsonr
-
-
-OUTPUT_NAME = "suitability_screening.json"
-SNR_THRESHOLD = 2.0
-MATCH_TOLERANCE_DAYS = 2
-
-
-def load_json(path: Path) -> Any | None:
-    if not path.is_file():
-        return None
-    try:
-        with path.open("r", encoding="utf-8") as f:
-            return json.load(f)
-    except (json.JSONDecodeError, OSError) as exc:
-        print(f"[WARN] Could not read JSON {path}: {exc}")
-        return None
-
-
-def jsonable_float(value: Any) -> float | None:
-    if isinstance(value, bool):
-        return None
-    try:
-        out = float(value)
-    except (TypeError, ValueError):
-        return None
-    if not math.isfinite(out):
-        return None
-    return out
-
-
-def parse_date(value: Any) -> pd.Timestamp | None:
-    if value is None:
-        return None
-    if isinstance(value, pd.Timestamp):
-        return value.normalize()
-    text = str(value).strip()
-    if not text:
-        return None
-    match = re.search(r"(?<!\d)(\d{8})(?!\d)", text)
-    if match:
-        try:
-            return pd.to_datetime(match.group(1), format="%Y%m%d").normalize()
-        except (TypeError, ValueError):
-            pass
-    try:
-        ts = pd.to_datetime(text, errors="coerce")
-    except (TypeError, ValueError):
-        return None
-    if pd.isna(ts):
-        return None
-    if getattr(ts, "tzinfo", None) is not None:
-        ts = ts.tz_convert(None)
-    return pd.Timestamp(ts).normalize()
-
-
-def compact(value: Any, *, max_text: int = 220) -> Any:
-    """Return a short representation suitable for discovery logging."""
-    if isinstance(value, dict):
-        return {k: compact(v, max_text=max_text) for k, v in list(value.items())[:12]}
-    if isinstance(value, list):
-        return [compact(v, max_text=max_text) for v in value[:2]]
-    text = repr(value)
-    if len(text) > max_text:
-        return text[: max_text - 3] + "..."
-    return value
-
-
-def top_keys(data: Any) -> list[str]:
-    if isinstance(data, dict):
-        return list(data.keys())
-    if isinstance(data, list) and data and isinstance(data[0], dict):
-        keys: set[str] = set()
-        for entry in data[:5]:
-            keys.update(entry.keys())
-        return sorted(keys)
-    return []
-
-
-def normalize_records(data: Any) -> list[dict[str, Any]]:
-    """Convert common JSON shapes into a list of record dictionaries."""
-    if data is None:
-        return []
-    if isinstance(data, list):
-        records = []
-        for item in data:
-            if isinstance(item, dict):
-                records.append(dict(item))
-            else:
-                records.append({"value": item})
-        return records
-    if not isinstance(data, dict):
-        return [{"value": data}]
-
-    for key in ("timeseries", "time_series", "data", "entries", "results", "records"):
-        value = data.get(key)
-        if isinstance(value, list):
-            return normalize_records(value)
-
-    # Dict keyed by date or filename.
-    if data and all(not isinstance(v, (list, tuple)) for v in data.values()):
-        records = []
-        for key, value in data.items():
-            if isinstance(value, dict):
-                record = dict(value)
-                record.setdefault("date", key)
-            else:
-                record = {"date": key, "value": value}
-            records.append(record)
-        return records
-
-    return [dict(data)]
-
-
-def first_records(data: Any, count: int = 2) -> list[Any]:
-    records = normalize_records(data)
-    return records[:count]
-
-
-def recursive_snr_candidates(data: Any, prefix: str = "") -> list[tuple[str, Any]]:
-    found: list[tuple[str, Any]] = []
-    if isinstance(data, dict):
-        for key, value in data.items():
-            path = f"{prefix}.{key}" if prefix else str(key)
-            if "snr" in str(key).lower():
-                found.append((path, value))
-            found.extend(recursive_snr_candidates(value, path))
-    elif isinstance(data, list):
-        for i, value in enumerate(data[:10]):
-            found.extend(recursive_snr_candidates(value, f"{prefix}[{i}]"))
-    return found
-
-
-def find_numeric_snr(data: Any) -> float | None:
-    candidates = recursive_snr_candidates(data)
-    # Prefer exact leaf keys named "snr"; fall back to any numeric snr-containing key.
-    candidates.sort(key=lambda kv: 0 if kv[0].split(".")[-1].lower() == "snr" else 1)
-    for _, value in candidates:
-        numeric = jsonable_float(value)
-        if numeric is not None:
-            return numeric
-        if isinstance(value, dict):
-            nested = value.get("snr")
-            numeric = jsonable_float(nested)
-            if numeric is not None:
-                return numeric
-    return None
-
-
-def find_site_roots(base_dir: Path) -> list[tuple[str, Path]]:
-    """Find direct site roots, plus the repo's common site/year layout."""
-    roots: list[tuple[str, Path]] = []
-    if not base_dir.is_dir():
-        return roots
-
-    def looks_like_site_root(path: Path) -> bool:
-        return any(
-            (
-                (path / "metrics.json").exists(),
-                (path / "raw" / "preselection").exists(),
-                (path / "phenocam").exists(),
-                (path / "raw" / "phenocam").exists(),
-            )
-        )
-
-    for child in sorted(p for p in base_dir.iterdir() if p.is_dir()):
-        if looks_like_site_root(child):
-            roots.append((child.name, child))
-            continue
-        for grandchild in sorted(p for p in child.iterdir() if p.is_dir()):
-            if looks_like_site_root(grandchild):
-                name = child.name if grandchild.name.isdigit() else f"{child.name}_{grandchild.name}"
-                roots.append((name, grandchild))
-
-    return roots
-
-
-def find_s2_preselection(site_root: Path) -> Path | None:
-    candidates = [
-        site_root / "raw" / "preselection" / "s2_preselection.json",
-        site_root / "preselection" / "s2_preselection.json",
-    ]
-    return next((p for p in candidates if p.is_file()), None)
-
-
-def find_s3_timeseries(site_root: Path) -> Path | None:
-    candidates = [
-        site_root / "processed_aggressive_sigma20" / "gcc" / "s3" / "timeseries.json",
-        site_root / "processed_aggressive_itb_sigma20" / "gcc" / "s3" / "timeseries.json",
-    ]
-    for candidate in candidates:
-        if candidate.is_file():
-            return candidate
-    matches = sorted(site_root.glob("processed*aggressive*sigma20*/gcc/s3/timeseries.json"))
-    return matches[0] if matches else None
-
-
-def find_metrics(site_root: Path) -> Path | None:
-    path = site_root / "metrics.json"
-    return path if path.is_file() else None
-
-
-def find_phenocam(site_root: Path) -> Path | None:
-    candidates = [
-        site_root / "phenocam" / "gcc_90.json",
-        site_root / "phenocam" / "phenocam_gcc.json",
-        site_root / "raw" / "phenocam" / "gcc_90.json",
-        site_root / "raw" / "phenocam" / "phenocam_gcc.json",
-    ]
-    for candidate in candidates:
-        if candidate.is_file():
-            return candidate
-    patterns = [
-        "phenocam/*gcc*90*.json",
-        "phenocam/*gcc*.json",
-        "raw/phenocam/*gcc*90*.json",
-        "raw/phenocam/*gcc*.json",
-        "raw/phenocam/*.json",
-    ]
-    for pattern in patterns:
-        matches = sorted(site_root.glob(pattern))
-        if matches:
-            return matches[0]
-    return None
-
-
-def print_structure(label: str, path: Path | None) -> None:
-    print(f"\n[{label}]")
-    if path is None:
-        print("missing")
-        return
-    data = load_json(path)
-    print(f"path: {path}")
-    print(f"type: {type(data).__name__}")
-    print(f"keys: {top_keys(data)}")
-    records = [] if label == "metrics.json" else first_records(data, 2)
-    if records:
-        print(f"first {len(records)} entr{'y' if len(records) == 1 else 'ies'}:")
-        print(json.dumps(compact(records), indent=2, default=str))
-    if label == "metrics.json":
-        snr = recursive_snr_candidates(data)
-        phenocam_keys = []
-        if isinstance(data, dict):
-            for key, value in data.items():
-                if "phenocam" in str(key).lower():
-                    phenocam_keys.append((key, top_keys(value)))
-        print(f"phenocam-like keys: {phenocam_keys}")
-        print(f"snr-like keys: {[(path, compact(value)) for path, value in snr]}")
-
-
-def run_discovery(site_name: str, site_root: Path) -> None:
-    print("\n=== Discovery mode ===")
-    print(f"Using site: {site_name} ({site_root})")
-    print_structure("s2_preselection.json", find_s2_preselection(site_root))
-    print_structure("S3 timeseries.json", find_s3_timeseries(site_root))
-    print_structure("metrics.json", find_metrics(site_root))
-    print_structure("PhenoCam gcc_90 file", find_phenocam(site_root))
-    print("\n=== Computing indicators ===")
-
-
-def choose_discovery_site(site_roots: list[tuple[str, Path]]) -> tuple[str, Path]:
-    def score(item: tuple[str, Path]) -> int:
-        _, root = item
-        return sum(
-            int(path is not None)
-            for path in (
-                find_s2_preselection(root),
-                find_s3_timeseries(root),
-                find_metrics(root),
-                find_phenocam(root),
-            )
-        )
-
-    return max(site_roots, key=score)
-
-
-def truthy_status(value: Any, *, field_name: str | None = None) -> bool | None:
-    if isinstance(value, bool):
-        if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
-            return not value
-        return value
-    if value is None:
-        return True
-    if isinstance(value, (int, float)) and not isinstance(value, bool):
-        if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
-            return not bool(value)
-        return bool(value)
-    text = str(value).strip().lower()
-    if text in {"", "none", "null", "nan", "ok", "pass", "passed", "keep", "kept", "valid", "selected"}:
-        return True
-    if text in {
-        "fail",
-        "failed",
-        "false",
-        "reject",
-        "rejected",
-        "exclude",
-        "excluded",
-        "invalid",
-        "cloud",
-        "cloudy",
-        "dark",
-        "bad",
-    }:
-        return False
-    if field_name and any(word in field_name.lower() for word in ("reason", "status")):
-        return False
-    return None
-
-
-def acquisition_passes(entry: dict[str, Any], strategy: str) -> bool:
-    strategy_aliases = {
-        strategy,
-        strategy.replace("nonaggressive", "non_aggressive"),
-        strategy.replace("nonaggressive", "non-aggressive"),
-    }
-    negative_prefixes = ("excluded", "exclude", "rejected", "reject")
-    positive_prefixes = ("passed", "pass", "keep", "kept", "valid", "selected")
-
-    for alias in strategy_aliases:
-        for prefix in negative_prefixes:
-            key = f"{prefix}_{alias}"
-            if key in entry:
-                return not bool(entry[key])
-        for prefix in positive_prefixes:
-            key = f"{prefix}_{alias}"
-            if key in entry:
-                return bool(entry[key])
-
-    for alias in strategy_aliases:
-        nested = entry.get(alias)
-        if isinstance(nested, dict):
-            for key, value in nested.items():
-                passed = truthy_status(value, field_name=key)
-                if passed is not None:
-                    return passed
-        elif nested is not None:
-            passed = truthy_status(nested, field_name=alias)
-            if passed is not None:
-                return passed
-
-    # Generic status fields.
-    for key in (*negative_prefixes, *positive_prefixes, "status", "strategy", "reason", "rejection_reason"):
-        if key in entry:
-            passed = truthy_status(entry[key], field_name=key)
-            if passed is not None:
-                return passed
-
-    # Dict keyed by date with a scalar rejection reason.
-    if "value" in entry and len(entry) <= 3:
-        passed = truthy_status(entry.get("value"), field_name="value")
-        if passed is not None:
-            return passed
-
-    # Existing pipeline entries with band means and no rejection marker are usable.
-    return True
-
-
-def band_value(entry: dict[str, Any], names: Iterable[str]) -> float | None:
-    lowered = {str(k).lower(): v for k, v in entry.items()}
-    for name in names:
-        if name.lower() in lowered:
-            value = jsonable_float(lowered[name.lower()])
-            if value is not None:
-                return value
-    for container_key in ("bands", "band_means", "reflectance", "reflectances", "means", "window_means"):
-        container = entry.get(container_key)
-        if isinstance(container, dict):
-            value = band_value(container, names)
-            if value is not None:
-                return value
-    return None
-
-
-def entry_date(entry: dict[str, Any]) -> pd.Timestamp | None:
-    for key in ("date", "datetime", "time", "timestamp", "acquisition_date"):
-        if key in entry:
-            date = parse_date(entry[key])
-            if date is not None:
-                return date
-    for key in ("filename", "file", "path", "name"):
-        if key in entry:
-            date = parse_date(entry[key])
-            if date is not None:
-                return date
-    return None
-
-
-def s2_gcc_series(s2_data: Any) -> pd.DataFrame:
-    rows = []
-    for entry in normalize_records(s2_data):
-        if not isinstance(entry, dict) or not acquisition_passes(entry, "aggressive"):
-            continue
-        date = entry_date(entry)
-        blue = band_value(entry, ("b02", "blue", "B02", "band_1", "band1"))
-        green = band_value(entry, ("b03", "green", "B03", "band_2", "band2"))
-        red = band_value(entry, ("b04", "red", "B04", "band_3", "band3"))
-        if date is None or blue is None or green is None or red is None:
-            continue
-        denom = blue + green + red
-        if denom <= 0:
-            continue
-        rows.append({"date": date, "s2_gcc": green / denom})
-    if not rows:
-        return pd.DataFrame(columns=["date", "s2_gcc"])
-    return pd.DataFrame(rows).groupby("date", as_index=False)["s2_gcc"].mean().sort_values("date")
-
-
-def value_from_record(entry: dict[str, Any], preferred: Iterable[str]) -> float | None:
-    lowered = {str(k).lower(): v for k, v in entry.items()}
-    for name in preferred:
-        value = jsonable_float(lowered.get(name.lower()))
-        if value is not None:
-            return value
-    for key, value in lowered.items():
-        if any(token in key for token in ("gcc", "greenness")):
-            numeric = jsonable_float(value)
-            if numeric is not None:
-                return numeric
-    return None
-
-
-def gcc_timeseries(data: Any, value_name: str) -> pd.DataFrame:
-    rows = []
-    for entry in normalize_records(data):
-        if not isinstance(entry, dict):
-            continue
-        date = entry_date(entry)
-        value = value_from_record(
-            entry,
-            ("greenness_index", "gcc_90", "gcc", "value", "mean", "site_value"),
-        )
-        if date is not None and value is not None:
-            rows.append({"date": date, value_name: value})
-    if not rows:
-        return pd.DataFrame(columns=["date", value_name])
-    return pd.DataFrame(rows).groupby("date", as_index=False)[value_name].mean().sort_values("date")
-
-
-def compute_archive_density(s2_data: Any | None) -> tuple[int | None, int | None]:
-    if s2_data is None:
-        return None, None
-    records = [entry for entry in normalize_records(s2_data) if isinstance(entry, dict)]
-    if not records:
-        return None, None
-    aggressive = sum(1 for entry in records if acquisition_passes(entry, "aggressive"))
-    nonaggressive = sum(1 for entry in records if acquisition_passes(entry, "nonaggressive"))
-    return aggressive, nonaggressive
-
-
-def compute_coherence(s2_data: Any | None, s3_data: Any | None) -> tuple[int | None, float | None, float | None]:
-    if s2_data is None or s3_data is None:
-        return None, None, None
-    s2 = s2_gcc_series(s2_data)
-    s3 = gcc_timeseries(s3_data, "s3_gcc")
-    if s2.empty or s3.empty:
-        return 0, None, None
-
-    matched = pd.merge_asof(
-        s2.sort_values("date"),
-        s3.sort_values("date"),
-        on="date",
-        direction="nearest",
-        tolerance=pd.Timedelta(days=MATCH_TOLERANCE_DAYS),
-    ).dropna(subset=["s2_gcc", "s3_gcc"])
-    n = int(len(matched))
-    if n < 2:
-        return n, None, None
-    r, p_value = pearsonr(matched["s2_gcc"].to_numpy(), matched["s3_gcc"].to_numpy())
-    return n, jsonable_float(r), jsonable_float(p_value)
-
-
-def phenocam_series(data: Any | None) -> pd.DataFrame:
-    if data is None:
-        return pd.DataFrame(columns=["date", "gcc"])
-    rows = []
-    for entry in normalize_records(data):
-        if isinstance(entry, dict):
-            date = entry_date(entry)
-            value = value_from_record(
-                entry,
-                ("gcc_90", "greenness_index", "gcc", "gcc_mean", "value"),
-            )
-        else:
-            date = None
-            value = jsonable_float(entry)
-        if date is not None and value is not None:
-            rows.append({"date": date, "gcc": value})
-    if not rows:
-        return pd.DataFrame(columns=["date", "gcc"])
-    return pd.DataFrame(rows).groupby("date", as_index=False)["gcc"].mean().sort_values("date")
-
-
-def compute_snr_from_phenocam(phenocam_data: Any | None) -> float | None:
-    series = phenocam_series(phenocam_data)
-    if len(series) < 5:
-        return None
-    x = (series["date"] - series["date"].min()).dt.days.to_numpy(dtype=float)
-    y = series["gcc"].to_numpy(dtype=float)
-    if len(np.unique(x)) < 5:
-        return None
-    try:
-        spline = UnivariateSpline(x, y, k=3)
-        residual = y - spline(x)
-    except Exception as exc:
-        print(f"[WARN] Could not fit PhenoCam smoothing spline: {exc}")
-        return None
-    rmse = float(np.sqrt(np.mean(residual**2)))
-    amplitude = float(np.max(y) - np.min(y))
-    if rmse <= 0:
-        return None
-    return amplitude / rmse
-
-
-def compute_snr(metrics_data: Any | None, phenocam_data: Any | None) -> float | None:
-    from_metrics = find_numeric_snr(metrics_data)
-    if from_metrics is not None:
-        return from_metrics
-    return compute_snr_from_phenocam(phenocam_data)
-
-
-def compute_site(site_root: Path) -> dict[str, Any]:
-    s2_data = load_json(find_s2_preselection(site_root) or Path("__missing__"))
-    s3_data = load_json(find_s3_timeseries(site_root) or Path("__missing__"))
-    metrics_data = load_json(find_metrics(site_root) or Path("__missing__"))
-    phenocam_data = load_json(find_phenocam(site_root) or Path("__missing__"))
-
-    snr = compute_snr(metrics_data, phenocam_data)
-    n_s2_aggressive, n_s2_nonaggressive = compute_archive_density(s2_data)
-    n_matched, pearson_r, p_value = compute_coherence(s2_data, s3_data)
-
-    return {
-        "snr": snr,
-        "snr_pass": None if snr is None else snr >= SNR_THRESHOLD,
-        "n_s2_aggressive": n_s2_aggressive,
-        "n_s2_nonaggressive": n_s2_nonaggressive,
-        "coherence_n_matched": n_matched,
-        "coherence_pearson_r": pearson_r,
-        "coherence_p_value": p_value,
-    }
-
-
-def print_summary(results: dict[str, dict[str, Any]]) -> None:
-    print("\nSuitability summary")
-    if not results:
-        print("(no sites found)")
-        return
-
-    columns = [
-        ("site", "site"),
-        ("snr", "snr"),
-        ("snr_pass", "pass"),
-        ("n_s2_aggressive", "n_s2_agg"),
-        ("n_s2_nonaggressive", "n_s2_nonagg"),
-        ("coherence_n_matched", "n_match"),
-        ("coherence_pearson_r", "pearson_r"),
-        ("coherence_p_value", "p_value"),
-    ]
-
-    def fmt(value: Any, key: str) -> str:
-        if value is None:
-            return "null"
-        if key.startswith("n_") or key == "coherence_n_matched":
-            return str(int(value))
-        if isinstance(value, bool):
-            return "true" if value else "false"
-        if isinstance(value, (int, float)):
-            return f"{float(value):.4g}"
-        return str(value)
-
-    rows = []
-    for site, values in results.items():
-        rows.append([site, *[fmt(values.get(key), key) for key, _ in columns[1:]]])
-    widths = [
-        max(len(header), *(len(row[i]) for row in rows))
-        for i, (_, header) in enumerate(columns)
-    ]
-    header = "  ".join(header.ljust(widths[i]) for i, (_, header) in enumerate(columns))
-    print(header)
-    print("  ".join("-" * width for width in widths))
-    for row in rows:
-        print("  ".join(row[i].ljust(widths[i]) for i in range(len(columns))))
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--base-dir",
-        required=True,
-        type=Path,
-        help="Pipeline output root containing one subdirectory per site.",
-    )
-    args = parser.parse_args()
-
-    base_dir = args.base_dir.expanduser().resolve()
-    site_roots = find_site_roots(base_dir)
-    if site_roots:
-        run_discovery(*choose_discovery_site(site_roots))
-    else:
-        print(f"[WARN] No site directories found under {base_dir}")
-
-    results = {site_name: compute_site(site_root) for site_name, site_root in site_roots}
-    output_path = base_dir / OUTPUT_NAME
-    with output_path.open("w", encoding="utf-8") as f:
-        json.dump(results, f, indent=2, allow_nan=False)
-        f.write("\n")
-    print_summary(results)
-    print(f"\nWrote {output_path}")
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/uv.lock
+++ b/uv.lock
--- a/webapp/fusion.html
+++ b/webapp/fusion.html
@ -1,397 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <title>Fusion Viewer</title>
-    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
-    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
-    <script src="common.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
-        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
-        .selectors { margin-bottom: 20px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { margin: 0 0 5px 0; font-size: 22px; }
-        .season-row { padding-bottom: 15px; }
-        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
-        .download-links { margin-left: 10px; font-size: 14px; }
-        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
-        .download-links a:hover { text-decoration: underline; }
-        #dateSlider { width: 100%; margin: 15px 0; }
-        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
-        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
-        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
-        #fusionMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
-        .leaflet-image-layer { image-rendering: pixelated; }
-        .leaflet-control-attribution { display: none; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="header-sticky">
-            <div class="nav">
-                <a href="index.html">Full</a>
-                <a href="preselection.html">Pre-selection</a>
-                <a href="prepared.html">Prepared</a>
-                <a href="fusion.html" class="active">Fusion</a>
-                <a href="postprocessed.html">Postprocessed</a>
-                <a href="metrics.html">Metrics</a>
-                <a href="gap_validation.html">Gap validation</a>
-                <a href="phenology.html">Phenology</a>
-            </div>
-            <h1 id="siteName">Innsbruck</h1>
-            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
-            <div class="selectors">
-                <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-            <label>Strategy:</label>
-            <select id="strategySelect">
-                <option value="aggressive">Aggressive</option>
-                <option value="nonaggressive">Non-aggressive</option>
-            </select>
-            <label>Sigma:</label>
-            <select id="sigmaSelect">
-                <option value="20">σ=20</option>
-                <option value="30">σ=30</option>
-            </select>
-            <label>Mode:</label>
-            <select id="fusionModeSelect" title="BtI = reflectance fusion; ItB = GCC fusion">
-                <option value="bti">BtI (REFL)</option>
-                <option value="itb">ItB (GCC)</option>
-            </select>
-            </div>
-            <input type="range" id="dateSlider" min="0" max="365" value="0">
-            <div id="dateDisplay">2024-01-01</div>
-        </div>
-        <div class="map-label" id="mapLabelFusion">Fusion RGB (closest available)</div>
-        <div id="mapDate" class="map-date"></div>
-        <div id="fusionMap"></div>
-        <div id="plots">
-            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
-            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
-            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
-            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
-            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
-            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
-        </div>
-    </div>
-    <script>
-        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
-        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
-
-        let siteName = "innsbruck", season = "2024";
-        let strategy = "aggressive", sigma = "20", fusionMode = "bti";
-        let sitePosition = [47.116171, 11.320308];
-        let start = new Date(2024, 0, 1);
-        let availableSiteSeasons = {};
-        let fusionMap = null, overlay = null, marker = null;
-        let ndviTs = [], gccTs = [], bandsTs = [];
-        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
-        const urlParams = new URLSearchParams(location.search);
-        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
-
-        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
-        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
-        const daysFromDate = (dateStr) => {
-            const [y, m, d] = dateStr.split("-").map(Number);
-            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
-        };
-
-        function getPreparedBase() {
-            return fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
-        }
-
-        function getFusionDir() {
-            const sub = sigma === "30" ? "fusion_sigma30" : "fusion";
-            return `data/${siteName}/${season}/${getPreparedBase()}/${sub}`;
-        }
-
-        function getFusionTimeseriesDir() {
-            return sigma === "30" ? "fusion_sigma30" : "fusion";
-        }
-
-        async function loadTimeseries() {
-            const sub = getFusionTimeseriesDir();
-            const base = `data/${siteName}/${season}/${getPreparedBase()}`;
-            try {
-                if (fusionMode === "itb") {
-                    const g = await fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : []));
-                    ndviTs = [];
-                    gccTs = g;
-                    bandsTs = [];
-                } else {
-                    const [n, g, b] = await Promise.all([
-                        fetch(`${base}/ndvi/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                        fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                        fetch(`${base}/bands/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                    ]);
-                    ndviTs = n;
-                    gccTs = g;
-                    bandsTs = b;
-                }
-            } catch {
-                ndviTs = [];
-                gccTs = [];
-                bandsTs = [];
-            }
-            drawPlots();
-            updateDownloadLinks();
-        }
-
-        function drawPlot(canvasId, data, key, color) {
-            const canvas = document.getElementById(canvasId);
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-            const pts = data.filter(t => t[key] != null);
-            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
-            const dates = pts.map(t => new Date(t.date));
-            const vals = pts.map(t => t[key]);
-            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
-            const minV = Math.min(...vals), maxV = Math.max(...vals);
-            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
-            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
-            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
-            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
-            ctx.strokeStyle = color;
-            ctx.beginPath();
-            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
-            ctx.stroke();
-            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(curDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
-            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
-            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
-        }
-
-        function drawPlots() {
-            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
-            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
-            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
-        }
-
-        function updateDownloadLinks() {
-            const el = document.getElementById("downloadLinks");
-            if (!el) return;
-            const sub = getFusionTimeseriesDir();
-            const prep = `data/${siteName}/${season}/${getPreparedBase()}`;
-            if (fusionMode === "itb") {
-                el.innerHTML = `<a href="${prep}/gcc/${sub}/timeseries.json">[GCC JSON]</a>`;
-                return;
-            }
-            const base = `${prep}/export/${sub}`;
-            const name = `${siteName}_${season}_fusion_${strategy}_${sub}`;
-            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
-        }
-
-        async function findFusionFile(dateStr) {
-            const target = new Date(dateStr);
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            const seasonStart = start.getTime();
-            const seasonEnd = yearEnd.getTime();
-            for (let offset = 0; offset <= 365; offset++) {
-                for (const dir of offset === 0 ? [0] : [-1, 1]) {
-                    const d = new Date(target.getTime() + dir * offset * 86400000);
-                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
-                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
-                    const filename = (fusionMode === "itb" ? "GCC_" : "REFL_") + `${ds}.tif`;
-                    try {
-                        const res = await fetch(`${getFusionDir()}/${filename}`, { method: "HEAD" });
-                        if (res.ok) return filename;
-                    } catch {}
-                }
-            }
-            return null;
-        }
-
-        function transformBounds(bbox, fromCRS) {
-            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
-            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
-            return [[sw[1], sw[0]], [ne[1], ne[0]]];
-        }
-
-        async function loadGeotiff(filename) {
-            const path = `${getFusionDir()}/${filename}`;
-            const buf = await (await fetch(path)).arrayBuffer();
-            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
-            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
-            const dateStr = filename.replace(/^(REFL|GCC)_/, "").replace(".tif", "");
-            return { dataUrl, bounds, dateStr };
-        }
-
-        async function updateMap() {
-            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const filename = await findFusionFile(dateStr);
-            if (!filename || !fusionMap) {
-                if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-                return;
-            }
-            try {
-                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
-                if (overlay) fusionMap.removeLayer(overlay);
-                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(fusionMap);
-                fusionMap.fitBounds(bounds);
-                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
-            } catch (e) {
-                if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-            }
-        }
-
-        async function probeDataExists(sitename, s) {
-            try {
-                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
-                return res.ok;
-            } catch { return false; }
-        }
-
-        function getSiteBySitename(sn) {
-            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
-        }
-
-        async function setSiteSeason(newSite, newSeason) {
-            siteName = newSite;
-            season = newSeason;
-            start = new Date(parseInt(season), 0, 1);
-            const site = getSiteBySitename(newSite);
-            if (site?.geometry?.coordinates) {
-                const [lon, lat] = site.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            if (fusionMap) { fusionMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
-            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
-            document.getElementById("season").textContent = season;
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
-            const params = new URLSearchParams(location.search);
-            params.set("site", siteName);
-            params.set("season", season);
-            params.set("mode", fusionMode);
-            history.replaceState({}, "", `?${params}`);
-            const urlDate = params.get("date");
-            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            await loadTimeseries();
-            await updateMap();
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch { window.sitesData = { features: [] }; }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeDataExists(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
-            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
-
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
-                `<option value="${s}">${s}</option>`
-            ).join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            strategy = urlParams.get("strategy") || "aggressive";
-            sigma = urlParams.get("sigma") || "20";
-            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
-            document.getElementById("strategySelect").value = strategy;
-            document.getElementById("sigmaSelect").value = sigma;
-            document.getElementById("fusionModeSelect").value = fusionMode;
-            const ml = document.getElementById("mapLabelFusion");
-            if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
-
-            const initSite = getSiteBySitename(initialSite);
-            if (initSite?.geometry?.coordinates) {
-                const [lon, lat] = initSite.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            fusionMap = L.map("fusionMap", { zoomControl: false }).setView(sitePosition, 12)
-                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
-            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(fusionMap);
-
-            siteSelect.addEventListener("change", function() {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                setSiteSeason(sn, document.getElementById("seasonSelect").value);
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function() {
-                setSiteSeason(siteSelect.value, this.value);
-            });
-            document.getElementById("strategySelect").addEventListener("change", function() {
-                strategy = this.value;
-                urlParams.set("strategy", strategy);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("sigmaSelect").addEventListener("change", function() {
-                sigma = this.value;
-                urlParams.set("sigma", sigma);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("fusionModeSelect").addEventListener("change", function() {
-                fusionMode = this.value;
-                urlParams.set("mode", fusionMode);
-                history.replaceState({}, "", `?${urlParams}`);
-                const ml = document.getElementById("mapLabelFusion");
-                if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
-                loadTimeseries(); updateMap();
-            });
-
-            await setSiteSeason(initialSite, initialSeason);
-        }
-
-        document.getElementById("dateSlider").addEventListener("input", function() {
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
-            drawPlots(); updateMap();
-        });
-
-        init();
-    </script>
-</body>
-</html>
--- a/webapp/gap_validation.html
+++ b/webapp/gap_validation.html
@ -1,284 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <title>Gap validation</title>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1100px; margin: 0 auto; padding: 20px; }
-        .selectors { margin-bottom: 18px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { font-size: 22px; margin-top: 0; }
-        h2 { font-size: 16px; margin-top: 22px; color: #333; }
-        h2:first-of-type { margin-top: 8px; }
-        table { border-collapse: collapse; width: 100%; font-size: 12px; margin-bottom: 14px; }
-        th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; vertical-align: top; }
-        th { background: #f5f5f5; }
-        td.num { text-align: right; font-variant-numeric: tabular-nums; }
-        td.paths { font-size: 11px; word-break: break-all; color: #444; max-width: 420px; }
-        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
-                 padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
-        .intro code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
-        .section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; line-height: 1.45; }
-        .empty { color: #666; font-style: italic; }
-        .err { color: #a00; }
-        details.meta { font-size: 12px; margin-top: 12px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
-        details.meta summary { cursor: pointer; font-weight: 600; }
-        details.meta pre { margin: 8px 0 0; overflow: auto; font-size: 11px; max-height: 200px; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="nav">
-            <a href="index.html">Full</a>
-            <a href="preselection.html">Pre-selection</a>
-            <a href="prepared.html">Prepared</a>
-            <a href="fusion.html">Fusion</a>
-            <a href="postprocessed.html">Postprocessed</a>
-            <a href="metrics.html">Metrics</a>
-            <a href="gap_validation.html" class="active">Gap validation</a>
-            <a href="phenology.html">Phenology</a>
-        </div>
-        <h1 id="pageTitle">Gap validation</h1>
-        <div class="selectors">
-            <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-        </div>
-        <div id="content"></div>
-    </div>
-    <script>
-        let siteName = "innsbruck",
-            season = "2024";
-        let availableSiteSeasons = {};
-        const urlParams = new URLSearchParams(location.search);
-
-        async function probeSummary(sn, s) {
-            try {
-                const res = await fetch(`data/${sn}/${s}/validation/gap_validation_summary.json`, {
-                    method: "HEAD",
-                });
-                return res.ok;
-            } catch {
-                return false;
-            }
-        }
-
-        function fmt(v, d = 4) {
-            if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
-            return v.toFixed(d);
-        }
-
-        function fmtInt(v) {
-            if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
-            return String(Math.round(v));
-        }
-
-        function crossoverBlock(summary) {
-            const scen = summary.scenario;
-            const wcRoot = summary.whittaker_crossover || {};
-            const wc = (scen && wcRoot[scen]) || Object.values(wcRoot)[0];
-            if (!wc) return "";
-            const first = wc.first_gap_days_fusion_nse_below_whittaker;
-            const def = wc.whittaker_definition || "";
-            let h = `<h2>Whittaker crossover (NSE<sub>S2</sub>)</h2>`;
-            h += `<p class="section-note">${def}</p>`;
-            h += `<p class="section-note"><b>First gap length (days)</b> where fusion NSE<sub>S2</sub> &lt; Whittaker NSE<sub>S2</sub> (strict): <b>${first != null ? first : "—"}</b> (none if fusion never falls below).</p>`;
-            const rows = wc.by_gap || [];
-            if (rows.length) {
-                h += `<table><tr><th>Gap days</th><th class="num">NSE<sub>S2</sub> fusion</th><th class="num">NSE<sub>S2</sub> Whittaker</th></tr>`;
-                for (const r of rows) {
-                    h += `<tr><td>${r.gap_days}</td><td class="num">${fmt(r.nse_s2_fusion, 3)}</td><td class="num">${fmt(r.nse_s2_whittaker, 3)}</td></tr>`;
-                }
-                h += `</table>`;
-            }
-            return h;
-        }
-
-        function manifestTable(manifest) {
-            if (!manifest?.entries?.length) return "";
-            let h = `<h2>Gap manifest</h2>`;
-            h += `<p class="section-note">From <code>data/${siteName}/${season}/validation/gap_manifest.json</code>. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.</p>`;
-            h += `<table><tr><th>Transition</th><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
-            for (const e of manifest.entries) {
-                const w = `${e.window_start} → ${e.window_end}`;
-                h += `<tr><td>${e.transition || "—"}</td><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
-            }
-            h += `</table>`;
-            return h;
-        }
-
-        function resultsTable(results) {
-            if (!results?.length) return `<p class="empty">No result rows in summary.</p>`;
-            const head = `<tr>
-                <th>Transition</th><th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
-                <th class="num">RMSE<br><span style="font-weight:normal">gap</span></th>
-                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">gap</span></th>
-                <th class="num">RMSE<br><span style="font-weight:normal">no gap</span></th>
-                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">no gap</span></th>
-                <th class="num">ΔRMSE</th><th class="num">ΔNSE</th>
-                <th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">Whitt.</span></th>
-                <th class="num">n</th>
-                <th>Paths / error</th>
-            </tr>`;
-            const parts = [head];
-            for (const r of results) {
-                if (r.error) {
-                    parts.push(
-                        `<tr><td>${r.transition ?? "—"}</td><td>${r.gap_days ?? "—"}</td><td colspan="9" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
-                    );
-                    continue;
-                }
-                const g = r.spatial?.gap || {};
-                const ng = r.spatial?.no_gap || {};
-                const wh = r.spatial?.whittaker || {};
-                const dRm = r.spatial?.delta_rmse;
-                const dNs = r.spatial?.delta_nse;
-                const p = r.paths || {};
-                const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("<br>");
-                parts.push(`<tr>
-                    <td>${r.transition || "—"}</td>
-                    <td>${r.gap_days}</td>
-                    <td>${r.prediction_date || "—"}</td>
-                    <td style="font-size:11px">${r.withheld_s2_filename || "—"}</td>
-                    <td class="num">${fmt(g.rmse)}</td>
-                    <td class="num">${fmt(g.nse_s2, 3)}</td>
-                    <td class="num">${fmt(ng.rmse)}</td>
-                    <td class="num">${fmt(ng.nse_s2, 3)}</td>
-                    <td class="num">${fmt(dRm)}</td>
-                    <td class="num">${fmt(dNs, 3)}</td>
-                    <td class="num">${fmt(wh.nse_s2, 3)}</td>
-                    <td class="num">${fmtInt(g.n_pixels)}</td>
-                    <td class="paths">${pathNote}</td>
-                </tr>`);
-            }
-            return `<table>${parts.join("")}</table>`;
-        }
-
-        function metaDetails(summary) {
-            const cmd = summary.command_line;
-            const git = summary.git_commit;
-            if (!cmd && !git) return "";
-            let h = `<details class="meta"><summary>Run metadata</summary>`;
-            if (git) h += `<p>Git: <code>${git}</code></p>`;
-            if (cmd?.length) h += `<pre>${cmd.map((x) => String(x)).join(" ")}</pre>`;
-            h += `</details>`;
-            return h;
-        }
-
-        async function render(summary, manifest) {
-            const el = document.getElementById("content");
-            if (!summary) {
-                el.innerHTML = `<p class="err">Could not load <code>data/${siteName}/${season}/validation/gap_validation_summary.json</code>.</p>
-                    <p class="section-note">From <code>processing/</code>: <code>python -m gap_validation.run --site ${siteName} --season ${season} --lat LAT --lon LON</code> (see <code>--help</code>). Serve from <code>processing/</code>: <code>python3 -m http.server 8000</code> → <code>/webapp/gap_validation.html</code> (<code>webapp/data</code> → <code>../data</code>).</p>`;
-                if (manifest?.entries) el.innerHTML += manifestTable(manifest);
-                return;
-            }
-            const scen = summary.scenario || "—";
-            const sn = summary.site_name ?? siteName;
-            const se = summary.season ?? season;
-            let html = `<div class="intro">
-                Tier-2 withheld S2, spatial GCC vs withheld scene, NSE<sub>S2</sub>, and Whittaker comparison.
-                Summary: <code>data/${sn}/${se}/validation/gap_validation_summary.json</code>.
-                Scenario in this file: <b>${scen}</b> (one run overwrites; re-run CLI for other strategy/σ/mode).
-            </div>`;
-            html += `<h2>Spatial metrics (per gap length)</h2>`;
-            html += `<p class="section-note">Reference = GCC from withheld S2 REFL (bilinear to fusion grid). Prediction = fused GCC. ΔRMSE = RMSE<sub>gap</sub> − RMSE<sub>no gap</sub>; ΔNSE = NSE<sub>no gap</sub> − NSE<sub>gap</sub>.</p>`;
-            html += resultsTable(summary.results);
-            html += crossoverBlock(summary);
-            html += metaDetails(summary);
-            if (manifest?.entries) html += manifestTable(manifest);
-            el.innerHTML = html;
-        }
-
-        async function load() {
-            let summary = null,
-                manifest = null;
-            try {
-                const r1 = await fetch(`data/${siteName}/${season}/validation/gap_validation_summary.json`);
-                summary = r1.ok ? await r1.json() : null;
-            } catch {
-                summary = null;
-            }
-            try {
-                const r2 = await fetch(`data/${siteName}/${season}/validation/gap_manifest.json`);
-                manifest = r2.ok ? await r2.json() : null;
-            } catch {
-                manifest = null;
-            }
-            await render(summary, manifest);
-            const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
-            document.getElementById("pageTitle").textContent =
-                (site?.properties?.description || siteName) + " — gap validation — " + season;
-            urlParams.set("site", siteName);
-            urlParams.set("season", season);
-            history.replaceState({}, "", `?${urlParams}`);
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch {
-                window.sitesData = { features: [] };
-            }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeSummary(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
-            const initialSeason =
-                urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
-                    ? urlSeason
-                    : (availableSiteSeasons[initialSite] || [])[0] || "2024";
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
-                .map((s) => `<option value="${s}">${s}</option>`)
-                .join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            siteName = initialSite;
-            season = initialSeason;
-
-            siteSelect.addEventListener("change", function () {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                siteName = sn;
-                season = document.getElementById("seasonSelect").value;
-                load();
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function () {
-                season = this.value;
-                load();
-            });
-            await load();
-        }
-        init();
-    </script>
-</body>
-</html>
--- a/webapp/index.html
+++ b/webapp/index.html
--- a/webapp/metrics.html
+++ b/webapp/metrics.html
@ -1,367 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <title>Metrics</title>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1100px; margin: 0 auto; padding: 20px; }
-        .selectors { margin-bottom: 20px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { font-size: 22px; }
-        h2 { font-size: 16px; margin-top: 24px; color: #333; }
-        h2:first-of-type { margin-top: 8px; }
-        h3 { font-size: 14px; margin: 14px 0 6px 0; color: #444; font-weight: 600; }
-        table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 12px; }
-        th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; }
-        th { background: #f5f5f5; }
-        td.num { text-align: right; font-variant-numeric: tabular-nums; }
-        .fusion-block table { margin-bottom: 4px; }
-        .fusion-block table + table { margin-top: 12px; }
-        .section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
-        .section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
-        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
-                 padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
-        .intro-short { margin-bottom: 0; }
-        details.definitions { margin-top: 28px; font-size: 13px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
-        details.definitions summary { cursor: pointer; font-weight: 600; color: #333; }
-        details.definitions ul { margin: 8px 0 0 18px; padding: 0; }
-        details.definitions li { margin-bottom: 4px; }
-        .scenario-key { font-size: 11px; color: #666; font-weight: normal; }
-        .empty { color: #666; font-style: italic; }
-        .err { color: #a00; }
-        details.how-read {
-            font-size: 12px; color: #333; line-height: 1.5; max-width: 820px; margin: 0 0 18px 0;
-            padding: 8px 12px 10px; border: 1px solid #ccd; border-radius: 4px; background: #f8fafc;
-        }
-        details.how-read summary {
-            cursor: pointer; font-weight: 600; font-size: 13px; color: #111; margin-bottom: 0;
-        }
-        details.how-read ol { margin: 10px 0 0; padding-left: 1.35rem; }
-        details.how-read li { margin-bottom: 7px; }
-        details.how-read li:last-child { margin-bottom: 0; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="nav">
-            <a href="index.html">Full</a>
-            <a href="preselection.html">Pre-selection</a>
-            <a href="prepared.html">Prepared</a>
-            <a href="fusion.html">Fusion</a>
-            <a href="postprocessed.html">Postprocessed</a>
-            <a href="metrics.html" class="active">Metrics</a>
-            <a href="gap_validation.html">Gap validation</a>
-            <a href="phenology.html">Phenology</a>
-        </div>
-        <h1 id="siteName">Metrics</h1>
-        <div class="selectors">
-            <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-        </div>
-        <div id="content"></div>
-    </div>
-    <script>
-        /** Shown in the UI; pearson_r, rmse, mae, n_samples remain in metrics.json only. */
-        const DISPLAY_METRIC_COLS = ["r_squared", "nrmse", "nse_pc"];
-        const DISPLAY_METRIC_LABELS = {
-            r_squared: "R² vs mean",
-            nrmse: "nRMSE",
-            nse_pc: "NSE_PC",
-        };
-
-        const FUSION_BTI_ROWS = [
-            ["aggressive_sigma20", "Aggressive", 20],
-            ["aggressive_sigma30", "Aggressive", 30],
-            ["nonaggressive_sigma20", "Non-aggressive", 20],
-            ["nonaggressive_sigma30", "Non-aggressive", 30],
-        ];
-
-        function mv(m, c) {
-            return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
-        }
-        function fmtMetric(col, v) {
-            if (v == null || typeof v !== "number") return "—";
-            if (col === "r_squared" || col === "nse_pc") return v.toFixed(3);
-            if (col === "nrmse") return v.toFixed(4);
-            return fmt(v);
-        }
-        let siteName = "innsbruck", season = "2024";
-        let availableSiteSeasons = {};
-        const urlParams = new URLSearchParams(location.search);
-
-        async function probeMetrics(sn, s) {
-            try {
-                const res = await fetch(`data/${sn}/${s}/metrics.json`, { method: "HEAD" });
-                return res.ok;
-            } catch { return false; }
-        }
-
-        function fmt(v) {
-            if (v == null || typeof v !== "number") return "—";
-            return Number.isInteger(v) ? String(v) : v.toFixed(4);
-        }
-
-        function fusionMeanResidual(m) {
-            const x = m?.residual_vs_phenocam?.mean;
-            const n = Number(x);
-            return Number.isFinite(n) ? n : null;
-        }
-
-        function fusionSubTableRows(temporal, keysWithLabels, includeMeanResid) {
-            const parts = [];
-            for (const [key, stratLabel, sig] of keysWithLabels) {
-                const m = temporal[key];
-                if (!m) continue;
-                const mr = fusionMeanResidual(m);
-                const meanCell = includeMeanResid
-                    ? `<td class="num">${mr !== null ? mr.toFixed(3) : "—"}</td>`
-                    : "";
-                parts.push(
-                    `<tr><td>${stratLabel}, σ=${sig} <span class="scenario-key">(${key})</span></td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}${meanCell}</tr>`
-                );
-            }
-            return parts;
-        }
-
-        function fusionTables(temporal) {
-            if (!temporal || typeof temporal !== "object") {
-                return `<p class="empty">No fusion temporal data</p>`;
-            }
-            const itbRows = FUSION_BTI_ROWS.map(([k, s, sig]) => [`${k}_itb`, s, sig]);
-            const allKeys = [...FUSION_BTI_ROWS.map((r) => r[0]), ...itbRows.map((r) => r[0])];
-            let showMean = false;
-            for (const k of allKeys) {
-                if (fusionMeanResidual(temporal[k]) !== null) {
-                    showMean = true;
-                    break;
-                }
-            }
-            const btiBody = fusionSubTableRows(temporal, FUSION_BTI_ROWS, showMean);
-            const itbBody = fusionSubTableRows(temporal, itbRows, showMean);
-            if (!btiBody.length && !itbBody.length) {
-                return `<p class="empty">No fusion scenarios in temporal</p>`;
-            }
-            const meanTh = showMean ? `<th class="num">Mean resid.</th>` : "";
-            const head = `<tr><th>Setting</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}${meanTh}</tr>`;
-
-            let h = `<div class="fusion-block">`;
-            if (btiBody.length) {
-                h += `<h3>Bands-then-Index (BtI)</h3>`;
-                h += `<table>${head}${btiBody.join("")}</table>`;
-            }
-            if (itbBody.length) {
-                h += `<h3>Index-then-Bands (ItB)</h3>`;
-                h += `<table>${head}${itbBody.join("")}</table>`;
-            }
-            h += `</div>`;
-            return h;
-        }
-
-        /** Returns only &lt;table&gt;…&lt;/table&gt; or empty string (no heading). */
-        function baselineTable(b) {
-            if (!b || typeof b !== "object") return "";
-            const rows = [];
-            const pushRow = (label, m) => {
-                if (!m || typeof m !== "object") return;
-                rows.push(
-                    `<tr><td>${label}</td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}</tr>`
-                );
-            };
-            pushRow("S2 GCC (all acquisitions)", b.s2);
-            for (const strat of ["aggressive", "nonaggressive"]) {
-                pushRow(`S3 composite GCC (${strat})`, b.s3?.[strat]);
-                pushRow(`S2 GCC cloud-screened (${strat})`, b.s2_cloudfree?.[strat]);
-                pushRow(`S2 Whittaker λ=400 (${strat})`, b.s2_whittaker_lambda400?.[strat]);
-            }
-            if (!rows.length) return "";
-            const head = `<tr><th>Baseline</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}</tr>`;
-            return `<table>${head}${rows.join("")}</table>`;
-        }
-
-        function fmtFixed3(v) {
-            const n = Number(v);
-            return Number.isFinite(n) ? n.toFixed(3) : "—";
-        }
-
-        function derivedSection(d) {
-            if (!d) return "";
-            const dn = d.delta_nse_pc_sigma20_minus_sigma30;
-            const paired = d.bti_vs_itb_mean_residual || [];
-            if (!dn && !paired.length) return "";
-
-            let h = `<h2>Summaries</h2>`;
-            h += `<p class="section-note">Same numbers as Fusion, condensed. First table: which σ fits PhenoCam better (NSE_PC only). Second: mean bias BtI vs ItB.</p>`;
-            if (dn) {
-                h += `<p class="section-note"><b>ΔNSE_PC</b> = NSE_PC(σ20) − NSE_PC(σ30). <b>+</b> → σ20 better. <b>−</b> → σ30 better.</p>`;
-                h += `<table><tr><th>Mode</th><th>Strategy</th><th class="num">ΔNSE_PC</th></tr>`;
-                let anyDelta = false;
-                for (const mode of ["bti", "itb"]) {
-                    for (const strat of ["aggressive", "nonaggressive"]) {
-                        const v = dn[mode]?.[strat];
-                        if (Number.isFinite(Number(v))) anyDelta = true;
-                        h += `<tr><td>${mode.toUpperCase()}</td><td>${strat}</td><td class="num">${fmtFixed3(v)}</td></tr>`;
-                    }
-                }
-                h += `</table>`;
-                if (!anyDelta) {
-                    h += `<p class="section-note">ΔNSE_PC needs both σ20 and σ30 fusion rows in <code>temporal</code> (BtI and ItB). Re-run <code>metrics_stats</code>.</p>`;
-                }
-            }
-            if (paired.length) {
-                h += `<p class="section-note">Mean(fused − PhenoCam) per row. <b>+</b> / <b>−</b> = average over / under PhenoCam. Closer to <b>0</b> in a column = less bias for that workflow.</p>`;
-                h += `<table><tr><th>Strategy</th><th>σ</th><th class="num">Mean residual BtI</th><th class="num">Mean residual ItB</th></tr>`;
-                for (const row of paired) {
-                    h += `<tr><td>${row.strategy}</td><td>${row.sigma}</td><td class="num">${fmtFixed3(row.mean_residual_bti)}</td><td class="num">${fmtFixed3(row.mean_residual_itb)}</td></tr>`;
-                }
-                h += `</table>`;
-            }
-            return h;
-        }
-
-        function howToReadBlock() {
-            return `<details class="how-read">
-                <summary>How to read</summary>
-                <ol>
-                    <li>All scores are satellite or fusion <b>GCC</b> vs <b>PhenoCam GCC</b> at the site 3×3 window, <b>same calendar days</b> only. Extra stats: <code>metrics.json</code>.</li>
-                    <li><b>R² vs mean</b> and <b>NSE_PC</b> are the same value (1 − SS<sub>res</sub>/SS<sub>tot</sub> vs predicting mean PhenoCam each day); not (Pearson <i>r</i>)²; can be negative. Higher = better. <b>nRMSE</b>: lower = better.</li>
-                    <li><b>Fusion:</b> same row number in BtI and in ItB = same screening + same σ — compare left/right. Down one block = change screening or σ.</li>
-                    <li><b>Mean resid.</b> (if present): mean(fused − PhenoCam). Sign = average bias; use R² vs mean / nRMSE / NSE_PC for overall fit.</li>
-                    <li><b>Summaries:</b> ΔNSE_PC = NSE at σ20 minus NSE at σ30 (+ means σ20 wins). Paired table: closer to 0 = less mean bias.</li>
-                </ol>
-            </details>`;
-        }
-
-        function definitionsDetails() {
-            return `<details class="definitions">
-                <summary>Definitions</summary>
-                <ul>
-                    <li><b>BtI</b>: fuse reflectance bands, then GCC.</li>
-                    <li><b>ItB</b>: GCC on S2 and S3, then fuse GCC.</li>
-                    <li><b>Scenario</b>: screening (<code>aggressive</code> / <code>nonaggressive</code>) × σ (20 / 30 days).</li>
-                    <li><a href="phenology.html">Phenology</a> — PhenoCam SOS/EOS (TIMESAT).</li>
-                    <li><b>R² vs mean</b> — coefficient of determination vs a constant mean(PhenoCam) baseline; JSON key <code>r_squared</code>; duplicates <code>nse_pc</code>. Not (Pearson <i>r</i>)².</li>
-                    <li><code>metrics.json</code> — also Pearson <i>r</i>, RMSE, MAE, <code>n_samples</code>.</li>
-                </ul>
-            </details>`;
-        }
-
-        function render(data) {
-            const el = document.getElementById("content");
-            if (!data) {
-                el.innerHTML = `<p class="err">Could not load metrics.json</p>`;
-                return;
-            }
-            let html = "";
-            html += `<div class="intro intro-short">
-                GCC at the 3×3 site window vs PhenoCam. Sections: PhenoCam → baselines → fusion (BtI, then ItB) → summaries.
-                <code>data/${siteName}/${season}/metrics.json</code>
-            </div>`;
-            html += howToReadBlock();
-
-            if (data.phenocam_stats) {
-                html += `<h2>PhenoCam (ground truth)</h2>`;
-                html += `<p class="section-note">Camera ROI GCC (not compared to itself). Dates / SOS–EOS: <a href="phenology.html">Phenology</a>.</p>`;
-                html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
-                const p = data.phenocam_stats;
-                html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
-            }
-
-            const baselineTbl = baselineTable(data.baseline);
-            if (baselineTbl) {
-                html += `<h2>Baselines (vs PhenoCam)</h2>`;
-                html += `<p class="section-note">Same columns as fusion (vs PhenoCam). Higher R² vs mean / NSE_PC, lower nRMSE = better. S3 = coarse-only; Whittaker = smoothed S2-only.</p>`;
-                html += baselineTbl;
-            }
-
-            html += `<h2>Fusion (vs PhenoCam)</h2>`;
-            html += `<p class="section-note">BtI block vs ItB block: same row = same screening + σ. Within a block: four EFAST combinations.</p>`;
-            html += fusionTables(data.temporal || {});
-
-            html += derivedSection(data.derived);
-
-            html += definitionsDetails();
-
-            el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
-        }
-
-        async function load() {
-            try {
-                const res = await fetch(`data/${siteName}/${season}/metrics.json`);
-                render(res.ok ? await res.json() : null);
-            } catch {
-                render(null);
-            }
-            const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
-            document.getElementById("siteName").textContent = (site?.properties?.description || siteName) + " — " + season;
-            urlParams.set("site", siteName);
-            urlParams.set("season", season);
-            history.replaceState({}, "", `?${urlParams}`);
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch { window.sitesData = { features: [] }; }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeMetrics(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
-            const initialSeason =
-                urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
-                    ? urlSeason
-                    : (availableSiteSeasons[initialSite] || [])[0] || "2024";
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
-                .map((s) => `<option value="${s}">${s}</option>`)
-                .join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            siteName = initialSite;
-            season = initialSeason;
-
-            siteSelect.addEventListener("change", function () {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                siteName = sn;
-                season = document.getElementById("seasonSelect").value;
-                load();
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function () {
-                season = this.value;
-                load();
-            });
-            await load();
-        }
-        init();
-    </script>
-</body>
-</html>
--- a/webapp/phenology.html
+++ b/webapp/phenology.html
@ -1,146 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <title>Phenology</title>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 900px; margin: 0 auto; padding: 20px; }
-        h1 { font-size: 22px; margin-top: 0; }
-        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
-                 padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
-        table { border-collapse: collapse; width: 100%; font-size: 13px; }
-        th, td { border: 1px solid #ccc; padding: 8px 10px; text-align: left; }
-        th { background: #f5f5f5; }
-        td.num { text-align: center; font-variant-numeric: tabular-nums; }
-        td.site { font-weight: 500; }
-        a.rowlink { color: #0066cc; text-decoration: none; }
-        a.rowlink:hover { text-decoration: underline; }
-        .empty { color: #666; }
-        .err { color: #a00; }
-        .loading { color: #666; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="nav">
-            <a href="index.html">Full</a>
-            <a href="preselection.html">Pre-selection</a>
-            <a href="prepared.html">Prepared</a>
-            <a href="fusion.html">Fusion</a>
-            <a href="postprocessed.html">Postprocessed</a>
-            <a href="metrics.html">Metrics</a>
-            <a href="gap_validation.html">Gap validation</a>
-            <a href="phenology.html" class="active">Phenology</a>
-        </div>
-        <h1>PhenoCam phenology (50% amplitude)</h1>
-        <p class="intro">
-            Green-up and green-down dates from <code>data/&lt;site&gt;/&lt;season&gt;/raw/phenocam/phenocam_phenology.json</code>
-            (TIMESAT on PhenoCam GCC). Site/season rows match <code>data/sites.geojson</code>.
-            Run <code>python phenology_timesat.py --all</code> or the pipeline to generate missing JSON files.
-        </p>
-        <p id="status" class="loading">Loading…</p>
-        <div id="tableWrap"></div>
-    </div>
-    <script>
-        function escapeHtml(s) {
-            return String(s)
-                .replace(/&/g, "&amp;")
-                .replace(/</g, "&lt;")
-                .replace(/>/g, "&gt;")
-                .replace(/"/g, "&quot;");
-        }
-
-        function cellDate(v) {
-            if (v == null || v === "") return "<span class='empty'>—</span>";
-            return escapeHtml(v);
-        }
-
-        async function loadPhenologyRow(site, season) {
-            const path = `data/${site}/${season}/raw/phenocam/phenocam_phenology.json`;
-            try {
-                const res = await fetch(path);
-                if (!res.ok) return { ok: false, up: null, down: null };
-                const j = await res.json();
-                return {
-                    ok: true,
-                    up: j.green_up_50pct_date ?? null,
-                    down: j.green_down_50pct_date ?? null
-                };
-            } catch {
-                return { ok: false, up: null, down: null };
-            }
-        }
-
-        async function main() {
-            const status = document.getElementById("status");
-            const wrap = document.getElementById("tableWrap");
-            let features = [];
-            try {
-                const res = await fetch("data/sites.geojson");
-                if (!res.ok) throw new Error("Could not load sites.geojson");
-                const g = await res.json();
-                features = g.features || [];
-            } catch (e) {
-                status.textContent = "";
-                status.className = "err";
-                status.textContent = "Failed to load data/sites.geojson.";
-                return;
-            }
-
-            const rows = [];
-            for (const f of features) {
-                const site = f.properties && f.properties.sitename;
-                if (!site) continue;
-                const desc = (f.properties && f.properties.description) || site;
-                const seasons = f.properties && f.properties.seasons
-                    ? Object.keys(f.properties.seasons).sort()
-                    : [];
-                for (const season of seasons) {
-                    rows.push({ site, season, desc });
-                }
-            }
-            rows.sort((a, b) => a.site.localeCompare(b.site) || a.season.localeCompare(b.season));
-
-            const results = await Promise.all(
-                rows.map((r) =>
-                    loadPhenologyRow(r.site, r.season).then((phen) => ({ ...r, ...phen }))
-                )
-            );
-
-            const head =
-                "<thead><tr>" +
-                "<th>Site</th><th>Season</th><th>Description</th>" +
-                "<th>Green-up</th><th>Green-down</th>" +
-                "</tr></thead>";
-            const body = results
-                .map((r) => {
-                    const q = new URLSearchParams();
-                    q.set("site", r.site);
-                    q.set("season", r.season);
-                    const viewer = `index.html?${q.toString()}`;
-                    return (
-                        "<tr>" +
-                        `<td class="site"><a class="rowlink" href="${viewer}">${escapeHtml(r.site)}</a></td>` +
-                        `<td class="num">${r.season}</td>` +
-                        `<td>${escapeHtml(r.desc)}</td>` +
-                        `<td class="num">${cellDate(r.up)}</td>` +
-                        `<td class="num">${cellDate(r.down)}</td>` +
-                        "</tr>"
-                    );
-                })
-                .join("");
-
-            status.textContent = "";
-            status.className = "";
-            wrap.innerHTML = "<table>" + head + "<tbody>" + body + "</tbody></table>";
-        }
-
-        main();
-    </script>
-</body>
-</html>
--- a/webapp/postprocessed.html
+++ b/webapp/postprocessed.html
@ -1,390 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <title>Postprocessed Viewer</title>
-    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
-    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
-    <script src="common.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
-        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
-        .selectors { margin-bottom: 20px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { margin: 0 0 5px 0; font-size: 22px; }
-        .season-row { padding-bottom: 15px; }
-        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
-        .download-links { margin-left: 10px; font-size: 14px; }
-        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
-        .download-links a:hover { text-decoration: underline; }
-        #dateSlider { width: 100%; margin: 15px 0; }
-        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
-        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
-        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
-        #postprocessedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
-        .leaflet-image-layer { image-rendering: pixelated; }
-        .leaflet-control-attribution { display: none; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="header-sticky">
-            <div class="nav">
-                <a href="index.html">Full</a>
-                <a href="preselection.html">Pre-selection</a>
-                <a href="prepared.html">Prepared</a>
-                <a href="fusion.html">Fusion</a>
-                <a href="postprocessed.html" class="active">Postprocessed</a>
-                <a href="metrics.html">Metrics</a>
-                <a href="gap_validation.html">Gap validation</a>
-                <a href="phenology.html">Phenology</a>
-            </div>
-            <h1 id="siteName">Innsbruck</h1>
-            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
-            <div class="selectors">
-                <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-            <label>Strategy:</label>
-            <select id="strategySelect">
-                <option value="aggressive">Aggressive</option>
-                <option value="nonaggressive">Non-aggressive</option>
-            </select>
-            <label>Sigma:</label>
-            <select id="sigmaSelect">
-                <option value="20">σ=20</option>
-                <option value="30">σ=30</option>
-            </select>
-            <label>Source:</label>
-            <select id="sourceSelect">
-                <option value="s2">S2</option>
-                <option value="fusion">Fusion</option>
-                <option value="s3">S3</option>
-            </select>
-            <label>Mode:</label>
-            <select id="fusionModeSelect" title="BtI vs ItB processed paths">
-                <option value="bti">BtI</option>
-                <option value="itb">ItB</option>
-            </select>
-            </div>
-            <input type="range" id="dateSlider" min="0" max="365" value="0">
-            <div id="dateDisplay">2024-01-01</div>
-        </div>
-        <div class="map-label">Postprocessed RGB (closest available)</div>
-        <div id="mapDate" class="map-date"></div>
-        <div id="postprocessedMap"></div>
-        <div id="plots">
-            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
-            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
-            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
-            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
-            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
-            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
-        </div>
-    </div>
-    <script>
-        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
-        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
-
-        let siteName = "innsbruck", season = "2024";
-        let strategy = "aggressive", sigma = "20", source = "s2", fusionMode = "bti";
-        let sitePosition = [47.116171, 11.320308];
-        let start = new Date(2024, 0, 1);
-        let availableSiteSeasons = {};
-        let postprocessedMap = null, overlay = null, marker = null;
-        let ndviTs = [], gccTs = [], bandsTs = [];
-        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
-        const urlParams = new URLSearchParams(location.search);
-        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
-
-        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
-        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
-        const daysFromDate = (dateStr) => {
-            const [y, m, d] = dateStr.split("-").map(Number);
-            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
-        };
-
-        function getProcessedPath() {
-            const mid = fusionMode === "itb" ? `processed_${strategy}_itb_sigma${sigma}` : `processed_${strategy}_sigma${sigma}`;
-            return `data/${siteName}/${season}/${mid}`;
-        }
-
-        async function loadTimeseries() {
-            const base = getProcessedPath();
-            try {
-                const [n, g, b] = await Promise.all([
-                    fetch(`${base}/ndvi/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                    fetch(`${base}/gcc/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                    fetch(`${base}/bands/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
-                ]);
-                ndviTs = n;
-                gccTs = g;
-                bandsTs = b;
-            } catch {
-                ndviTs = [];
-                gccTs = [];
-                bandsTs = [];
-            }
-            drawPlots();
-            updateDownloadLinks();
-        }
-
-        function drawPlot(canvasId, data, key, color) {
-            const canvas = document.getElementById(canvasId);
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-            const pts = data.filter(t => t[key] != null);
-            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
-            const dates = pts.map(t => new Date(t.date));
-            const vals = pts.map(t => t[key]);
-            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
-            const minV = Math.min(...vals), maxV = Math.max(...vals);
-            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
-            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
-            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
-            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
-            ctx.strokeStyle = color;
-            ctx.beginPath();
-            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
-            ctx.stroke();
-            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(curDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
-            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
-            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
-        }
-
-        function drawPlots() {
-            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
-            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
-            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
-        }
-
-        function updateDownloadLinks() {
-            const el = document.getElementById("downloadLinks");
-            if (!el) return;
-            const root = getProcessedPath();
-            if (fusionMode === "itb") {
-                el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
-                return;
-            }
-            const base = `${root}/export/${source}`;
-            const name = `${siteName}_${season}_postprocessed_${strategy}_sigma${sigma}_${source}`;
-            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
-        }
-
-        async function findProcessedFile(dateStr) {
-            const target = new Date(dateStr);
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            const seasonStart = start.getTime();
-            const seasonEnd = yearEnd.getTime();
-            for (let offset = 0; offset <= 365; offset++) {
-                for (const dir of offset === 0 ? [0] : [-1, 1]) {
-                    const d = new Date(target.getTime() + dir * offset * 86400000);
-                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
-                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
-                    const filename = `${ds}_0.geotiff`;
-                    try {
-                        const res = await fetch(`${getProcessedPath()}/${source}/${filename}`, { method: "HEAD" });
-                        if (res.ok) return filename;
-                    } catch {}
-                }
-            }
-            return null;
-        }
-
-        function transformBounds(bbox, fromCRS) {
-            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
-            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
-            return [[sw[1], sw[0]], [ne[1], ne[0]]];
-        }
-
-        async function loadGeotiff(filename) {
-            const path = `${getProcessedPath()}/${source}/${filename}`;
-            const buf = await (await fetch(path)).arrayBuffer();
-            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
-            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
-            const dateStr = filename.replace("_0.geotiff", "");
-            return { dataUrl, bounds, dateStr };
-        }
-
-        async function updateMap() {
-            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const filename = await findProcessedFile(dateStr);
-            if (!filename || !postprocessedMap) {
-                if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-                return;
-            }
-            try {
-                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
-                if (overlay) postprocessedMap.removeLayer(overlay);
-                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(postprocessedMap);
-                postprocessedMap.fitBounds(bounds);
-                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
-            } catch (e) {
-                if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-            }
-        }
-
-        async function probeDataExists(sitename, s) {
-            try {
-                const res = await fetch(`data/${sitename}/${s}/metrics.json`, { method: "HEAD" });
-                return res.ok;
-            } catch { return false; }
-        }
-
-        function getSiteBySitename(sn) {
-            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
-        }
-
-        async function setSiteSeason(newSite, newSeason) {
-            siteName = newSite;
-            season = newSeason;
-            start = new Date(parseInt(season), 0, 1);
-            const site = getSiteBySitename(newSite);
-            if (site?.geometry?.coordinates) {
-                const [lon, lat] = site.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            if (postprocessedMap) { postprocessedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
-            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
-            document.getElementById("season").textContent = season;
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
-            const params = new URLSearchParams(location.search);
-            params.set("site", siteName);
-            params.set("season", season);
-            params.set("mode", fusionMode);
-            history.replaceState({}, "", `?${params}`);
-            const urlDate = params.get("date");
-            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            await loadTimeseries();
-            await updateMap();
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch { window.sitesData = { features: [] }; }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeDataExists(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
-            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
-
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
-                `<option value="${s}">${s}</option>`
-            ).join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            strategy = urlParams.get("strategy") || "aggressive";
-            sigma = urlParams.get("sigma") || "20";
-            source = urlParams.get("source") || "s2";
-            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
-            document.getElementById("strategySelect").value = strategy;
-            document.getElementById("sigmaSelect").value = sigma;
-            document.getElementById("sourceSelect").value = source;
-            document.getElementById("fusionModeSelect").value = fusionMode;
-
-            const initSite = getSiteBySitename(initialSite);
-            if (initSite?.geometry?.coordinates) {
-                const [lon, lat] = initSite.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            postprocessedMap = L.map("postprocessedMap", { zoomControl: false }).setView(sitePosition, 12)
-                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
-            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(postprocessedMap);
-
-            siteSelect.addEventListener("change", function() {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                setSiteSeason(sn, document.getElementById("seasonSelect").value);
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function() {
-                setSiteSeason(siteSelect.value, this.value);
-            });
-            document.getElementById("strategySelect").addEventListener("change", function() {
-                strategy = this.value;
-                urlParams.set("strategy", strategy);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("sigmaSelect").addEventListener("change", function() {
-                sigma = this.value;
-                urlParams.set("sigma", sigma);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("sourceSelect").addEventListener("change", function() {
-                source = this.value;
-                urlParams.set("source", source);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("fusionModeSelect").addEventListener("change", function() {
-                fusionMode = this.value;
-                urlParams.set("mode", fusionMode);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-
-            await setSiteSeason(initialSite, initialSeason);
-        }
-
-        document.getElementById("dateSlider").addEventListener("input", function() {
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
-            drawPlots(); updateMap();
-        });
-
-        init();
-    </script>
-</body>
-</html>
--- a/webapp/prepared.html
+++ b/webapp/prepared.html
@ -1,379 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <title>Prepared S2/S3 Viewer</title>
-    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
-    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
-    <script src="common.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
-        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
-        .selectors { margin-bottom: 20px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { margin: 0 0 5px 0; font-size: 22px; }
-        .season-row { padding-bottom: 15px; }
-        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
-        .download-links { margin-left: 10px; font-size: 14px; }
-        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
-        .download-links a:hover { text-decoration: underline; }
-        #dateSlider { width: 100%; margin: 15px 0; }
-        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
-        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
-        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
-        #preparedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
-        .leaflet-image-layer { image-rendering: pixelated; }
-        .leaflet-control-attribution { display: none; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="header-sticky">
-            <div class="nav">
-                <a href="index.html">Full</a>
-                <a href="preselection.html">Pre-selection</a>
-                <a href="prepared.html" class="active">Prepared</a>
-                <a href="fusion.html">Fusion</a>
-                <a href="postprocessed.html">Postprocessed</a>
-                <a href="metrics.html">Metrics</a>
-                <a href="gap_validation.html">Gap validation</a>
-                <a href="phenology.html">Phenology</a>
-            </div>
-            <h1 id="siteName">Innsbruck</h1>
-            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
-            <div class="selectors">
-                <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-            <label>Strategy:</label>
-            <select id="strategySelect">
-                <option value="aggressive">Aggressive</option>
-                <option value="nonaggressive">Non-aggressive</option>
-            </select>
-            <label>Source:</label>
-            <select id="sourceSelect">
-                <option value="s2">S2</option>
-                <option value="s3">S3</option>
-            </select>
-            <label>Mode:</label>
-            <select id="fusionModeSelect" title="BtI = REFL/composite; ItB = GCC rasters">
-                <option value="bti">BtI</option>
-                <option value="itb">ItB</option>
-            </select>
-            </div>
-            <input type="range" id="dateSlider" min="0" max="365" value="0">
-            <div id="dateDisplay">2024-01-01</div>
-        </div>
-        <div class="map-label" id="mapLabel">Prepared RGB (closest available)</div>
-        <div id="mapDate" class="map-date"></div>
-        <div id="preparedMap"></div>
-        <div id="plots">
-            <div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
-            <div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
-            <div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
-            <div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
-            <div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
-            <div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
-        </div>
-    </div>
-    <script>
-        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
-        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
-
-        let siteName = "innsbruck", season = "2024";
-        let strategy = "aggressive", source = "s2", fusionMode = "bti";
-        let sitePosition = [47.116171, 11.320308];
-        let start = new Date(2024, 0, 1);
-        let availableSiteSeasons = {};
-        let preparedMap = null, overlay = null, marker = null;
-        let ndviTs = [], gccTs = [], bandsTs = [];
-        const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
-        const urlParams = new URLSearchParams(location.search);
-        const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
-
-        const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
-        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
-        const daysFromDate = (dateStr) => {
-            const [y, m, d] = dateStr.split("-").map(Number);
-            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
-        };
-
-        function getPreparedPath() {
-            const mid = fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
-            return `data/${siteName}/${season}/${mid}`;
-        }
-
-        async function loadTimeseries() {
-            try {
-                const [n, g, b] = await Promise.all([
-                    fetch(`${getPreparedPath()}/ndvi/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
-                    fetch(`${getPreparedPath()}/gcc/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
-                    fetch(`${getPreparedPath()}/bands/${source}/timeseries.json`).then(r => r.ok ? r.json() : [])
-                ]);
-                ndviTs = n; gccTs = g; bandsTs = b;
-            } catch { ndviTs = []; gccTs = []; bandsTs = []; }
-            drawPlots();
-            updateDownloadLinks();
-        }
-
-        function drawPlot(canvasId, data, key, color) {
-            const canvas = document.getElementById(canvasId);
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-            const pts = data.filter(t => t[key] != null);
-            if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
-            const dates = pts.map(t => new Date(t.date));
-            const vals = pts.map(t => t[key]);
-            const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
-            const minV = Math.min(...vals), maxV = Math.max(...vals);
-            const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
-            const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
-            const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
-            ctx.fillText(maxV.toFixed(3), 2, pad + 3);
-            ctx.strokeStyle = color;
-            ctx.beginPath();
-            pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
-            ctx.stroke();
-            const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(curDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
-            const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
-            if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
-        }
-
-        function drawPlots() {
-            drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
-            drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
-            BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
-        }
-
-        function updateDownloadLinks() {
-            const el = document.getElementById("downloadLinks");
-            if (!el) return;
-            const root = getPreparedPath();
-            if (fusionMode === "itb") {
-                el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
-                return;
-            }
-            const base = `${root}/export/${source}`;
-            const name = `${siteName}_${season}_prepared_${strategy}_${source}`;
-            el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
-        }
-
-        async function findPreparedFile(dateStr) {
-            const target = new Date(dateStr);
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            const seasonStart = start.getTime();
-            const seasonEnd = yearEnd.getTime();
-            for (let offset = 0; offset <= 365; offset++) {
-                for (const dir of offset === 0 ? [0] : [-1, 1]) {
-                    const d = new Date(target.getTime() + dir * offset * 86400000);
-                    if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
-                    const ds = d.toISOString().split("T")[0].replace(/-/g, "");
-                    const filename =
-                        source === "s2"
-                            ? fusionMode === "itb"
-                                ? `S2A_MSIL2A_${ds}_GCC.tif`
-                                : `S2A_MSIL2A_${ds}_REFL.tif`
-                            : `composite_${ds}.tif`;
-                    try {
-                        const res = await fetch(`${getPreparedPath()}/${source}/${filename}`, { method: "HEAD" });
-                        if (res.ok) return filename;
-                    } catch {}
-                }
-            }
-            return null;
-        }
-
-        function transformBounds(bbox, fromCRS) {
-            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
-            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
-            return [[sw[1], sw[0]], [ne[1], ne[0]]];
-        }
-
-        async function loadGeotiff(filename) {
-            const path = `${getPreparedPath()}/${source}/${filename}`;
-            const buf = await (await fetch(path)).arrayBuffer();
-            const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
-            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
-            const m = filename.match(/(\d{8})/);
-            const dateStr = m ? m[1] : "";
-            return { dataUrl, bounds, dateStr };
-        }
-
-        async function updateMap() {
-            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const filename = await findPreparedFile(dateStr);
-            if (!filename || !preparedMap) {
-                if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-                return;
-            }
-            try {
-                const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
-                if (overlay) preparedMap.removeLayer(overlay);
-                overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(preparedMap);
-                preparedMap.fitBounds(bounds);
-                document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
-            } catch (e) {
-                if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
-                document.getElementById("mapDate").textContent = "";
-            }
-        }
-
-        async function probeDataExists(sitename, s) {
-            try {
-                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
-                return res.ok;
-            } catch { return false; }
-        }
-
-        function getSiteBySitename(sn) {
-            return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
-        }
-
-        async function setSiteSeason(newSite, newSeason) {
-            siteName = newSite;
-            season = newSeason;
-            start = new Date(parseInt(season), 0, 1);
-            const site = getSiteBySitename(newSite);
-            if (site?.geometry?.coordinates) {
-                const [lon, lat] = site.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            if (preparedMap) { preparedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
-            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
-            document.getElementById("season").textContent = season;
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
-            const params = new URLSearchParams(location.search);
-            params.set("site", siteName);
-            params.set("season", season);
-            params.set("mode", fusionMode);
-            history.replaceState({}, "", `?${params}`);
-            const urlDate = params.get("date");
-            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            await loadTimeseries();
-            await updateMap();
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch { window.sitesData = { features: [] }; }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeDataExists(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
-            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
-
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
-                `<option value="${s}">${s}</option>`
-            ).join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            strategy = urlParams.get("strategy") || "aggressive";
-            source = urlParams.get("source") || "s2";
-            fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
-            document.getElementById("strategySelect").value = strategy;
-            document.getElementById("sourceSelect").value = source;
-            document.getElementById("fusionModeSelect").value = fusionMode;
-            const ml = document.getElementById("mapLabel");
-            if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
-
-            const initSite = getSiteBySitename(initialSite);
-            if (initSite?.geometry?.coordinates) {
-                const [lon, lat] = initSite.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            preparedMap = L.map("preparedMap", { zoomControl: false }).setView(sitePosition, 12)
-                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
-            marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(preparedMap);
-
-            siteSelect.addEventListener("change", function() {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                setSiteSeason(sn, document.getElementById("seasonSelect").value);
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function() {
-                setSiteSeason(siteSelect.value, this.value);
-            });
-            document.getElementById("strategySelect").addEventListener("change", function() {
-                strategy = this.value;
-                urlParams.set("strategy", strategy);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("sourceSelect").addEventListener("change", function() {
-                source = this.value;
-                urlParams.set("source", source);
-                history.replaceState({}, "", `?${urlParams}`);
-                loadTimeseries(); updateMap();
-            });
-            document.getElementById("fusionModeSelect").addEventListener("change", function() {
-                fusionMode = this.value;
-                urlParams.set("mode", fusionMode);
-                history.replaceState({}, "", `?${urlParams}`);
-                const ml = document.getElementById("mapLabel");
-                if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
-                loadTimeseries(); updateMap();
-            });
-
-            await setSiteSeason(initialSite, initialSeason);
-        }
-
-        document.getElementById("dateSlider").addEventListener("input", function() {
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
-            drawPlots(); updateMap();
-        });
-
-        init();
-    </script>
-</body>
-</html>
--- a/webapp/preselection.html
+++ b/webapp/preselection.html
@ -1,541 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <title>S2 Band Reflectance Timeseries</title>
-    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
-    <script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
-    <style>
-        body { margin: 0; font-family: sans-serif; }
-        .nav { margin-bottom: 15px; font-size: 14px; }
-        .nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
-        .nav a:hover { text-decoration: underline; }
-        .nav a.active { font-weight: bold; }
-        .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
-        .header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
-        .selectors { margin-bottom: 20px; }
-        .selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
-        h1 { margin: 0 0 5px 0; font-size: 22px; }
-        .season-row { padding-bottom: 15px; }
-        h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
-        .download-links { margin-left: 10px; font-size: 14px; }
-        .download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
-        .download-links a:hover { text-decoration: underline; }
-        .plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
-        .plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        #dateSlider { width: 100%; margin: 15px 0; }
-        #dateDisplay { text-align: center; font-size: 14px; color: #666; }
-        .map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
-        .map-date { font-size: 11px; margin-top: 3px; color: #999; }
-        #s2map { height: 400px; border: 1px solid #ccc; margin-top: 10px; }
-        .leaflet-image-layer { image-rendering: pixelated; }
-        .leaflet-control-attribution { display: none; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="header-sticky">
-            <div class="nav">
-                <a href="index.html">Full</a>
-                <a href="preselection.html" class="active">Pre-selection</a>
-                <a href="prepared.html">Prepared</a>
-                <a href="fusion.html">Fusion</a>
-                <a href="postprocessed.html">Postprocessed</a>
-                <a href="metrics.html">Metrics</a>
-                <a href="gap_validation.html">Gap validation</a>
-                <a href="phenology.html">Phenology</a>
-            </div>
-            <h1 id="siteName">Innsbruck</h1>
-            <div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
-            <div class="selectors">
-            <label>Site:</label>
-            <select id="siteSelect"></select>
-            <label>Season:</label>
-            <select id="seasonSelect"></select>
-            <label>Source:</label>
-            <select id="sourceSelect">
-                <option value="s2">S2</option>
-                <option value="s3">S3</option>
-            </select>
-            <label>Exclusion:</label>
-            <select id="exclusionSelect">
-                <option value="none">None</option>
-                <option value="aggressive">Aggressive</option>
-                <option value="nonaggressive">Non-aggressive</option>
-            </select>
-            </div>
-            <input type="range" id="dateSlider" min="0" max="365" value="0">
-            <div id="dateDisplay">2024-01-01</div>
-        </div>
-        <div class="map-label" id="mapLabel">S2 RGB (closest available)</div>
-        <div id="s2rgbdate" class="map-date"></div>
-        <div id="s2map"></div>
-        <div id="bandPlots"></div>
-    </div>
-    <script>
-        proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
-        proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
-
-        const BANDS = [
-            { key: "b02", label: "B02 (Blue)", color: "#0066ff" },
-            { key: "b03", label: "B03 (Green)", color: "#00aa00" },
-            { key: "b04", label: "B04 (Red)", color: "#cc0000" },
-            { key: "b8a", label: "B8A (NIR)", color: "#9900cc" }
-        ];
-        let siteName = "innsbruck", season = "2024";
-        let source = "s2";
-        let exclusion = "none";
-        let sitePosition = [47.116171, 11.320308];
-        let start = new Date(2024, 0, 1);
-        let timeseries = [];
-        let gccTimeseries = [];
-        let ndviTimeseries = [];
-        let availableSiteSeasons = {};
-        let s2Map = null, s2Overlay = null, s2Marker = null;
-
-        const urlParams = new URLSearchParams(location.search);
-
-        function filteredTimeseries(arr) {
-            if (exclusion === "none") return arr;
-            const key = exclusion === "aggressive" ? "excluded_aggressive" : "excluded_nonaggressive";
-            return arr.filter(t => !t[key]);
-        }
-
-        function fmtDate(d) {
-            return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
-        }
-        const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
-        const daysFromDate = (dateStr) => {
-            const [y, m, d] = dateStr.split("-").map(Number);
-            return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
-        };
-
-        function drawBandPlot(canvasId, bandKey, bandLabel, color) {
-            const canvas = document.getElementById(canvasId);
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-
-            const data = filteredTimeseries(timeseries).filter(t => t[bandKey] != null);
-            if (!data.length) return;
-
-            const dates = data.map(t => new Date(t.date));
-            const values = data.map(t => t[bandKey]);
-            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
-            const dateRange = maxDate - minDate || 1;
-            const minVal = Math.min(...values), maxVal = Math.max(...values);
-            const valRange = maxVal - minVal || 1;
-
-            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
-            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
-
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath();
-            ctx.moveTo(pad, pad);
-            ctx.lineTo(pad, pad + plotH);
-            ctx.lineTo(pad + plotW, pad + plotH);
-            ctx.stroke();
-
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minVal.toFixed(4), 2, pad + plotH + 10);
-            ctx.fillText(maxVal.toFixed(4), 2, pad + 3);
-
-            ctx.strokeStyle = color;
-            ctx.beginPath();
-            data.forEach((t, i) => {
-                const px = x(t.date), py = y(t[bandKey]);
-                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
-            });
-            ctx.stroke();
-
-            ctx.fillStyle = "#888";
-            const axisY = pad + plotH;
-            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
-
-            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(currentDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath();
-            ctx.moveTo(xPos, pad);
-            ctx.lineTo(xPos, pad + plotH);
-            ctx.stroke();
-
-            const closest = data.reduce((c, t) =>
-                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
-            );
-            if (closest) {
-                ctx.fillStyle = "#f00";
-                ctx.font = "bold 10px sans-serif";
-                ctx.fillText(closest[bandKey].toFixed(4), xPos + 5, y(closest[bandKey]) - 5);
-            }
-        }
-
-        function drawNdviPlot() {
-            const canvas = document.getElementById("plot_ndvi");
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-            const data = filteredTimeseries(ndviTimeseries).filter(t => t.ndvi != null);
-            if (!data.length) return;
-
-            const dates = data.map(t => new Date(t.date));
-            const values = data.map(t => t.ndvi);
-            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
-            const dateRange = maxDate - minDate || 1;
-            const minVal = Math.min(...values), maxVal = Math.max(...values);
-            const valRange = maxVal - minVal || 1;
-            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
-            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
-
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath();
-            ctx.moveTo(pad, pad);
-            ctx.lineTo(pad, pad + plotH);
-            ctx.lineTo(pad + plotW, pad + plotH);
-            ctx.stroke();
-
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
-            ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
-
-            ctx.strokeStyle = "#2d7a3e";
-            ctx.beginPath();
-            data.forEach((t, i) => {
-                const px = x(t.date), py = y(t.ndvi);
-                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
-            });
-            ctx.stroke();
-
-            ctx.fillStyle = "#888";
-            const axisY = pad + plotH;
-            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
-
-            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(currentDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath();
-            ctx.moveTo(xPos, pad);
-            ctx.lineTo(xPos, pad + plotH);
-            ctx.stroke();
-
-            const closest = data.reduce((c, t) =>
-                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
-            );
-            if (closest) {
-                ctx.fillStyle = "#f00";
-                ctx.font = "bold 10px sans-serif";
-                ctx.fillText(closest.ndvi.toFixed(3), xPos + 5, y(closest.ndvi) - 5);
-            }
-        }
-
-        function drawGccPlot() {
-            const canvas = document.getElementById("plot_gcc");
-            if (!canvas) return;
-            const ctx = canvas.getContext("2d");
-            canvas.width = canvas.offsetWidth;
-            canvas.height = 100;
-            const w = canvas.width, h = canvas.height, pad = 30;
-            const plotW = w - pad * 2, plotH = h - pad * 2;
-            const data = filteredTimeseries(gccTimeseries).filter(t => t.greenness_index != null);
-            if (!data.length) return;
-
-            const dates = data.map(t => new Date(t.date));
-            const values = data.map(t => t.greenness_index);
-            const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
-            const dateRange = maxDate - minDate || 1;
-            const minVal = Math.min(...values), maxVal = Math.max(...values);
-            const valRange = maxVal - minVal || 1;
-            const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
-            const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
-
-            ctx.clearRect(0, 0, w, h);
-            ctx.strokeStyle = "#ccc";
-            ctx.beginPath();
-            ctx.moveTo(pad, pad);
-            ctx.lineTo(pad, pad + plotH);
-            ctx.lineTo(pad + plotW, pad + plotH);
-            ctx.stroke();
-
-            ctx.fillStyle = "#000";
-            ctx.font = "9px sans-serif";
-            ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
-            ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
-
-            ctx.strokeStyle = "#00aa00";
-            ctx.beginPath();
-            data.forEach((t, i) => {
-                const px = x(t.date), py = y(t.greenness_index);
-                i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
-            });
-            ctx.stroke();
-
-            ctx.fillStyle = "#888";
-            const axisY = pad + plotH;
-            for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
-
-            const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const xPos = x(currentDate);
-            ctx.strokeStyle = "#f00";
-            ctx.lineWidth = 2;
-            ctx.beginPath();
-            ctx.moveTo(xPos, pad);
-            ctx.lineTo(xPos, pad + plotH);
-            ctx.stroke();
-
-            const closest = data.reduce((c, t) =>
-                Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
-            );
-            if (closest) {
-                ctx.fillStyle = "#f00";
-                ctx.font = "bold 10px sans-serif";
-                ctx.fillText(closest.greenness_index.toFixed(3), xPos + 5, y(closest.greenness_index) - 5);
-            }
-        }
-
-        function drawAllPlots() {
-            drawNdviPlot();
-            drawGccPlot();
-            BANDS.forEach(b => drawBandPlot(`plot_${b.key}`, b.key, b.label, b.color));
-        }
-
-        function computeGcc(entry) {
-            const b = entry.b02 + entry.b03 + entry.b04;
-            return b > 0 ? entry.b03 / b : null;
-        }
-
-        async function loadTimeseries() {
-            const rawBase = `data/${siteName}/${season}/raw`;
-            const src = document.getElementById("sourceSelect")?.value || "s2";
-            source = src;
-            try {
-                const preselectionRes = await fetch(`${rawBase}/preselection/${source}_preselection.json`);
-                const preselection = preselectionRes.ok ? await preselectionRes.json() : [];
-                timeseries = preselection;
-                ndviTimeseries = preselection;
-                gccTimeseries = preselection.map(t => ({ ...t, greenness_index: computeGcc(t) })).filter(t => t.greenness_index != null);
-            } catch {
-                timeseries = [];
-                ndviTimeseries = [];
-                gccTimeseries = [];
-            }
-            const srcLabel = source.toUpperCase();
-            document.getElementById("mapLabel").textContent = `${srcLabel} RGB (closest available)`;
-            const jsonUrl = `${rawBase}/preselection/${source}_preselection.json`;
-            const csvUrl = `${rawBase}/preselection/${source}_preselection.csv`;
-            document.getElementById("downloadLinks").innerHTML =
-                `<a href="${jsonUrl}" download="${siteName}_${season}_${source}_preselection.json" target="_blank">[JSON]</a>` +
-                `<a href="${csvUrl}" download="${siteName}_${season}_${source}_preselection.csv" target="_blank">[CSV]</a>`;
-            document.getElementById("bandPlots").innerHTML =
-                `<div class="plot-label">${srcLabel} NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>` +
-                `<div class="plot-label">${srcLabel} GCC (Greenness Index)</div><canvas id="plot_gcc" class="plot"></canvas>` +
-                BANDS.map(b => `<div class="plot-label">${b.label}</div><canvas id="plot_${b.key}" class="plot"></canvas>`).join("");
-            const yearEnd = new Date(parseInt(season), 11, 31);
-            document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
-            drawAllPlots();
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            updateS2Imagery();
-        }
-
-        async function probeDataExists(sitename, s) {
-            try {
-                const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
-                return res.ok;
-            } catch { return false; }
-        }
-
-        function getSiteBySitename(sitename) {
-            return window.sitesData?.features?.find(f => f.properties?.sitename === sitename);
-        }
-
-        async function setSiteSeason(newSite, newSeason) {
-            siteName = newSite;
-            season = newSeason;
-            start = new Date(parseInt(season), 0, 1);
-            const site = getSiteBySitename(newSite);
-            if (site?.geometry?.coordinates) {
-                const [lon, lat] = site.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            if (s2Map) { s2Map.setView(sitePosition, 12); if (s2Marker) s2Marker.setLatLng(sitePosition); }
-            document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
-            document.getElementById("season").textContent = season;
-            const params = new URLSearchParams(location.search);
-            params.set("site", siteName);
-            params.set("season", season);
-            history.replaceState({}, "", `?${params}`);
-            await loadTimeseries();
-            const urlDate = params.get("date");
-            if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
-        }
-
-        async function init() {
-            try {
-                const res = await fetch("data/sites.geojson");
-                window.sitesData = res.ok ? await res.json() : { features: [] };
-            } catch {
-                window.sitesData = { features: [] };
-            }
-            const features = window.sitesData.features || [];
-            for (const f of features) {
-                const sn = f.properties?.sitename;
-                if (!sn) continue;
-                const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
-                const withData = [];
-                for (const s of seasonsFromGeo) {
-                    if (await probeDataExists(sn, s)) withData.push(s);
-                }
-                if (withData.length) availableSiteSeasons[sn] = withData;
-            }
-            const availableSites = Object.keys(availableSiteSeasons);
-            const siteSelect = document.getElementById("siteSelect");
-            siteSelect.innerHTML = "";
-            (availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
-                const opt = document.createElement("option");
-                opt.value = sn;
-                opt.textContent = sn;
-                siteSelect.appendChild(opt);
-                if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
-            });
-
-            const urlSite = urlParams.get("site");
-            const urlSeason = urlParams.get("season");
-            const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
-            const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
-
-            siteSelect.value = initialSite;
-            document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
-                `<option value="${s}">${s}</option>`
-            ).join("");
-            document.getElementById("seasonSelect").value = initialSeason;
-            document.getElementById("sourceSelect").value = urlParams.get("source") || "s2";
-            exclusion = urlParams.get("exclusion") || "none";
-            document.getElementById("exclusionSelect").value = exclusion;
-
-            const initSite = getSiteBySitename(initialSite);
-            if (initSite?.geometry?.coordinates) {
-                const [lon, lat] = initSite.geometry.coordinates;
-                sitePosition = [lat, lon];
-            }
-            const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
-            s2Map = L.map("s2map", { zoomControl: false }).setView(sitePosition, 12)
-                .addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
-            s2Marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(s2Map);
-
-            siteSelect.addEventListener("change", function() {
-                const sn = this.value;
-                const seas = availableSiteSeasons[sn] || [];
-                document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
-                document.getElementById("seasonSelect").value = seas[0] || "2024";
-                setSiteSeason(sn, document.getElementById("seasonSelect").value);
-            });
-            document.getElementById("seasonSelect").addEventListener("change", function() {
-                setSiteSeason(siteSelect.value, this.value);
-            });
-            document.getElementById("sourceSelect").addEventListener("change", async function() {
-                source = this.value;
-                urlParams.set("source", source);
-                history.replaceState({}, "", `?${urlParams}`);
-                await loadTimeseries();
-            });
-            document.getElementById("exclusionSelect").addEventListener("change", function() {
-                exclusion = this.value;
-                urlParams.set("exclusion", exclusion);
-                history.replaceState({}, "", `?${urlParams}`);
-                drawAllPlots();
-                updateS2Imagery();
-            });
-
-            await setSiteSeason(initialSite, initialSeason);
-        }
-
-        document.getElementById("dateSlider").addEventListener("input", function() {
-            document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
-            drawAllPlots();
-            updateS2Imagery();
-        });
-
-        function closestFilename(dateStr) {
-            const target = new Date(dateStr);
-            const withData = filteredTimeseries(timeseries).filter(t => t.filename);
-            if (!withData.length) return null;
-            const closest = withData.reduce((c, t) =>
-                Math.abs(new Date(t.date) - target) < Math.abs(new Date(c.date) - target) ? t : c
-            );
-            return closest.filename;
-        }
-
-        function transformBounds(bbox, fromCRS) {
-            const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
-            const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
-            return [[sw[1], sw[0]], [ne[1], ne[0]]];
-        }
-
-        async function loadS2Geotiff(filename) {
-            const path = `data/${siteName}/${season}/raw/${source}/${filename}`;
-            const tiff = await GeoTIFF.fromArrayBuffer(await (await fetch(path)).arrayBuffer());
-            const image = await tiff.getImage();
-            const rasters = await image.readRasters();
-            const width = image.getWidth(), height = image.getHeight();
-            const bbox = image.getBoundingBox();
-            const geoKeys = image.getGeoKeys();
-            const crsCode = geoKeys.ProjectedCSTypeGeoKey ? `EPSG:${geoKeys.ProjectedCSTypeGeoKey}` :
-                (geoKeys.GeographicTypeGeoKey !== 4326 ? `EPSG:${geoKeys.GeographicTypeGeoKey}` : "EPSG:4326");
-            const [blue, green, red] = [0, 1, 2].map(i => Array.from(rasters[i]));
-            const normalize = (arr) => {
-                let min = Infinity, max = -Infinity;
-                for (const v of arr) if (!isNaN(v) && v > 0) { min = Math.min(min, v); max = Math.max(max, v); }
-                return arr.map(v => Math.max(0, Math.min(255, ((v - min) / (max - min || 1)) * 255)));
-            };
-            const [rN, gN, bN] = [red, green, blue].map(normalize);
-            const canvas = Object.assign(document.createElement("canvas"), { width, height });
-            const ctx = canvas.getContext("2d");
-            ctx.imageSmoothingEnabled = false;
-            const imgData = ctx.createImageData(width, height);
-            for (let i = 0; i < rN.length; i++) {
-                const idx = i * 4;
-                if (rN[i] === 0 && gN[i] === 0 && bN[i] === 0) imgData.data[idx + 3] = 0;
-                else { imgData.data[idx] = rN[i]; imgData.data[idx + 1] = gN[i]; imgData.data[idx + 2] = bN[i]; imgData.data[idx + 3] = 255; }
-            }
-            ctx.putImageData(imgData, 0, 0);
-            const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
-            return { dataUrl: canvas.toDataURL(), bounds };
-        }
-
-        async function updateS2Imagery() {
-            const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
-            const filename = closestFilename(dateStr);
-            if (!filename || !s2Map) {
-                if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
-                document.getElementById("s2rgbdate").textContent = "";
-                return;
-            }
-            try {
-                const { dataUrl, bounds } = await loadS2Geotiff(filename);
-                if (s2Overlay) s2Map.removeLayer(s2Overlay);
-                s2Overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(s2Map);
-                s2Map.fitBounds(bounds);
-                const d = filename.split("_")[0];
-                document.getElementById("s2rgbdate").textContent = `${d.slice(0,4)}-${d.slice(4,6)}-${d.slice(6,8)}`;
-            } catch (e) {
-                if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
-                document.getElementById("s2rgbdate").textContent = "";
-            }
-        }
-
-        init();
-    </script>
-</body>
-</html>
				`@ -1 +0,0 @@`
				`"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""`