Switching horses.

This commit is contained in:
Felix Delattre 2026-06-10 14:18:06 +02:00
parent 25cbd97662
commit e3e14027fc
51 changed files with 5078 additions and 11678 deletions

12
.gitignore vendored
View file

@ -1,10 +1,9 @@
# Project data # Generated caches and downloads (regenerate via pipeline steps)
data/* data/
webapp/data
# Environment # Environment and secrets
.env .env
.venv .venv/
venv/ venv/
env/ env/
@ -42,6 +41,3 @@ dist/
# OS # OS
.DS_Store .DS_Store
Thumbs.db Thumbs.db
AGENTS.md
.vibe

View file

@ -1,8 +0,0 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.4
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

278
1-phenocam.py Normal file
View file

@ -0,0 +1,278 @@
"""Step 1: download worldwide PhenoCam sites for a calendar year.
Inputs (``data/``): none queries the PhenoCam API.
Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
- ``phenocam/{year}.json`` site list manifest
- ``phenocam/{year}/{sitename}.json`` camera + ROI metadata
- ``phenocam/{year}/{sitename}_1day.csv`` ``one_day_summary`` GCC CSV
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
Next step: :mod:`2-phenocam-screening`.
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import date
from pathlib import Path
from typing import Any
import requests
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
if str(PROCESSING_DIR) not in sys.path:
sys.path.insert(0, str(PROCESSING_DIR))
from acquisition_phenocam import PHENOCAM_API # noqa: E402
from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date # noqa: E402
EVALUATION_YEAR = 2025
HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
ONE_DAY_CSV_SUFFIX = "_1day.csv"
def check_phenocam_host() -> None:
try:
response = requests.get(HOST_PROBE, timeout=30)
response.raise_for_status()
except requests.RequestException as exc:
raise RuntimeError(
f"PhenoCam API unreachable (phenocam.nau.edu): "
f"{exc.__class__.__name__}: {exc}"
) from exc
def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
start = _parse_iso_date(first)
end = _parse_iso_date(last)
if start is None or end is None:
return False
return start <= date(season, 12, 31) and end >= date(season, 1, 1)
def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
return cache_dir / "phenocam" / str(evaluation_year)
def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
def load_candidate_cameras(
evaluation_year: int,
*,
site_filter: set[str] | None = None,
active_only: bool = False,
limit: int | None = None,
) -> list[dict[str, Any]]:
cameras: list[dict[str, Any]] = []
for camera in _paginate_cameras():
if active_only and not camera.get("active"):
continue
sitename = str(camera["Sitename"])
if site_filter is not None and sitename not in site_filter:
continue
if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year):
continue
cameras.append(dict(camera))
cameras.sort(key=lambda item: str(item["Sitename"]))
if limit is not None:
cameras = cameras[:limit]
return cameras
def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
rois: list[dict[str, Any]] = []
url = f"{PHENOCAM_API}/roilists/"
params: dict[str, Any] | None = {"site": site_name}
while url:
response = requests.get(url, params=params, timeout=60)
response.raise_for_status()
payload = response.json()
rois.extend(
item for item in payload.get("results", []) if item.get("site") == site_name
)
url = payload.get("next")
params = None
if rois:
break
return dict(rois[0]) if rois else None
def download_one_day_csv(csv_url: str, output_path: Path) -> None:
response = requests.get(csv_url, timeout=60)
response.raise_for_status()
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(response.text, encoding="utf-8")
def download_site(
camera: dict[str, Any],
evaluation_year: int,
cache_dir: Path,
) -> str:
sitename = str(camera["Sitename"])
roi = fetch_roi_record(sitename)
payload = {"response": {"camera": camera, "roi": roi}}
json_path = site_json_path(cache_dir, evaluation_year, sitename)
json_path.parent.mkdir(parents=True, exist_ok=True)
json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
csv_url = roi.get("one_day_summary") if roi else None
if csv_url:
download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename))
return sitename
def load_or_download_site(
camera: dict[str, Any],
evaluation_year: int,
cache_dir: Path,
*,
refresh: bool,
) -> str:
sitename = str(camera["Sitename"])
json_path = site_json_path(cache_dir, evaluation_year, sitename)
csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
if not refresh and json_path.is_file():
if not csv_path.is_file():
payload = json.loads(json_path.read_text(encoding="utf-8"))
roi = payload.get("response", {}).get("roi") or {}
csv_url = roi.get("one_day_summary")
if csv_url:
download_one_day_csv(csv_url, csv_path)
return sitename
return download_site(camera, evaluation_year, cache_dir)
def run_download(
*,
cache_dir: Path,
evaluation_year: int,
active_only: bool = False,
site_filter: set[str] | None = None,
limit: int | None = None,
refresh: bool = False,
) -> list[str]:
check_phenocam_host()
candidates = load_candidate_cameras(
evaluation_year,
site_filter=site_filter,
active_only=active_only,
limit=limit,
)
print(
f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
f"{evaluation_year}"
)
sitenames: list[str] = []
for index, camera in enumerate(candidates, start=1):
sitename = str(camera["Sitename"])
print(
f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
)
sitenames.append(
load_or_download_site(
camera,
evaluation_year,
cache_dir,
refresh=refresh,
)
)
return sorted(sitenames)
def write_manifest(
sitenames: list[str],
output_path: Path,
cache_dir: Path,
evaluation_year: int,
) -> None:
rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent)
payload = {
"evaluation_year": evaluation_year,
"count": len(sitenames),
"sites_dir": rel_sites_dir.as_posix(),
"sites": sitenames,
}
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
print(f"[PhenoCam-1] Wrote {output_path}")
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--cache-dir",
type=Path,
default=Path("data"),
help="Base directory for per-site files and manifest",
)
parser.add_argument(
"--evaluation-year",
type=int,
default=EVALUATION_YEAR,
help=f"Calendar year to download (default: {EVALUATION_YEAR})",
)
parser.add_argument(
"--active-only",
action="store_true",
help="Restrict candidates to cameras marked active in the API",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="Process only the first N candidate sites (testing)",
)
parser.add_argument(
"--sites",
type=str,
default=None,
help="Comma-separated sitenames to download (testing)",
)
parser.add_argument(
"--refresh",
action="store_true",
help="Re-download sites even when cache files exist",
)
parser.add_argument(
"--output-json",
type=Path,
default=None,
help="Manifest output path (default: data/phenocam/{year}.json)",
)
args = parser.parse_args(argv)
site_filter = None
if args.sites:
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
sitenames = run_download(
cache_dir=args.cache_dir,
evaluation_year=args.evaluation_year,
active_only=args.active_only,
site_filter=site_filter,
limit=args.limit,
refresh=args.refresh,
)
manifest_path = args.output_json or (
args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
)
write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
return 0
if __name__ == "__main__":
raise SystemExit(main())

495
2-phenocam-screening.py Normal file
View file

@ -0,0 +1,495 @@
"""Step 2: PhenoCam GCC + SNR screening on step-1 cache.
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
- ``phenocam/{year}.json`` step-1 manifest
- ``phenocam/{year}/{sitename}.json`` per-site metadata
- ``phenocam/{year}/{sitename}_1day.csv`` GCC timeseries
Outputs (``data/phenocam_screening/``):
- ``{year}.json`` full per-site results
- ``{year}.csv`` flat summary table
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional; default: all manifest sites).
Next step: :mod:`3-sentinel-clouds`.
"""
from __future__ import annotations
import argparse
import csv
import json
import math
import sys
from datetime import date, datetime
from pathlib import Path
from typing import Any
import numpy as np
from scipy.interpolate import UnivariateSpline
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
if str(PROCESSING_DIR) not in sys.path:
sys.path.insert(0, str(PROCESSING_DIR))
from acquisition_phenocam import _phenocam_summary_gcc_value # noqa: E402
MIN_GCC_POINTS = 30
SNR_THRESHOLD = 2.0
CLUSTER_RADIUS_M = 500.0
GATE_ORDER = ("phenocam", "snr", "cluster")
ONE_DAY_CSV_SUFFIX = "_1day.csv"
_EARTH_RADIUS_M = 6371000.0
def load_manifest(path: Path) -> dict[str, Any]:
payload = json.loads(path.read_text(encoding="utf-8"))
for key in ("evaluation_year", "sites_dir", "sites"):
if key not in payload:
raise ValueError(f"Expected '{key}' in manifest {path}")
return payload
def resolve_sites_dir(manifest_path: Path, manifest: dict[str, Any]) -> Path:
return (manifest_path.parent / manifest["sites_dir"]).resolve()
def load_site_entry(sites_dir: Path, sitename: str) -> dict[str, Any]:
json_path = sites_dir / f"{sitename}.json"
payload = json.loads(json_path.read_text(encoding="utf-8"))
csv_path = sites_dir / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
payload["_one_day_csv"] = csv_path if csv_path.is_file() else None
return payload
def parse_gcc90_series(csv_path: Path, evaluation_year: int) -> list[tuple[str, float]]:
lines = [
line
for line in csv_path.read_text(encoding="utf-8").split("\n")
if line and not line.startswith("#")
]
reader = csv.DictReader(lines)
fieldnames = reader.fieldnames or ()
use_mean_fallback = "gcc_90" not in fieldnames
year_start = date(evaluation_year, 1, 1)
year_end = date(evaluation_year, 12, 31)
series: list[tuple[str, float]] = []
for row in reader:
date_str = row.get("date")
if not date_str:
continue
try:
row_date = datetime.strptime(date_str, "%Y-%m-%d").date()
except ValueError:
continue
if not (year_start <= row_date <= year_end):
continue
gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
if gcc is None:
continue
series.append((row_date.isoformat(), float(gcc)))
series.sort(key=lambda item: item[0])
return series
def _months_covered(day_strings: list[str]) -> int:
months: set[int] = set()
for day in day_strings:
months.add(datetime.strptime(day, "%Y-%m-%d").month)
return len(months)
def _aic_for_spline(x: np.ndarray, y: np.ndarray, spline: UnivariateSpline) -> float:
residuals = y - spline(x)
rss = float(np.sum(residuals**2))
n = len(y)
if rss <= 0 or n < 4:
return math.inf
edf = float(spline.get_knots().shape[0] + spline.get_coeffs().shape[0])
return n * math.log(rss / n) + 2.0 * edf
def compute_snr_aic_spline(series: list[tuple[str, float]]) -> float | None:
if len(series) < MIN_GCC_POINTS:
return None
dates = [datetime.strptime(day, "%Y-%m-%d").date() for day, _ in series]
x = np.array([(d - dates[0]).days for d in dates], dtype=float)
y = np.array([value for _, value in series], dtype=float)
if len(np.unique(x)) < 5:
return None
y_var = float(np.var(y))
if y_var <= 0:
return None
candidates = np.logspace(-4, 2, 40) * y_var * len(y)
best_spline: UnivariateSpline | None = None
best_aic = math.inf
for smoothing in candidates:
try:
spline = UnivariateSpline(x, y, k=3, s=float(smoothing))
except Exception:
continue
aic = _aic_for_spline(x, y, spline)
if aic < best_aic:
best_aic = aic
best_spline = spline
if best_spline is None:
return None
residuals = y - best_spline(x)
rmse = float(np.sqrt(np.mean(residuals**2)))
amplitude = float(np.max(y) - np.min(y))
if rmse <= 0:
return None
return amplitude / rmse
def screen_site(
site_entry: dict[str, Any],
*,
evaluation_year: int,
min_gcc_points: int,
snr_threshold: float,
) -> dict[str, Any]:
response = site_entry["response"]
roi = response.get("roi")
csv_path = site_entry.get("_one_day_csv")
calculations: dict[str, Any] = {
"evaluation_year": evaluation_year,
"n_gcc_points": 0,
"first_gcc_date": None,
"last_gcc_date": None,
"months_with_gcc": 0,
"snr": None,
"min_gcc_points": min_gcc_points,
"snr_threshold": snr_threshold,
"status": "FAIL",
"failing_gate": None,
"passed_gates": [],
"reason": None,
}
if roi is None or not roi.get("one_day_summary") or csv_path is None:
calculations["failing_gate"] = "phenocam"
calculations["reason"] = "no_roi"
return {"response": response, "calculations": calculations}
series = parse_gcc90_series(csv_path, evaluation_year)
calculations["n_gcc_points"] = len(series)
if calculations["n_gcc_points"] == 0:
calculations["failing_gate"] = "phenocam"
calculations["reason"] = "no_gcc_in_year"
return {"response": response, "calculations": calculations}
day_strings = [day for day, _ in series]
calculations["first_gcc_date"] = day_strings[0]
calculations["last_gcc_date"] = day_strings[-1]
calculations["months_with_gcc"] = _months_covered(day_strings)
if calculations["n_gcc_points"] < min_gcc_points:
calculations["failing_gate"] = "phenocam"
calculations["reason"] = "insufficient_gcc_points"
return {"response": response, "calculations": calculations}
calculations["passed_gates"].append("phenocam")
snr = compute_snr_aic_spline(series)
calculations["snr"] = snr
if snr is None or snr < snr_threshold:
calculations["failing_gate"] = "snr"
calculations["reason"] = "insufficient_snr" if snr is not None else "snr_undefined"
return {"response": response, "calculations": calculations}
calculations["passed_gates"].append("snr")
calculations["status"] = "PASS"
calculations["failing_gate"] = None
calculations["reason"] = None
return {"response": response, "calculations": calculations}
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
p1, p2 = math.radians(lat1), math.radians(lat2)
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dlat / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
return 2 * _EARTH_RADIUS_M * math.asin(math.sqrt(a))
def _site_coords(row: dict[str, Any]) -> tuple[float, float] | None:
camera = row["response"]["camera"]
lat, lon = camera.get("Lat"), camera.get("Lon")
if lat is None or lon is None:
return None
return float(lat), float(lon)
def _cluster_rank(row: dict[str, Any]) -> tuple[int, float]:
calc = row["calculations"]
return calc["n_gcc_points"], float(calc.get("snr") or 0.0)
def apply_cluster_gate(results: list[dict[str, Any]], *, radius_m: float) -> int:
pool: list[tuple[int, float, float]] = []
for idx, row in enumerate(results):
if "snr" not in row["calculations"]["passed_gates"]:
continue
coords = _site_coords(row)
if coords is None:
row["calculations"]["passed_gates"].append("cluster")
continue
pool.append((idx, coords[0], coords[1]))
n = len(pool)
parent = list(range(n))
def find(x: int) -> int:
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(a: int, b: int) -> None:
ra, rb = find(a), find(b)
if ra != rb:
parent[rb] = ra
for i in range(n):
_, lat1, lon1 = pool[i]
for j in range(i + 1, n):
_, lat2, lon2 = pool[j]
if _haversine_m(lat1, lon1, lat2, lon2) <= radius_m:
union(i, j)
clusters: dict[int, list[int]] = {}
for i in range(n):
clusters.setdefault(find(i), []).append(i)
demoted = 0
for members in clusters.values():
result_indices = [pool[i][0] for i in members]
cluster_size = len(result_indices)
winner_idx = max(result_indices, key=lambda idx: _cluster_rank(results[idx]))
winner_name = str(results[winner_idx]["response"]["camera"]["Sitename"])
for idx in result_indices:
calc = results[idx]["calculations"]
calc["cluster_size"] = cluster_size
if idx == winner_idx:
calc["passed_gates"].append("cluster")
else:
calc["status"] = "FAIL"
calc["failing_gate"] = "cluster"
calc["reason"] = "nearby_duplicate"
calc["cluster_winner"] = winner_name
demoted += 1
return demoted
def run_screening(
manifest: dict[str, Any],
sites_dir: Path,
*,
evaluation_year: int,
min_gcc_points: int,
snr_threshold: float,
site_filter: set[str] | None = None,
) -> list[dict[str, Any]]:
results: list[dict[str, Any]] = []
sitenames = manifest["sites"]
if site_filter is not None:
sitenames = [name for name in sitenames if name in site_filter]
for index, sitename in enumerate(sitenames, start=1):
print(f"[PhenoCam-2] ({index}/{len(sitenames)}) {sitename}")
site_entry = load_site_entry(sites_dir, sitename)
results.append(
screen_site(
site_entry,
evaluation_year=evaluation_year,
min_gcc_points=min_gcc_points,
snr_threshold=snr_threshold,
)
)
return results
def print_summary(results: list[dict[str, Any]], evaluation_year: int) -> None:
passing = [row for row in results if row["calculations"]["status"] == "PASS"]
gates_label = " + ".join(GATE_ORDER)
print(
f"\n[PhenoCam-2] Screening for {evaluation_year}: "
f"{len(passing)}/{len(results)} pass ({gates_label})"
)
for gate in GATE_ORDER:
fails = sum(1 for row in results if row["calculations"]["failing_gate"] == gate)
after = sum(1 for row in results if gate in row["calculations"]["passed_gates"])
print(f" after_{gate}: {after}, fail_at_{gate}: {fails}")
print("\nPer-site table")
print(
f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} "
f"{'status':>6} gate reason"
)
print("-" * 72)
for row in sorted(
results,
key=lambda item: str(item["response"]["camera"]["Sitename"]),
):
camera = row["response"]["camera"]
calc = row["calculations"]
snr_text = f"{calc['snr']:.2f}" if calc["snr"] is not None else ""
print(
f"{camera['Sitename']:<24} {calc['n_gcc_points']:4d} "
f"{calc['months_with_gcc']:3d} {snr_text:>6} "
f"{calc['status']:>6} {(calc['failing_gate'] or '-'):<8} "
f"{calc['reason'] or '-'}"
)
def write_screening_json(
results: list[dict[str, Any]],
output_path: Path,
evaluation_year: int,
) -> None:
passing = [row for row in results if row["calculations"]["status"] == "PASS"]
payload = {
"evaluation_year": evaluation_year,
"count": len(results),
"qualifying_count": len(passing),
"sites": sorted(
results,
key=lambda item: str(item["response"]["camera"]["Sitename"]),
),
}
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
print(f"[PhenoCam-2] Wrote {output_path}")
def write_screening_csv(results: list[dict[str, Any]], output_path: Path) -> None:
rows: list[dict[str, Any]] = []
for row in results:
camera = row["response"]["camera"]
metadata = camera.get("sitemetadata") or {}
roi = row["response"].get("roi") or {}
calc = row["calculations"]
rows.append(
{
"Sitename": camera.get("Sitename"),
"Lat": camera.get("Lat"),
"Lon": camera.get("Lon"),
"site_description": metadata.get("site_description"),
"primary_veg_type": metadata.get("primary_veg_type"),
"site_type": metadata.get("site_type"),
"one_day_summary": roi.get("one_day_summary"),
**calc,
}
)
fieldnames = list(rows[0].keys()) if rows else ["Sitename", "status"]
if rows:
extra = [k for row in rows for k in row if k not in fieldnames]
fieldnames.extend(dict.fromkeys(extra))
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
print(f"[PhenoCam-2] Wrote {output_path}")
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--evaluation-year",
type=int,
default=2025,
help="Evaluation year (default: 2025)",
)
parser.add_argument(
"--sites",
type=str,
default=None,
help="Comma-separated sitenames (default: all sites in step-1 manifest)",
)
parser.add_argument(
"--min-gcc-points",
type=int,
default=MIN_GCC_POINTS,
help=f"Minimum valid gcc_90 observations in-year (default: {MIN_GCC_POINTS})",
)
parser.add_argument(
"--snr-threshold",
type=float,
default=SNR_THRESHOLD,
help=f"Minimum AIC-spline SNR (default: {SNR_THRESHOLD})",
)
parser.add_argument(
"--output-json",
type=Path,
default=None,
help="Screening output (default: data/phenocam_screening/{year}.json)",
)
parser.add_argument(
"--output-csv",
type=Path,
default=None,
help="Flat CSV summary path",
)
parser.add_argument(
"--cluster-radius-m",
type=float,
default=CLUSTER_RADIUS_M,
help=f"Deduplicate SNR-passed sites within this radius (default: {CLUSTER_RADIUS_M})",
)
parser.add_argument(
"--no-cluster",
action="store_true",
help="Skip nearby-site deduplication gate",
)
args = parser.parse_args(argv)
evaluation_year = args.evaluation_year
manifest_path = Path("data") / "phenocam" / f"{evaluation_year}.json"
if not manifest_path.is_file():
raise SystemExit(f"Step-1 manifest not found: {manifest_path}")
site_filter = None
if args.sites:
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
manifest = load_manifest(manifest_path)
sites_dir_path = resolve_sites_dir(manifest_path, manifest)
results = run_screening(
manifest,
sites_dir_path,
evaluation_year=evaluation_year,
min_gcc_points=args.min_gcc_points,
snr_threshold=args.snr_threshold,
site_filter=site_filter,
)
if not args.no_cluster:
demoted = apply_cluster_gate(results, radius_m=args.cluster_radius_m)
if demoted:
print(f"[PhenoCam-2] Cluster dedup: demoted {demoted} nearby duplicate(s)")
print_summary(results, evaluation_year)
default_dir = Path("data") / "phenocam_screening"
json_name = f"{evaluation_year}.json"
csv_name = f"{evaluation_year}.csv"
write_screening_json(
results,
args.output_json or (default_dir / json_name),
evaluation_year,
)
write_screening_csv(results, args.output_csv or (default_dir / csv_name))
return 0
if __name__ == "__main__":
raise SystemExit(main())

805
3-sentinel-data.py Normal file
View file

@ -0,0 +1,805 @@
"""Step 3: Download S2 and S3 rasters and prepare EFAST inputs.
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
- ``phenocam_screening/{year}.json`` step-2 PASS sites (coordinates included)
Outputs (``data/``):
- ``sentinel_data/{year}/{sitename}/raw/s3/*.tif`` S3 SYN L2 per-date GeoTIFFs
- ``sentinel_data/{year}/{sitename}/prepared/s2/`` S2 REFL + DIST_CLOUD GeoTIFFs
- ``sentinel_data/{year}/{sitename}/prepared/s3/`` S3 composite GeoTIFFs
- ``sentinel_data/{year}/{sitename}/data.json`` run summary
Requires ``CDSE_USER`` / ``CDSE_PASSWORD`` (``uv sync`` installs efast).
CLI:
- ``--evaluation-year`` (default 2025)
- ``--site`` (optional; default: all step-2 PASS sites)
Prior step: :mod:`2-phenocam-screening`.
Next step: :mod:`4-fusion`.
"""
from __future__ import annotations
import argparse
import json
import os
import shutil
import time
from datetime import datetime
from pathlib import Path
from typing import Any
import netCDF4
import numpy as np
import openeo
import rasterio
import requests
from dotenv import load_dotenv
from pystac_client import Client
from rasterio import shutil as rio_shutil
from rasterio.enums import Resampling
from rasterio.errors import WindowError
from rasterio.transform import from_bounds
from rasterio.vrt import WarpedVRT
from rasterio.warp import transform_geom
from rasterio.windows import Window
from rasterio.windows import from_bounds as window_from_bounds
from rasterio.windows import transform as window_transform
from shapely import wkt as shapely_wkt
from tqdm import tqdm
# ---------------------------------------------------------------------------
# Public constants — edit here to change pipeline behaviour
# ---------------------------------------------------------------------------
S2_BANDS = ["B02", "B03", "B04"]
S3_BANDS = [
"Syn_Oa04_reflectance",
"Syn_Oa06_reflectance",
"Syn_Oa08_reflectance",
"Syn_Oa17_reflectance",
]
S3_BAND_NAMES = ["SDR_Oa04", "SDR_Oa06", "SDR_Oa08", "SDR_Oa17"]
RESOLUTION_RATIO = 30
S3_MOSAIC_DAYS = 100
S3_COMPOSITE_STEP = 2
S3_COMPOSITE_SIGMA_DOY = 10
S3_COMPOSITE_D = 20
S3_SMOOTHING_STD = 1
S3_REFLECTANCE_SCALE = 10_000 # OpenEO SYN L2 SDR → 01 (EFAST expects < 5)
# ---------------------------------------------------------------------------
# Internal S2 constants
# ---------------------------------------------------------------------------
EARTH_SEARCH_URL = "https://earth-search.aws.element84.com/v1"
_BAND_ASSETS: dict[str, str] = {
"B02": "blue",
"B03": "green",
"B04": "red",
"B05": "rededge1",
"B06": "rededge2",
"B07": "rededge3",
"B08": "nir",
"B8A": "nir08",
"B11": "swir16",
"B12": "swir22",
}
_SCL_ASSET = "scl"
_MIN_BBOX_HALF_DEG = 0.008
# ---------------------------------------------------------------------------
# Internal S3 constants
# ---------------------------------------------------------------------------
CDSE_TOKEN_URL = (
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/"
"protocol/openid-connect/token"
)
OPENEO_URL = "openeo.dataspace.copernicus.eu"
S3_COLLECTION = "SENTINEL3_SYN_L2_SYN"
DATA_DIR = Path("data")
DEFAULT_YEAR = 2025
# ---------------------------------------------------------------------------
# Credentials
# ---------------------------------------------------------------------------
def _cdse_credentials() -> dict[str, str | None]:
load_dotenv()
return {
"username": os.getenv("CDSE_USER"),
"password": os.getenv("CDSE_PASSWORD"),
}
# ---------------------------------------------------------------------------
# Screening manifest helpers
# ---------------------------------------------------------------------------
def _load_screening_pass_sites(year: int) -> list[dict[str, Any]]:
"""Return list of PASS-site dicts from step-2 screening JSON.
Each entry has ``sitename``, ``lat``, ``lon`` keys.
"""
path = DATA_DIR / "phenocam_screening" / f"{year}.json"
if not path.is_file():
raise FileNotFoundError(f"Step-2 screening manifest not found: {path}")
payload = json.loads(path.read_text(encoding="utf-8"))
sites = []
for row in payload.get("sites", []):
calc = row.get("calculations", {})
if calc.get("status") != "PASS":
continue
camera = row.get("response", {}).get("camera", {})
name = camera.get("Sitename")
lat = camera.get("Lat")
lon = camera.get("Lon")
if name and lat is not None and lon is not None:
sites.append({"sitename": str(name), "lat": float(lat), "lon": float(lon)})
return sites
# ---------------------------------------------------------------------------
# S2: geometry helpers (from s2_cloud_native.py)
# ---------------------------------------------------------------------------
def wkt_to_bbox(geometry_wkt: str) -> list[float]:
"""Convert a WKT geometry to a ``[west, south, east, north]`` bbox."""
geom = shapely_wkt.loads(geometry_wkt)
minx, miny, maxx, maxy = geom.bounds
if minx == maxx and miny == maxy:
minx -= _MIN_BBOX_HALF_DEG
maxx += _MIN_BBOX_HALF_DEG
miny -= _MIN_BBOX_HALF_DEG
maxy += _MIN_BBOX_HALF_DEG
return [minx, miny, maxx, maxy]
def _boa_offset(item: Any) -> int:
"""Return the BOA additive offset for a STAC item.
Processing baseline >= 04.00 applies a -1000 offset; earlier baselines use 0.
"""
if item.properties.get("earthsearch:boa_offset_applied"):
return 0
baseline_str = str(
item.properties.get("processing:baseline")
or item.properties.get("s2:processing_baseline")
or "0"
)
try:
baseline = float(baseline_str)
except ValueError:
baseline = 0.0
return -1000 if baseline >= 4.0 else 0
def _window_for_bbox(
src: rasterio.io.DatasetReader,
bbox_4326: list[float],
) -> Window | None:
"""Return the rasterio Window for a EPSG:4326 bbox clipped to src bounds."""
bbox_geom = {
"type": "Polygon",
"coordinates": [
[
[bbox_4326[0], bbox_4326[1]],
[bbox_4326[2], bbox_4326[1]],
[bbox_4326[2], bbox_4326[3]],
[bbox_4326[0], bbox_4326[3]],
[bbox_4326[0], bbox_4326[1]],
]
],
}
src_geom = transform_geom("EPSG:4326", src.crs.to_wkt(), bbox_geom)
xs = [c[0] for c in src_geom["coordinates"][0][:4]]
ys = [c[1] for c in src_geom["coordinates"][0][:4]]
win = window_from_bounds(min(xs), min(ys), max(xs), max(ys), src.transform)
try:
return win.intersection(Window(0, 0, src.width, src.height))
except WindowError:
return None
def _read_window(
href: str,
bbox_4326: list[float],
out_shape: tuple[int, int] | None = None,
resampling: Resampling = Resampling.bilinear,
) -> tuple[np.ndarray, dict[str, Any]] | None:
"""Range-read a single-band array for the bbox window from a COG URL."""
with rasterio.open(href) as src:
win = _window_for_bbox(src, bbox_4326)
if win is None:
return None
data = src.read(1, window=win, out_shape=out_shape, resampling=resampling)
profile: dict[str, Any] = {
"crs": src.crs,
"transform": window_transform(win, src.transform),
"height": data.shape[0],
"width": data.shape[1],
"dtype": src.dtypes[0],
}
return data, profile
def _read_bands(
item: Any,
bbox: list[float],
bands: list[str],
) -> tuple[list[np.ndarray], dict[str, Any]] | None:
"""Range-read all requested bands for one STAC item."""
band_arrays: list[np.ndarray] = []
ref_profile: dict[str, Any] | None = None
for band_name in bands:
asset_key = _BAND_ASSETS.get(band_name)
if asset_key is None or asset_key not in item.assets:
return None
ref_shape = (
(ref_profile["height"], ref_profile["width"]) if ref_profile else None
)
result = _read_window(item.assets[asset_key].href, bbox, out_shape=ref_shape)
if result is None:
return None
data, profile = result
if ref_profile is None:
ref_profile = profile
band_arrays.append(data.astype("float32"))
return (band_arrays, ref_profile) if ref_profile is not None else None
def _cloud_mask(item: Any, bbox: list[float], shape: tuple[int, int]) -> np.ndarray:
"""Return a boolean cloud/shadow mask from the item's SCL band.
Masks SCL classes 0 (no data), 3 (cloud shadow), and >7 (clouds, cirrus, snow).
"""
scl = item.assets.get(_SCL_ASSET)
result = (
_read_window(scl.href, bbox, out_shape=shape, resampling=Resampling.nearest)
if scl
else None
)
if result is None:
return np.zeros(shape, dtype=bool)
scl_data, _ = result
return (scl_data == 0) | (scl_data == 3) | (scl_data > 7)
def _pad_to_multiple(arr: np.ndarray, ratio: int) -> np.ndarray:
"""Zero-pad (bands, H, W) so H and W are multiples of ``ratio``."""
pad_h = (ratio - arr.shape[1] % ratio) % ratio
pad_w = (ratio - arr.shape[2] % ratio) % ratio
if pad_h or pad_w:
arr = np.pad(arr, ((0, 0), (0, pad_h), (0, pad_w)), constant_values=0)
return arr
# ---------------------------------------------------------------------------
# S2: STAC search + download (from s2_cloud_native.py)
# ---------------------------------------------------------------------------
def stac_search_s2(
bbox: list[float],
start_date: datetime,
end_date: datetime,
) -> list[Any]:
"""Search Earth Search for S2 L2A items intersecting a bbox."""
client = Client.open(EARTH_SEARCH_URL)
search = client.search(
collections=["sentinel-2-l2a"],
bbox=bbox,
datetime=(
f"{start_date.strftime('%Y-%m-%dT%H:%M:%SZ')}/"
f"{end_date.strftime('%Y-%m-%dT23:59:59Z')}"
),
max_items=10_000,
)
return list({item.id: item for item in search.items()}.values())
def download_s2_window(
items: list[Any],
bbox: list[float],
output_dir: Path,
bands: list[str],
ratio: int = RESOLUTION_RATIO,
) -> None:
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
Writes ``{item.id}_REFL.tif`` directly no intermediate raw download.
Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
"""
output_dir.mkdir(parents=True, exist_ok=True)
for item in tqdm(items, unit="granule", desc="S2 COG window read"):
out_path = output_dir / f"{item.id}_REFL.tif"
if out_path.is_file():
continue
bands_result = _read_bands(item, bbox, bands)
if bands_result is None:
tqdm.write(f"[S2] Skipping {item.id}: missing asset or no bbox overlap")
continue
band_arrays, ref_profile = bands_result
target_shape = (ref_profile["height"], ref_profile["width"])
mask = _cloud_mask(item, bbox, target_shape)
stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
np.clip(stacked, 0, None, out=stacked)
stacked[:, mask] = 0.0
stacked = _pad_to_multiple(stacked, ratio)
out_profile = {
"driver": "GTiff",
"count": len(bands),
"dtype": "float32",
"nodata": 0,
"crs": ref_profile["crs"],
"transform": ref_profile["transform"],
"height": stacked.shape[1],
"width": stacked.shape[2],
"compress": "lzw",
}
with rasterio.open(out_path, "w", **out_profile) as dst:
dst.write(stacked)
for i, band_name in enumerate(bands, 1):
dst.set_band_description(i, band_name)
# ---------------------------------------------------------------------------
# S3: download (from s3_openeo.py)
# ---------------------------------------------------------------------------
def _utm_epsg(bbox: list[float]) -> int:
"""Return the UTM EPSG code for the centre of a ``[W, S, E, N]`` bbox."""
lon = (bbox[0] + bbox[2]) / 2
lat = (bbox[1] + bbox[3]) / 2
zone = int((lon + 180) / 6) + 1
return 32600 + zone if lat >= 0 else 32700 + zone
def _cdse_token(username: str, password: str) -> str:
"""Obtain a CDSE bearer token via password grant."""
resp = requests.post(
CDSE_TOKEN_URL,
data={
"grant_type": "password",
"username": username,
"password": password,
"client_id": "cdse-public",
},
timeout=30,
)
resp.raise_for_status()
return resp.json()["access_token"]
def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
"""Split an OpenEO NetCDF into per-date GeoTIFFs.
Output filenames match the ``S3*__YYYYMMDDTHHMMSS.tif`` pattern that
``s3_processing.produce_median_composite`` expects.
Handles half-pixel cell-centre coordinates, ascending y-axis (flip_y),
and fills NetCDF masked values with NaN.
"""
written = 0
with netCDF4.Dataset(str(nc_path), "r") as nc:
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
x_coords = np.asarray(nc.variables["x"][:], dtype=float)
y_coords = np.asarray(nc.variables["y"][:], dtype=float)
half_x = abs(x_coords[1] - x_coords[0]) / 2 if len(x_coords) > 1 else 0.0
half_y = abs(y_coords[1] - y_coords[0]) / 2 if len(y_coords) > 1 else 0.0
transform = from_bounds(
x_coords.min() - half_x,
y_coords.min() - half_y,
x_coords.max() + half_x,
y_coords.max() + half_y,
len(x_coords),
len(y_coords),
)
flip_y = len(y_coords) > 1 and y_coords[0] < y_coords[-1]
date_counts: dict[str, int] = {}
for t_idx, time_val in enumerate(times):
date_str = time_val.strftime("%Y%m%d")
n = date_counts.get(date_str, 0)
date_counts[date_str] = n + 1
raw = np.stack(
[nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0
)
stacked = (
np.ma.filled(raw, fill_value=np.nan).astype("float32")
/ S3_REFLECTANCE_SCALE
)
if flip_y:
stacked = stacked[:, ::-1, :]
filename = f"S3_{date_str}_{n}__{date_str}T120000.tif"
with rasterio.open(
output_dir / filename,
"w",
driver="GTiff",
height=len(y_coords),
width=len(x_coords),
count=len(S3_BANDS),
dtype="float32",
nodata=float("nan"),
crs=f"EPSG:{epsg}",
transform=transform,
compress="lzw",
) as dst:
dst.write(stacked)
for i, band_name in enumerate(S3_BAND_NAMES, 1):
dst.set_band_description(i, band_name)
written += 1
return written
def download_s3_openeo(
start_date: datetime,
end_date: datetime,
aoi_geometry: str,
output_dir: Path,
credentials: dict[str, str | None],
) -> None:
"""Download S3 SYN L2 SDR for an AOI via CDSE OpenEO, server-side clipped.
Writes per-date ``S3_{YYYYMMDD}_{n}__{YYYYMMDD}T120000.tif`` files to
``output_dir``, ready for ``s3_processing.produce_median_composite``.
Skips if any ``S3*.tif`` files already exist.
"""
output_dir.mkdir(parents=True, exist_ok=True)
if any(output_dir.glob("S3*.tif")):
print("[S3-OEO] Skipping — output_dir already contains S3 GeoTIFFs")
return
bbox = wkt_to_bbox(aoi_geometry)
epsg = _utm_epsg(bbox)
spatial_extent = {
"west": bbox[0],
"east": bbox[2],
"south": bbox[1],
"north": bbox[3],
}
print("[S3-OEO] Authenticating with CDSE...")
token = _cdse_token(credentials["username"], credentials["password"]) # type: ignore[arg-type]
conn = openeo.connect(OPENEO_URL)
conn.authenticate_oidc_access_token(token)
start_str = start_date.strftime("%Y-%m-%d")
end_str = end_date.strftime("%Y-%m-%d")
print(f"[S3-OEO] Loading {S3_COLLECTION} ({start_str}{end_str})...")
datacube = conn.load_collection(
S3_COLLECTION,
spatial_extent=spatial_extent,
temporal_extent=[start_str, end_str],
bands=S3_BANDS,
).resample_spatial(projection=epsg)
nc_path = output_dir / "_s3_syn_l2.nc"
print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
t0 = time.time()
datacube.download(str(nc_path), format="NetCDF")
print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
print("[S3-OEO] Splitting into per-date GeoTIFFs...")
written = _netcdf_to_geotiffs(nc_path, output_dir, epsg)
nc_path.unlink(missing_ok=True)
print(f"[S3-OEO] {written} GeoTIFFs written to {output_dir}")
# ---------------------------------------------------------------------------
# S2: distance_to_clouds helper
# ---------------------------------------------------------------------------
def _import_distance_to_clouds():
try:
from efast.s2_processing import distance_to_clouds
return distance_to_clouds
except ImportError as exc:
raise ImportError(
"efast not found. Install with: uv sync"
) from exc
def _rescale_dist_cloud(s2_dir: Path) -> None:
"""Ensure DIST_CLOUD values are in pixel units (not normalised to [0,1])."""
for dc_path in s2_dir.glob("*DIST_CLOUD.tif"):
with rasterio.open(dc_path) as src:
d = src.read(1)
if float(np.nanmax(d)) <= 1:
with rasterio.open(dc_path, "r+") as dst:
dst.write(np.where(d > 0, 2.0, d).astype(np.float32), 1)
# ---------------------------------------------------------------------------
# S3: compositing + reprojection helpers (from 4-sentinel-data.py)
# ---------------------------------------------------------------------------
def _import_s3_processing():
try:
from efast import s3_processing
return s3_processing
except ImportError as exc:
raise ImportError(
"efast not found. Install with: uv sync"
) from exc
def _reproject_s3_composites_to_s2_grid(
composite_dir: Path,
s2_refl_path: Path,
s3_out_dir: Path,
*,
resolution_ratio: int = RESOLUTION_RATIO,
) -> None:
"""Reproject S3 composites to the S2 spatial grid at LR resolution."""
s3_out_dir.mkdir(parents=True, exist_ok=True)
with rasterio.open(s2_refl_path) as s2_ref:
target_bounds = s2_ref.bounds
target_crs = s2_ref.crs
width = s2_ref.width // resolution_ratio
height = s2_ref.height // resolution_ratio
s3_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
width,
height,
)
for sen3_path in sorted(composite_dir.glob("composite_*.tif")):
date_part = sen3_path.stem.split("_", 1)[1].replace("-", "")
outfile = s3_out_dir / f"composite_{date_part}.tif"
vrt_options = {
"transform": s3_transform,
"height": height,
"width": width,
"crs": target_crs,
"resampling": Resampling.cubic,
}
with rasterio.open(sen3_path) as s3_src:
with WarpedVRT(s3_src, **vrt_options) as vrt:
profile = vrt.profile.copy()
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
rio_shutil.copy(vrt, outfile, **profile)
def _s3_reflectance_scale(raw_s3_dir: Path) -> float:
"""Return multiplier that maps raw SYN L2 SDR values to 01 reflectance."""
for path in raw_s3_dir.glob("S3*.tif"):
with rasterio.open(path) as src:
mx = float(np.nanmax(src.read()))
if np.isfinite(mx) and mx > 5:
return 1.0 / S3_REFLECTANCE_SCALE
return 1.0
def _stage_s3_for_efast(raw_s3_dir: Path, staging_dir: Path) -> int:
"""Copy ``S3_*.tif`` inputs, scaling reflectance when still in DN form."""
scale = _s3_reflectance_scale(raw_s3_dir)
if staging_dir.exists():
shutil.rmtree(staging_dir)
staging_dir.mkdir(parents=True)
count = 0
for src_path in sorted(raw_s3_dir.glob("S3*.tif")):
dst_path = staging_dir / src_path.name
with rasterio.open(src_path) as src:
data = src.read().astype("float32") * scale
profile = src.profile.copy()
profile.update(dtype="float32")
descriptions = src.descriptions
with rasterio.open(dst_path, "w", **profile) as dst:
dst.write(data)
for i, desc in enumerate(descriptions, 1):
if desc:
dst.set_band_description(i, desc)
count += 1
if scale != 1.0:
print(f"[S3-PREP] Scaled raw SDR by {scale:g} for EFAST compositing")
return count
def _prepare_s3(
raw_s3_dir: Path,
s2_refl_path: Path,
s3_out_dir: Path,
*,
work_dir: Path | None = None,
) -> None:
"""Run EFAST S3 compositing pipeline and reproject to S2 grid."""
s3 = _import_s3_processing()
base = work_dir or (s3_out_dir / "_efast_work")
staging = base / "scaled"
composites = base / "composites"
blurred = base / "blurred"
calibrated = base / "calibrated"
for directory in (staging, composites, blurred, calibrated):
if directory.exists():
shutil.rmtree(directory)
directory.mkdir(parents=True, exist_ok=True)
staged = _stage_s3_for_efast(raw_s3_dir, staging)
if staged == 0:
raise ValueError(f"No S3*.tif files found in {raw_s3_dir}")
print(
f"[S3-PREP] produce_median_composite: mosaic_days={S3_MOSAIC_DAYS}, "
f"step={S3_COMPOSITE_STEP}, sigma_doy={S3_COMPOSITE_SIGMA_DOY}, "
f"D={S3_COMPOSITE_D}"
)
s3.produce_median_composite(
staging,
composites,
step=S3_COMPOSITE_STEP,
mosaic_days=S3_MOSAIC_DAYS,
s3_bands=[1, 2, 3, 4],
D=S3_COMPOSITE_D,
sigma_doy=S3_COMPOSITE_SIGMA_DOY,
)
s3.smoothing(
composites,
blurred,
product="composite",
std=S3_SMOOTHING_STD,
preserve_nan=False,
)
s3.reformat_s3(blurred, calibrated, product="composite", scaling_factor=1)
for old in s3_out_dir.glob("composite_*.tif"):
old.unlink()
_reproject_s3_composites_to_s2_grid(calibrated, s2_refl_path, s3_out_dir)
if work_dir is None and base.exists():
shutil.rmtree(base)
n_out = len(list(s3_out_dir.glob("composite_*.tif")))
print(f"[S3-PREP] Wrote {n_out} composites")
# ---------------------------------------------------------------------------
# Per-site pipeline
# ---------------------------------------------------------------------------
def process_site(
sitename: str,
lat: float,
lon: float,
year: int,
) -> dict[str, Any]:
"""Download S2 + S3 and run EFAST preparation for one site."""
site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
s2_out = site_dir / "prepared" / "s2"
s3_raw = site_dir / "raw" / "s3"
s3_out = site_dir / "prepared" / "s3"
aoi_wkt = f"POINT ({lon} {lat})"
bbox = wkt_to_bbox(aoi_wkt)
creds = _cdse_credentials()
# S3 download
print(f"[{sitename}] Downloading S3...")
download_s3_openeo(
start_date=datetime(year, 1, 1),
end_date=datetime(year, 12, 31),
aoi_geometry=aoi_wkt,
output_dir=s3_raw,
credentials=creds,
)
# S2 download
print(f"[{sitename}] Searching S2 on Earth Search...")
items = stac_search_s2(bbox, datetime(year, 1, 1), datetime(year, 12, 31))
print(f"[{sitename}] {len(items)} S2 items found — downloading windows...")
download_s2_window(items, bbox, s2_out, S2_BANDS, RESOLUTION_RATIO)
# S2 distance-to-clouds
print(f"[{sitename}] Computing distance-to-clouds...")
distance_to_clouds = _import_distance_to_clouds()
distance_to_clouds(s2_out, ratio=RESOLUTION_RATIO)
_rescale_dist_cloud(s2_out)
# S3 compositing
s2_refl_path = next(iter(s2_out.glob("*_REFL.tif")), None)
if s2_refl_path is None:
raise ValueError(f"No REFL files in {s2_out} — S2 download may have failed")
s3_out.mkdir(parents=True, exist_ok=True)
print(f"[{sitename}] Running S3 compositing pipeline...")
_prepare_s3(s3_raw, s2_refl_path, s3_out)
summary = {
"sitename": sitename,
"evaluation_year": year,
"lat": lat,
"lon": lon,
"s2_refl_count": len(list(s2_out.glob("*_REFL.tif"))),
"s2_dist_cloud_count": len(list(s2_out.glob("*_DIST_CLOUD.tif"))),
"s3_raw_count": len(list(s3_raw.glob("S3*.tif"))),
"s3_composite_count": len(list(s3_out.glob("composite_*.tif"))),
}
site_dir.mkdir(parents=True, exist_ok=True)
(site_dir / "data.json").write_text(
json.dumps(summary, indent=2) + "\n", encoding="utf-8"
)
return summary
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
parser.add_argument(
"--site",
type=str,
default=None,
help="Single sitename to process (default: all step-2 PASS sites)",
)
args = parser.parse_args(argv)
year = args.evaluation_year
pass_sites = _load_screening_pass_sites(year)
if not pass_sites:
print("[Sentinel-3] No PASS sites found in step-2 screening output")
return 1
if args.site:
pass_sites = [s for s in pass_sites if s["sitename"] == args.site]
if not pass_sites:
print(f"[Sentinel-3] Site '{args.site}' not found in step-2 PASS sites")
return 1
print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
for i, site in enumerate(pass_sites, 1):
sitename = site["sitename"]
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
try:
summary = process_site(sitename, site["lat"], site["lon"], year)
print(
f"[Sentinel-3] {sitename} done — "
f"{summary['s2_refl_count']} REFL, "
f"{summary['s3_composite_count']} composites"
)
except Exception as exc:
print(f"[Sentinel-3] {sitename} FAILED: {exc}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

330
4-fusion.py Normal file
View file

@ -0,0 +1,330 @@
"""Step 4: Compute GCC and run EFAST BtI + ItB fusion for prepared sites.
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
- ``sentinel_data/{year}/{sitename}/prepared/s2/`` ``*_REFL.tif`` + ``*_DIST_CLOUD.tif``
- ``sentinel_data/{year}/{sitename}/prepared/s3/`` ``composite_*.tif`` (4-band)
Outputs (``data/``):
- ``sentinel_data/{year}/{sitename}/prepared/s2/*_GCC.tif`` S2 GCC (in-place)
- ``sentinel_data/{year}/{sitename}/prepared/gcc_s3/*.tif`` S3 GCC composites
- ``fusion/{year}/{sitename}/bti/fusion/REFL_*.tif`` BtI fused 4-band reflectance
- ``fusion/{year}/{sitename}/bti/gcc/GCC_*.tif`` GCC derived from BtI fusion
- ``fusion/{year}/{sitename}/itb/s2/GCC_*.tif`` per-acquisition S2 GCC (simplified names)
- ``fusion/{year}/{sitename}/itb/s3/GCC_*.tif`` per-composite S3 GCC (simplified names)
- ``fusion/{year}/{sitename}/itb/fusion/GCC_*.tif`` ItB fused GCC
Requires ``uv sync`` (efast).
CLI:
- ``--evaluation-year`` (default 2025)
- ``--site`` (optional; default: all prepared sites under ``sentinel_data/{year}/``)
Prior step: :mod:`3-sentinel-data`.
"""
from __future__ import annotations
import argparse
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
import numpy as np
import rasterio
from dateutil import rrule
# ---------------------------------------------------------------------------
# Public constants
# ---------------------------------------------------------------------------
RESOLUTION_RATIO = 30
MOSAIC_STEP = 2
MAX_DAYS = 100
MINIMUM_ACQUISITION_IMPORTANCE = 0
DATA_DIR = Path("data")
DEFAULT_YEAR = 2025
# ---------------------------------------------------------------------------
# efast import helper
# ---------------------------------------------------------------------------
def _import_efast():
try:
import efast.efast as efast_module
return efast_module
except ImportError as exc:
raise ImportError(
"efast not found. Install with: uv sync"
) from exc
# ---------------------------------------------------------------------------
# GCC computation (from s2_cloud_native.py and s3_openeo.py)
# ---------------------------------------------------------------------------
def compute_gcc_s2(s2_dir: Path, output_dir: Path) -> None:
"""Compute GCC from S2 REFL files and write ``*_GCC.tif`` to ``output_dir``.
Reads every ``*_REFL.tif`` (band order B02/B03/B04) and writes a co-located
single-band GCC file. Cloud-masked pixels (zero in all bands) remain zero.
"""
output_dir.mkdir(parents=True, exist_ok=True)
for src_path in sorted(s2_dir.glob("*_REFL.tif")):
out_path = output_dir / src_path.name.replace("_REFL.tif", "_GCC.tif")
if out_path.is_file():
continue
with rasterio.open(src_path) as src:
b, g, r = src.read(1), src.read(2), src.read(3)
profile = src.profile
total = b + g + r
gcc = g / (total + 1e-10)
gcc[total == 0] = 0
profile.update(count=1)
with rasterio.open(out_path, "w", **profile) as dst:
dst.write(gcc[np.newaxis].astype("float32"))
def compute_gcc_s3(s3_dir: Path, output_dir: Path) -> None:
"""Compute GCC from S3 composite files and write single-band GeoTIFFs.
Reads every ``composite_*.tif`` (band order Oa04/Oa06/Oa08/Oa17) and writes
a single-band GCC file. NaN pixels in the input remain NaN.
"""
output_dir.mkdir(parents=True, exist_ok=True)
for src_path in sorted(s3_dir.glob("composite_*.tif")):
out_path = output_dir / src_path.name
if out_path.is_file():
continue
with rasterio.open(src_path) as src:
b, g, r = src.read(1), src.read(2), src.read(3)
profile = src.profile
total = b + g + r
gcc = g / (total + 1e-10)
gcc[np.isnan(total)] = np.nan
profile.update(count=1, dtype="float32")
with rasterio.open(out_path, "w", **profile) as dst:
dst.write(gcc[np.newaxis].astype("float32"))
def compute_gcc_from_refl(refl_dir: Path, gcc_dir: Path) -> None:
"""Derive GCC from ``REFL_YYYYMMDD.tif`` files (BtI fusion output).
Reads every ``REFL_*.tif`` and writes a co-located single-band
``GCC_YYYYMMDD.tif``. Zero pixels remain zero.
"""
gcc_dir.mkdir(parents=True, exist_ok=True)
for src_path in sorted(refl_dir.glob("REFL_*.tif")):
out_path = gcc_dir / src_path.name.replace("REFL_", "GCC_")
if out_path.is_file():
continue
with rasterio.open(src_path) as src:
b, g, r = src.read(1), src.read(2), src.read(3)
profile = src.profile
total = b + g + r
gcc = g / (total + 1e-10)
gcc[total == 0] = 0
profile.update(count=1)
with rasterio.open(out_path, "w", **profile) as dst:
dst.write(gcc[np.newaxis].astype("float32"))
# ---------------------------------------------------------------------------
# Date-range detection
# ---------------------------------------------------------------------------
def _refl_date_range(s2_dir: Path) -> tuple[datetime, datetime] | None:
"""Return (start, end) datetime from REFL filenames in ``s2_dir``.
Filenames are expected to follow the S2 product naming convention, where
the acquisition date ``YYYYMMDD`` appears at position index 2 when the
stem is split by ``_``, e.g.
``S2A_MSIL2A_20230911T114111_N0509_R025_T29PKT_20230911T153131_REFL.tif``.
"""
dates: list[datetime] = []
for p in s2_dir.glob("*_REFL.tif"):
parts = p.stem.split("_")
if len(parts) >= 3:
try:
dates.append(datetime.strptime(parts[2][:8], "%Y%m%d"))
except ValueError:
pass
if not dates:
return None
return min(dates), max(dates)
# ---------------------------------------------------------------------------
# Per-site fusion
# ---------------------------------------------------------------------------
def fuse_site(sitename: str, year: int) -> dict[str, Any]:
"""Run GCC computation and EFAST BtI + ItB fusion for one prepared site."""
efast = _import_efast()
s2_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s2"
s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3"
gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3"
base = DATA_DIR / "fusion" / str(year) / sitename
if not s2_dir.is_dir() or not any(s2_dir.glob("*_REFL.tif")):
raise FileNotFoundError(f"No REFL files in {s2_dir}")
if not s3_dir.is_dir() or not any(s3_dir.glob("composite_*.tif")):
raise FileNotFoundError(f"No composite files in {s3_dir}")
print(f"[{sitename}] Computing S2 GCC (in-place)...")
compute_gcc_s2(s2_dir, s2_dir)
print(f"[{sitename}] Computing S3 GCC...")
compute_gcc_s3(s3_dir, gcc_s3_dir)
date_range = _refl_date_range(s2_dir)
if date_range is None:
raise ValueError(f"Could not detect date range from REFL filenames in {s2_dir}")
start, end = date_range
print(f"[{sitename}] Date range: {start.date()}{end.date()}")
fusion_dates = list(
rrule.rrule(
rrule.DAILY,
dtstart=start + timedelta(MOSAIC_STEP),
until=end - timedelta(MOSAIC_STEP),
interval=MOSAIC_STEP,
)
)
_fusion_kwargs = dict(
ratio=RESOLUTION_RATIO,
max_days=MAX_DAYS,
minimum_acquisition_importance=MINIMUM_ACQUISITION_IMPORTANCE,
)
# --- ItB: GCC first, then fuse GCC ---
itb_s2 = base / "itb" / "s2"
itb_s3 = base / "itb" / "s3"
itb_fusion = base / "itb" / "fusion"
itb_s2.mkdir(parents=True, exist_ok=True)
itb_s3.mkdir(parents=True, exist_ok=True)
itb_fusion.mkdir(parents=True, exist_ok=True)
for p in sorted(s2_dir.glob("*_GCC.tif")):
dst = itb_s2 / f"GCC_{p.stem.split('_')[2][:8]}.tif"
if not dst.exists():
shutil.copy2(p, dst)
for p in sorted(gcc_s3_dir.glob("composite_*.tif")):
dst = itb_s3 / f"GCC_{p.stem.split('_')[1]}.tif"
if not dst.exists():
shutil.copy2(p, dst)
print(f"[{sitename}] ItB: fusing GCC over {len(fusion_dates)} dates...")
for date in fusion_dates:
efast.fusion(date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs)
# --- BtI: fuse reflectance (3-band, matching S2 B02/B03/B04), then derive GCC ---
# S3 composites have 4 bands; strip band 4 (Oa17/NIR) so shapes match S2 REFL.
s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb"
s3_rgb_dir.mkdir(parents=True, exist_ok=True)
for p in sorted(s3_dir.glob("composite_*.tif")):
out = s3_rgb_dir / p.name
if not out.exists():
with rasterio.open(p) as src:
data = src.read([1, 2, 3])
profile = src.profile.copy()
profile.update(count=3)
with rasterio.open(out, "w", **profile) as dst:
dst.write(data)
bti_fusion = base / "bti" / "fusion"
bti_gcc = base / "bti" / "gcc"
bti_fusion.mkdir(parents=True, exist_ok=True)
print(f"[{sitename}] BtI: fusing REFL over {len(fusion_dates)} dates...")
for date in fusion_dates:
efast.fusion(date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs)
print(f"[{sitename}] BtI: deriving GCC from fused REFL...")
compute_gcc_from_refl(bti_fusion, bti_gcc)
return {
"sitename": sitename,
"evaluation_year": year,
"start": start.date().isoformat(),
"end": end.date().isoformat(),
"fusion_dates": len(fusion_dates),
"itb_fusion_files": len(list(itb_fusion.glob("*.tif"))),
"bti_fusion_files": len(list(bti_fusion.glob("*.tif"))),
"bti_gcc_files": len(list(bti_gcc.glob("*.tif"))),
}
# ---------------------------------------------------------------------------
# Site discovery
# ---------------------------------------------------------------------------
def _discover_sites(year: int) -> list[str]:
"""Return sitenames that have prepared S2 REFL files under sentinel_data."""
base = DATA_DIR / "sentinel_data" / str(year)
if not base.is_dir():
return []
return sorted(
d.name
for d in base.iterdir()
if d.is_dir() and any((d / "prepared" / "s2").glob("*_REFL.tif"))
)
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
parser.add_argument(
"--site",
type=str,
default=None,
help="Single sitename to fuse (default: all prepared sites)",
)
args = parser.parse_args(argv)
year = args.evaluation_year
if args.site:
sites = [args.site]
else:
sites = _discover_sites(year)
if not sites:
print(f"[Fusion] No prepared sites found under data/sentinel_data/{year}/")
return 1
print(f"[Fusion] Processing {len(sites)} site(s)")
for i, sitename in enumerate(sites, 1):
print(f"[Fusion] ({i}/{len(sites)}) {sitename}")
try:
summary = fuse_site(sitename, year)
print(
f"[Fusion] {sitename} done — "
f"{summary['fusion_dates']} dates, "
f"itb={summary['itb_fusion_files']} bti={summary['bti_fusion_files']} "
f"bti_gcc={summary['bti_gcc_files']}"
)
except Exception as exc:
print(f"[Fusion] {sitename} FAILED: {exc}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

695
5-metrics.py Normal file
View file

@ -0,0 +1,695 @@
"""Step 5: Pre-compute per-site GCC timeseries + raster index for the webapp.
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
- ``phenocam_screening/{year}.json`` qualifying sites + metadata
- ``phenocam/{year}/{site}_1day.csv`` daily GCC timeseries
- ``sentinel_data/{year}/{site}/prepared/s2/*_GCC.tif`` S2 GCC rasters
- ``sentinel_data/{year}/{site}/prepared/gcc_s3/composite_*.tif`` S3 GCC rasters
- ``fusion/{year}/{site}/bti/gcc/GCC_*.tif`` BtI GCC rasters
- ``fusion/{year}/{site}/itb/fusion/GCC_*.tif`` ItB GCC rasters
Outputs (``data/metrics/``):
- ``manifest.json`` years + per-site metadata
- ``{year}/{site}/gcc_phenocam.json`` PhenoCam ``gcc_90`` at matched dates
- ``{year}/{site}/gcc_s2.json`` S2 GCC (center pixel, cloud-free scenes)
- ``{year}/{site}/gcc_s2_whittaker.json`` Whittaker-smoothed S2 GCC
- ``{year}/{site}/gcc_s3.json`` S3 composite GCC
- ``{year}/{site}/gcc_s3_smooth.json`` S3 5-day moving average
- ``{year}/{site}/gcc_fusion_bti.json`` BtI fused GCC
- ``{year}/{site}/gcc_fusion_itb.json`` ItB fused GCC
- ``{year}/{site}/phenocam_images.json`` midday photo URLs for the viewer
- ``{year}/{site}/rasters_s2_refl.json`` S2 REFL paths (BtI view)
- ``{year}/{site}/rasters_s3_composite.json`` S3 composite paths (BtI view)
- ``{year}/{site}/rasters_s2_gcc.json`` S2 GCC paths (ItB view)
- ``{year}/{site}/rasters_s3_gcc.json`` S3 GCC paths (ItB view)
- ``{year}/{site}/rasters_fusion_bti_refl.json`` BtI fused REFL paths
- ``{year}/{site}/rasters_fusion_itb_gcc.json`` ItB fused GCC paths
- ``{year}/{site}/metrics.json`` NSE, RMSE, nRMSE, Pearson r vs PhenoCam per series
- ``{year}/{site}/bands_s2.json`` S2 center-pixel reflectance (B02, B03, B04) per scene
- ``{year}/{site}/bands_s3.json`` S3 center-pixel reflectance (Oa04, Oa06, Oa08, Oa17) per composite
- ``{year}/{site}/covariates.json`` spatial CV/std, S2/S3 counts, gap stats
CLI:
- ``--evaluation-year`` (default 2025)
- ``--site`` (optional; default: all qualifying sites with sentinel data)
"""
from __future__ import annotations
import argparse
import csv
import json
import re
from pathlib import Path
from typing import Any
import datetime
import numpy as np
import rasterio
from rasterio.crs import CRS
from rasterio.transform import rowcol
from pyproj import Transformer
from scipy.stats import pearsonr
from tqdm import tqdm
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DATA_DIR = Path("data")
DEFAULT_YEAR = 2025
# GCC smoothing window for S3 moving average (days)
S3_SMOOTH_WINDOW = 5
# Whittaker lambda (penalised smoothing strength for S2)
WHITTAKER_LAMBDA = 400.0
# Half-width in metres for the spatial heterogeneity footprint (~300 m = 1 S3 pixel)
SPATIAL_CV_HALF_M = 150
# PhenoCam archive image URL pattern
PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
# ---------------------------------------------------------------------------
# Helpers: raster pixel extraction
# ---------------------------------------------------------------------------
def _read_center_pixel(path: Path, lat: float, lon: float) -> float | None:
"""Return the 3×3 mean GCC value at (lat, lon) from a single-band raster.
Returns ``None`` when the pixel is masked/zero/NaN.
"""
try:
with rasterio.open(path) as src:
transformer = Transformer.from_crs(
CRS.from_epsg(4326), src.crs, always_xy=True
)
x, y = transformer.transform(lon, lat)
row, col = rowcol(src.transform, x, y)
h, w = src.height, src.width
r0, r1 = max(0, row - 1), min(h, row + 2)
c0, c1 = max(0, col - 1), min(w, col + 2)
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
data = src.read(1, window=window).astype(float)
nodata = src.nodata
if nodata is not None:
data = np.where(data == nodata, np.nan, data)
data[data == 0] = np.nan
val = np.nanmean(data)
return None if np.isnan(val) else float(val)
except Exception:
return None
# ---------------------------------------------------------------------------
# Helpers: date extraction from filenames
# ---------------------------------------------------------------------------
def _date_from_gcc_tif(path: Path) -> str | None:
"""Extract YYYYMMDD from ``GCC_YYYYMMDD.tif`` or ``composite_YYYYMMDD.tif``."""
m = re.search(r"(\d{8})", path.stem)
return m.group(1) if m else None
def _date_from_s2_tif(path: Path) -> str | None:
"""Extract YYYYMMDD from S2 product name ``S2X_TTTT_YYYYMMDD_…``."""
parts = path.stem.split("_")
if len(parts) >= 3:
m = re.match(r"(\d{8})", parts[2])
return m.group(1) if m else None
return None
# ---------------------------------------------------------------------------
# Helpers: Whittaker smoother (2nd-order differences, tridiagonal solver)
# ---------------------------------------------------------------------------
def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
"""Penalised least-squares smoother (Whittaker, 2nd-order differences).
Masked (None) values are filled via the smooth and then re-set to None in
the output so the caller can distinguish observed from gap-filled points.
"""
n = len(values)
if n < 4:
return values[:]
obs_mask = [v is not None for v in values]
y = np.array([v if v is not None else 0.0 for v in values], dtype=float)
w = np.array([1.0 if m else 0.0 for m in obs_mask], dtype=float)
W = np.diag(w)
D = np.diff(np.eye(n), n=2, axis=0) # (n-2) x n second-difference matrix
A = W + lam * D.T @ D
try:
z = np.linalg.solve(A, w * y)
except np.linalg.LinAlgError:
return values[:]
result: list[float | None] = []
for i, m in enumerate(obs_mask):
result.append(float(z[i]) if m else None)
return result
# ---------------------------------------------------------------------------
# Helpers: PhenoCam CSV parsing
# ---------------------------------------------------------------------------
def _parse_phenocam_csv(
csv_path: Path, year: int, site: str
) -> tuple[list[dict], list[dict]]:
"""Return (gcc_series, image_list) filtered to ``year``.
``gcc_series`` entries: ``{"date": "YYYY-MM-DD", "gcc_90": float}``
``image_list`` entries: ``{"date": "YYYY-MM-DD", "url": str}``
"""
gcc_series: list[dict] = []
image_list: list[dict] = []
year_str = str(year)
if not csv_path.is_file():
return gcc_series, image_list
with csv_path.open() as f:
lines = [l for l in f if not l.startswith("#")]
reader = csv.DictReader(lines)
for row in reader:
if row.get("year") != year_str:
continue
date = row.get("date", "")
gcc_raw = row.get("gcc_90")
if gcc_raw and gcc_raw not in ("NA", ""):
try:
gcc_series.append({"date": date, "gcc_90": float(gcc_raw)})
except ValueError:
pass
fn = row.get("midday_filename", "").strip()
if fn and fn != "NA" and date:
month = date[5:7]
url = PHENOCAM_IMAGE_URL.format(
site=site, year=year_str, month=month, filename=fn
)
image_list.append({"date": date, "url": url})
return gcc_series, image_list
# ---------------------------------------------------------------------------
# Helpers: moving average
# ---------------------------------------------------------------------------
def _moving_average(
series: list[dict], value_key: str, window: int
) -> list[dict]:
"""Compute centred moving average; returns new list with ``_smooth`` suffix key."""
if not series:
return []
vals = [p[value_key] for p in series]
half = window // 2
smoothed = []
for i, pt in enumerate(series):
chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
smoothed.append({
"date": pt["date"],
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
})
return smoothed
# ---------------------------------------------------------------------------
# Helpers: validation metrics
# ---------------------------------------------------------------------------
MATCH_TOLERANCE_DAYS = 5
def compute_metrics(
ref: list[dict], ref_key: str,
pred: list[dict], pred_key: str,
) -> dict | None:
"""Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
Each pred point is matched to the nearest ref date within
``MATCH_TOLERANCE_DAYS``. Returns a dict or ``None`` if fewer than
2 matched pairs exist.
"""
ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
if not ref_lookup:
return None
ref_dates = sorted(ref_lookup)
obs, sim = [], []
for pt in pred:
v = pt.get(pred_key)
if v is None:
continue
nearest = min(ref_dates, key=lambda d: abs((
np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
obs.append(ref_lookup[nearest])
sim.append(v)
if len(obs) < 2:
return None
obs_arr = np.array(obs)
sim_arr = np.array(sim)
obs_mean = obs_arr.mean()
rmse = float(np.sqrt(np.mean((sim_arr - obs_arr) ** 2)))
nrmse = rmse / obs_mean if obs_mean else None
ss_res = float(np.sum((obs_arr - sim_arr) ** 2))
ss_tot = float(np.sum((obs_arr - obs_mean) ** 2))
nse = (1.0 - ss_res / ss_tot) if ss_tot else None
r, _ = pearsonr(obs_arr, sim_arr)
def _r4(v: float | None) -> float | None:
return round(v, 4) if v is not None else None
return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
S2_BAND_NAMES = ["B02", "B03", "B04"]
S3_BAND_NAMES = ["Oa04", "Oa06", "Oa08", "Oa17"]
def _read_multiband_center(
path: Path, lat: float, lon: float, band_names: list[str]
) -> dict[str, float | None]:
"""Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
try:
with rasterio.open(path) as src:
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
x, y = transformer.transform(lon, lat)
row, col = rowcol(src.transform, x, y)
h, w = src.height, src.width
r0, r1 = max(0, row - 1), min(h, row + 2)
c0, c1 = max(0, col - 1), min(w, col + 2)
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
nodata = src.nodata
result = {}
for i, name in enumerate(band_names, 1):
if i > src.count:
result[name] = None
continue
data = src.read(i, window=window).astype(float)
if nodata is not None:
data = np.where(data == nodata, np.nan, data)
data[data == 0] = np.nan
val = np.nanmean(data)
result[name] = None if np.isnan(val) else round(float(val), 6)
return result
except Exception:
return {name: None for name in band_names}
def _multiband_series(
tif_paths: list[Path],
date_fn,
lat: float,
lon: float,
band_names: list[str],
desc: str,
) -> list[dict]:
"""Extract center-pixel values for all bands; return ``[{date, band1, band2, …}]``."""
result = []
for p in tqdm(tif_paths, desc=desc, leave=False):
date = date_fn(p)
if date is None:
continue
vals = _read_multiband_center(p, lat, lon, band_names)
if any(v is not None for v in vals.values()):
result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", **vals})
return sorted(result, key=lambda x: x["date"])
# ---------------------------------------------------------------------------
# Helpers: spatial heterogeneity + observation density
# ---------------------------------------------------------------------------
def _read_footprint_stats(
path: Path, lat: float, lon: float, half_m: float = SPATIAL_CV_HALF_M
) -> tuple[float, float] | tuple[None, None]:
"""Return (mean, std) of valid GCC pixels within a ±half_m metre square window.
Returns ``(None, None)`` on any error or when fewer than 4 valid pixels exist.
"""
try:
with rasterio.open(path) as src:
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
x, y = transformer.transform(lon, lat)
res = abs(src.transform.a) # pixel size in CRS units (metres for UTM)
half_px = max(1, int(round(half_m / res)))
row, col = rowcol(src.transform, x, y)
h, w = src.height, src.width
r0, r1 = max(0, row - half_px), min(h, row + half_px + 1)
c0, c1 = max(0, col - half_px), min(w, col + half_px + 1)
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
data = src.read(1, window=window).astype(float)
nodata = src.nodata
if nodata is not None:
data = np.where(data == nodata, np.nan, data)
data[data <= 0] = np.nan
valid = data[~np.isnan(data)]
if len(valid) < 4:
return None, None
return float(np.mean(valid)), float(np.std(valid))
except Exception:
return None, None
def compute_covariates(
s2_gcc_paths: list[Path],
s2_series: list[dict],
s3_series: list[dict],
n_gcc_points: int | None,
lat: float,
lon: float,
) -> dict:
"""Compute spatial heterogeneity and temporal observation density covariates."""
# Spatial GCC statistics over ~300 m footprint
means, stds = [], []
for p in s2_gcc_paths:
m, s = _read_footprint_stats(p, lat, lon)
if m is not None and m > 0:
means.append(m)
stds.append(s)
spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
# S2 temporal gap statistics
s2_dates = [datetime.date.fromisoformat(p["date"]) for p in s2_series]
if len(s2_dates) >= 2:
gaps = [(s2_dates[i + 1] - s2_dates[i]).days for i in range(len(s2_dates) - 1)]
s2_mean_gap = round(float(np.mean(gaps)), 1)
s2_max_gap = int(max(gaps))
else:
s2_mean_gap = None
s2_max_gap = None
return {
"spatial_gcc_cv": spatial_gcc_cv,
"spatial_gcc_std": spatial_gcc_std,
"s2_scene_count": len(s2_series),
"s2_mean_gap_days": s2_mean_gap,
"s2_max_gap_days": s2_max_gap,
"s3_composite_count": len(s3_series),
"n_gcc_points": n_gcc_points,
}
# ---------------------------------------------------------------------------
# Per-site export
# ---------------------------------------------------------------------------
def _write_json(path: Path, data: Any) -> None:
path.write_text(json.dumps(data, separators=(",", ":")))
def _raster_series(
tif_paths: list[Path],
date_fn,
lat: float,
lon: float,
desc: str,
) -> list[dict]:
"""Extract center-pixel GCC from each tif, return ``[{date, gcc}]`` sorted."""
result = []
for p in tqdm(tif_paths, desc=desc, leave=False):
date = date_fn(p)
if date is None:
continue
val = _read_center_pixel(p, lat, lon)
if val is not None:
result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", "gcc": val})
return sorted(result, key=lambda x: x["date"])
def _raster_index(tif_paths: list[Path], date_fn, rel_root: Path) -> list[dict]:
"""Build raster index: ``[{date, path}]`` sorted by date."""
result = []
for p in tif_paths:
date = date_fn(p)
if date is None:
continue
try:
rel = str(p.relative_to(rel_root))
except ValueError:
rel = str(p)
result.append({"date": date, "path": rel})
return sorted(result, key=lambda x: x["date"])
def export_site(
site: str,
year: int,
lat: float,
lon: float,
out_dir: Path,
n_gcc_points: int | None = None,
) -> bool:
"""Export timeseries.json and rasters.json for one site. Returns True on success."""
sentinel_base = DATA_DIR / "sentinel_data" / str(year) / site / "prepared"
fusion_base = DATA_DIR / "fusion" / str(year) / site
s2_gcc_dir = sentinel_base / "s2"
s3_gcc_dir = sentinel_base / "gcc_s3"
bti_gcc_dir = fusion_base / "bti" / "gcc"
itb_gcc_dir = fusion_base / "itb" / "fusion"
# Raster slider sources
s2_refl_dir = sentinel_base / "s2"
s3_comp_dir = sentinel_base / "s3"
bti_refl_dir = fusion_base / "bti" / "fusion"
has_fusion = bti_gcc_dir.is_dir() and any(bti_gcc_dir.glob("GCC_*.tif"))
if not has_fusion:
return False
out_dir.mkdir(parents=True, exist_ok=True)
# --- GCC timeseries from rasters ---
s2_gcc_paths = sorted(s2_gcc_dir.glob("*_GCC.tif"))
s3_gcc_paths = sorted(s3_gcc_dir.glob("composite_*.tif"))
bti_paths = sorted(bti_gcc_dir.glob("GCC_*.tif"))
itb_paths = sorted(itb_gcc_dir.glob("GCC_*.tif"))
s2_series = _raster_series(s2_gcc_paths, _date_from_s2_tif, lat, lon, f"{site} S2")
s3_series = _raster_series(s3_gcc_paths, _date_from_gcc_tif, lat, lon, f"{site} S3")
bti_series = _raster_series(bti_paths, _date_from_gcc_tif, lat, lon, f"{site} BtI")
itb_series = _raster_series(itb_paths, _date_from_gcc_tif, lat, lon, f"{site} ItB")
# Whittaker on S2
s2_vals = [p["gcc"] for p in s2_series]
s2_smooth_vals = _whittaker_smooth(s2_vals)
s2_whittaker = [
{"date": p["date"], "gcc": v}
for p, v in zip(s2_series, s2_smooth_vals)
if v is not None
]
# S3 5-day moving average
s3_smooth = _moving_average(s3_series, "gcc", S3_SMOOTH_WINDOW)
# PhenoCam CSV
csv_path = DATA_DIR / "phenocam" / str(year) / f"{site}_1day.csv"
phenocam_series, image_list = _parse_phenocam_csv(csv_path, year, site)
s3_smooth_series = [
{"date": p["date"], "gcc": p["gcc_smooth"]}
for p in s3_smooth
if p.get("gcc_smooth") is not None
]
# Band reflectance timeseries (multi-band center-pixel)
bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
# --- Per-metric JSON outputs ---
_write_json(out_dir / "gcc_phenocam.json", phenocam_series)
_write_json(out_dir / "gcc_s2.json", s2_series)
_write_json(out_dir / "gcc_s2_whittaker.json", s2_whittaker)
_write_json(out_dir / "gcc_s3.json", s3_series)
_write_json(out_dir / "gcc_s3_smooth.json", s3_smooth_series)
_write_json(out_dir / "gcc_fusion_bti.json", bti_series)
_write_json(out_dir / "gcc_fusion_itb.json", itb_series)
_write_json(out_dir / "phenocam_images.json", image_list)
_write_json(out_dir / "bands_s2.json", bands_s2)
_write_json(out_dir / "bands_s3.json", bands_s3)
# --- Raster index for slider ---
rel_root = DATA_DIR.parent # paths relative to project root
# Valid-pixel sets: only show S2/S3 rasters where the center pixel had
# usable data (non-zero GCC). This excludes cloud-masked / snow-covered
# scenes that would render as black or visually nonsensical.
s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
if r["date"] in s2_valid_dates]
s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
if r["date"] in s3_valid_dates]
s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
if r["date"] in s2_valid_dates]
s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
if r["date"] in s3_valid_dates]
bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
_write_json(out_dir / "rasters_s2_refl.json", s2_refl)
_write_json(out_dir / "rasters_s3_composite.json", s3_comp)
_write_json(out_dir / "rasters_s2_gcc.json", s2_gcc)
_write_json(out_dir / "rasters_s3_gcc.json", s3_gcc)
_write_json(out_dir / "rasters_fusion_bti_refl.json", bti_refl)
_write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
# --- Site covariates (heterogeneity + observation density) ---
_write_json(out_dir / "covariates.json", compute_covariates(
s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
))
# --- Validation metrics vs PhenoCam gcc_90 ---
_write_json(out_dir / "metrics.json", {
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
"s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker, "gcc"),
"s3_smooth": compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
})
# Remove legacy bundled outputs if present
for legacy in ("timeseries.json", "rasters.json"):
(out_dir / legacy).unlink(missing_ok=True)
return True
# ---------------------------------------------------------------------------
# Manifest
# ---------------------------------------------------------------------------
VEG_TYPE_LABELS = {
"AG": "Agriculture",
"DB": "Deciduous broadleaf",
"DN": "Deciduous needleleaf",
"EB": "Evergreen broadleaf",
"EN": "Evergreen needleleaf",
"GR": "Grassland",
"MX": "Mixed",
"SH": "Shrubland",
"TN": "Tundra",
"UN": "Unknown",
"WL": "Wetland",
"RF": "Reference",
}
def build_manifest(years: list[int], filter_site: str | None = None) -> dict:
manifest: dict[str, Any] = {"years": years, "sites": {}}
for year in years:
screening_path = DATA_DIR / "phenocam_screening" / f"{year}.json"
if not screening_path.is_file():
continue
data = json.loads(screening_path.read_text())
sites_meta: dict[str, Any] = {}
for entry in data.get("sites", []):
if entry.get("calculations", {}).get("status") != "PASS":
continue
cam = entry.get("response", {}).get("camera", {})
roi = entry.get("response", {}).get("roi", {})
calc = entry.get("calculations", {})
site = cam.get("Sitename", "")
if not site:
continue
if filter_site and site != filter_site:
continue
sm = cam.get("sitemetadata", {})
veg_raw = sm.get("primary_veg_type") or roi.get("roitype") or "UN"
fusion_dir = DATA_DIR / "fusion" / str(year) / site / "bti" / "gcc"
has_fusion = fusion_dir.is_dir() and any(fusion_dir.glob("GCC_*.tif"))
sites_meta[site] = {
"lat": cam.get("Lat"),
"lon": cam.get("Lon"),
"veg_type": veg_raw,
"veg_label": VEG_TYPE_LABELS.get(veg_raw, veg_raw),
"description": sm.get("site_description", ""),
"dominant_species": sm.get("dominant_species", ""),
"group": sm.get("group", ""),
"snr": calc.get("snr"),
"n_gcc_points": calc.get("n_gcc_points"),
"has_fusion": has_fusion,
}
manifest["sites"][str(year)] = sites_meta
return manifest
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
parser.add_argument("--site", type=str, default=None)
args = parser.parse_args()
year = args.evaluation_year
filter_site = args.site
out_base = DATA_DIR / "metrics"
out_base.mkdir(parents=True, exist_ok=True)
# Determine years with screening data
screening_dir = DATA_DIR / "phenocam_screening"
years = sorted(
int(p.stem) for p in screening_dir.glob("*.json") if p.stem.isdigit()
)
if not years:
years = [year]
print(f"Building manifest for years: {years}")
manifest = build_manifest(years, filter_site)
# Export per-site data for the requested year
year_sites = manifest["sites"].get(str(year), {})
fusion_sites = {s: m for s, m in year_sites.items() if m["has_fusion"]}
if filter_site:
fusion_sites = {s: m for s, m in fusion_sites.items() if s == filter_site}
print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
out_dir = out_base / str(year) / site
ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
if ok:
print(f"{site}")
else:
print(f"{site} — no fusion data found")
manifest_path = out_base / "manifest.json"
manifest_path.write_text(json.dumps(manifest, separators=(",", ":")))
print(f"Manifest written → {manifest_path}")
if __name__ == "__main__":
main()

151
AGENTS.md Normal file
View file

@ -0,0 +1,151 @@
# AGENTS.md
Worldwide PhenoCam EFAST feasibility screening. Human summary: [`README.md`](README.md).
---
## Layout
| Path | Role |
|------|------|
| `1-phenocam.py` | Step 1: download PhenoCam metadata + `one_day_summary` CSV |
| `2-phenocam-screening.py` | Step 2: PhenoCam + SNR gates on cached CSVs |
| `3-sentinel-data.py` | Step 3: S2 (Earth Search COG) + S3 (CDSE OpenEO) download + EFAST prep |
| `4-fusion.py` | Step 4: GCC computation + EFAST BtI/ItB fusion loop |
| `5-metrics.py` | Step 5: timeseries, covariates, `metrics.json`, webapp manifest |
| `data/` | Manifests, per-site caches, screening outputs (large; mostly generated) |
| `webapp/` | Static QA viewer (`make serve` from workspace root) |
Workspace orchestration: [`../AGENTS.md`](../AGENTS.md).
---
## Where to work
| Task | Location |
|------|----------|
| PhenoCam bulk download | `1-phenocam.py` |
| GCC/SNR screening on disk | `2-phenocam-screening.py` |
| S2/S3 download + EFAST prep | `3-sentinel-data.py` |
| GCC + fusion | `4-fusion.py` |
| Metrics + webapp index | `5-metrics.py` |
| Web QA | `../Makefile` target `serve``webapp/index.html` |
---
## Setup
**Preferred (uv):** from `processing/`:
```bash
uv sync # all deps from pyproject.toml (incl. efast)
```
Run any script as `uv run python <script>.py …`. Python version is pinned in `.python-version` (3.11.10).
- `CDSE_USER` — Copernicus Data Space username
- `CDSE_PASSWORD` — Copernicus Data Space password
Required for step 3 S3 download (CDSE OpenEO). Step 3 S2 download uses AWS Earth Search (no auth).
---
## CLI convention
Every numbered step script shares two user-facing flags:
| Flag | Default | Role |
|------|---------|------|
| `--evaluation-year` | `2025` | Calendar year; input/output paths under `data/` use `{year}` |
| `--site` | all eligible | Single sitename to limit scope (testing or single-site runs) |
All other tunable parameters (bands, resolution ratio, compositing window, etc.) are public constants at the top of each script. Paths are derived from the year — do not pass manifest paths on the CLI. Each script docstring lists **Inputs** and **Outputs** under `data/`.
Resume behaviour: step 3 skips S3 sites when `raw/s3/S3*.tif` already exist; step 3 skips S2 scenes when `*_REFL.tif` already exists. Step 4 skips GCC/fusion files that already exist. Step 5 overwrites JSON sidecars for processed sites.
Example:
```bash
uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
```
---
## Workflow
### Stepped pipeline (resumable)
```bash
uv run python 1-phenocam.py --evaluation-year 2025
uv run python 2-phenocam-screening.py --evaluation-year 2025
uv run python 3-sentinel-data.py --evaluation-year 2025
uv run python 4-fusion.py --evaluation-year 2025
uv run python 5-metrics.py --evaluation-year 2025
# single site
uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
```
S3 uses CDSE OpenEO collection `SENTINEL3_SYN_L2_SYN` (bands Oa04/Oa06/Oa08/Oa17). S2 uses AWS Earth Search COG range reads (no auth). No S2↔S3 radiometric harmonisation.
---
## Screening gates
### Step 2 (`2-phenocam-screening.py`)
| Gate | Rule |
|------|------|
| `phenocam` | ROI + `one_day_summary` CSV; ≥ `MIN_GCC_POINTS` (30) valid `gcc_90` in evaluation year |
| `snr` | AIC-selected cubic spline SNR ≥ `SNR_THRESHOLD` (2.0) |
| `cluster` | SNR-passed sites within 500 m deduplicated; keep highest `n_gcc_points` (SNR tie-break) |
---
## Data layout
**Naming:** `data/` paths follow step script names — `1-phenocam.py``phenocam/`, `2-phenocam-screening.py``phenocam_screening/`, `3-sentinel-data.py``sentinel_data/`, `4-fusion.py``fusion/`, `5-metrics.py``metrics/`.
```
data/
phenocam/
{year}.json # step-1 manifest
{year}/
{sitename}.json # camera + ROI API payload
{sitename}_1day.csv # raw PhenoCam summary CSV
phenocam_screening/
{year}.json # step-2 results
{year}.csv
sentinel_data/{year}/{sitename}/
raw/s3/ # step 3: S3 SYN L2 per-date GeoTIFFs
prepared/s2/ # step 3: *_REFL.tif, *_DIST_CLOUD.tif, *_GCC.tif
prepared/s3/ # step 3: composite_*.tif
prepared/gcc_s3/ # step 4: single-band GCC composites
data.json # step-3 run summary
fusion/{year}/{sitename}/
bti/fusion/REFL_*.tif # step 4: BtI fused reflectance
bti/gcc/GCC_*.tif # step 4: BtI GCC
itb/s2/GCC_*.tif # step 4: S2 GCC (ItB stack)
itb/s3/GCC_*.tif # step 4: S3 GCC (ItB stack)
itb/fusion/GCC_*.tif # step 4: ItB fused GCC
metrics/
manifest.json # step 5: years + site metadata for webapp
{year}/{sitename}/
gcc_*.json, metrics.json, covariates.json, rasters_*.json, bands_*.json
```
---
## Module map
| File | Responsibility |
|------|----------------|
| `1-phenocam.py` | Paginate PhenoCam API; cache JSON + CSV; write manifest |
| `2-phenocam-screening.py` | Parse cached CSVs; PhenoCam + SNR gates |
| `3-sentinel-data.py` | S2 COG range reads (Earth Search); S3 OpenEO download; EFAST REFL/DIST_CLOUD/composites |
| `4-fusion.py` | GCC from S2 REFL + S3 composites; daily `efast.fusion` BtI + ItB |
| `5-metrics.py` | PhenoCam-matched GCC series, baselines, fusion metrics, raster index, covariates |

619
LICENSE
View file

@ -1,619 +0,0 @@
GNU AFFERO GENERAL PUBLIC LICENSE
=================================
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

159
README.md
View file

@ -1,146 +1,57 @@
# Satellite Data Fusion Pipeline # Worldwide PhenoCam EFAST feasibility screening
Python pipeline for downloading Sentinel-2 and Sentinel-3 imagery and PhenoCam ground truth, applying NDVI-based cloud pre-selection, fusing sensors with the [EFAST](https://github.com/DHI-GRAS/efast) algorithm, and evaluating fused **Green Chromatic Coordinate (GCC)** time series against PhenoCam `gcc_90`. Screen the global [PhenoCam Network](https://phenocam.nau.edu/) for sites where EFAST Sentinel-2 / Sentinel-3 fusion is likely to work: enough PhenoCam `gcc_90`, seasonal signal, and S2/S3 coverage for a calendar year.
## Features Agent-oriented detail: [`AGENTS.md`](AGENTS.md).
- **Acquisition** — S2 L2A (AWS Element84 STAC), S3 OLCI L1B (Copernicus OpenEO), PhenoCam midday images and GCC CSV ---
- **Pre-selection** — Aggressive and non-aggressive NDVI-based cloud screening (plus dark-scene rejection)
- **Preparation** — Harmonised reflectance/GCC rasters, distance-to-cloud weights, S3 compositing and optional temporal smoothing
- **Fusion** — EFAST under eight scenarios per site (BtI and ItB × two strategies × σ ∈ {20, 30} days)
- **Post-processing** — Crop to valid-data window; NDVI and GCC timeseries at the site
- **Metrics** — Temporal comparison vs PhenoCam (`metrics.json`); optional Tier-2 withheld-S2 gap validation
- **Web viewer** — Static HTML dashboard over pipeline outputs (`webapp/`)
## Installation ## Quick start
From `processing/`:
```bash ```bash
pip install -r requirements.txt uv sync
pip install git+https://github.com/DHI-GRAS/efast.git # not on PyPI uv run python 1-phenocam.py --evaluation-year 2025
``` ```
Create `.env` with Copernicus Data Space credentials: ### Stepped pipeline (resumable)
- `CDSE_USER` All steps use `--evaluation-year` (default 2025) and optional `--site`. See each script docstring for inputs/outputs under `data/`.
- `CDSE_PASSWORD`
Python version is pinned in `.python-version` (use `.venv/` locally).
## Usage
```python
from run import run_pipeline
run_pipeline(season=2024, site_position=(47.116171, 11.320308), site_name="innsbruck")
```
`site_position` is always **`(lat, lon)`**. Study sites are listed at the bottom of `run.py`: `innsbruck`, `forthgr`, `pitsalu`, `vindeln2`, `sunflowerjerez1`, `institutekarnobat`.
By default, most stages in `run.py` are **commented out** (metrics-only). Uncomment acquisition → pre-selection → preparation → fusion → post-processing for a full run.
### Pipeline stages
1. Download S2, S3, and PhenoCam
2. Pre-selection (per-sensor NDVI screening → `raw/preselection/`)
3. Prepare S2/S3 for each strategy (`prepared_{aggressive|nonaggressive}/` and `_itb/` variants)
4. EFAST fusion (BtI reflectance and ItB GCC products)
5. Post-process crops and timeseries (`processed_*_sigma{20,30}/`)
6. Compute metrics vs PhenoCam → `metrics.json`
### Gap validation (optional)
With prepared data and EFAST installed:
```bash ```bash
# Phenology sidecars (TIMESAT 50 % amplitude) uv run python 1-phenocam.py --evaluation-year 2025
python -m phenology_timesat --all uv run python 2-phenocam-screening.py --evaluation-year 2025
uv run python 3-sentinel-data.py --evaluation-year 2025
uv run python 4-fusion.py --evaluation-year 2025
uv run python 5-metrics.py --evaluation-year 2025
# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up) # single site
python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \ uv run python 3-sentinel-data.py --evaluation-year 2025 --site innsbruck
--strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30 uv run python 4-fusion.py --evaluation-year 2025 --site innsbruck
uv run python 5-metrics.py --evaluation-year 2025 --site innsbruck
# All six sites, best BtI scenario per site
python -m gap_validation.batch_spatial
# Full-season NSE_PC on gap-degraded stack (slow)
python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688
python -m gap_validation.batch_temporal
# TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier)
python -m gap_validation.phenology_offsets
``` ```
Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`. Step 3 S3 uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`); S2 uses AWS Earth Search COG range reads (no auth).
## Data layout ---
``` ## Outputs (under `data/`)
data/{site_name}/{season}/
raw/
s2/ # {YYYYMMDD}_{n}.geotiff — B02, B03, B04, B8A
s3/ # {YYYYMMDD}_{n}.geotiff — Oa04, Oa06, Oa08, Oa17
phenocam/ # JPEGs, GCC JSON, phenology sidecar
preselection/ # {s2,s3}_preselection.{json,csv}
prepared_{strategy}/
s2/ # REFL + DIST_CLOUD GeoTIFFs
s3/ # composite_{YYYYMMDD}.tif
fusion/ # REFL_{YYYYMMDD}.tif (σ≈20)
fusion_sigma30/ # REFL (σ=30)
prepared_{strategy}_itb/
s2/ s3/ fusion/ # GCC products (Index-then-Blend)
processed_{strategy}_sigma{20,30}/
s2/ s3/ fusion/ # cropped {YYYYMMDD}_0.geotiff
gcc/ ndvi/ # timeseries.json per source
processed_{strategy}_itb_sigma{20,30}/
s2/ s3/ fusion/ gcc/
validation/ # gap experiment (when run)
metrics.json
```
Site metadata: `data/sites.geojson` (six thesis sites). `data/coweeta/` is local/legacy and not listed there. | Artifact | Step | Role |
|----------|------|------|
| `phenocam/{year}.json` | 1 | Site list + `sites_dir` pointer |
| `phenocam/{year}/{site}.json`, `{site}_1day.csv` | 1 | Raw API + GCC CSV |
| `phenocam_screening/{year}.json` / `.csv` | 2 | PhenoCam + SNR gate results |
| `sentinel_data/{year}/{site}/prepared/s2/` | 3 | S2 REFL + DIST_CLOUD GeoTIFFs |
| `sentinel_data/{year}/{site}/prepared/s3/` | 3 | S3 composite GeoTIFFs |
| `fusion/{year}/{site}/` | 4 | BtI/ItB fused rasters |
| `metrics/{year}/{site}/`, `metrics/manifest.json` | 5 | Timeseries JSON, covariates, webapp manifest |
### File formats The 2025 manifest currently lists **739** cameras with archive overlap; most per-site CSV/JSON files are cached under `data/phenocam/2025/`.
**Sentinel-2** — Multi-band GeoTIFF; bands `[blue, green, red, nir]`; `VIEWING_ZENITH_ANGLE` metadata; filename `{YYYYMMDD}_{increment}.geotiff`. ---
**Sentinel-3** — Multi-band GeoTIFF; same band order; filename `{YYYYMMDD}_{increment}.geotiff`.
**Prepared S2** — `S2A_MSIL2A_{YYYYMMDD}_REFL.tif` plus `*DIST_CLOUD.tif` (cloud-distance weights for EFAST).
## Web viewer ## Web viewer
Static HTML/JS in `webapp/` — no build step. Shared GeoTIFF helpers: `webapp/common.js`. CDN: Leaflet, geotiff.js, proj4. Symlink: `webapp/data``../data`. From the workspace root, `make serve` serves `processing/` at [http://localhost:8000/webapp/index.html](http://localhost:8000/webapp/index.html). Requires step 5 (`data/metrics/manifest.json`).
Serve from the **repository root** (not `webapp/`):
```bash
python3 -m http.server 8000
# http://localhost:8000/webapp/index.html
```
Or from the workspace root: `make serve`.
| Page | Purpose | Primary data paths |
|------|---------|-------------------|
| `index.html` | Post-processed maps, NDVI/GCC timeseries, PhenoCam | `processed_{strategy}_sigma{n}/`, `raw/phenocam/` |
| `preselection.html` | Cloud-screening diagnostics | `raw/preselection/{s2,s3}_preselection.json` |
| `prepared.html` | Prepared REFL/GCC before crop | `prepared_{strategy}/`, `prepared_{strategy}_itb/` |
| `fusion.html` | EFAST daily fusion rasters | `prepared_*/fusion/`, `fusion_sigma30/` |
| `postprocessed.html` | Cropped processed stacks | `processed_*_sigma*/` |
| `metrics.html` | Tabular `metrics.json` (thesis export source) | `{site}/{season}/metrics.json` under `webapp/data/` |
| `gap_validation.html` | Withheld-S2 gap experiment | `{site}/{season}/validation/gap_validation_summary.json` |
| `phenology.html` | TIMESAT on PhenoCam GCC | `raw/phenocam/phenocam_phenology.json` |
Site/season dropdowns use `data/sites.geojson`. Map pages: **BtI | ItB**; scenarios `aggressive` / `nonaggressive`, σ 20 / 30. Keep the shared nav consistent across all eight pages. QA only — thesis tables are exported from the workspace root (`make export` or `../scripts/export_thesis_tables.py`).
## Development
```bash
ruff check --fix . && ruff format .
```
Pre-commit hooks: `.pre-commit-config.yaml`.
## License
GNU Affero General Public License v3.0 (AGPL-3.0). See [LICENSE](LICENSE).

View file

@ -1,282 +0,0 @@
"""PhenoCam acquisition from PhenoCam Network API."""
import csv
import json
import requests
from pathlib import Path
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
PHENOCAM_API = "https://phenocam.nau.edu/api"
def _phenocam_summary_gcc_value(row, use_mean_fallback: bool):
"""Extract daily GCC from a one-day summary row.
Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth).
Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present.
With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``.
"""
if not use_mean_fallback:
oflag = row.get("outlierflag_gcc_90")
if oflag is not None and str(oflag).strip() in ("1", "1.0"):
return None
raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
if raw is None:
return None
text = str(raw).strip()
if not text or text.upper() == "NA":
return None
try:
val = float(text)
except ValueError:
return None
if val <= -9998.0:
return None
return val
def _find_start_offset(site_name, start_dt, total_count):
"""Binary search to find approximate offset for start date."""
low, high = 0, total_count - 1
limit = 1
for _ in range(15):
mid = (low + high) // 2
response = requests.get(
f"{PHENOCAM_API}/middayimages/",
params={"site": site_name, "limit": limit, "offset": mid},
timeout=30,
)
response.raise_for_status()
results = response.json().get("results", [])
if not results:
break
mid_date_str = results[0].get("imgdate", "")
if not mid_date_str:
break
try:
mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
if mid_date < start_dt:
low = mid + 1
else:
high = mid
except ValueError:
break
return max(0, low - 100)
def download_phenocam(season, site_position, site_name, date_range=None):
"""Wrapper that downloads both phenocam images and GCC time series."""
_download_phenocam_images(season, site_position, site_name, date_range)
_download_phenocam_gcc(season, site_position, site_name, date_range)
def _download_phenocam_images(season, site_position, site_name, date_range=None):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
output_dir.mkdir(parents=True, exist_ok=True)
print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
start_date, end_date = datetime_range.split("/")
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
try:
response = requests.get(
f"{PHENOCAM_API}/middayimages/",
params={"site": site_name, "limit": 1},
timeout=30,
)
response.raise_for_status()
total_count = response.json().get("count", 0)
if total_count == 0:
print(f"[PhenoCam] No images found for site '{site_name}'")
return
print(
f"[PhenoCam] Found {total_count} total images, estimating start offset..."
)
start_offset = _find_start_offset(site_name, start_dt, total_count)
url = f"{PHENOCAM_API}/middayimages/"
params = {"site": site_name, "offset": start_offset}
print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
images = []
page = 1
max_pages = 500
past_end_date = False
while url and page <= max_pages and not past_end_date:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
break
for img in results:
img_date_str = img.get("imgdate", "")
if not img_date_str:
continue
try:
img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
if img_date > end_dt:
past_end_date = True
break
if start_dt <= img_date <= end_dt:
images.append(img)
except ValueError:
continue
if url and not past_end_date:
url = data.get("next")
params = None
page += 1
if page % 50 == 0:
print(
f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..."
)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
print(f"[PhenoCam] Site '{site_name}' not found")
return
raise
print(f"[PhenoCam] Found {len(images)} images")
def _download_image(img):
date_str = img.get("imgdate", "").replace("-", "")
if not date_str:
return None
filepath = output_dir / f"{date_str}.jpg"
if filepath.exists():
return f"Skipped {date_str}.jpg (exists)"
img_path = img.get("imgpath")
if not img_path:
return None
img_url = f"https://phenocam.nau.edu{img_path}"
try:
img_response = requests.get(img_url, timeout=30)
img_response.raise_for_status()
filepath.write_bytes(img_response.content)
return f"Saved {date_str}.jpg"
except Exception as e:
return f"Error downloading {date_str}: {e}"
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(_download_image, img) for img in images]
for future in as_completed(futures):
result = future.result()
if result:
print(f"[PhenoCam] {result}")
print("[PhenoCam] Completed")
def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
"""Fetch greenness-index time series from PhenoCam API. Saves JSON and CSV."""
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
output_file = Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
output_file.parent.mkdir(parents=True, exist_ok=True)
start_date, end_date = datetime_range.split("/")
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
# Get ROIs for site (paginate through results)
try:
url = f"{PHENOCAM_API}/roilists/"
params = {"site": site_name}
rois = []
while url:
r = requests.get(url, params=params, timeout=30)
r.raise_for_status()
data = r.json()
rois.extend(
[roi for roi in data.get("results", []) if roi["site"] == site_name]
)
url = data.get("next")
params = None
if len(rois) > 0:
break
if not rois:
print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
return
csv_url = rois[0].get("one_day_summary")
if not csv_url:
print("[PhenoCam-GI] No CSV data URL found for ROI")
return
except requests.exceptions.RequestException as e:
print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
return
# Fetch CSV data
try:
csv_r = requests.get(csv_url, timeout=30)
csv_r.raise_for_status()
lines = [
line for line in csv_r.text.split("\n") if line and not line.startswith("#")
]
reader = csv.DictReader(lines)
fieldnames = reader.fieldnames or ()
use_mean_fallback = "gcc_90" not in fieldnames
if use_mean_fallback:
print(
"[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)"
)
timeseries = []
for row in reader:
try:
date_str = row.get("date")
if not date_str:
continue
date = datetime.strptime(date_str, "%Y-%m-%d")
if start_dt <= date <= end_dt:
gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
if gcc is not None:
timeseries.append(
{"date": date.isoformat(), "greenness_index": gcc}
)
except (ValueError, KeyError):
continue
except requests.exceptions.RequestException as e:
print(f"[PhenoCam-GI] Error fetching CSV: {e}")
return
timeseries.sort(key=lambda x: x["date"])
output_dir = output_file.parent
json_path = output_dir / "phenocam_gcc.json"
csv_path = output_dir / "phenocam_gcc.csv"
with open(json_path, "w") as f:
json.dump(timeseries, f, indent=2)
with open(csv_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["date", "greenness_index"])
writer.writeheader()
writer.writerows(timeseries)
print(
f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)"
)
from phenocam_snr import write_phenocam_snr
write_phenocam_snr(site_name, season, base=Path("data"))

View file

@ -1,190 +0,0 @@
"""Sentinel-2-MSI acquisition from AWS Element84 Earth Search (STAC catalog)."""
import numpy as np
import rasterio
import xml.etree.ElementTree as ET
import requests
from pathlib import Path
from rasterio.crs import CRS
from rasterio.warp import Resampling, calculate_default_transform, reproject, transform_geom
from rasterio.windows import from_bounds, transform as window_transform
from pystac_client import Client
BBOX_SIZE = 0.011
TARGET_CRS = CRS.from_epsg(32632)
def _get_bbox(lon, lat):
half = BBOX_SIZE / 2
return [lon - half, lat - half, lon + half, lat + half]
def _get_window_for_bbox(src, bbox):
bbox_geom = {
"type": "Polygon",
"coordinates": [
[
[bbox[0], bbox[1]],
[bbox[2], bbox[1]],
[bbox[2], bbox[3]],
[bbox[0], bbox[3]],
[bbox[0], bbox[1]],
]
],
}
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
coords = bbox_transformed["coordinates"][0]
x_coords = [c[0] for c in coords[:4]]
y_coords = [c[1] for c in coords[:4]]
bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
src_bounds = src.bounds
intersect_bbox = [
max(bbox_crs[0], src_bounds.left),
max(bbox_crs[1], src_bounds.bottom),
min(bbox_crs[2], src_bounds.right),
min(bbox_crs[3], src_bounds.top),
]
return from_bounds(*intersect_bbox, src.transform)
def _extract_viewing_angle(item):
if "granule_metadata" not in item.assets:
return None
try:
xml_url = item.assets["granule_metadata"].href
xml_resp = requests.get(xml_url, timeout=10)
xml_resp.raise_for_status()
root = ET.fromstring(xml_resp.content)
angles = [
abs(float(zenith_elem.text))
for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle")
if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None
]
return angles[0] if angles else None
except Exception as e:
print(f"[S2] Warning: Could not extract viewing angle: {e}")
return None
def download_s2(season, site_position, site_name, date_range=None):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
output_dir = Path(f"data/{site_name}/{season}/raw/s2/")
print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
bbox = _get_bbox(lon, lat)
bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
output_dir.mkdir(parents=True, exist_ok=True)
print("[S2] Connecting to STAC catalog...")
client = Client.open("https://earth-search.aws.element84.com/v1")
search = client.search(
collections=["sentinel-2-l2a"],
intersects={"type": "Point", "coordinates": [lon, lat]},
datetime=datetime_range,
max_items=1000,
)
print("[S2] Searching items...")
items_by_key = {}
for item in search.items():
date = item.datetime.strftime("%Y%m%d")
parts = item.id.split("_")
increment = parts[3] if len(parts) > 3 else "0"
key = (date, increment)
if key not in items_by_key:
items_by_key[key] = item
print(f"[S2] Found {len(items_by_key)} unique items")
for (date, increment), item in items_by_key.items():
filepath = output_dir / f"{date}_{increment}.geotiff"
if filepath.exists():
print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
continue
print(f"[S2] Processing {date}_{increment}...")
band_data = {}
profile = None
for band_name, asset_name in bands.items():
if asset_name not in item.assets:
continue
asset = item.assets[asset_name]
with rasterio.open(asset.href) as src:
window = _get_window_for_bbox(src, bbox)
if window.height <= 0 or window.width <= 0:
continue
data = src.read(window=window)
new_transform = window_transform(window, src.transform)
if profile is None:
profile = {
"driver": "GTiff",
"height": window.height,
"width": window.width,
"count": len(bands),
"dtype": data.dtype,
"crs": src.crs,
"transform": new_transform,
"compress": "lzw",
}
band_idx = list(bands.keys()).index(band_name)
band_data[band_idx] = data[0]
if profile and len(band_data) == len(bands):
stacked = np.array([band_data[i] for i in sorted(band_data.keys())])
band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
viewing_angle = _extract_viewing_angle(item)
if profile["crs"] != TARGET_CRS:
src_transform = profile["transform"]
src_height, src_width = profile["height"], profile["width"]
left, bottom, right, top = rasterio.transform.array_bounds(
src_height, src_width, src_transform
)
dst_transform, dst_width, dst_height = calculate_default_transform(
profile["crs"], TARGET_CRS, src_width, src_height,
left=left, bottom=bottom, right=right, top=top,
)
reprojected = np.empty(
(len(stacked), dst_height, dst_width), dtype=stacked.dtype
)
for i in range(len(stacked)):
reproject(
source=stacked[i],
destination=reprojected[i],
src_transform=src_transform,
src_crs=profile["crs"],
dst_transform=dst_transform,
dst_crs=TARGET_CRS,
resampling=Resampling.bilinear,
)
stacked = reprojected
profile.update({
"crs": TARGET_CRS,
"transform": dst_transform,
"width": dst_width,
"height": dst_height,
})
with rasterio.open(filepath, "w", **profile) as dst:
for i, data in enumerate(stacked, 1):
dst.write(data, i)
dst.set_band_description(i, band_names[i - 1])
tags = {}
if viewing_angle is not None:
tags["VIEWING_ZENITH_ANGLE"] = str(viewing_angle)
pb = item.properties.get("s2:processing_baseline")
if pb is not None:
tags["PROCESSING_BASELINE"] = str(pb)
if tags:
dst.update_tags(**tags)
angle_msg = (
f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else ""
)
print(f"[S2] Saved: {filepath}{angle_msg}")
else:
print(f"[S2] Skipping {date}_{increment} (missing bands)")
print("[S2] Completed")

View file

@ -1,160 +0,0 @@
"""Sentinel-3-OLCI acquisition from Copernicus Data Space OpenEO API."""
import os
import time
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
import openeo
import requests
import netCDF4
import numpy as np
import rasterio
from rasterio.transform import from_bounds
load_dotenv()
BBOX_SIZE = 0.016 # Larger than S2 to ensure full coverage including padded pixels
def _get_bbox(lon, lat):
half = BBOX_SIZE / 2
return [lon - half, lat - half, lon + half, lat + half]
def _process_netcdf(nc_file, output_dir, bands, openeo_bands):
with netCDF4.Dataset(str(nc_file), "r") as nc:
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
x_coords = nc.variables["x"][:]
y_coords = nc.variables["y"][:]
band_vars = sorted(
[v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
)
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
transform = from_bounds(
float(x_coords.min()),
float(y_coords.min()),
float(x_coords.max()),
float(y_coords.max()),
len(x_coords),
len(y_coords),
)
print(f"[S3] Found {len(times)} time steps")
date_counts = {}
for t_idx, time_val in enumerate(times):
dt = (
time_val
if isinstance(time_val, datetime)
else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
)
date_str = dt.strftime("%Y%m%d")
increment = date_counts.get(date_str, 0)
date_counts[date_str] = increment + 1
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
stacked = np.stack(band_data, axis=0)
output_path = output_dir / f"{date_str}_{increment}.geotiff"
with rasterio.open(
output_path,
"w",
driver="GTiff",
height=len(y_coords),
width=len(x_coords),
count=len(band_data),
dtype=stacked.dtype,
crs="EPSG:32632",
transform=transform,
compress="lzw",
) as dst:
dst.write(stacked)
for i, band_name in enumerate(band_names, 1):
dst.set_band_description(i, band_name)
print(f"[S3] Saved: {output_path}")
def download_s3(season, site_position, site_name, date_range=None):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
output_dir = Path(f"data/{site_name}/{season}/raw/s3/")
print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
bbox = _get_bbox(lon, lat)
bands = {
"SDR_Oa04": "blue",
"SDR_Oa06": "green",
"SDR_Oa08": "red",
"SDR_Oa17": "nir",
}
output_dir.mkdir(parents=True, exist_ok=True)
band_map = {
"SDR_Oa04": "B04",
"SDR_Oa06": "B06",
"SDR_Oa08": "B08",
"SDR_Oa17": "B17",
}
openeo_bands = [band_map.get(b, b) for b in bands.keys()]
start_date, end_date = datetime_range.split("/")
spatial_extent = {
"west": bbox[0],
"east": bbox[2],
"south": bbox[1],
"north": bbox[3],
}
print("[S3] Authenticating...")
token_response = requests.post(
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
data={
"grant_type": "password",
"username": os.getenv("CDSE_USER"),
"password": os.getenv("CDSE_PASSWORD"),
"client_id": "cdse-public",
},
)
token_response.raise_for_status()
tokens = token_response.json()
access_token = tokens["access_token"]
print("[S3] Connecting to OpenEO...")
conn = openeo.connect("openeo.dataspace.copernicus.eu")
conn.authenticate_oidc_access_token(access_token)
print("[S3] Loading collection...")
datacube = conn.load_collection(
"SENTINEL3_OLCI_L1B",
spatial_extent=spatial_extent,
temporal_extent=[start_date, end_date],
bands=openeo_bands,
).resample_spatial(projection=32632)
output_file = output_dir / "s3_data.nc"
print(f"[S3] Downloading NetCDF to {output_file}...")
print(f"[S3] Temporal extent: {start_date} to {end_date}")
print(f"[S3] Spatial extent: {spatial_extent}")
print(f"[S3] Bands: {openeo_bands}")
print("[S3] This may take several minutes depending on data volume...")
start_time = time.time()
try:
datacube.download(str(output_file), format="NetCDF")
elapsed = time.time() - start_time
print(f"[S3] Download completed in {elapsed:.1f} seconds")
except Exception as e:
elapsed = time.time() - start_time
print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}")
raise
print("[S3] Processing NetCDF...")
process_start = time.time()
_process_netcdf(output_file, output_dir, bands, openeo_bands)
process_elapsed = time.time() - process_start
print(f"[S3] Processing completed in {process_elapsed:.1f} seconds")
print(f"[S3] Removing temporary NetCDF file...")
os.remove(output_file)
print("[S3] Completed")

View file

@ -1,132 +0,0 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
25.0743,
35.3045
]
},
"properties": {
"country": "",
"seasons": {
"2024": {}
},
"elevation": 68,
"description": "FORTH Heraklion Greece",
"sitename": "forthgr",
"ndvi_selected": true,
"vegetation_type": "Agriculture"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
11.320308,
47.116171
]
},
"properties": {
"country": "",
"seasons": {
"2020": {},
"2024": {}
},
"elevation": 972,
"description": "Neustift Field Site, Stubai Valley, Tyrol, Austria",
"sitename": "innsbruck",
"ndvi_selected": true,
"vegetation_type": "Grassland"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
24.3688,
58.5633
]
},
"properties": {
"country": "",
"seasons": {
"2024": {}
},
"elevation": 3,
"description": "Abandoned peat extraction area, Estonia",
"sitename": "pitsalu",
"ndvi_selected": true,
"vegetation_type": "Wetland"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
19.7673,
64.2437
]
},
"properties": {
"country": "",
"seasons": {
"2023": {}
},
"elevation": 224,
"description": "SITES Svartberget Research Station, Vindeln, Sweden",
"sitename": "vindeln2",
"ndvi_selected": true,
"vegetation_type": "Deciduous Broadleaf"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
-6.0033,
36.7455
]
},
"properties": {
"country": "",
"seasons": {
"2024": {}
},
"elevation": 56,
"description": "Sun flower plot, Jerez, Spain",
"sitename": "sunflowerjerez1",
"ndvi_selected": true,
"vegetation_type": "Agriculture"
}
},
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
26.9837,
42.6558
]
},
"properties": {
"country": "",
"seasons": {
"2024": {}
},
"elevation": 262,
"description": "Institute of Agriculture in Karnobat (selection fields)",
"sitename": "institutekarnobat",
"ndvi_selected": true,
"vegetation_type": "Agriculture"
}
}
]
}

View file

@ -1,84 +0,0 @@
#!/bin/bash
set -e
MODE="${1:-setup}"
SERVER="${2:-root@49.12.2.88}"
APP_DIR="/opt/satellite-fusion"
DATA_DIR="$APP_DIR/data"
case "$MODE" in
setup)
echo "Deploying to $SERVER..."
TEMP_DIR=$(mktemp -d)
rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$TEMP_DIR/"
cat > "$TEMP_DIR/.env.example" <<EOF
CDSE_USER=your_username_here
CDSE_PASSWORD=your_password_here
EOF
ssh $SERVER "mkdir -p $APP_DIR"
rsync -av --delete "$TEMP_DIR/" "$SERVER:$APP_DIR/"
rm -rf "$TEMP_DIR"
ssh $SERVER <<ENDSSH
set -e
cd $APP_DIR
# Find/install Python 3.11
if ! command -v python3.11 &> /dev/null; then
apt-get update -qq
apt-get install -y python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
apt-get install -y -t trixie-backports python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
apt-get install -y software-properties-common
add-apt-repository -y ppa:deadsnakes/ppa 2>/dev/null || true
apt-get update -qq
apt-get install -y python3.11 python3.11-venv python3.11-dev
}
}
fi
# Setup venv
[ -d venv ] && rm -rf venv
python3.11 -m venv venv
source venv/bin/activate
pip install --upgrade pip -q
pip install -r requirements.txt -q
pip install git+https://github.com/DHI-GRAS/efast.git -q
# Setup .env
[ ! -f .env ] && [ -f .env.example ] && cp .env.example .env
# Setup systemd service
if [ -f satellite-fusion-web.service ]; then
sed "s|/opt/satellite-fusion|$APP_DIR|g" satellite-fusion-web.service | \
sed "s|--directory /opt/satellite-fusion|--directory $APP_DIR/webapp|g" > /tmp/satellite-fusion-web.service
cp /tmp/satellite-fusion-web.service /etc/systemd/system/
systemctl daemon-reload
fi
# Create data directory and webapp/data symlink
mkdir -p $DATA_DIR
ln -sf ../data $APP_DIR/webapp/data
ENDSSH
echo "Setup complete!"
;;
upload)
echo "Uploading data to $SERVER..."
rsync -avh --progress --exclude='*.pyc' --exclude='__pycache__' data/ "$SERVER:$DATA_DIR/"
echo "Data upload complete!"
;;
code)
echo "Uploading code to $SERVER..."
rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$SERVER:$APP_DIR/"
echo "Code upload complete!"
;;
*)
echo "Usage: $0 {setup|upload|code} [server]"
echo " setup - Deploy code and setup server (default)"
echo " upload - Upload data directory only"
echo " code - Upload code files only (no setup)"
exit 1
;;
esac

176
fusion.py
View file

@ -1,176 +0,0 @@
"""EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
from datetime import datetime, timedelta
from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
def _import_efast():
"""Lazy import of efast to avoid import errors when not using efast functions."""
try:
import efast
return efast
except ImportError:
raise ImportError(
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
)
def run_efast(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
*,
s2_output_dir=None,
s3_output_dir=None,
fusion_output_dir=None,
):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy)
s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
fusion_output_dir = fusion_output_dir or (
efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
)
fusion_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
efast = _import_efast()
start_str, end_str = datetime_range.split("/")
start_date = datetime.strptime(start_str, "%Y-%m-%d")
end_date = datetime.strptime(end_str, "%Y-%m-%d")
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime("%Y%m%d")
output_file = fusion_output_dir / f"REFL_{date_str}.tif"
try:
kwargs = {
"product": "REFL",
"max_days": 30,
"date_position": 2,
"minimum_acquisition_importance": 0.0,
"ratio": RESOLUTION_RATIO,
}
if sigma is not None:
kwargs["sigma"] = sigma
efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print(
f"[EFAST] Saved: {output_file}"
if output_file.exists()
else f"[EFAST] No output for {date_str} (insufficient nearby data)"
)
except Exception as e:
print(f"[EFAST] Error processing {date_str}: {e}")
current_date += timedelta(days=1)
print("[EFAST] Completed")
def run_all_efast_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
"""Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
for strategy in ["aggressive", "nonaggressive"]:
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)
def run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
*,
s2_output_dir=None,
s3_output_dir=None,
fusion_output_dir=None,
):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
fusion_output_dir = fusion_output_dir or (
efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
)
fusion_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
efast = _import_efast()
start_str, end_str = datetime_range.split("/")
start_date = datetime.strptime(start_str, "%Y-%m-%d")
end_date = datetime.strptime(end_str, "%Y-%m-%d")
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime("%Y%m%d")
output_file = fusion_output_dir / f"GCC_{date_str}.tif"
try:
kwargs = {
"product": "GCC",
"max_days": 30,
"date_position": 2,
"minimum_acquisition_importance": 0.0,
"ratio": RESOLUTION_RATIO,
}
if sigma is not None:
kwargs["sigma"] = sigma
efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print(
f"[EFAST-ITB] Saved: {output_file}"
if output_file.exists()
else f"[EFAST-ITB] No output for {date_str}"
)
except Exception as e:
print(f"[EFAST-ITB] Error {date_str}: {e}")
current_date += timedelta(days=1)
print("[EFAST-ITB] Completed")
def run_all_efast_itb_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
for strategy in ["aggressive", "nonaggressive"]:
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)

View file

@ -1,263 +0,0 @@
"""
No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
(:mod:`gap_validation.phenology_offsets`).
"""
from __future__ import annotations
import argparse
import json
import re
from datetime import datetime
from pathlib import Path
from metrics_stats import _norm_date_key, load_timeseries
from phenology_timesat import (
_timesat as _timesat_pkg,
build_yraw_three_years,
iter_sites_seasons_from_sites_geojson,
phenocam_phenology_path,
run_timesat_phenology_from_yraw,
)
FUSION_SCENARIO_KEYS: tuple[str, ...] = (
"aggressive_sigma20",
"aggressive_sigma30",
"nonaggressive_sigma20",
"nonaggressive_sigma30",
"aggressive_sigma20_itb",
"aggressive_sigma30_itb",
"nonaggressive_sigma20_itb",
"nonaggressive_sigma30_itb",
)
def fusion_phenology_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/fusion_phenology.json")
def parse_scenario_key(key: str) -> tuple[str, int, str]:
"""``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
mode = "itb" if key.endswith("_itb") else "bti"
base = key.replace("_itb", "")
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
if not m:
raise ValueError(f"Cannot parse scenario key: {key!r}")
return m.group(1), int(m.group(2)), mode
def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
strategy, sigma, mode = parse_scenario_key(scenario_key)
if mode == "bti":
processed = f"processed_{strategy}_sigma{sigma}"
else:
processed = f"processed_{strategy}_itb_sigma{sigma}"
return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
"""YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
raw = load_timeseries(ts_path)
out: dict[str, float] = {}
for k, v in raw.items():
nk = _norm_date_key(k)
if nk and v is not None:
try:
fv = float(v)
except (TypeError, ValueError):
continue
if fv == fv: # finite
out[nk] = fv
return out
def timesat_transitions_from_by_date(
by_date: dict[str, float], season: int
) -> dict[str, str | float | None]:
"""Run TIMESAT on fusion GCC; return transition dates for *season*."""
if len(by_date) < 10:
return {
"green_up_50pct_date": None,
"green_down_50pct_date": None,
"timesat_input": None,
"n_values": len(by_date),
}
y1, y2, y3 = season - 1, season, season + 1
yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
return {
"green_up_50pct_date": out.get("green_up_50pct_date"),
"green_down_50pct_date": out.get("green_down_50pct_date"),
"timesat_input": stack_mode,
"n_values": len(by_date),
}
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
if not iso_a or not iso_b:
return None
try:
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
return abs((a - b).days)
except ValueError:
return None
def _offsets_vs_reference(
fused: dict[str, str | float | None], reference: dict
) -> dict[str, int | None]:
ref_up = reference.get("green_up_50pct_date")
ref_dn = reference.get("green_down_50pct_date")
fup = fused.get("green_up_50pct_date")
fdn = fused.get("green_down_50pct_date")
return {
"abs_day_offset_green_up": _day_offset(fup, ref_up),
"abs_day_offset_green_down": _day_offset(fdn, ref_dn),
}
def compute_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> dict:
ref_path = phenocam_phenology_path(site_name, season)
reference = (
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
)
scenarios: dict[str, dict] = {}
for key in scenario_keys:
ts_path = fusion_gcc_timeseries_path(site_name, season, key)
if not ts_path.is_file():
scenarios[key] = {
"workflow": parse_scenario_key(key)[2],
"missing_timeseries": str(ts_path),
}
continue
by_date = fusion_gcc_by_date(ts_path)
fused = timesat_transitions_from_by_date(by_date, season)
strategy, sigma, mode = parse_scenario_key(key)
scenarios[key] = {
"workflow": mode,
"strategy": strategy,
"sigma": sigma,
"timeseries_path": str(ts_path),
**fused,
**_offsets_vs_reference(fused, reference),
}
return {
"site_name": site_name,
"season": season,
"reference": {
"source": str(ref_path) if ref_path.is_file() else None,
"green_up_50pct_date": reference.get("green_up_50pct_date"),
"green_down_50pct_date": reference.get("green_down_50pct_date"),
},
"scenarios": scenarios,
}
def write_fusion_phenology_for_site(
site_name: str,
season: int,
*,
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
) -> Path | None:
if _timesat_pkg is None:
out = fusion_phenology_path(site_name, season)
print(
f"[Fusion phenology] Skipped (no timesat); would write {out}. "
"pip install timesat"
)
return None
payload = compute_fusion_phenology_for_site(
site_name, season, scenario_keys=scenario_keys
)
out = fusion_phenology_path(site_name, season)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
n_ok = sum(
1
for s in payload["scenarios"].values()
if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
)
print(
f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
f"≥1 transition date)"
)
return out
def write_fusion_phenology_all(
*,
sites_geojson: str | Path = "data/sites.geojson",
seasons: dict[str, int] | None = None,
) -> int:
if seasons:
pairs = sorted((s, seasons[s]) for s in seasons.keys())
else:
pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
n = 0
for site, season in pairs:
print(f"=== {site} {season} ===")
if write_fusion_phenology_for_site(site, season):
n += 1
print(f"[Fusion phenology] Processed {n} site/season pair(s).")
return n
def main() -> None:
ap = argparse.ArgumentParser(
description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
)
ap.add_argument("--site", type=str, default=None)
ap.add_argument("--season", type=int, default=None)
ap.add_argument(
"--all",
action="store_true",
help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
)
ap.add_argument(
"--primary-only",
action="store_true",
help="With --all: only thesis primary seasons per site.",
)
ap.add_argument(
"--sites-geojson",
type=Path,
default=Path("data/sites.geojson"),
)
args = ap.parse_args()
if _timesat_pkg is None:
raise SystemExit("Install timesat: pip install timesat")
primary = {
"forthgr": 2024,
"innsbruck": 2024,
"pitsalu": 2024,
"vindeln2": 2023,
"sunflowerjerez1": 2024,
"institutekarnobat": 2024,
}
if args.all:
write_fusion_phenology_all(
sites_geojson=args.sites_geojson,
seasons=primary if args.primary_only else None,
)
return
if not args.site or args.season is None:
raise SystemExit("Provide --site and --season, or use --all --primary-only")
write_fusion_phenology_for_site(args.site, args.season)
if __name__ == "__main__":
main()

View file

@ -1 +0,0 @@
"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""

View file

@ -1,4 +0,0 @@
from gap_validation.run import main
if __name__ == "__main__":
main()

View file

@ -1,135 +0,0 @@
"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site)."""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from gap_validation.run import run_validation
# Primary season per site (matches scripts/export_thesis_tables.py).
PRIMARY_SEASON = {
"forthgr": 2024,
"innsbruck": 2024,
"pitsalu": 2024,
"vindeln2": 2023,
"sunflowerjerez1": 2024,
"institutekarnobat": 2024,
}
def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]:
data = json.loads(geojson.read_text(encoding="utf-8"))
out: dict[str, tuple[float, float]] = {}
for feat in data.get("features", []):
props = feat.get("properties") or {}
name = props.get("sitename")
coords = (feat.get("geometry") or {}).get("coordinates")
if not name or not coords or len(coords) < 2:
continue
lon, lat = float(coords[0]), float(coords[1])
out[str(name)] = (lat, lon)
return out
def _parse_scenario(key: str) -> tuple[str, int | None, str]:
"""``aggressive_sigma20`` → (strategy, sigma, bti)."""
mode = "itb" if key.endswith("_itb") else "bti"
base = key.replace("_itb", "")
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
if not m:
raise ValueError(f"Cannot parse scenario key: {key!r}")
strategy = m.group(1)
sigma = int(m.group(2))
return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode
def _best_from_metrics(metrics_path: Path, workflow: str) -> str | None:
"""Best scenario key (max no-gap NSE_PC) for ``workflow`` (``bti`` or ``itb``)."""
if workflow not in ("bti", "itb"):
raise ValueError(f"workflow must be bti or itb, got {workflow!r}")
if not metrics_path.is_file():
return None
temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {}
want_itb = workflow == "itb"
best_key, best_nse = None, None
for k, v in temporal.items():
if k.endswith("_itb") != want_itb or not isinstance(v, dict):
continue
n = v.get("nse_pc")
if isinstance(n, (int, float)) and (best_nse is None or n > best_nse):
best_nse = n
best_key = k
return best_key
def _best_bti_from_metrics(metrics_path: Path) -> str | None:
return _best_from_metrics(metrics_path, "bti")
def _best_itb_from_metrics(metrics_path: Path) -> str | None:
return _best_from_metrics(metrics_path, "itb")
def _resolve_workflows(workflow: str) -> tuple[str, ...]:
return ("bti", "itb") if workflow == "both" else (workflow,)
def main() -> None:
ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).")
ap.add_argument("--data-dir", type=Path, default=Path("data"))
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
ap.add_argument("--skip-fusion", action="store_true")
ap.add_argument("--write-manifest-only", action="store_true")
ap.add_argument(
"--workflow",
choices=["bti", "itb", "both"],
default="both",
help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
)
ap.add_argument(
"--gap-days",
type=int,
action="append",
help="Filter gap lengths (default: all 15 and 30 in manifest).",
)
args = ap.parse_args()
positions = _site_positions(args.sites_geojson)
gap_filter = args.gap_days
workflows = _resolve_workflows(args.workflow)
for site, season in sorted(PRIMARY_SEASON.items()):
pos = positions.get(site)
if not pos:
print(f"[skip] No coordinates for {site}")
continue
metrics_path = args.data_dir / site / str(season) / "metrics.json"
for workflow in workflows:
scenario_key = _best_from_metrics(metrics_path, workflow)
if not scenario_key:
print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
continue
strategy, sigma, mode = _parse_scenario(scenario_key)
sigma_kw = 30 if sigma == 30 else None
print(f"=== {site} {season} {scenario_key} ===")
out = run_validation(
site,
season,
pos,
strategy,
sigma_kw,
mode,
skip_manifest=False,
skip_fusion=args.skip_fusion,
write_manifest_only=args.write_manifest_only,
gap_days_filter=gap_filter,
transition_filter=None,
s2_calendar_strategy=strategy,
)
print(out)
if __name__ == "__main__":
main()

View file

@ -1,65 +0,0 @@
"""Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario)."""
from __future__ import annotations
import argparse
from pathlib import Path
from gap_validation.batch_spatial import (
PRIMARY_SEASON,
_best_from_metrics,
_parse_scenario,
_resolve_workflows,
_site_positions,
)
from gap_validation.temporal_pc import run_temporal_pc
def main() -> None:
ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).")
ap.add_argument("--data-dir", type=Path, default=Path("data"))
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
ap.add_argument("--skip-fusion", action="store_true")
ap.add_argument(
"--workflow",
choices=["bti", "itb", "both"],
default="both",
help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
)
ap.add_argument("--gap-days", type=int, action="append")
args = ap.parse_args()
positions = _site_positions(args.sites_geojson)
workflows = _resolve_workflows(args.workflow)
for site, season in sorted(PRIMARY_SEASON.items()):
pos = positions.get(site)
if not pos:
print(f"[skip] No coordinates for {site}")
continue
metrics_path = args.data_dir / site / str(season) / "metrics.json"
for workflow in workflows:
scenario_key = _best_from_metrics(metrics_path, workflow)
if not scenario_key:
print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
continue
strategy, sigma, mode = _parse_scenario(scenario_key)
sigma_kw = 30 if sigma == 30 else None
print(f"=== {site} {season} temporal {scenario_key} ===")
out = run_temporal_pc(
site,
season,
pos,
strategy,
sigma_kw,
mode,
skip_manifest=False,
skip_fusion=args.skip_fusion,
gap_days_filter=args.gap_days,
transition_filter=None,
s2_calendar_strategy=strategy,
)
print(out)
if __name__ == "__main__":
main()

View file

@ -1,210 +0,0 @@
"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
from __future__ import annotations
import json
import re
from datetime import date, datetime, timedelta
from pathlib import Path
from phenology_timesat import phenocam_phenology_path
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
DEFAULT_GAP_LENGTHS = (15, 30)
TRANSITIONS = ("green_up", "green_down")
def validation_dir(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/validation")
def _parse_iso_date(s, season: int) -> date | None:
if not s or not isinstance(s, str):
return None
try:
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
except ValueError:
return None
y0, y1 = date(season, 1, 1), date(season, 12, 31)
return d if y0 <= d <= y1 else None
def transition_midpoint(
site_name: str,
season: int,
transition: str,
phenology_path: Path | None = None,
) -> date | None:
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
if transition not in TRANSITIONS:
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
path = phenology_path or phenocam_phenology_path(site_name, season)
if not path.is_file():
return None
try:
rec = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return None
key = (
"green_up_50pct_date"
if transition == "green_up"
else "green_down_50pct_date"
)
return _parse_iso_date(rec.get(key), season)
def phenology_midpoint(
site_name: str, season: int, phenology_path: Path | None = None
) -> date:
"""Legacy: green-up if in season, else green-down, else July 1."""
for tr in ("green_up", "green_down"):
d = transition_midpoint(site_name, season, tr, phenology_path)
if d:
return d
return date(season, 7, 1)
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
"""[start, end] inclusive, gap_days wide, clamped to calendar year."""
half = gap_days // 2
start = mid - timedelta(days=half)
end = mid + timedelta(days=gap_days - 1 - half)
y0, y1 = date(season, 1, 1), date(season, 12, 31)
if start < y0:
end = min(y1, end + (y0 - start))
start = y0
if end > y1:
start = max(y0, start - (end - y1))
end = y1
return start, end
def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
"""Return sorted (acquisition_date, filename) for *REFL.tif."""
out: list[tuple[date, str]] = []
if not prepared_s2.is_dir():
return out
for p in sorted(prepared_s2.glob("*REFL.tif")):
m = REFL_DATE_RE.search(p.name)
if not m:
continue
d = datetime.strptime(m.group(1), "%Y%m%d").date()
out.append((d, p.name))
out.sort(key=lambda x: x[0])
return out
def nearest_s2_acquisition(
prediction: date, pairs: list[tuple[date, str]]
) -> tuple[date, str] | None:
if not pairs:
return None
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
def build_manifest_entries(
site_name: str,
season: int,
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
s2_calendar_strategy: str = "aggressive",
) -> list[dict]:
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
pairs = list_s2_refl_dates(prepared_s2)
entries: list[dict] = []
for transition in transitions:
mid = transition_midpoint(site_name, season, transition)
if mid is None:
continue
for gap_days in gap_lengths:
w0, w1 = centered_window(mid, gap_days, season)
prediction = mid
ns = nearest_s2_acquisition(prediction, pairs)
if ns is None:
withheld_date = None
withheld_filename = None
else:
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
entries.append(
{
"transition": transition,
"gap_days": gap_days,
"midpoint_rule": f"{transition}_50pct_date",
"midpoint_date": mid.isoformat(),
"window_start": w0.isoformat(),
"window_end": w1.isoformat(),
"prediction_date": prediction.isoformat(),
"withheld_s2_date": withheld_date,
"withheld_s2_filename": withheld_filename,
}
)
return entries
def write_gap_withheld_images(
site_name: str,
season: int,
entries: list[dict],
) -> Path:
"""Reproducibility sidecar for withheld scenes and gap placement."""
path = validation_dir(site_name, season) / "gap_withheld_images.json"
records = []
for e in entries:
records.append(
{
"site_name": site_name,
"season": season,
"transition": e.get("transition"),
"gap_days": e.get("gap_days"),
"midpoint_date": e.get("midpoint_date"),
"window_start": e.get("window_start"),
"window_end": e.get("window_end"),
"withheld_s2_date": e.get("withheld_s2_date"),
"withheld_s2_filename": e.get("withheld_s2_filename"),
}
)
path.write_text(
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
+ "\n",
encoding="utf-8",
)
return path
def write_manifest(
site_name: str,
season: int,
site_position: tuple[float, float],
s2_calendar_strategy: str = "aggressive",
*,
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
transitions: tuple[str, ...] = TRANSITIONS,
) -> Path:
out_dir = validation_dir(site_name, season)
out_dir.mkdir(parents=True, exist_ok=True)
entries = build_manifest_entries(
site_name,
season,
gap_lengths=gap_lengths,
transitions=transitions,
s2_calendar_strategy=s2_calendar_strategy,
)
path = out_dir / "gap_manifest.json"
payload = {
"site_name": site_name,
"season": season,
"site_position_lat_lon": list(site_position),
"s2_calendar_strategy": s2_calendar_strategy,
"entries": entries,
}
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
write_gap_withheld_images(site_name, season, entries)
return path
def load_manifest(site_name: str, season: int) -> dict:
path = validation_dir(site_name, season) / "gap_manifest.json"
if not path.is_file():
raise FileNotFoundError(f"Missing manifest: {path}")
return json.loads(path.read_text(encoding="utf-8"))

View file

@ -1,438 +0,0 @@
"""Export 2×4 RGB panels for Tier-A gap validation (thesis appendix).
Crops follow the same fusion-valid bounding box as ``postprocessing.process_cropped``
and the webapp (``processed_*`` / ``common.js``), anchored on gap-degraded fusion at the
prediction date; S2 and S3 are read from prepared stacks on that shared window.
"""
from __future__ import annotations
import json
import re
from datetime import date, datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from rasterio import windows
from rasterio.transform import rowcol
from rasterio.warp import Resampling, reproject
from gap_validation.s2_mask_dir import acquisition_yyyymmdd_in_window, yyyymmdd_from_iso
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
S3_COMPOSITE_RE = re.compile(r"composite_(\d{8})\.tif$")
TRANSITIONS = ("green_up", "green_down")
COL_TITLES = ("Withheld S2", "Gap fusion", "S3 composite", "Nearest S2")
ROW_LABELS = {"green_up": "Green-up", "green_down": "Green-down"}
VALID_REFL_THRESHOLD = 0.001
NODATA_RGB = (0.15, 0.15, 0.15)
def _parse_bti_scenario(scenario: str) -> tuple[str, int]:
m = re.match(r"^(aggressive|nonaggressive)_sigma(20|30)$", scenario)
if not m:
raise ValueError(f"expected BtI scenario key, got {scenario!r}")
return m.group(1), int(m.group(2))
def _prepared_base(data_dir: Path, site: str, season: int, strategy: str) -> Path:
return data_dir / site / str(season) / f"prepared_{strategy}"
def _s2_strategy_fallbacks(strategy: str, manifest: dict) -> tuple[str, ...]:
"""Prepared trees to try for S2 REFL (best-BtI first, then manifest calendar)."""
order: list[str] = []
for s in (strategy, manifest.get("s2_calendar_strategy")):
if isinstance(s, str) and s and s not in order:
order.append(s)
for s in ("aggressive", "nonaggressive"):
if s not in order:
order.append(s)
return tuple(order)
def _find_prepared_s2_refl(
data_dir: Path,
site: str,
season: int,
filename: str,
strategies: tuple[str, ...],
) -> Path | None:
for strat in strategies:
p = _prepared_base(data_dir, site, season, strat) / "s2" / filename
if p.is_file():
return p
return None
def _gap_spatial_fusion_dir(
data_dir: Path,
site: str,
season: int,
gap_days: int,
transition: str,
strategy: str,
sigma: int,
) -> Path:
return (
data_dir
/ site
/ str(season)
/ "validation"
/ "fusion"
/ f"gap_{gap_days}_{transition}"
/ f"{strategy}_sigma{sigma}_bti"
)
def _iso_to_date(iso_d: str) -> date:
return datetime.strptime(iso_d[:10], "%Y-%m-%d").date()
def _exclude_ymds(entry: dict) -> set[str]:
withheld_fn = entry.get("withheld_s2_filename") or ""
m = REFL_DATE_RE.search(withheld_fn)
return {m.group(1)} if m else set()
def nearest_stack_s2(
prepared_s2_dir: Path,
prediction_iso: str,
*,
exclude_ymds: set[str],
) -> Path | None:
if not prepared_s2_dir.is_dir():
return None
target = _iso_to_date(prediction_iso)
best_path: Path | None = None
best_delta: int | None = None
for p in prepared_s2_dir.glob("S2A_MSIL2A_*_REFL.tif"):
m = REFL_DATE_RE.search(p.name)
if not m or m.group(1) in exclude_ymds:
continue
delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
if best_delta is None or delta < best_delta:
best_delta = delta
best_path = p
return best_path
def nearest_s3_composite(prepared_s3_dir: Path, prediction_iso: str) -> Path | None:
if not prepared_s3_dir.is_dir():
return None
target = _iso_to_date(prediction_iso)
best_path: Path | None = None
best_delta: int | None = None
for p in prepared_s3_dir.glob("composite_*.tif"):
m = S3_COMPOSITE_RE.search(p.name)
if not m:
continue
delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
if best_delta is None or delta < best_delta:
best_delta = delta
best_path = p
return best_path
def _crop_window_from_fusion(fusion_path: Path) -> dict | None:
"""Fusion-valid crop (``postprocessing.process_cropped``) on the full prepared grid."""
if not fusion_path.is_file():
return None
with rasterio.open(fusion_path) as src:
data = src.read()
valid = np.isfinite(data) & (data > VALID_REFL_THRESHOLD)
rows = np.any(valid, axis=(0, 2))
cols = np.any(valid, axis=(0, 1))
row_idx = np.where(rows)[0]
col_idx = np.where(cols)[0]
if len(row_idx) == 0 or len(col_idx) == 0:
return None
r0, r1 = int(row_idx[0]), int(row_idx[-1])
c0, c1 = int(col_idx[0]), int(col_idx[-1])
w, h = c1 - c0 + 1, r1 - r0 + 1
win = windows.Window(c0, r0, w, h)
return {
"window": win,
"crop_transform": windows.transform(win, src.transform),
"full_transform": src.transform,
"crs": src.crs,
"profile": src.profile.copy(),
}
def _read_bgr_prepared_s2(prepared_refl: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
if not prepared_refl.is_file():
return None
with rasterio.open(prepared_refl) as src:
if src.count < 3:
return None
b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
def _read_bgr_gap_fusion(fusion_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
if not fusion_path.is_file():
return None
with rasterio.open(fusion_path) as src:
if src.count < 3:
return None
b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
def _read_bgr_prepared_s3(s3_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
"""Resample S3 composite to the fusion grid, then crop (matches ``process_cropped``)."""
if not s3_path.is_file():
return None
with rasterio.open(s3_path) as src:
if src.count < 3:
return None
temp_profile = crop["profile"].copy()
temp_profile.update({"dtype": "float32", "count": src.count})
bands: list[np.ndarray] = []
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=crop["full_transform"],
dst_crs=crop["crs"],
resampling=Resampling.nearest,
)
b, g, r = resampled.read(
indexes=(1, 2, 3), window=crop["window"]
)
bands = [
b.astype(np.float64),
g.astype(np.float64),
r.astype(np.float64),
]
return bands[0], bands[1], bands[2]
def _refl_valid(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
return (
np.isfinite(blue)
& np.isfinite(green)
& np.isfinite(red)
& (blue > VALID_REFL_THRESHOLD)
& (green > VALID_REFL_THRESHOLD)
& (red > VALID_REFL_THRESHOLD)
)
def _panel_stretch_limits(
blue: np.ndarray, green: np.ndarray, red: np.ndarray, valid: np.ndarray
) -> tuple[float, float]:
"""Per-panel 2--98 % stretch on positive reflectance (webapp ``common.js`` style)."""
if not valid.any():
return 0.0, 1.0
vals = np.concatenate([red[valid], green[valid], blue[valid]])
lo, hi = np.percentile(vals, (2, 98))
if hi <= lo:
return 0.0, 1.0
return float(lo), float(hi)
def _bgr_to_rgba(
blue: np.ndarray,
green: np.ndarray,
red: np.ndarray,
*,
valid: np.ndarray,
vmin: float,
vmax: float,
) -> np.ndarray:
rgba = np.zeros((*blue.shape, 4), dtype=np.float32)
rgba[..., 3] = 1.0
rgba[~valid, 0] = NODATA_RGB[0]
rgba[~valid, 1] = NODATA_RGB[1]
rgba[~valid, 2] = NODATA_RGB[2]
span = vmax - vmin or 1.0
for band, idx in ((red, 0), (green, 1), (blue, 2)):
norm = np.clip((band - vmin) / span, 0.0, 1.0)
rgba[..., idx] = np.where(valid, norm, rgba[..., idx])
return rgba
def _phenocam_pixel_cropped(
crop: dict, site_position_lat_lon: tuple[float, float]
) -> tuple[int, int] | None:
lat, lon = site_position_lat_lon
try:
r, c = rowcol(
crop["crop_transform"], [lon], [lat], op=crop["crs"]
)
return int(r[0]), int(c[0])
except Exception:
return None
def _resolve_row_paths(
data_dir: Path,
site: str,
season: int,
entry: dict,
strategy: str,
sigma: int,
*,
gap_days: int,
manifest: dict,
) -> tuple[Path, Path, Path, Path] | None:
pred_ymd = yyyymmdd_from_iso(entry["prediction_date"])
transition = entry["transition"]
prep = _prepared_base(data_dir, site, season, strategy)
s2_strats = _s2_strategy_fallbacks(strategy, manifest)
withheld_fn = entry.get("withheld_s2_filename")
if not withheld_fn:
return None
withheld = _find_prepared_s2_refl(
data_dir, site, season, withheld_fn, s2_strats
)
fusion = (
_gap_spatial_fusion_dir(data_dir, site, season, gap_days, transition, strategy, sigma)
/ f"REFL_{pred_ymd}.tif"
)
s3_exact = prep / "s3" / f"composite_{pred_ymd}.tif"
s3 = (
s3_exact
if s3_exact.is_file()
else nearest_s3_composite(prep / "s3", entry["prediction_date"])
)
w0 = _iso_to_date(entry["window_start"])
w1 = _iso_to_date(entry["window_end"])
nearest: Path | None = None
for strat in s2_strats:
prep_s2 = _prepared_base(data_dir, site, season, strat) / "s2"
window_ymds = acquisition_yyyymmdd_in_window(prep_s2, w0, w1)
exclude = window_ymds | _exclude_ymds(entry)
nearest = nearest_stack_s2(
prep_s2, entry["prediction_date"], exclude_ymds=exclude
)
if nearest is not None:
break
if withheld is None or not fusion.is_file() or s3 is None or nearest is None:
return None
return withheld, fusion, s3, nearest
def build_site_panel(
site: str,
season: int,
data_dir: Path,
out_png: Path,
*,
best_bti_scenario: str,
site_label: str,
site_position_lat_lon: tuple[float, float] | None = None,
gap_days: int = 30,
) -> bool:
"""Build 2×4 RGB figure; return False if manifest or any transition row is incomplete."""
manifest_path = data_dir / site / str(season) / "validation" / "gap_manifest.json"
if not manifest_path.is_file():
return False
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
strategy, sigma = _parse_bti_scenario(best_bti_scenario)
rows: list[tuple[str, dict, tuple[Path, Path, Path, Path]]] = []
for transition in TRANSITIONS:
entry = next(
(
e
for e in manifest["entries"]
if e.get("gap_days") == gap_days and e.get("transition") == transition
),
None,
)
if not entry:
continue
paths = _resolve_row_paths(
data_dir,
site,
season,
entry,
strategy,
sigma,
gap_days=gap_days,
manifest=manifest,
)
if paths is None:
continue
rows.append((transition, entry, paths))
if not rows:
return False
readers = (
_read_bgr_prepared_s2,
_read_bgr_gap_fusion,
_read_bgr_prepared_s3,
_read_bgr_prepared_s2,
)
fig, axes = plt.subplots(
len(rows),
4,
figsize=(12.0, 2.8 * len(rows)),
squeeze=False,
constrained_layout=True,
)
for row_idx, (transition, entry, paths) in enumerate(rows):
row_title = ROW_LABELS.get(transition, transition)
crop = _crop_window_from_fusion(paths[1])
if crop is None:
for ax in axes[row_idx]:
ax.set_visible(False)
continue
layers: list[tuple[np.ndarray, np.ndarray, np.ndarray]] = []
for path, read_fn in zip(paths, readers, strict=True):
bgr = read_fn(path, crop)
if bgr is None:
layers = []
break
layers.append(bgr)
if len(layers) != 4:
for ax in axes[row_idx]:
ax.set_visible(False)
continue
mark: tuple[int, int] | None = None
if site_position_lat_lon:
mark = _phenocam_pixel_cropped(crop, site_position_lat_lon)
for col_idx, (col_title, bgr) in enumerate(zip(COL_TITLES, layers, strict=True)):
ax = axes[row_idx, col_idx]
blue, green, red = bgr
valid = _refl_valid(blue, green, red)
vmin, vmax = _panel_stretch_limits(blue, green, red, valid)
rgba = _bgr_to_rgba(
blue, green, red, valid=valid, vmin=vmin, vmax=vmax
)
ax.imshow(rgba, origin="upper", aspect="equal", interpolation="nearest")
h, w = rgba.shape[:2]
if col_idx == 0 and mark and 0 <= mark[0] < h and 0 <= mark[1] < w:
ax.plot(
mark[1],
mark[0],
"+",
color="red",
markersize=8,
markeredgewidth=1.2,
)
if row_idx == 0:
ax.set_title(col_title, fontsize=9)
if col_idx == 0:
ax.set_ylabel(row_title, fontsize=9)
ax.set_xticks([])
ax.set_yticks([])
fig.suptitle(f"{site_label} ({season})", fontsize=10)
out_png.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out_png, dpi=150)
plt.close(fig)
return True

View file

@ -1,200 +0,0 @@
"""EFAST with symlinked S2 dir (gap window omitted); outputs under validation/."""
from __future__ import annotations
from datetime import datetime
from pathlib import Path
from tempfile import TemporaryDirectory
from fusion import run_efast, run_efast_itb
from preparation import _get_base_dir, _get_itb_base_dir
from gap_validation.s2_mask_dir import (
acquisition_yyyymmdd_in_window,
assert_no_leakage,
build_masked_s2_dir_bti,
build_masked_s2_dir_itb,
)
def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
return _get_base_dir(season, site_name, strategy) / "s3"
def validation_fusion_dir(
site_name: str,
season: int,
gap_days: int,
transition: str,
strategy: str,
sigma: int | None,
mode: str,
) -> Path:
"""``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``."""
sig = 30 if sigma == 30 else 20
return (
Path(f"data/{site_name}/{season}/validation")
/ "fusion"
/ f"gap_{gap_days}_{transition}"
/ f"{strategy}_sigma{sig}_{mode}"
)
def excluded_acquisition_days(
prepared_s2: Path,
window_start_iso: str,
window_end_iso: str,
withheld_yyyymmdd: str,
) -> set[str]:
"""Union of gap-window S2 days and the withheld validation acquisition."""
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1)
excluded.add(withheld_yyyymmdd)
return excluded
def run_masked_fusion_one_date(
season: int,
site_position: tuple[float, float],
site_name: str,
strategy: str,
sigma: int | None,
mode: str,
prediction_date_iso: str,
window_start_iso: str,
window_end_iso: str,
withheld_yyyymmdd: str,
fusion_output_dir: Path,
) -> Path:
"""Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only."""
fusion_output_dir.mkdir(parents=True, exist_ok=True)
date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
tmp_s2 = Path(tmp) / "s2"
if mode == "bti":
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
excl = excluded_acquisition_days(
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
)
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
date_range=date_range,
s2_output_dir=tmp_s2,
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
fusion_output_dir=fusion_output_dir,
)
elif mode == "itb":
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
excl = excluded_acquisition_days(
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
)
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
date_range=date_range,
s2_output_dir=tmp_s2,
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
fusion_output_dir=fusion_output_dir,
)
else:
raise ValueError(f"mode must be bti or itb, got {mode!r}")
return fusion_output_dir
def run_masked_fusion_season(
season: int,
site_position: tuple[float, float],
site_name: str,
strategy: str,
sigma: int | None,
mode: str,
window_start_iso: str,
window_end_iso: str,
withheld_yyyymmdd: str,
fusion_output_dir: Path,
) -> Path:
"""Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier)."""
fusion_output_dir.mkdir(parents=True, exist_ok=True)
date_range = f"{season}-01-01/{season}-12-31"
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
tmp_s2 = Path(tmp) / "s2"
if mode == "bti":
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
excl = excluded_acquisition_days(
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
)
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
date_range=date_range,
s2_output_dir=tmp_s2,
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
fusion_output_dir=fusion_output_dir,
)
else:
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
excl = excluded_acquisition_days(
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
)
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
date_range=date_range,
s2_output_dir=tmp_s2,
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
fusion_output_dir=fusion_output_dir,
)
return fusion_output_dir
def production_fusion_path(
season: int,
site_name: str,
strategy: str,
sigma: int | None,
mode: str,
yyyymmdd: str,
) -> Path:
"""Single-date fused raster from the normal prepared tree (no-gap baseline)."""
if mode == "bti":
base = _get_base_dir(season, site_name, strategy)
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
return base / sub / f"REFL_{yyyymmdd}.tif"
base = _get_itb_base_dir(season, site_name, strategy)
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
return base / sub / f"GCC_{yyyymmdd}.tif"
def withheld_s2_refl_path(
season: int, site_name: str, strategy: str, withheld_filename: str | None
) -> Path | None:
if not withheld_filename:
return None
p = _get_base_dir(season, site_name, strategy) / "s2" / withheld_filename
return p if p.is_file() else None

View file

@ -1,163 +0,0 @@
"""TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference."""
from __future__ import annotations
import argparse
import json
from datetime import datetime
from pathlib import Path
from fusion_phenology import timesat_transitions_from_by_date
from phenology_timesat import phenocam_phenology_path
from gap_validation.batch_spatial import (
PRIMARY_SEASON,
_best_from_metrics,
_parse_scenario,
_resolve_workflows,
_site_positions,
)
from gap_validation.calendar import load_manifest, validation_dir
from gap_validation.temporal_pc import _fusion_gcc_timeseries
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
if not iso_a or not iso_b:
return None
try:
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
return abs((a - b).days)
except ValueError:
return None
def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
out = timesat_transitions_from_by_date(by_date, season)
return {
"green_up": out.get("green_up_50pct_date"),
"green_down": out.get("green_down_50pct_date"),
}
def _temporal_fusion_dir(
site: str, season: int, gap_days: int, transition: str, scenario_key: str
) -> Path:
strategy, sigma, mode = _parse_scenario(scenario_key)
sig = 30 if sigma == 30 else 20
return (
validation_dir(site, season)
/ "temporal"
/ f"gap_{gap_days}_{transition}"
/ f"{strategy}_sigma{sig}_{mode}"
)
def compute_offsets_for_site(
site: str,
season: int,
site_position: tuple[float, float],
*,
workflow: str = "bti",
gap_days_list: tuple[int, ...] = (15, 30),
) -> list[dict]:
base = Path(f"data/{site}/{season}")
metrics_path = base / "metrics.json"
scenario_key = _best_from_metrics(metrics_path, workflow)
if not scenario_key:
return []
ref_path = phenocam_phenology_path(site, season)
reference = (
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
)
manifest = load_manifest(site, season)
rows: list[dict] = []
for entry in manifest["entries"]:
gd = entry.get("gap_days")
tr = entry.get("transition")
if gd not in gap_days_list or tr not in ("green_up", "green_down"):
continue
fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key)
if not fusion_dir.is_dir():
continue
_, _, mode = _parse_scenario(scenario_key)
ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode)
if len(ts) < 10:
continue
fused = _timesat_transitions(ts, season)
ref_key = (
"green_up_50pct_date"
if tr == "green_up"
else "green_down_50pct_date"
)
ref_date = reference.get(ref_key)
fused_date = fused.get("green_up" if tr == "green_up" else "green_down")
rows.append(
{
"site_name": site,
"season": season,
"transition": tr,
"gap_days": gd,
"scenario": scenario_key,
"reference_date": ref_date,
"fused_date": fused_date,
"abs_day_offset": _day_offset(fused_date, ref_date),
"window_start": entry.get("window_start"),
"window_end": entry.get("window_end"),
}
)
return rows
def write_phenology_offsets(
site: str,
season: int,
site_position: tuple[float, float],
*,
workflow: str = "bti",
gap_days_list: tuple[int, ...] = (15, 30),
) -> Path:
rows = compute_offsets_for_site(
site, season, site_position, workflow=workflow, gap_days_list=gap_days_list
)
vdir = validation_dir(site, season)
payload = {
"site_name": site,
"season": season,
"workflow": workflow,
"records": rows,
}
out = vdir / f"gap_phenology_offsets_{workflow}.json"
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
if workflow == "bti":
# Legacy alias for backward-compatible readers.
(vdir / "gap_phenology_offsets.json").write_text(
json.dumps(payload, indent=2) + "\n", encoding="utf-8"
)
return out
def main() -> None:
ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.")
ap.add_argument("--data-dir", type=Path, default=Path("data"))
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
ap.add_argument(
"--workflow",
choices=["bti", "itb", "both"],
default="both",
help="Fusion workflow(s) (default: both best BtI and best ItB).",
)
args = ap.parse_args()
positions = _site_positions(args.sites_geojson)
workflows = _resolve_workflows(args.workflow)
for site, season in sorted(PRIMARY_SEASON.items()):
pos = positions.get(site)
if not pos:
continue
for workflow in workflows:
p = write_phenology_offsets(site, season, pos, workflow=workflow)
print(p)
if __name__ == "__main__":
main()

View file

@ -1,352 +0,0 @@
"""Tier-2 gap validation CLI: manifest, masked EFAST, spatial ``nse_s2``, Whittaker crossover."""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from gap_validation.calendar import (
DEFAULT_GAP_LENGTHS,
TRANSITIONS,
load_manifest,
validation_dir,
write_manifest,
)
from gap_validation.fusion_masked import (
production_fusion_path,
run_masked_fusion_one_date,
validation_fusion_dir,
withheld_s2_refl_path,
)
from gap_validation.spatial_metrics import evaluate_gap_vs_withheld
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
def _ymd_from_iso(iso_d: str) -> str:
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
def _yyyymmdd_from_withheld_filename(fn: str) -> str | None:
for part in fn.replace(".tif", "").split("_"):
if len(part) == 8 and part.isdigit():
return part
return None
def _withheld_iso(entry: dict) -> str | None:
d = entry.get("withheld_s2_date")
if isinstance(d, str) and len(d) >= 10:
return d[:10]
fn = entry.get("withheld_s2_filename")
if not fn or not isinstance(fn, str):
return None
ymd = _yyyymmdd_from_withheld_filename(fn)
if not ymd:
return None
return datetime.strptime(ymd, "%Y%m%d").date().isoformat()
def _fused_file(fusion_dir: Path, mode: str, ymd: str) -> Path:
stem = "REFL" if mode == "bti" else "GCC"
return fusion_dir / f"{stem}_{ymd}.tif"
def _scenario_key(strategy: str, sigma: int | None, mode: str) -> str:
sig = 30 if sigma == 30 else 20
return f"{strategy}_sigma{sig}_{mode}"
def _git_rev() -> str | None:
try:
return subprocess.check_output(
["git", "rev-parse", "HEAD"],
cwd=Path(__file__).resolve().parent.parent,
text=True,
).strip()
except (OSError, subprocess.CalledProcessError):
return None
def _filter_entries(
entries: list[dict],
gap_days_filter: list[int] | None,
transition_filter: list[str] | None,
) -> list[dict]:
out = entries
if gap_days_filter:
out = [e for e in out if e.get("gap_days") in gap_days_filter]
if transition_filter:
out = [e for e in out if e.get("transition") in transition_filter]
return out
def run_validation(
site_name: str,
season: int,
site_position: tuple[float, float],
strategy: str,
sigma: int | None,
mode: str,
*,
skip_manifest: bool,
skip_fusion: bool,
write_manifest_only: bool,
gap_days_filter: list[int] | None,
transition_filter: list[str] | None,
s2_calendar_strategy: str,
manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
manifest_transitions: tuple[str, ...] = TRANSITIONS,
) -> Path:
base = Path(f"data/{site_name}/{season}")
vdir = validation_dir(site_name, season)
vdir.mkdir(parents=True, exist_ok=True)
if not skip_manifest:
write_manifest(
site_name,
season,
site_position,
s2_calendar_strategy=s2_calendar_strategy,
gap_lengths=manifest_gap_lengths,
transitions=manifest_transitions,
)
if write_manifest_only:
return vdir / "gap_manifest.json"
manifest = load_manifest(site_name, season)
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
results: list[dict] = []
for entry in entries:
gap_days = entry["gap_days"]
transition = entry.get("transition", "green_up")
pred = entry["prediction_date"]
w0 = entry["window_start"]
w1 = entry["window_end"]
fn = entry.get("withheld_s2_filename")
if not fn:
results.append(
{
"transition": transition,
"gap_days": gap_days,
"error": "no_withheld_s2_filename",
"entry": entry,
}
)
continue
ymd = _ymd_from_iso(pred)
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
if not wh_ymd:
results.append(
{
"transition": transition,
"gap_days": gap_days,
"error": "could_not_parse_withheld_yyyymmdd",
"withheld_s2_filename": fn,
}
)
continue
withheld_iso = (
_withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
)
fusion_out = validation_fusion_dir(
site_name, season, gap_days, transition, strategy, sigma, mode
)
if not skip_fusion:
try:
run_masked_fusion_one_date(
season,
site_position,
site_name,
strategy,
sigma,
mode,
pred,
w0,
w1,
wh_ymd,
fusion_out,
)
except RuntimeError as e:
results.append(
{
"transition": transition,
"gap_days": gap_days,
"error": str(e),
"entry": entry,
}
)
continue
fused_gap = _fused_file(fusion_out, mode, ymd)
prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
wh_path = withheld_s2_refl_path(season, site_name, strategy, fn)
if wh_path is None or not fused_gap.is_file():
results.append(
{
"transition": transition,
"gap_days": gap_days,
"prediction_date": pred,
"withheld_s2_filename": fn,
"scenario": {
"strategy": strategy,
"sigma": 30 if sigma == 30 else 20,
"mode": mode,
},
"error": "missing_withheld_refl_or_fused_gap",
"fused_gap_path": str(fused_gap),
}
)
continue
spatial = evaluate_gap_vs_withheld(
wh_path,
fused_gap,
prod if prod.is_file() else None,
mode,
whittaker_context=(base, strategy, pred, withheld_iso, w0, w1),
)
fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
results.append(
{
"transition": transition,
"gap_days": gap_days,
"prediction_date": pred,
"window_start": w0,
"window_end": w1,
"withheld_s2_filename": fn,
"scenario": {
"strategy": strategy,
"sigma": 30 if sigma == 30 else 20,
"mode": mode,
},
"paths": {
"fused_gap": str(fused_gap),
"fused_no_gap": str(prod) if prod.is_file() else None,
"withheld_s2_refl": str(wh_path),
},
"spatial": spatial,
"whittaker_crossover_row": {
"transition": transition,
"gap_days": gap_days,
"nse_s2_fusion": fusion_nse,
"nse_s2_whittaker": wh_nse,
},
}
)
scenario = _scenario_key(strategy, sigma, mode)
crossover_rows = [
r["whittaker_crossover_row"]
for r in results
if isinstance(r.get("whittaker_crossover_row"), dict)
]
summary = {
"site_name": site_name,
"season": season,
"scenario": scenario,
"command_line": sys.argv,
"git_commit": _git_rev(),
"manifest": str(vdir / "gap_manifest.json"),
"gap_withheld_images": str(vdir / "gap_withheld_images.json"),
"results": results,
"whittaker_crossover": {
scenario: {
"metric": "nse_s2_spatial_vs_withheld_s2_gcc",
"whittaker_definition": (
"Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
"all S2 dates in the gap window and the withheld acquisition removed; "
"prediction is a spatially constant field at smoothed GCC(prediction_date)."
),
"first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
crossover_rows,
fusion_key="nse_s2_fusion",
whittaker_key="nse_s2_whittaker",
),
"by_gap": crossover_rows,
}
},
}
out_path = vdir / f"gap_validation_summary_{mode}.json"
out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
if mode == "bti":
# Legacy alias for backward-compatible readers (webapp, older scripts).
(vdir / "gap_validation_summary.json").write_text(
json.dumps(summary, indent=2) + "\n", encoding="utf-8"
)
return out_path
def main() -> None:
ap = argparse.ArgumentParser(
description="Tier-2 withheld-S2 gap validation (outputs under data/.../validation/)."
)
ap.add_argument("--site", required=True)
ap.add_argument("--season", type=int, required=True)
ap.add_argument("--lat", type=float, required=True)
ap.add_argument("--lon", type=float, required=True)
ap.add_argument(
"--strategy", default="aggressive", choices=["aggressive", "nonaggressive"]
)
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
ap.add_argument(
"--gap-days",
type=int,
action="append",
metavar="N",
help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
)
ap.add_argument(
"--transition",
choices=list(TRANSITIONS),
action="append",
help="Restrict to transition(s); repeatable (default: all in manifest).",
)
ap.add_argument("--skip-manifest", action="store_true")
ap.add_argument(
"--skip-fusion",
action="store_true",
help="Reuse existing validation fusion rasters.",
)
ap.add_argument(
"--write-manifest-only",
action="store_true",
help="Write gap_manifest.json + gap_withheld_images.json and exit.",
)
ap.add_argument(
"--s2-calendar-strategy",
default="aggressive",
choices=["aggressive", "nonaggressive"],
help="Which prepared_*/s2 tree is used to pick nearest S2 for withholding.",
)
args = ap.parse_args()
sigma_kw = 30 if args.sigma == 30 else None
site_position = (args.lat, args.lon)
gap_filter = args.gap_days if args.gap_days else None
trans_filter = args.transition if args.transition else None
out = run_validation(
args.site,
args.season,
site_position,
args.strategy,
sigma_kw,
args.mode,
skip_manifest=args.skip_manifest,
skip_fusion=args.skip_fusion,
write_manifest_only=args.write_manifest_only,
gap_days_filter=gap_filter,
transition_filter=trans_filter,
s2_calendar_strategy=args.s2_calendar_strategy,
)
print(out)
if __name__ == "__main__":
main()

View file

@ -1,91 +0,0 @@
"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
from __future__ import annotations
import re
from datetime import date, datetime
from pathlib import Path
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
def yyyymmdd_in_name(name: str) -> str | None:
m = S2_PREP_DATE_RE.search(name)
return m.group(1) if m else None
def yyyymmdd_from_iso(iso_d: str) -> str:
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
def acquisition_yyyymmdd_in_window(
prepared_s2: Path, window_start: date, window_end: date
) -> set[str]:
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
out: set[str] = set()
if not prepared_s2.is_dir():
return out
for p in prepared_s2.glob("*REFL.tif"):
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
if not m:
continue
d = datetime.strptime(m.group(1), "%Y%m%d").date()
if window_start <= d <= window_end:
out.add(m.group(1))
return out
def build_masked_s2_dir(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
patterns: tuple[str, ...],
) -> int:
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
dest.mkdir(parents=True, exist_ok=True)
n = 0
for pattern in patterns:
for src in sorted(prepared_s2.glob(pattern)):
if not src.is_file() and not src.is_symlink():
continue
y = yyyymmdd_in_name(src.name)
if y and y in excluded_yyyymmdd:
continue
link = dest / src.name
if link.exists() or link.is_symlink():
link.unlink()
link.symlink_to(src.resolve())
n += 1
return n
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
for p in masked_s2_dir.iterdir():
y = yyyymmdd_in_name(p.name)
if y == withheld_yyyymmdd:
raise RuntimeError(
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
f"found in masked S2 dir {masked_s2_dir}"
)
def build_masked_s2_dir_bti(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
) -> int:
return build_masked_s2_dir(
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
)
def build_masked_s2_dir_itb(
prepared_s2: Path,
excluded_yyyymmdd: set[str],
dest: Path,
) -> int:
return build_masked_s2_dir(
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
)

View file

@ -1,234 +0,0 @@
"""Per-pixel GCC vs withheld S2; NSE (nse_s2); no-gap baseline; deltas."""
from __future__ import annotations
from pathlib import Path
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling
from scipy.stats import pearsonr
# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
VALID_REFL_THRESHOLD = 0.001
GCC_DENOM_EPS = 1e-3
MAX_REPORTED_NSE_S2 = 20.0
def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
t = red.astype(np.float64) + green.astype(np.float64) + blue.astype(np.float64)
out = np.full_like(blue, np.nan, dtype=np.float64)
m = (
np.isfinite(t)
& (t >= GCC_DENOM_EPS)
& np.isfinite(blue)
& np.isfinite(green)
& np.isfinite(red)
& (blue > GCC_DENOM_EPS)
& (green > GCC_DENOM_EPS)
& (red > GCC_DENOM_EPS)
)
out[m] = green[m].astype(np.float64) / t[m]
return out.astype(np.float32)
def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
"""Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
with rasterio.open(fusion_path) as src:
if src.count < 3:
return None
stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
"""Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
with rasterio.open(fusion_path) as src:
if src.count >= 4:
b = src.read(1).astype(np.float32)
g = src.read(2).astype(np.float32)
r = src.read(3).astype(np.float32)
gcc = _gcc_from_rgb(b, g, r)
else:
gcc = src.read(1).astype(np.float32)
prof = src.profile.copy()
return gcc, prof
def warp_refl_bands_to_grid(
refl_path: Path,
height: int,
width: int,
transform,
crs,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Resample S2 REFL blue/green/red to fusion grid (bilinear)."""
with rasterio.open(refl_path) as src:
b = np.empty((height, width), dtype=np.float32)
g = np.empty((height, width), dtype=np.float32)
r = np.empty((height, width), dtype=np.float32)
for i, dst in enumerate((b, g, r), start=1):
reproject(
source=rasterio.band(src, i),
destination=dst,
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=crs,
resampling=Resampling.bilinear,
)
return b, g, r
def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
"""Valid pixels: BtI uses REFL-style mask; ItB uses single-band GCC (postprocessing ItB)."""
with rasterio.open(fusion_path) as src:
if mode == "itb" or src.count < 4:
d = src.read(1).astype(np.float32)
return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
stacks = src.read().astype(np.float32)
with np.errstate(all="ignore"):
mx = np.nanmax(stacks, axis=0)
ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
mx > VALID_REFL_THRESHOLD
)
return ok
def spatial_scores(
y_true_gcc: np.ndarray,
y_pred_gcc: np.ndarray,
mask: np.ndarray,
) -> dict:
"""RMSE, MAE, mean bias, Pearson r, nse_s2 (NashSutcliffe vs spatial truth)."""
yt = y_true_gcc[mask].astype(np.float64).ravel()
yp = y_pred_gcc[mask].astype(np.float64).ravel()
n = int(yt.size)
if n < 2:
return {"n_pixels": n}
mean_t = float(np.mean(yt))
rmse = float(np.sqrt(np.mean((yt - yp) ** 2)))
mae = float(np.mean(np.abs(yt - yp)))
bias = float(np.mean(yp - yt))
den = float(np.sum((yt - mean_t) ** 2))
nse_s2 = None
if den > 0:
raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
if abs(raw) <= MAX_REPORTED_NSE_S2:
nse_s2 = raw
r = None
if np.std(yt) > 0 and np.std(yp) > 0:
r = float(pearsonr(yt, yp)[0])
return {
"n_pixels": n,
"rmse": rmse,
"mae": mae,
"mean_bias": bias,
"pearson_r": r,
"nse_s2": nse_s2,
}
def withheld_gcc_on_fusion_grid(
withheld_refl_path: Path, fused_path: Path
) -> tuple[np.ndarray, np.ndarray, dict]:
"""``y_true`` GCC (withheld S2) and ``y_pred`` GCC from ``fused_path``, same grid."""
yp, prof = read_fused_gcc(fused_path)
h, w = yp.shape
b, g, r = warp_refl_bands_to_grid(
withheld_refl_path, h, w, prof["transform"], prof["crs"]
)
yt = _gcc_from_rgb(b, g, r)
return yt, yp, prof
def mask_gap_whittaker(
yt: np.ndarray,
y_gap: np.ndarray,
fused_gap_path: Path,
mode: str,
) -> np.ndarray:
"""Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
m = (
valid_mask_fused(fused_gap_path, mode)
& np.isfinite(yt)
& np.isfinite(y_gap)
& (yt > VALID_REFL_THRESHOLD)
& (yt <= 1.0)
& (y_gap > VALID_REFL_THRESHOLD)
& (y_gap <= 1.0)
)
pos = _positive_bgr_mask(fused_gap_path)
if pos is not None:
m &= pos
return m
def common_valid_mask(
yt: np.ndarray,
y_gap: np.ndarray,
y_nogap: np.ndarray | None,
fused_gap_path: Path,
mode: str,
) -> np.ndarray:
"""Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
if y_nogap is not None:
m &= (
np.isfinite(y_nogap)
& (y_nogap > VALID_REFL_THRESHOLD)
& (y_nogap <= 1.0)
)
return m
def evaluate_gap_vs_withheld(
withheld_refl_path: Path,
fused_gap_path: Path,
fused_nogap_path: Path | None,
mode: str,
*,
whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
) -> dict:
"""Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
``delta_nse`` = NSE_no_gap NSE_gap, not exported to thesis tables).
"""
yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
y_nogap = None
if fused_nogap_path is not None and fused_nogap_path.is_file():
y_nogap, _ = read_fused_gcc(fused_nogap_path)
mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
if y_nogap is not None:
mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
g, ng = out["gap"], out["no_gap"]
if g.get("rmse") is not None and ng.get("rmse") is not None:
out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
if g.get("nse_s2") is not None and ng.get("nse_s2") is not None:
out["delta_nse"] = float(ng["nse_s2"] - g["nse_s2"])
if whittaker_context is not None:
from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context
wgcc = whittaker_gcc_on_gap_masked_series(
base,
strategy,
prediction_iso,
withheld_iso,
window_start_iso=w0,
window_end_iso=w1,
)
if wgcc is not None:
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
return out
def constant_field_scores(
y_true_gcc: np.ndarray, scalar: float, mask: np.ndarray
) -> dict:
"""NSE / RMSE when prediction is a spatially constant Whittaker value (same mask as fusion)."""
yp = np.full_like(y_true_gcc, scalar, dtype=np.float32)
return spatial_scores(y_true_gcc, yp, mask)

View file

@ -1,293 +0,0 @@
"""Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation)."""
from __future__ import annotations
import argparse
import json
import re
from datetime import datetime
from pathlib import Path
from metrics_indices import _get_gcc_from_original
from metrics_stats import (
WHITTAKER_LAMBDA_DAYS_SQ,
_norm_date_key,
_s2_gcc_series_from_preselection,
_whittaker_smooth_dict,
calculate_temporal_metrics,
load_timeseries,
)
from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest
from gap_validation.fusion_masked import run_masked_fusion_season
from gap_validation.run import (
_filter_entries,
_scenario_key,
_withheld_iso,
_yyyymmdd_from_withheld_filename,
)
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
def _fusion_gcc_timeseries(
fusion_dir: Path, site_position: tuple[float, float], mode: str
) -> dict[str, float]:
"""3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``."""
pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif"
out: dict[str, float] = {}
for p in sorted(fusion_dir.glob(pattern)):
m = re.search(r"_(\d{8})\.tif$", p.name)
if not m:
continue
d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat()
gcc = _get_gcc_from_original(p, site_position)
if gcc is not None:
out[d] = float(gcc)
return out
def whittaker_timeseries_gap_degraded(
base: Path,
strategy: str,
window_start_iso: str,
window_end_iso: str,
withheld_iso: str,
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
) -> dict[str, float]:
"""Daily Whittaker GCC on S2 preselection with gap window + withheld day removed."""
all_gcc, flags = _s2_gcc_series_from_preselection(base)
if not all_gcc:
return {}
idx = 0 if strategy == "aggressive" else 1
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
wh_k = _norm_date_key(withheld_iso)
def in_window(dk: str) -> bool:
try:
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
except ValueError:
return False
return w0 <= d <= w1
kept = sorted(
(d, g)
for d, g in all_gcc.items()
if d in flags
and not flags[d][idx]
and _norm_date_key(d) != wh_k
and not in_window(_norm_date_key(d) or "")
)
if len(kept) < 2:
return {}
obs_d, obs_v = zip(*kept)
return _whittaker_smooth_dict(obs_d, obs_v, lam)
def run_temporal_pc(
site_name: str,
season: int,
site_position: tuple[float, float],
strategy: str,
sigma: int | None,
mode: str,
*,
skip_manifest: bool,
skip_fusion: bool,
gap_days_filter: list[int] | None,
transition_filter: list[str] | None,
s2_calendar_strategy: str,
) -> Path:
"""Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``."""
base = Path(f"data/{site_name}/{season}")
vdir = validation_dir(site_name, season)
vdir.mkdir(parents=True, exist_ok=True)
if not skip_manifest:
write_manifest(
site_name,
season,
site_position,
s2_calendar_strategy=s2_calendar_strategy,
)
manifest = load_manifest(site_name, season)
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
phenocam_ts = load_timeseries(phenocam_ts_path)
nogap_metrics_path = base / "metrics.json"
nogap_nse: dict[str, float | None] = {}
if nogap_metrics_path.is_file():
m = json.loads(nogap_metrics_path.read_text(encoding="utf-8"))
sk = _scenario_key(strategy, sigma, mode)
block = (m.get("temporal") or {}).get(sk) or {}
nogap_nse["nse_pc"] = block.get("nse_pc")
results: list[dict] = []
crossover_rows: list[dict] = []
for entry in entries:
transition = entry.get("transition", "green_up")
gap_days = entry["gap_days"]
pred = entry["prediction_date"]
w0, w1 = entry["window_start"], entry["window_end"]
fn = entry.get("withheld_s2_filename")
if not fn:
results.append(
{"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"}
)
continue
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
if not wh_ymd:
results.append(
{
"transition": transition,
"gap_days": gap_days,
"error": "bad_withheld_filename",
}
)
continue
withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
temporal_dir = (
vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode)
)
if not skip_fusion:
try:
run_masked_fusion_season(
season,
site_position,
site_name,
strategy,
sigma,
mode,
w0,
w1,
wh_ymd,
temporal_dir,
)
except RuntimeError as e:
results.append(
{
"transition": transition,
"gap_days": gap_days,
"error": str(e),
}
)
continue
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
else:
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
wh_ts = whittaker_timeseries_gap_degraded(
base, strategy, w0, w1, withheld_iso
)
wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts)
row: dict = {
"transition": transition,
"gap_days": gap_days,
"prediction_date": pred,
"window_start": w0,
"window_end": w1,
"withheld_s2_filename": fn,
"temporal": {
"fused": fused_metrics,
"whittaker": wh_metrics,
},
"fusion_dir": str(temporal_dir),
}
if fused_metrics and nogap_nse.get("nse_pc") is not None:
g_rmse = fused_metrics.get("rmse")
ng_rmse = None
if nogap_metrics_path.is_file():
sk = _scenario_key(strategy, sigma, mode)
ng_rmse = (
(json.loads(nogap_metrics_path.read_text()).get("temporal") or {})
.get(sk, {})
.get("rmse")
)
n_g = fused_metrics.get("nse_pc")
n_ng = nogap_nse["nse_pc"]
if g_rmse is not None and ng_rmse is not None:
row["delta_rmse"] = float(g_rmse - ng_rmse)
if n_g is not None and n_ng is not None:
row["delta_nse"] = float(n_ng - n_g)
fn_pc = (fused_metrics or {}).get("nse_pc")
wh_pc = (wh_metrics or {}).get("nse_pc")
row["utility_crossover_row"] = {
"transition": transition,
"gap_days": gap_days,
"nse_pc_fusion": fn_pc,
"nse_pc_whittaker": wh_pc,
}
crossover_rows.append(row["utility_crossover_row"])
results.append(row)
scenario = _scenario_key(strategy, sigma, mode)
payload = {
"site_name": site_name,
"season": season,
"scenario": scenario,
"tier": "temporal_nse_pc",
"manifest": str(vdir / "gap_manifest.json"),
"results": results,
"utility_crossover": {
scenario: {
"metric": "nse_pc_vs_phenocam_gcc90",
"first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker(
crossover_rows,
fusion_key="nse_pc_fusion",
whittaker_key="nse_pc_whittaker",
),
"by_gap": crossover_rows,
}
},
}
out_path = vdir / f"gap_metrics_{mode}.json"
out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
if mode == "bti":
# Legacy alias for backward-compatible readers.
(vdir / "gap_metrics.json").write_text(
json.dumps(payload, indent=2) + "\n", encoding="utf-8"
)
return out_path
def main() -> None:
ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.")
ap.add_argument("--site", required=True)
ap.add_argument("--season", type=int, required=True)
ap.add_argument("--lat", type=float, required=True)
ap.add_argument("--lon", type=float, required=True)
ap.add_argument("--strategy", default="aggressive")
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
ap.add_argument("--gap-days", type=int, action="append")
ap.add_argument("--transition", choices=list(TRANSITIONS), action="append")
ap.add_argument("--skip-manifest", action="store_true")
ap.add_argument("--skip-fusion", action="store_true")
ap.add_argument("--s2-calendar-strategy", default="aggressive")
args = ap.parse_args()
sigma_kw = 30 if args.sigma == 30 else None
out = run_temporal_pc(
args.site,
args.season,
(args.lat, args.lon),
args.strategy,
sigma_kw,
args.mode,
skip_manifest=args.skip_manifest,
skip_fusion=args.skip_fusion,
gap_days_filter=args.gap_days,
transition_filter=args.transition,
s2_calendar_strategy=args.s2_calendar_strategy,
)
print(out)
if __name__ == "__main__":
main()

View file

@ -1,81 +0,0 @@
"""Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
from __future__ import annotations
from datetime import date, datetime
from pathlib import Path
from metrics_stats import (
WHITTAKER_LAMBDA_DAYS_SQ,
_norm_date_key,
_s2_gcc_series_from_preselection,
_whittaker_smooth_dict,
)
def _date_in_window(dk: str, start: date, end: date) -> bool:
try:
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
except ValueError:
return False
return start <= d <= end
def whittaker_gcc_on_gap_masked_series(
base: Path,
strategy: str,
prediction_iso: str,
withheld_iso: str,
*,
window_start_iso: str | None = None,
window_end_iso: str | None = None,
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
) -> float | None:
"""Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
pred_k = _norm_date_key(prediction_iso)
wh_k = _norm_date_key(withheld_iso)
if not pred_k or not wh_k:
return None
w0 = w1 = None
if window_start_iso and window_end_iso:
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
all_gcc, flags = _s2_gcc_series_from_preselection(base)
if not all_gcc:
return None
idx = 0 if strategy == "aggressive" else 1
kept = []
for d, g in all_gcc.items():
if d not in flags or flags[d][idx]:
continue
dk = _norm_date_key(d)
if not dk or dk == wh_k:
continue
if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
continue
kept.append((d, g))
kept.sort(key=lambda t: t[0])
if len(kept) < 2:
return None
obs_d, obs_v = zip(*kept)
smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
return smooth.get(pred_k)
def first_gap_where_fusion_below_whittaker(
rows: list[dict],
*,
fusion_key: str = "nse_s2",
whittaker_key: str = "nse_s2",
) -> int | None:
"""Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
eligible = [
r
for r in rows
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
]
eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
for r in eligible:
if r[fusion_key] < r[whittaker_key]:
return int(r["gap_days"])
return None

View file

@ -1,689 +0,0 @@
"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
import json
import numpy as np
import rasterio
from rasterio.warp import transform as transform_coords
from pathlib import Path
from datetime import datetime
from preselection import _sample_3x3
RED_BAND = 3
NIR_BAND = 4
BLUE_BAND = 1
GREEN_BAND = 2
def _calculate_and_write_ndvi(input_file, output_file):
with rasterio.open(input_file) as src:
red = src.read(RED_BAND).astype(np.float32)
nir = src.read(NIR_BAND).astype(np.float32)
mask = (red > 0) & (nir > 0)
ndvi = np.zeros_like(red, dtype=np.float32)
ndvi[mask] = (nir[mask] - red[mask]) / (nir[mask] + red[mask])
profile = src.profile.copy()
profile.update(
{
"count": 1,
"dtype": "float32",
"nodata": 0,
"compress": "lzw",
}
)
with rasterio.open(output_file, "w", **profile) as dst:
dst.write(ndvi, 1)
dst.set_band_description(1, "NDVI")
def _get_ndvi_value(ndvi_file, site_position):
try:
with rasterio.open(ndvi_file) as src:
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
# Check if point is within bounds
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None # Point is outside raster bounds
samples = list(src.sample([(x[0], y[0])]))
if samples:
value = float(samples[0][0])
# Check if it's actually nodata (using raster's nodata value)
if src.nodata is not None and value == src.nodata:
return None # This is nodata, not a valid 0 value
if np.isnan(value):
return None # NaN is invalid
# 0 is a valid NDVI value (no vegetation), so return it
return value
except Exception as e:
print(f"Error sampling {ndvi_file.name}: {e}")
pass
return None
def _create_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[NDVI-{source_name}] Creating timeseries.json...")
timeseries = []
for input_file in sorted(input_dir.glob(pattern)):
if "DIST_CLOUD" in input_file.name:
continue
filename = input_file.name
parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
date_str = None
for part in parts:
if len(part) == 8 and part.isdigit():
date_str = part
break
if date_str:
try:
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
except ValueError:
date = date_str
else:
date_str = parts[0]
date = date_str
print(
f"[NDVI-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
)
ndvi_value, band_means = _sample_3x3(input_file, site_position)
blue_mean = band_means.get("b02") if band_means else None
if ndvi_value is None:
print(
f"[NDVI-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
)
entry = {"date": date, "filename": filename, "ndvi": ndvi_value}
if blue_mean is not None:
entry["blue"] = blue_mean
timeseries.append(entry)
timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
timeseries_file = output_dir / "timeseries.json"
with open(timeseries_file, "w") as f:
json.dump(timeseries, f, indent=2)
print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
def _process_ndvi_files(
input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
):
output_dir.mkdir(parents=True, exist_ok=True)
print(f"[NDVI-{source_name}] Processing {input_dir}...")
geotiff_files = sorted(input_dir.glob(pattern))
if not geotiff_files:
print(f"[NDVI-{source_name}] No files found")
return
for geotiff_file in geotiff_files:
# Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI)
if "DIST_CLOUD" in geotiff_file.name:
continue
# Check if file has enough bands (need at least 4 for RED and NIR)
try:
with rasterio.open(geotiff_file) as src:
if src.count < 4:
print(
f"[NDVI-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 4+)"
)
continue
except Exception as e:
print(
f"[NDVI-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
)
continue
output_file = output_dir / (
output_namer(geotiff_file) if output_namer else geotiff_file.name
)
_calculate_and_write_ndvi(geotiff_file, output_file)
print(f"[NDVI-{source_name}] Saved: {output_file}")
def generate_ndvi_raw(season, site_position, site_name):
# No longer creating NDVI GeoTIFF files, only timeseries
pass
def _get_output_name_prepared(geotiff_file):
if geotiff_file.suffix == ".tif":
if "REFL" in geotiff_file.stem:
# For S2: S2A_MSIL2A_20240101_REFL -> date is at index [2]
# For S3: composite_20240101.tif -> date is at index [1] after removing .tif
parts = geotiff_file.stem.split("_")
if len(parts) >= 3 and parts[0].startswith("S2"):
# S2 format: S2A_MSIL2A_YYYYMMDD_REFL
date_str = parts[2]
elif len(parts) >= 2 and parts[0] == "composite":
# S3 format: composite_YYYYMMDD
date_str = parts[1]
else:
# Fallback: try index [1] for other formats
date_str = parts[1] if len(parts) > 1 else parts[0]
return f"{date_str}_ndvi.geotiff"
return geotiff_file.name.replace(".tif", ".geotiff")
return geotiff_file.name
def _fusion_namer(f):
date_str = f.stem.split("_")[1]
return f"{date_str}_ndvi.geotiff"
def generate_ndvi_post_process(season, site_position, site_name):
# No longer creating NDVI GeoTIFF files, only timeseries
pass
def create_ndvi_timeseries_post_process(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
)
_create_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
)
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
_create_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
def _calculate_and_write_gcc(input_file, output_file):
with rasterio.open(input_file) as src:
blue = src.read(BLUE_BAND).astype(np.float32)
green = src.read(GREEN_BAND).astype(np.float32)
red = src.read(RED_BAND).astype(np.float32)
total = red + green + blue
mask = total > 0
gcc = np.zeros_like(green, dtype=np.float32)
gcc[mask] = green[mask] / total[mask]
profile = src.profile.copy()
profile.update(
{
"count": 1,
"dtype": "float32",
"nodata": 0,
"compress": "lzw",
}
)
with rasterio.open(output_file, "w", **profile) as dst:
dst.write(gcc, 1)
dst.set_band_description(1, "GCC")
def _get_gcc_value(gcc_file, site_position):
try:
with rasterio.open(gcc_file) as src:
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
samples = list(src.sample([(x[0], y[0])]))
if samples:
value = float(samples[0][0])
if src.nodata is not None and value == src.nodata:
return None
if np.isnan(value):
return None
return value
except Exception as e:
print(f"Error sampling {gcc_file.name}: {e}")
pass
return None
def _get_gcc_from_original(input_file, site_position):
"""Calculate GCC directly from original file without creating GeoTIFF."""
try:
with rasterio.open(input_file) as src:
if src.count == 1:
g = src.read(1).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
win = g[r0:r1, c0:c1]
mask = np.isfinite(win) & (win > 0)
if not np.any(mask):
return None
return float(np.mean(win[mask]))
if src.count < 3:
return None
blue = src.read(BLUE_BAND).astype(np.float32)
green = src.read(GREEN_BAND).astype(np.float32)
red = src.read(RED_BAND).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
# Extract 3x3 window with boundary handling
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
blue_window = blue[r0:r1, c0:c1]
green_window = green[r0:r1, c0:c1]
red_window = red[r0:r1, c0:c1]
# Calculate GCC for each pixel in window
total = red_window + green_window + blue_window
mask = (
(total > 0)
& ~np.isnan(total)
& (blue_window >= 0)
& (green_window >= 0)
& (red_window >= 0)
)
if not np.any(mask):
negative_pixels = np.sum(
(blue_window < 0) | (green_window < 0) | (red_window < 0)
)
if negative_pixels > 0:
print(
f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
)
return None
gcc_window = np.zeros_like(green_window, dtype=np.float32)
gcc_window[mask] = green_window[mask] / total[mask]
# Return mean of valid GCC values
valid_gcc = gcc_window[mask]
return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
except Exception:
return None
def _create_gcc_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[GCC-{source_name}] Creating timeseries.json...")
timeseries = []
for input_file in sorted(input_dir.glob(pattern)):
if "DIST_CLOUD" in input_file.name:
continue
filename = input_file.name
parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
date_str = None
for part in parts:
if len(part) == 8 and part.isdigit():
date_str = part
break
if date_str:
try:
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
except ValueError:
date = date_str
else:
date_str = parts[0]
date = date_str
print(
f"[GCC-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
)
gcc_value = _get_gcc_from_original(input_file, site_position)
if gcc_value is None:
print(
f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
)
timeseries.append(
{"date": date, "filename": filename, "greenness_index": gcc_value}
)
timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
timeseries_file = output_dir / "timeseries.json"
with open(timeseries_file, "w") as f:
json.dump(timeseries, f, indent=2)
print(f"[GCC-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
def _process_gcc_files(
input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
):
output_dir.mkdir(parents=True, exist_ok=True)
print(f"[GCC-{source_name}] Processing {input_dir}...")
geotiff_files = sorted(input_dir.glob(pattern))
if not geotiff_files:
print(f"[GCC-{source_name}] No files found")
return
for geotiff_file in geotiff_files:
if "DIST_CLOUD" in geotiff_file.name:
continue
try:
with rasterio.open(geotiff_file) as src:
if src.count < 3:
print(
f"[GCC-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 3+)"
)
continue
except Exception as e:
print(
f"[GCC-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
)
continue
output_file = output_dir / (
output_namer(geotiff_file) if output_namer else geotiff_file.name
)
_calculate_and_write_gcc(geotiff_file, output_file)
print(f"[GCC-{source_name}] Saved: {output_file}")
def generate_gcc_post_process(season, site_position, site_name):
# No longer creating GCC GeoTIFF files, only timeseries
pass
def create_gcc_timeseries_post_process(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
)
_create_gcc_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
)
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
_create_gcc_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
if not base_itb.exists():
continue
for source in ["s2", "s3"]:
inp, out = base_itb / source, base_itb / "gcc" / source
_create_gcc_timeseries_for_dir(
inp,
out,
site_position,
f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
)
_create_gcc_timeseries_for_dir(
base_itb / "fusion",
base_itb / "gcc" / "fusion",
site_position,
f"POST-ITB-FUSION-{strategy}-σ{sigma}",
)
def _get_bands_from_original(input_file, site_position):
"""Extract mean B02, B03, B04, B8A from 3x3 window at site. Returns dict or None."""
try:
with rasterio.open(input_file) as src:
if src.count < 4:
return None
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
bands = [
src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
for i in range(4)
]
mask = ~np.any([np.isnan(b) for b in bands], axis=0)
mask &= np.all([b > 0 for b in bands], axis=0)
if not np.any(mask):
return None
return {
"b02": float(np.mean(bands[0][mask])),
"b03": float(np.mean(bands[1][mask])),
"b04": float(np.mean(bands[2][mask])),
"b8a": float(np.mean(bands[3][mask])),
}
except Exception:
return None
def _create_bands_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[BANDS-{source_name}] Creating timeseries.json...")
timeseries = []
for f in sorted(input_dir.glob(pattern)):
if "DIST_CLOUD" in f.name:
continue
parts = f.name.replace(".geotiff", "").replace(".tif", "").split("_")
date_str = next((p for p in parts if len(p) == 8 and p.isdigit()), None)
if not date_str:
continue
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
bands = _get_bands_from_original(f, site_position)
timeseries.append({"date": date, "filename": f.name, **(bands or {})})
timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
print(
f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
)
def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
"""Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
def load(p):
p = Path(p)
if not p.exists():
return []
try:
return json.loads((p / "timeseries.json").read_text())
except Exception:
return []
ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
keys = sorted(set(ndvi) | set(gcc) | set(bands))
merged = []
for k in keys:
r = {"date": k, "filename": ""}
for d in [ndvi.get(k, {}), gcc.get(k, {}), bands.get(k, {})]:
r.update({x: d[x] for x in d if x not in ("date",)})
merged.append(r)
export_dir.mkdir(parents=True, exist_ok=True)
(export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
def esc(v):
s = str(v) if v is not None else ""
return f'"{s}"' if "," in s or '"' in s else s
rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
(export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
print(
f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
)
def create_prepared_fusion_timeseries(season, site_position, site_name):
"""Generate NDVI, GCC, and band timeseries for prepared S2/S3 and fusion outputs."""
for strategy in ["aggressive", "nonaggressive"]:
base = Path(f"data/{site_name}/{season}/prepared_{strategy}")
for source in ["s2", "s3"]:
inp = base / source
if inp.exists():
_create_timeseries_for_dir(
inp,
base / "ndvi" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = base / fusion_sub
if inp.exists():
_create_timeseries_for_dir(
inp,
base / "ndvi" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_write_export(
base / "ndvi" / fusion_sub,
base / "gcc" / fusion_sub,
base / "bands" / fusion_sub,
base / "export" / fusion_sub,
)
itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
if not itb.exists():
continue
for source in ["s2", "s3"]:
inp = itb / source
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / source,
site_position,
f"PREPARED-ITB-{source.upper()}-{strategy}",
"*.tif",
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = itb / fusion_sub
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / fusion_sub,
site_position,
f"FUSION-ITB-{strategy}-σ{sig or 20}",
"*.tif",
)
def create_bands_timeseries_post_process(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
processed_dir = f"processed_{strategy}_sigma{sigma}"
base = Path(f"data/{site_name}/{season}/{processed_dir}")
for source in ["s2", "s3", "fusion"]:
inp, out = base / source, base / "bands" / source
if inp.exists():
_create_bands_timeseries_for_dir(
inp,
out,
site_position,
f"POST-{source.upper()}-{strategy}-σ{sigma}",
"*.geotiff",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)

View file

@ -1,529 +0,0 @@
"""Metrics and statistics: temporal metrics and PhenoCam stats."""
import json
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
from scipy import sparse
from scipy.sparse.linalg import spsolve
from scipy.stats import pearsonr
WHITTAKER_LAMBDA_DAYS_SQ = 400.0
def _norm_date_key(s):
if s is None:
return None
t = str(s).strip()
return t.split("T")[0][:10] if "T" in t else t[:10]
def load_timeseries(filepath):
"""Load JSON timeseries and return dict mapping date -> value."""
if not Path(filepath).exists():
return {}
with open(filepath) as f:
data = json.load(f)
return {item["date"]: item.get("greenness_index") for item in data}
def match_dates(fusion_ts, phenocam_ts):
"""Match dates between timeseries, return aligned numpy arrays (filter None values)."""
def _bundle(m):
out = {}
for k, v in m.items():
nk = _norm_date_key(k)
if nk and nk not in out:
out[nk] = v
return out
fa, pa = _bundle(fusion_ts), _bundle(phenocam_ts)
common_dates = set(fa) & set(pa)
fusion_vals = []
phenocam_vals = []
dates = []
for date in sorted(common_dates):
fusion_val = fa[date]
phenocam_val = pa[date]
if fusion_val is not None and phenocam_val is not None:
fusion_vals.append(fusion_val)
phenocam_vals.append(phenocam_val)
dates.append(date)
return np.array(fusion_vals), np.array(phenocam_vals), dates
def pearson_correlation(y_true, y_pred):
"""Calculate Pearson correlation coefficient r."""
if len(y_true) < 2 or np.std(y_true) == 0 or np.std(y_pred) == 0:
return None
r, _ = pearsonr(y_true, y_pred)
return float(r)
def r_squared(y_true, y_pred):
"""Generalized R² vs predicting mean(y_true); can be negative. Same formula as ``nse`` with the same arguments; not Pearson r squared."""
if len(y_true) < 2 or np.std(y_true) == 0:
return None
ss_res = np.sum((y_true - y_pred) ** 2)
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
if ss_tot == 0:
return None
return float(1 - (ss_res / ss_tot))
def rmse(y_true, y_pred):
"""Calculate Root Mean Square Error."""
if len(y_true) == 0:
return None
return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
def mae(y_true, y_pred):
"""Calculate Mean Absolute Error."""
if len(y_true) == 0:
return None
return float(np.mean(np.abs(y_true - y_pred)))
def nrmse(y_true, y_pred):
"""Calculate normalized RMSE (RMSE / mean(y_true))."""
if len(y_true) == 0:
return None
mean_val = np.mean(y_true)
if mean_val == 0:
return None
rmse_val = rmse(y_true, y_pred)
return float(rmse_val / mean_val) if rmse_val is not None else None
def nse(y_true, y_pred):
"""Calculate Nash-Sutcliffe Efficiency."""
if len(y_true) < 2:
return None
numerator = np.sum((y_true - y_pred) ** 2)
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
if denominator == 0:
return None
return float(1 - (numerator / denominator))
def residual_vs_phenocam(fusion_ts, phenocam_ts):
"""Stats of (fused_GCC PhenoCam_GCC) on matched dates; None if too few points.
Mean: positive fusion systematically above PhenoCam; negative below; ~0 unbiased mean.
Compare BtI vs ItB means at same strategy/σ (``derived.bti_vs_itb_mean_residual``): closer to 0 less mean bias vs PhenoCam.
"""
yf, yp, _dates = match_dates(fusion_ts, phenocam_ts)
if len(yf) < 2:
return None
r = yf - yp
return {
"mean": float(np.mean(r)),
"std": float(np.std(r)),
"mae": float(np.mean(np.abs(r))),
"rmse": float(np.sqrt(np.mean(r**2))),
"n_samples": int(len(r)),
}
def calculate_temporal_metrics(fusion_ts, phenocam_ts):
"""Temporal metrics vs PhenoCam (nse_pc; nse is the same value)."""
fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
if len(fusion_vals) < 2:
return None
n_pc = nse(phenocam_vals, fusion_vals)
metrics = {
"pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
"r_squared": r_squared(phenocam_vals, fusion_vals),
"rmse": rmse(phenocam_vals, fusion_vals),
"mae": mae(phenocam_vals, fusion_vals),
"nrmse": nrmse(phenocam_vals, fusion_vals),
"nse_pc": n_pc,
"nse": n_pc,
"n_samples": len(fusion_vals),
"date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
}
rv = residual_vs_phenocam(fusion_ts, phenocam_ts)
if rv:
metrics["residual_vs_phenocam"] = rv
return metrics
def derived_tier1(temporal: dict) -> dict:
"""ΔNSE_PC (σ20 σ30) and paired BtI vs ItB mean residual; needs temporal fusion keys.
ΔNSE_PC > 0 NSE_PC higher at σ=20 than σ=30 (tighter EFAST temporal kernel wins).
ΔNSE_PC < 0 σ=30 wins (broader smoothing matches PhenoCam better).
"""
d_nse = {"bti": {}, "itb": {}}
for strategy in ("aggressive", "nonaggressive"):
for mode, suf in (("bti", ""), ("itb", "_itb")):
k20 = f"{strategy}_sigma20{suf}"
k30 = f"{strategy}_sigma30{suf}"
n20 = (temporal.get(k20) or {}).get("nse_pc")
n30 = (temporal.get(k30) or {}).get("nse_pc")
if isinstance(n20, (int, float)) and isinstance(n30, (int, float)):
d_nse[mode][strategy] = float(n20 - n30)
else:
d_nse[mode][strategy] = None
paired = []
for strategy in ("aggressive", "nonaggressive"):
for sig in (20, 30):
kb, ki = f"{strategy}_sigma{sig}", f"{strategy}_sigma{sig}_itb"
mb = (temporal.get(kb) or {}).get("residual_vs_phenocam", {}).get("mean")
mi = (temporal.get(ki) or {}).get("residual_vs_phenocam", {}).get("mean")
paired.append(
{
"strategy": strategy,
"sigma": sig,
"mean_residual_bti": float(mb)
if isinstance(mb, (int, float))
else None,
"mean_residual_itb": float(mi)
if isinstance(mi, (int, float))
else None,
}
)
return {
"delta_nse_pc_sigma20_minus_sigma30": d_nse,
"bti_vs_itb_mean_residual": paired,
}
MATCHED_PAIR_CONFIGS = (
"aggressive_sigma20",
"aggressive_sigma30",
"nonaggressive_sigma20",
"nonaggressive_sigma30",
)
def derived_matched_pair_workflow(temporal: dict) -> dict:
"""Per-config BtI vs ItB NSE_PC/RMSE pairs and site-level consistency flags."""
per_config = []
nse_deltas: list[float] = []
nse_bti_wins_count = 0
residual_bti_wins_count = 0
for config in MATCHED_PAIR_CONFIGS:
kb = config
ki = f"{config}_itb"
tb = temporal.get(kb) or {}
ti = temporal.get(ki) or {}
nse_bti = tb.get("nse_pc")
nse_itb = ti.get("nse_pc")
rmse_bti = tb.get("rmse")
rmse_itb = ti.get("rmse")
mb = (tb.get("residual_vs_phenocam") or {}).get("mean")
mi = (ti.get("residual_vs_phenocam") or {}).get("mean")
delta_nse = None
delta_rmse = None
bti_wins = None
residual_bti_wins = None
if isinstance(nse_bti, (int, float)) and isinstance(nse_itb, (int, float)):
delta_nse = float(nse_bti) - float(nse_itb)
bti_wins = delta_nse > 0
nse_deltas.append(delta_nse)
if bti_wins:
nse_bti_wins_count += 1
if isinstance(rmse_bti, (int, float)) and isinstance(rmse_itb, (int, float)):
delta_rmse = float(rmse_bti) - float(rmse_itb)
if isinstance(mb, (int, float)) and isinstance(mi, (int, float)):
if float(mb) > float(mi):
residual_bti_wins_count += 1
residual_bti_wins = True
elif float(mb) < float(mi):
residual_bti_wins = False
else:
residual_bti_wins = None
per_config.append(
{
"config": config,
"nse_pc_bti": float(nse_bti) if isinstance(nse_bti, (int, float)) else None,
"nse_pc_itb": float(nse_itb) if isinstance(nse_itb, (int, float)) else None,
"rmse_bti": float(rmse_bti) if isinstance(rmse_bti, (int, float)) else None,
"rmse_itb": float(rmse_itb) if isinstance(rmse_itb, (int, float)) else None,
"delta_nse_bti_minus_itb": delta_nse,
"delta_rmse_bti_minus_itb": delta_rmse,
"bti_wins": bti_wins,
"residual_bti_wins": residual_bti_wins,
}
)
mean_delta_nse = (
float(sum(nse_deltas) / len(nse_deltas)) if nse_deltas else None
)
return {
"per_config": per_config,
"consistency": nse_bti_wins_count,
"nse_bti_wins_count": nse_bti_wins_count,
"residual_bti_wins_count": residual_bti_wins_count,
"residual_nse_mismatch": residual_bti_wins_count != nse_bti_wins_count,
"mean_delta_nse": mean_delta_nse,
}
def calculate_phenocam_stats(phenocam_ts):
"""Calculate phenocam summary statistics."""
values = [v for v in phenocam_ts.values() if v is not None]
if len(values) == 0:
return None
vals = np.array(values)
return {
"mean": float(np.mean(vals)),
"std": float(np.std(vals)),
"min": float(np.min(vals)),
"max": float(np.max(vals)),
"n_samples": len(vals),
}
def _s2_gcc_series_from_preselection(base: Path):
"""Build the raw S2 GCC series from s2_preselection.json.
Uses the 3x3 site-window band means stored per raw S2 acquisition and
computes GCC = b03 / (b02 + b03 + b04). Scale cancels, so DN vs
reflectance is irrelevant. Returns (all_gcc, flags) where all_gcc maps
YYYY-MM-DD -> gcc for every row with a positive band sum, and flags maps
the same date key -> (excluded_aggressive, excluded_nonaggressive).
"""
path = base / "raw" / "preselection" / "s2_preselection.json"
if not path.exists():
return {}, {}
with open(path) as f:
rows = json.load(f)
all_gcc: dict = {}
flags: dict = {}
for e in rows:
nk = _norm_date_key(e.get("date"))
if not nk:
continue
try:
b02 = float(e.get("b02"))
b03 = float(e.get("b03"))
b04 = float(e.get("b04"))
except (TypeError, ValueError):
continue
total = b02 + b03 + b04
if not np.isfinite(total) or total <= 0:
continue
gcc = b03 / total
if not np.isfinite(gcc):
continue
if nk in all_gcc:
continue
all_gcc[nk] = float(gcc)
flags[nk] = (
bool(e.get("excluded_aggressive")),
bool(e.get("excluded_nonaggressive")),
)
return all_gcc, flags
def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
"""Daily Whittaker (weights 1 at obs); returns {YYYY-MM-DD: z}."""
pairs = [
(_norm_date_key(d), float(v))
for d, v in zip(obs_dates, obs_values)
if v is not None and _norm_date_key(d)
]
if len(pairs) < 2:
return {}
days = sorted({p[0] for p in pairs})
t0 = datetime.strptime(days[0], "%Y-%m-%d").date()
t1 = datetime.strptime(days[-1], "%Y-%m-%d").date()
n = (t1 - t0).days + 1
if n < n_min:
return {}
w = np.zeros(n)
y = np.zeros(n)
for dk, val in pairs:
i = (datetime.strptime(dk, "%Y-%m-%d").date() - t0).days
if 0 <= i < n:
w[i] = 1.0
y[i] = val
D = sparse.diags(
[1.0, -2.0, 1.0], [0, 1, 2], shape=(n - 2, n), format="csc", dtype=np.float64
)
H = D.T @ D
Wm = sparse.diags(w.astype(np.float64), format="csc")
z = spsolve(Wm + lam * H, w * y)
out = {}
for i in range(n):
out[(t0 + timedelta(days=i)).isoformat()] = float(z[i])
return out
def calculate_all_metrics(season, site_name, site_position):
"""Calculate metrics for all 4 scenarios and save to JSON."""
del site_position
results = {"temporal": {}}
base = Path(f"data/{site_name}/{season}")
# Load phenocam timeseries once (same for all scenarios)
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
phenocam_ts = load_timeseries(phenocam_ts_path)
if not phenocam_ts:
print("[METRICS] Warning: No phenocam data found")
return results
# Calculate phenocam stats
phenocam_stats = calculate_phenocam_stats(phenocam_ts)
if phenocam_stats:
results["phenocam_stats"] = phenocam_stats
from phenocam_snr import compute_snr, load_phenocam_snr, write_phenocam_snr
snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
if not snr_info:
write_phenocam_snr(
site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
)
snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
if not snr_info:
snr_info = compute_snr(
site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
)
if snr_info.get("snr") is not None:
results["phenocam_snr"] = {
"amplitude": snr_info.get("amplitude"),
"spline_rmse_gcc90": snr_info.get("spline_rmse_gcc90"),
"snr": snr_info.get("snr"),
}
baseline = {}
all_gcc, flags = _s2_gcc_series_from_preselection(base)
if all_gcc:
m0 = calculate_temporal_metrics(all_gcc, phenocam_ts)
if m0:
baseline["s2"] = m0
for strategy, flag_idx in (("aggressive", 0), ("nonaggressive", 1)):
kept_items = sorted(
(
(d, g)
for d, g in all_gcc.items()
if d in flags and not flags[d][flag_idx]
),
key=lambda x: x[0],
)
if not kept_items:
continue
kept_ts = dict(kept_items)
mcf = calculate_temporal_metrics(kept_ts, phenocam_ts)
if mcf:
baseline.setdefault("s2_cloudfree", {})[strategy] = mcf
obs_d, obs_v = zip(*kept_items)
smooth = _whittaker_smooth_dict(obs_d, obs_v, WHITTAKER_LAMBDA_DAYS_SQ)
if smooth:
mw = calculate_temporal_metrics(smooth, phenocam_ts)
if mw:
baseline.setdefault("s2_whittaker_lambda400", {})[strategy] = mw
for strategy in ("aggressive", "nonaggressive"):
p = base / f"processed_{strategy}_sigma20" / "gcc" / "s3" / "timeseries.json"
if not p.exists():
continue
s3_ts = load_timeseries(p)
if s3_ts:
m3 = calculate_temporal_metrics(s3_ts, phenocam_ts)
if m3:
baseline.setdefault("s3", {})[strategy] = m3
if baseline:
results["baseline"] = baseline
# Calculate fusion metrics for each scenario
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
scenario_name = f"{strategy}_sigma{sigma}"
print(f"[METRICS] Calculating metrics for {scenario_name}...")
processed_dir = f"processed_{strategy}_sigma{sigma}"
# Load fusion timeseries
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts:
print(
f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
)
continue
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
if temporal_metrics:
results["temporal"][scenario_name] = temporal_metrics
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
scenario_name = f"{strategy}_sigma{sigma}_itb"
processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts:
print(
f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
)
continue
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
if temporal_metrics:
results["temporal"][scenario_name] = temporal_metrics
if results["temporal"]:
derived = derived_tier1(results["temporal"])
derived["matched_pair_workflow"] = derived_matched_pair_workflow(
results["temporal"]
)
results["derived"] = derived
# Save results
output_path = Path(f"data/{site_name}/{season}/metrics.json")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
print(f"[METRICS] Saved results to {output_path}")
return results
def main():
"""Standalone script entry point."""
import sys
if len(sys.argv) < 4:
print("Usage: metrics_stats.py <season> <site_name> <lat> <lon>")
print("Example: metrics_stats.py 2024 innsbruck 47.116171 11.320308")
sys.exit(1)
season = int(sys.argv[1])
site_name = sys.argv[2]
site_position = (float(sys.argv[3]), float(sys.argv[4]))
results = calculate_all_metrics(season, site_name, site_position)
# Save results
output_path = Path(f"data/{site_name}/{season}/metrics.json")
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
print(f"[METRICS] Saved results to {output_path}")
if __name__ == "__main__":
main()

View file

@ -1,328 +0,0 @@
"""PhenoCam signal-to-noise ratio for aggregate utility eligibility (Richardson et al., 2018)."""
from __future__ import annotations
import json
import re
from pathlib import Path
import requests
PHENOCAM_API = "https://phenocam.nau.edu/api"
SPLINE_RMSE_RE = re.compile(
r"^\s*#\s*Spline\s+RMSE\s+gcc_90\s*:\s*([0-9.eE+-]+)\s*$",
re.IGNORECASE,
)
PRIMARY_SEASON: dict[str, int] = {
"forthgr": 2024,
"innsbruck": 2024,
"pitsalu": 2024,
"vindeln2": 2023,
"sunflowerjerez1": 2024,
"institutekarnobat": 2024,
}
# PhenoCam ROI type codes for archive URLs (first ROI used by acquisition when multiple exist).
SITE_ROITYPE: dict[str, str] = {
"forthgr": "AG",
"innsbruck": "GR",
"pitsalu": "WL",
"vindeln2": "MX",
"sunflowerjerez1": "AG",
"institutekarnobat": "AG",
}
PHENOCAM_ARCHIVE = "https://phenocam.nau.edu/data/archive"
def phenocam_snr_path(site_name: str, season: int, base: Path | None = None) -> Path:
root = base or Path("data")
return root / site_name / str(season) / "raw" / "phenocam" / "phenocam_snr.json"
def parse_spline_rmse_gcc90(text: str) -> float | None:
"""Parse ``# Spline RMSE gcc_90: <value>`` from transition-dates CSV header."""
for line in text.splitlines():
m = SPLINE_RMSE_RE.match(line)
if m:
try:
return float(m.group(1))
except ValueError:
return None
return None
def transition_dates_archive_url(site_name: str, roitype: str, seq: int = 1000) -> str:
return (
f"{PHENOCAM_ARCHIVE}/{site_name}/ROI/"
f"{site_name}_{roitype}_{seq}_1day_transition_dates.csv"
)
def transition_dates_url(site_name: str) -> str | None:
"""Return ``one_day_transition_dates`` URL for the site's primary ROI."""
roitype = SITE_ROITYPE.get(site_name)
if roitype:
for seq in (1000, 2000, 1001):
url = transition_dates_archive_url(site_name, roitype, seq)
try:
r = requests.head(url, timeout=15, allow_redirects=True)
if r.status_code == 200:
return url
except requests.RequestException:
continue
try:
url = f"{PHENOCAM_API}/roilists/"
params: dict | None = {"site": site_name}
while url:
r = requests.get(url, params=params, timeout=30)
r.raise_for_status()
data = r.json()
for roi in data.get("results", []):
if roi.get("site") == site_name:
td = roi.get("one_day_transition_dates")
if td:
return td
url = data.get("next")
params = None
except requests.RequestException:
pass
return None
def fetch_spline_rmse_from_archive(site_name: str) -> float | None:
"""Fetch spline RMSE via PhenoCam archive URL (fast path)."""
roitype = SITE_ROITYPE.get(site_name)
if not roitype:
return None
for seq in (1000, 2000, 1001):
url = transition_dates_archive_url(site_name, roitype, seq)
try:
r = requests.get(url, timeout=20)
if r.status_code != 200:
continue
rmse = parse_spline_rmse_gcc90(r.text)
if rmse is not None:
return rmse
except requests.RequestException:
continue
return None
def fetch_spline_rmse_gcc90(site_name: str) -> float | None:
"""Download transition-dates file header and return spline RMSE for gcc_90."""
rmse = fetch_spline_rmse_from_archive(site_name)
if rmse is not None:
return rmse
td_url = transition_dates_url(site_name)
if not td_url:
return None
try:
r = requests.get(td_url, timeout=30)
r.raise_for_status()
return parse_spline_rmse_gcc90(r.text)
except requests.RequestException:
return None
def season_amplitude(
site_name: str,
season: int,
*,
base: Path | None = None,
metrics: dict | None = None,
) -> float | None:
"""Seasonal amplitude max(gcc_90) - min(gcc_90) over the evaluation season."""
if metrics:
ps = metrics.get("phenocam_stats") or {}
mn, mx = ps.get("min"), ps.get("max")
if isinstance(mn, (int, float)) and isinstance(mx, (int, float)):
return float(mx - mn)
root = base or Path("data")
p = root / site_name / str(season) / "raw" / "phenocam" / "phenocam_gcc.json"
if not p.is_file():
return None
data = json.loads(p.read_text(encoding="utf-8"))
if isinstance(data, list):
vals = [
it.get("greenness_index")
for it in data
if isinstance(it.get("greenness_index"), (int, float))
]
elif isinstance(data, dict):
vals = [v for v in data.values() if isinstance(v, (int, float))]
else:
return None
if not vals:
return None
return float(max(vals) - min(vals))
def compute_snr(
site_name: str,
season: int,
*,
base: Path | None = None,
metrics: dict | None = None,
spline_rmse: float | None = None,
fetch_if_missing: bool = True,
) -> dict:
"""Return amplitude, spline RMSE, and SNR; may fetch RMSE from PhenoCam API."""
root = base or Path("data")
amp = season_amplitude(site_name, season, base=root, metrics=metrics)
rmse = spline_rmse
if rmse is None:
sidecar = phenocam_snr_path(site_name, season, root)
if sidecar.is_file():
cached = json.loads(sidecar.read_text(encoding="utf-8"))
rmse = cached.get("spline_rmse_gcc90")
elif fetch_if_missing:
rmse = fetch_spline_rmse_gcc90(site_name)
snr = None
if isinstance(amp, (int, float)) and isinstance(rmse, (int, float)) and rmse > 0:
snr = float(amp) / float(rmse)
return {
"site": site_name,
"season": season,
"amplitude": amp,
"spline_rmse_gcc90": rmse,
"snr": snr,
}
def write_phenocam_snr(
site_name: str,
season: int,
*,
base: Path | None = None,
metrics: dict | None = None,
fetch_if_missing: bool = True,
) -> Path | None:
"""Compute SNR and write ``phenocam_snr.json``; returns path or None on failure."""
root = base or Path("data")
info = compute_snr(
site_name,
season,
base=root,
metrics=metrics,
fetch_if_missing=fetch_if_missing,
)
if info.get("spline_rmse_gcc90") is None:
print(
f"[PhenoCam-SNR] Warning: no spline RMSE for {site_name} {season}; "
"skipping phenocam_snr.json"
)
return None
out = phenocam_snr_path(site_name, season, root)
out.parent.mkdir(parents=True, exist_ok=True)
td_url = transition_dates_url(site_name)
payload = {
"site": site_name,
"season": season,
"amplitude": info.get("amplitude"),
"spline_rmse_gcc90": info.get("spline_rmse_gcc90"),
"snr": info.get("snr"),
"source": "phenocam_1day_transition_dates_header",
"transition_dates_url": td_url,
"roitype": SITE_ROITYPE.get(site_name),
}
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
print(f"[PhenoCam-SNR] Saved: {out} (SNR={info.get('snr')})")
return out
def load_phenocam_snr(
site_name: str, season: int, *, base: Path | None = None
) -> dict | None:
"""Load cached SNR sidecar if present."""
p = phenocam_snr_path(site_name, season, base)
if not p.is_file():
return None
return json.loads(p.read_text(encoding="utf-8"))
def suggest_snr_threshold(snrs: list[float]) -> tuple[float, str]:
"""
Choose eligibility threshold from cross-site SNR distribution.
Returns (threshold, rationale). Uses a distribution-based split only when it
separates a low-SNR group (max below 2) from a high-SNR group (min at or above 2).
Otherwise defaults to SNR >= 2.
"""
if not snrs:
return 2.0, "default SNR >= 2 (no site SNR values available)"
sorted_snrs = sorted(snrs)
if len(sorted_snrs) == 1:
return 2.0, "default SNR >= 2 (single site only)"
if all(s >= 2.0 for s in sorted_snrs):
return 2.0, "default SNR >= 2 (all sites exceed 2; no low-SNR exclusion group)"
for i in range(1, len(sorted_snrs)):
low, high = sorted_snrs[:i], sorted_snrs[i:]
if not low or not high:
continue
gap = high[0] - low[-1]
if gap >= 0.5 and low[-1] < 2.0 <= high[0]:
threshold = (low[-1] + high[0]) / 2.0
return (
round(threshold, 3),
f"gap between {low[-1]:.3f} and {high[0]:.3f} straddles SNR=2 "
f"(midpoint {threshold:.3f})",
)
return 2.0, "default SNR >= 2 (no clear low/high cluster separation)"
def report_all_sites(
*,
base: Path | None = None,
sites: dict[str, int] | None = None,
fetch_if_missing: bool = True,
) -> list[dict]:
"""Compute SNR for all primary-season sites; print table and return rows."""
root = base or Path("data")
site_seasons = sites or PRIMARY_SEASON
rows: list[dict] = []
for site in sorted(site_seasons.keys()):
season = site_seasons[site]
metrics_path = root / site / str(season) / "metrics.json"
metrics = None
if metrics_path.is_file():
metrics = json.loads(metrics_path.read_text(encoding="utf-8"))
info = compute_snr(
site,
season,
base=root,
metrics=metrics,
fetch_if_missing=fetch_if_missing,
)
rows.append(info)
print(f"{'site':<20} {'season':>6} {'amplitude':>10} {'rmse_spl':>10} {'SNR':>8}")
print("-" * 58)
for r in rows:
amp = r.get("amplitude")
rmse = r.get("spline_rmse_gcc90")
snr = r.get("snr")
print(
f"{r['site']:<20} {r['season']:>6} "
f"{amp if amp is not None else '---':>10} "
f"{rmse if rmse is not None else '---':>10} "
f"{snr if snr is not None else '---':>8}"
)
valid_snrs = [r["snr"] for r in rows if isinstance(r.get("snr"), (int, float))]
threshold, rationale = suggest_snr_threshold(valid_snrs)
print(f"\nSuggested threshold: SNR >= {threshold} ({rationale})")
for r in rows:
snr = r.get("snr")
if isinstance(snr, (int, float)):
r["eligible_at_2"] = snr >= 2.0
r["eligible_at_3"] = snr >= 3.0
r["eligible_at_suggested"] = snr >= threshold
return rows
if __name__ == "__main__":
report_all_sites()

View file

@ -1,738 +0,0 @@
"""
PhenoCam GCC: green-up and green-down (50 % of seasonal amplitude) via TIMESAT.
Reads ``data/.../raw/phenocam/phenocam_gcc.json`` (or any path) and uses the
``timesat`` package (``timesat.tsfprocess``) with the same seasonal-threshold
meaning as the TIMESAT GUI: *startmethod* 1, *p_startcutoff* (0.5, 0.5) = 50 % of
the **per-season** amplitude above the local base. See the TIMESAT manual,
section 4.3 and row 3738 (season start method = seasonal amplitude).
**License:** the ``timesat`` PyPI wheel is under the TIMESAT Research License
(non-commercial research; see package metadata on PyPI).
PhenoCam time series: single-year acquisition writes
``phenocam_gcc.json`` (and ``phenocam_gcc.csv``). The three-year series used
for TIMESAT is stored separately as ``phenocam_gcc_3y.json`` in the same
folder (created on first use from the one-day summary API, then reused).
Importable: ``write_phenocam_phenology_for_site`` is called from ``run.py``;
the CLI entry point remains optional for ad-hoc runs.
**Saving results:** use ``-o path.json`` or ``--sidecar`` to write a JSON file
(see ``--help``). Sidecar mode writes ``phenocam_phenology.json`` (two dates
only) next to ``phenocam_gcc.json``.
``run_pipeline`` in ``run.py`` writes the same ``phenocam_phenology.json`` by
default when ``timesat`` is installed. GCC for TIMESAT uses ``phenocam_gcc_3y.json``
if present, otherwise the PhenoCam API for that site (listed in
``data/sites.geojson``; not a site list from the API). One-year
``phenocam_gcc.json`` on disk can still fill gaps when merged.
Use ``python phenology_timesat.py --all`` to batch every
``(sitename, season)`` from ``data/sites.geojson`` (``properties.sitename`` and
``properties.seasons``).
"""
from __future__ import annotations
import argparse
import csv
import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
import numpy as np
import requests
PHENOCAM_API = "https://phenocam.nau.edu/api"
try:
import timesat as _timesat
except ImportError:
_timesat = None
NODATA = -9999.0
def load_phenocam_gcc(path: Path) -> dict[str, float]:
"""Return map YYYY-MM-DD -> greenness index from PhenoCam JSON list."""
with open(path) as f:
rows = json.load(f)
out: dict[str, float] = {}
for row in rows:
d = str(row.get("date", ""))[:10]
v = row.get("greenness_index")
if d and v is not None and np.isfinite(v):
out[d] = float(v)
return out
def _gcc_from_summary_row(row: dict, use_mean_fallback: bool) -> float | None:
"""Extract daily GCC from a one-day summary row (same rules as acquisition)."""
if not use_mean_fallback:
oflag = row.get("outlierflag_gcc_90")
if oflag is not None and str(oflag).strip() in ("1", "1.0"):
return None
raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
if raw is None:
return None
text = str(raw).strip()
if not text or text.upper() == "NA":
return None
try:
val = float(text)
except ValueError:
return None
if val <= -9998.0:
return None
return val
def _phenocam_one_day_summary_csv_url(site_name: str) -> str | None:
"""Return URL of the one-day summary CSV for *site_name*, or None on failure."""
try:
url = f"{PHENOCAM_API}/roilists/"
params: dict | None = {"site": site_name}
rois: list[dict] = []
while url:
r = requests.get(url, params=params, timeout=30)
r.raise_for_status()
data = r.json()
rois.extend(
[roi for roi in data.get("results", []) if roi["site"] == site_name]
)
url = data.get("next")
params = None
if rois:
break
if not rois:
return None
return rois[0].get("one_day_summary") or None
except requests.RequestException:
return None
def _parse_phenocam_gcc_from_csv_text(
text: str, start_date: str, end_date: str
) -> dict[str, float]:
"""Map YYYY-MM-DD -> gcc for rows in [start_date, end_date] inclusive."""
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
lines = [line for line in text.split("\n") if line and not line.startswith("#")]
reader = csv.DictReader(lines)
fieldnames = reader.fieldnames or ()
use_mean_fallback = "gcc_90" not in fieldnames
out: dict[str, float] = {}
for row in reader:
try:
date_str = row.get("date")
if not date_str:
continue
date = datetime.strptime(date_str, "%Y-%m-%d")
if not (start_dt <= date <= end_dt):
continue
gcc = _gcc_from_summary_row(row, use_mean_fallback)
if gcc is not None:
out[date.date().isoformat()] = gcc
except (ValueError, KeyError):
continue
return out
def save_phenocam_gcc_json(path: Path, by_date: dict[str, float]) -> None:
"""Write the same list-of-objects format as :func:`acquisition_phenocam` GCC JSON."""
rows = [
{"date": d, "greenness_index": v}
for d, v in sorted(by_date.items(), key=lambda x: x[0])
]
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(rows, f, indent=2)
f.write("\n")
def fetch_phenocam_gcc_three_years_separately(
site_name: str, season: int
) -> dict[str, float]:
"""
Download PhenoCam one-day summary GCC for three **calendar** years
(``season-1`` ``season+1``), independently of :mod:`acquisition_phenocam`.
Uses one HTTP GET of the full summary CSV, then **three** per-year
extractions (same logic as the acquisition CSV filter, three date windows).
"""
out: dict[str, float] = {}
csv_url = _phenocam_one_day_summary_csv_url(site_name)
if not csv_url:
print(
f"[PhenoCam phenology] No PhenoCam one-day summary URL for site {site_name!r}"
)
return out
try:
csv_r = requests.get(csv_url, timeout=30)
csv_r.raise_for_status()
except requests.RequestException as e:
print(f"[PhenoCam phenology] API CSV fetch failed: {e}")
return out
text = csv_r.text
for y in (season - 1, season, season + 1):
part = _parse_phenocam_gcc_from_csv_text(text, f"{y}-01-01", f"{y}-12-31")
out.update(part)
return out
def load_or_fetch_phenocam_gcc_3y(
site_name: str, season: int, gcc_3y_path: Path
) -> dict[str, float]:
"""
Use ``phenocam_gcc_3y.json`` on disk if it exists and parses; else fetch
three years from the PhenoCam one-day summary for *site_name* and save to
*gcc_3y_path*.
"""
if gcc_3y_path.is_file():
try:
cached = load_phenocam_gcc(gcc_3y_path)
except (OSError, json.JSONDecodeError):
cached = {}
if cached:
print(f"[PhenoCam phenology] Using {gcc_3y_path} ({len(cached)} values)")
return cached
out = fetch_phenocam_gcc_three_years_separately(site_name, season)
if not out:
return {}
save_phenocam_gcc_json(gcc_3y_path, out)
print(
f"[PhenoCam phenology] Fetched and wrote {gcc_3y_path} "
f"({len(out)} values for {season - 1}{season + 1})"
)
return out
def resolve_phenocam_gcc_for_timesat(
site_name: str, season: int, gcc_path: Path
) -> dict[str, float]:
"""
Load three-year series from ``phenocam_gcc_3y.json`` (or fetch once and
save there), merge with one-year ``gcc_path`` if present; three-year values
win on duplicate dates.
"""
gcc_3y = gcc_path.parent / "phenocam_gcc_3y.json"
by_3y = load_or_fetch_phenocam_gcc_3y(site_name, season, gcc_3y)
by_1y: dict[str, float] = {}
if gcc_path.is_file():
try:
by_1y = load_phenocam_gcc(gcc_path)
except (OSError, json.JSONDecodeError):
pass
if by_3y:
return {**by_1y, **by_3y}
return by_1y
def _day_count(calendar_year: int) -> int:
a = datetime(calendar_year, 1, 1)
b = datetime(calendar_year + 1, 1, 1)
return (b - a).days
def daily_profile_for_year(by_date: dict[str, float], calendar_year: int) -> np.ndarray:
"""
One value per day (length 365 or 366 for leap years). Gaps are filled by
linear interpolation in time along the year; if only one valid point exists,
that value is used for the whole year.
"""
n = _day_count(calendar_year)
raw = np.full(n, np.nan, dtype=np.float64)
for d in range(1, n + 1):
dt = datetime(calendar_year, 1, 1) + timedelta(days=d - 1)
key = dt.strftime("%Y-%m-%d")
if key in by_date:
raw[d - 1] = by_date[key]
valid = np.isfinite(raw) & (raw > 0.0)
if not np.any(valid):
raise ValueError(f"No valid GCC in JSON for calendar year {calendar_year}")
if np.sum(valid) == 1:
v = float(raw[valid][0])
return np.full(n, v, dtype=np.float32)
idx = np.arange(n, dtype=np.float64)
raw = np.interp(idx, idx[valid], raw[valid])
return raw.astype(np.float32)
def _gcc_profile_365_for_timesat(profile: np.ndarray) -> np.ndarray:
"""TIMESAT uses 365 days per season; drop Dec 31 on leap years."""
p = np.asarray(profile, dtype=np.float32).ravel()
if p.size == 366:
return p[:365]
if p.size == 365:
return p
raise ValueError(f"expected 365 or 366 daily values, got {p.size}")
def yyyydoy_to_iso(v: float) -> str:
x = int(round(float(v)))
y = x // 1000
doy = x - y * 1000
d = datetime(y, 1, 1) + timedelta(days=doy - 1)
return d.date().isoformat()
def build_yraw_three_years(
by_date: dict[str, float], y1: int, y2: int, y3: int
) -> tuple[np.ndarray, str]:
"""
Stack three calendar years of daily GCC (365 pts/year) for TIMESAT.
If each of *y1*, *y2*, *y3* has at least one valid GCC in *by_date* (after
per-year gap filling), returns their concatenation **three real years**.
If any of those years cannot be built (e.g. single-year download only),
falls back to **replicating** the profile for *y2* three times (legacy
TIMESAT workaround).
"""
try:
p1 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y1))
p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
p3 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y3))
yraw = np.concatenate([p1, p2, p3]).astype(np.float32, copy=False)
return yraw, "three_independent_years"
except ValueError:
p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
yraw = np.tile(p2, 3)
return yraw, "single_year_replicated"
def run_timesat_phenology_from_yraw(
yraw: np.ndarray,
years_triplet: tuple[int, int, int],
*,
start_cutoff: tuple[float, float] = (0.5, 0.5),
smooth_window: float = 2.0,
p_ignoreday: int = 366,
) -> dict[str, str | float | None]:
"""
Run TIMESAT on a length ``365 * 3`` daily VI stack and calendar *years_triplet*
(YYYY, YYYY, YYYY) for the time vector. Middle year in the triplet is the
season whose SOS/EOS we report.
"""
yraw = np.asarray(yraw, dtype=np.float32).ravel()
y1, y2, y3 = years_triplet
nyear = 3
npt = 365 * nyear
if yraw.size != npt:
raise ValueError(f"yraw must have length {npt}, got {yraw.size}")
tlist: list[int] = []
for y in (y1, y2, y3):
t0 = datetime(y, 1, 1)
for d in range(365):
tlist.append(int((t0 + timedelta(days=d)).strftime("%Y%j")))
tv = np.array(tlist, dtype=np.int32)
if len(tv) != npt:
raise RuntimeError("internal: length mismatch")
vi = np.asfortranarray(yraw.reshape(1, 1, -1))
qa = np.asfortranarray(np.ones((1, 1, npt), dtype=np.float32))
lc = np.ones((1, 1), dtype=np.uint8)
landuse = np.ones(255, dtype=np.uint8)
p_out = np.arange(1, npt + 1, dtype=np.int32)
p_ylu = np.asfortranarray(np.array([0.0, 1.0], dtype=np.float64))
ci = 0
p_fitmethod = np.zeros(255, dtype=np.int32)
p_fitmethod[ci] = 1
p_smooth = np.zeros(255, dtype=np.float64)
p_smooth[ci] = float(smooth_window)
p_nenvi = np.zeros(255, dtype=np.int32)
p_nenvi[ci] = 1
p_wfact = np.zeros(255, dtype=np.float64)
p_wfact[ci] = 1.0
p_startmethod = np.zeros(255, dtype=np.int32)
p_startmethod[ci] = 1
p_startcutoff = np.zeros((255, 2), dtype=np.float64, order="F")
p_startcutoff[ci, :] = np.array(
[start_cutoff[0], start_cutoff[1]], dtype=np.float64
)
p_low = np.zeros(255, dtype=np.float64)
p_fillbase = np.zeros(255, dtype=np.int32)
p_seasonmethod = np.zeros(255, dtype=np.int32)
p_seasonmethod[ci] = 1
p_seapar = np.zeros(255, dtype=np.float64)
p_seapar[ci] = 1.0
if _timesat is None:
raise ImportError("Install the 'timesat' package: pip install timesat")
vpp, _vppqa, nseason, yfit, _yfitqa, _seasonfit, _tseq = _timesat.tsfprocess(
nyear,
vi,
qa,
tv,
lc,
1,
landuse,
p_out,
p_ignoreday,
p_ylu,
0,
p_fitmethod,
p_smooth,
NODATA,
45,
0,
p_nenvi,
p_wfact,
p_startmethod,
p_startcutoff,
p_low,
p_fillbase,
1,
p_seasonmethod,
p_seapar,
1,
1,
1,
npt,
len(p_out),
)
a = vpp[0, 0, :]
# three growing-season rows at indices 0, 13*2, 13*4 in the raw vector
middle_block = 2
off = 13 * middle_block
sosd = a[off + 0] if a.size > off + 0 else np.nan
sosv = a[off + 1] if a.size > off + 1 else np.nan
eosd = a[off + 3] if a.size > off + 3 else np.nan
eosv = a[off + 4] if a.size > off + 4 else np.nan
yfit_max = float(np.max(yfit)) if yfit.size else float("nan")
def pick(x: float) -> str | None:
if not np.isfinite(x) or x < 1.0e5 or x < 0:
return None
try:
return yyyydoy_to_iso(x)
except (OverflowError, ValueError):
return None
return {
"reference_calendar_year": y2,
"green_up_50pct_date": pick(sosd),
"green_up_50pct_fitted_gcc": float(sosv) if np.isfinite(sosv) else None,
"green_down_50pct_date": pick(eosd),
"green_down_50pct_fitted_gcc": float(eosv) if np.isfinite(eosv) else None,
"nseason": nseason[0, 0].tolist() if nseason.ndim >= 2 else [],
"yfit_max": yfit_max,
}
def run_timesat_phenology(
daily_profile: np.ndarray,
years_triplet: tuple[int, int, int],
*,
start_cutoff: tuple[float, float] = (0.5, 0.5),
smooth_window: float = 2.0,
p_ignoreday: int = 366,
) -> dict[str, str | float | None]:
"""
Back-compat: run TIMESAT on one years 365(366) profile **replicated** three times.
Prefer :func:`build_yraw_three_years` + :func:`run_timesat_phenology_from_yraw`.
"""
prof = np.asarray(daily_profile, dtype=np.float32).ravel()
if len(prof) not in (365, 366):
raise ValueError("daily_profile must have length 365 or 366")
if len(prof) == 366:
prof = prof[:365]
yraw = np.tile(prof, 3)
return run_timesat_phenology_from_yraw(
yraw,
years_triplet,
start_cutoff=start_cutoff,
smooth_window=smooth_window,
p_ignoreday=p_ignoreday,
)
def phenocam_gcc_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
def phenocam_gcc_3y_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc_3y.json")
def iter_sites_seasons_with_phenocam(
data_root: str | Path = "data",
) -> list[tuple[str, int]]:
"""``(site_name, season)`` for every ``phenocam_gcc.json`` under *data_root* (legacy)."""
root = Path(data_root)
if not root.is_dir():
return []
out: list[tuple[str, int]] = []
seen: set[tuple[str, int]] = set()
for p in sorted(root.glob("*/*/raw/phenocam/phenocam_gcc.json")):
rel = p.relative_to(root)
site, season_s = rel.parts[0], rel.parts[1]
if not season_s.isdigit():
continue
season = int(season_s)
key = (site, season)
if key not in seen:
seen.add(key)
out.append(key)
return out
def iter_sites_seasons_from_sites_geojson(
path: str | Path = "data/sites.geojson",
) -> list[tuple[str, int]]:
"""
``(sitename, season)`` from a GeoJSON FeatureCollection: each features
``properties.sitename`` and each key in ``properties.seasons`` (4-digit year).
"""
path = Path(path)
if not path.is_file():
return []
with open(path, encoding="utf-8") as f:
fc = json.load(f)
out: list[tuple[str, int]] = []
for feat in fc.get("features", []):
props = feat.get("properties") or {}
name = props.get("sitename")
seasons = props.get("seasons")
if not name or not isinstance(seasons, dict):
continue
for skey in sorted(seasons.keys()):
if skey.isdigit() and len(skey) == 4:
out.append((str(name), int(skey)))
return out
def write_phenocam_phenology_all(
*,
sites_geojson: str | Path | None = None,
data_root: str | Path = "data",
smooth_window: float = 2.0,
p_ignoreday: int = 366,
) -> int:
"""
Run :func:`write_phenocam_phenology_for_site` for every ``(site, season)`` in
*sites_geojson* (default: :file:`<data_root>/sites.geojson`), not a glob over
``data/``.
"""
geo = Path(
sites_geojson
if sites_geojson is not None
else Path(data_root) / "sites.geojson"
)
pairs = iter_sites_seasons_from_sites_geojson(geo)
if not pairs and geo.is_file():
print(
f"[PhenoCam phenology] No (sitename, season) entries in {geo} "
"(check properties.sitename and properties.seasons)."
)
elif not pairs:
print(f"[PhenoCam phenology] Missing or empty sites file: {geo}")
n = 0
for site, season in pairs:
print(f"=== {site} {season} ===")
write_phenocam_phenology_for_site(
site, season, smooth_window=smooth_window, p_ignoreday=p_ignoreday
)
n += 1
print(f"[PhenoCam phenology] Processed {n} site/season pair(s) from {geo}.")
return n
def phenocam_phenology_path(site_name: str, season: int) -> Path:
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_phenology.json")
def write_phenocam_phenology_for_site(
site_name: str,
season: int,
*,
smooth_window: float = 2.0,
p_ignoreday: int = 366,
) -> None:
"""
If ``timesat`` is installed, build GCC from ``phenocam_gcc_3y.json`` (or fetch
three years once and save there), with optional one-year ``phenocam_gcc.json``,
then write
``phenocam_phenology.json`` in the same directory with
``green_up_50pct_date`` and ``green_down_50pct_date`` (ISO dates or null).
"""
if _timesat is None:
out = phenocam_phenology_path(site_name, season)
print(
f"[PhenoCam phenology] Skipped (no timesat); would write {out}. "
"pip install timesat"
)
return
gcc = phenocam_gcc_path(site_name, season)
try:
by_date = resolve_phenocam_gcc_for_timesat(site_name, season, gcc)
except OSError as e:
print(f"[PhenoCam phenology] Skipped: {e}")
return
if not by_date:
g3 = gcc.parent / "phenocam_gcc_3y.json"
print(
f"[PhenoCam phenology] No GCC ({gcc} and no data in {g3} after API); "
f"skipping {phenocam_phenology_path(site_name, season).name}."
)
return
try:
yraw, stack_mode = build_yraw_three_years(
by_date, season - 1, season, season + 1
)
except (OSError, ValueError) as e:
print(f"[PhenoCam phenology] Skipped: {e}")
return
out = run_timesat_phenology_from_yraw(
yraw,
(season - 1, season, season + 1),
smooth_window=smooth_window,
p_ignoreday=p_ignoreday,
)
record = {
"green_up_50pct_date": out.get("green_up_50pct_date"),
"green_down_50pct_date": out.get("green_down_50pct_date"),
}
out_path = phenocam_phenology_path(site_name, season)
out_path.parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(record, f, indent=2)
f.write("\n")
gup, gdn = record["green_up_50pct_date"], record["green_down_50pct_date"]
print(
f"[PhenoCam phenology] Wrote {out_path} (green-up {gup!r}, green-down {gdn!r}; "
f"TIMESAT input={stack_mode})"
)
def main() -> None:
ap = argparse.ArgumentParser(
description="TIMESAT 50 % seasonal-amplitude green-up / green-down for PhenoCam GCC JSON."
)
ap.add_argument(
"--all",
action="store_true",
help="Write phenocam for every (sitename, season) in the sites GeoJSON (see --sites-geojson).",
)
ap.add_argument(
"--data-root",
type=Path,
default=Path("data"),
help="Resolves default --sites-geojson to <data-root>/sites.geojson.",
)
ap.add_argument(
"--sites-geojson",
type=Path,
default=None,
help="For --all: path to data/sites.geojson (default: <data-root>/sites.geojson).",
)
ap.add_argument(
"gcc_json",
type=Path,
nargs="?",
default=Path("data/innsbruck/2024/raw/phenocam/phenocam_gcc.json"),
help="Path to phenocam_gcc.json (default: Innsbruck 2024 if present).",
)
ap.add_argument(
"--season",
type=int,
default=None,
help="Calendar year to build the daily GCC profile (default: infer from file path .../<year>/...).",
)
ap.add_argument(
"--savitzky-hw",
type=float,
default=2.0,
help="Half-width for fitmethod 1 (SavitzkyGolay); default 2.",
)
ap.add_argument(
"--p-ignoreday",
type=int,
default=366,
help="TIMESAT p_ignoreday (default 366).",
)
ap.add_argument(
"-o",
"--output",
type=Path,
default=None,
help="Write results to this JSON file (same schema as stdout, plus metadata).",
)
ap.add_argument(
"--sidecar",
action="store_true",
help="Save two-date JSON next to input as phenocam_phenology.json (implies -o).",
)
args = ap.parse_args()
if _timesat is None:
raise SystemExit(
"The 'timesat' package is required. Install with: pip install timesat"
)
if args.all:
write_phenocam_phenology_all(
sites_geojson=args.sites_geojson,
data_root=args.data_root,
smooth_window=args.savitzky_hw,
p_ignoreday=args.p_ignoreday,
)
return
path: Path = args.gcc_json
if not path.is_file():
raise SystemExit(f"Not a file: {path}")
season = args.season
if season is None:
for part in path.parts:
if part.isdigit() and len(part) == 4:
season = int(part)
break
if season is None:
season = datetime.now().year
by_date = load_phenocam_gcc(path)
yraw, stack_mode = build_yraw_three_years(by_date, season - 1, season, season + 1)
out = run_timesat_phenology_from_yraw(
yraw,
(season - 1, season, season + 1),
smooth_window=args.savitzky_hw,
p_ignoreday=args.p_ignoreday,
)
payload = {
**out,
"source_gcc_json": str(path.resolve()),
"profile_year": season,
"timesat_input": stack_mode,
"method": "TIMESAT tsfprocess; startmethod=1; p_startcutoff=[0.5,0.5] (50% seasonal amplitude)",
}
out_path = args.output
if args.sidecar:
out_path = path.parent / "phenocam_phenology.json"
if out_path is not None:
out_path.parent.mkdir(parents=True, exist_ok=True)
to_write = (
{
"green_up_50pct_date": out.get("green_up_50pct_date"),
"green_down_50pct_date": out.get("green_down_50pct_date"),
}
if args.sidecar
else payload
)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(to_write, f, indent=2)
f.write("\n")
print(f"Wrote {out_path}", file=sys.stderr)
print(json.dumps(payload, indent=2))
gup = out.get("green_up_50pct_date")
gdn = out.get("green_down_50pct_date")
if gup and gdn:
print(
f"Green-up (50 %): {gup} | Green-down (50 %): {gdn} "
f"(profile year {season}, TIMESAT reference year {out['reference_calendar_year']})"
)
if __name__ == "__main__":
main()

View file

@ -1,268 +0,0 @@
"""Post-processing: crop fusion/S2/S3 to valid pixels."""
from pathlib import Path
import numpy as np
import rasterio
from rasterio import windows
from rasterio.warp import reproject, Resampling
def process_cropped(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
"""Crop fusion to valid data, then crop S2/S3 to match."""
base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}"
processed_dir = (
f"processed_{cleaning_strategy}_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_sigma20"
)
processed = base / processed_dir
s2_prep = prepared / "s2"
s3_prep = prepared / "s3"
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True)
print(
f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
# Crop fusion to valid data and get dimensions
fusion_dims = {}
for fusion_file in fusion_prep.glob("REFL_*.tif"):
date_str = fusion_file.stem.split("_")[1]
with rasterio.open(fusion_file) as src:
data = src.read()
valid = ~np.isnan(data) & (data > 0.001)
rows = np.any(valid, axis=(0, 2))
cols = np.any(valid, axis=(0, 1))
row_idx = np.where(rows)[0]
col_idx = np.where(cols)[0]
if len(row_idx) == 0 or len(col_idx) == 0:
print(f"[PROCESS] Skipping {fusion_file.name} (no valid pixels)")
continue
r0, r1 = row_idx[0], row_idx[-1]
c0, c1 = col_idx[0], col_idx[-1]
w, h = c1 - c0 + 1, r1 - r0 + 1
window = windows.Window(c0, r0, w, h)
data_crop = src.read(window=window)
transform = rasterio.windows.transform(window, src.transform)
p = src.profile.copy()
p.update({"width": w, "height": h, "transform": transform})
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop)
fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS] Cropped fusion: {output_file}")
# Crop S2 and S3 to fusion size
for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
# S2
for s2_file in s2_prep.glob("*REFL.tif"):
if s2_file.stem.split("_")[2] == date_str:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS] Cropped: {output_file}")
# S3: resample to fusion pixel size, then crop
s3_file = s3_prep / f"composite_{date_str}.tif"
if s3_file.exists():
output_file = processed / "s3" / f"{date_str}_0.geotiff"
with rasterio.open(s3_file) as src:
# Resample to fusion pixel size
temp_profile = fusion_profile.copy()
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest,
)
# Crop using same window
data = resampled.read(window=window)
p2 = resampled.profile.copy()
p2.update({"width": w, "height": h, "transform": transform})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS] Cropped: {output_file}")
print("[PROCESS] Completed")
def process_cropped_itb(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}_itb"
processed_dir = (
f"processed_{cleaning_strategy}_itb_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_itb_sigma20"
)
processed = base / processed_dir
s2_prep = prepared / "s2"
s3_prep = prepared / "s3"
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True)
print(
f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
fusion_dims = {}
for fusion_file in fusion_prep.glob("GCC_*.tif"):
date_str = fusion_file.stem.split("_")[1]
with rasterio.open(fusion_file) as src:
data = src.read()
valid = ~np.isnan(data) & (data > 0.001)
rows = np.any(valid, axis=(0, 2))
cols = np.any(valid, axis=(0, 1))
row_idx = np.where(rows)[0]
col_idx = np.where(cols)[0]
if len(row_idx) == 0 or len(col_idx) == 0:
print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
continue
r0, r1 = row_idx[0], row_idx[-1]
c0, c1 = col_idx[0], col_idx[-1]
w, h = c1 - c0 + 1, r1 - r0 + 1
window = windows.Window(c0, r0, w, h)
data_crop = src.read(window=window)
transform = rasterio.windows.transform(window, src.transform)
p = src.profile.copy()
p.update({"width": w, "height": h, "transform": transform})
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop)
fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
for s2_file in s2_prep.glob("*GCC.tif"):
parts = s2_file.stem.split("_")
if len(parts) > 2 and parts[2] == date_str:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
break
s3_file = s3_prep / f"composite_{date_str}.tif"
if s3_file.exists():
output_file = processed / "s3" / f"{date_str}_0.geotiff"
with rasterio.open(s3_file) as src:
temp_profile = fusion_profile.copy()
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest,
)
data = resampled.read(window=window)
p2 = resampled.profile.copy()
p2.update({"width": w, "height": h, "transform": transform})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
print("[PROCESS-ITB] Completed")
def post_process_all_itb_scenarios(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]:
process_cropped_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_all_scenarios(season, site_position, site_name):
"""Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]:
process_cropped(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_timeseries(season, site_position, site_name):
"""Generate NDVI, GCC, and S2 bands timeseries for all 4 scenarios."""
from metrics_indices import (
create_ndvi_timeseries_post_process,
create_gcc_timeseries_post_process,
create_bands_timeseries_post_process,
)
create_ndvi_timeseries_post_process(season, site_position, site_name)
create_gcc_timeseries_post_process(season, site_position, site_name)
create_bands_timeseries_post_process(season, site_position, site_name)

View file

@ -1,364 +0,0 @@
"""Data preparation: S2/S3 preprocessing for fusion."""
import json
import shutil
from pathlib import Path
from collections import defaultdict
import numpy as np
import rasterio
from rasterio.warp import Resampling
from rasterio.vrt import WarpedVRT
from rasterio import shutil as rio_shutil
RESOLUTION_RATIO = 21
# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
S3_MOVING_AVERAGE_WINDOW_DAYS = 5
def _apply_s3_temporal_moving_average(s3_dir, window):
"""In-place smoothing of composite_*.tif along calendar order; nodata 0 → NaN for averaging."""
if window <= 1:
return
paths = sorted(s3_dir.glob("composite_*.tif"), key=lambda p: p.stem.split("_")[1])
if not paths:
return
k = (window - 1) // 2
arrs = []
profiles = []
for p in paths:
with rasterio.open(p) as src:
d = src.read().astype(np.float32)
d[d == 0] = np.nan
arrs.append(d)
profiles.append(src.profile.copy())
stack = np.stack(arrs, axis=0)
t, _, _, _ = stack.shape
out = np.empty_like(stack)
for i in range(t):
lo, hi = max(0, i - k), min(t, i + k + 1)
out[i] = np.nanmean(stack[lo:hi], axis=0)
out = np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
for p, prof, slc in zip(paths, profiles, out):
prof.update({"dtype": "float32", "nodata": 0})
with rasterio.open(p, "w", **prof) as dst:
dst.write(slc)
print(f"[S3-PREP] Applied {window}-day centred MA ({t} composites)")
def _import_distance_to_clouds():
"""Lazy import of efast.distance_to_clouds."""
try:
from efast.s2_processing import distance_to_clouds
return distance_to_clouds
except ImportError:
raise ImportError(
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
)
def _load_excluded(season, site_name, cleaning_strategy):
"""Load excluded filenames from NDVI timeseries (excluded_aggressive / excluded_nonaggressive)."""
base = Path(f"data/{site_name}/{season}/raw/preselection")
key = f"excluded_{cleaning_strategy}"
clouds = {"s2": set(), "s3": set()}
for source in ["s2", "s3"]:
ts_file = base / f"{source}_preselection.json"
if ts_file.exists():
data = json.loads(ts_file.read_text())
clouds[source] = {e["filename"] for e in data if e.get(key)}
return clouds
def _get_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
def _get_itb_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
def _compute_gcc_from_refl_array(blue, green, red):
total = (
blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
)
mask = (total > 0) & np.isfinite(total)
gcc = np.zeros_like(green, dtype=np.float32)
gcc[mask] = green[mask].astype(np.float32) / total[mask]
return gcc
def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
dst_s2_dir.mkdir(parents=True, exist_ok=True)
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
dst = dst_s2_dir / src.name
if dst.is_symlink() or dst.exists():
dst.unlink(missing_ok=True)
try:
dst.symlink_to(src.resolve())
except OSError:
shutil.copy2(src, dst)
def prepare_s2_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
s2_prep = base / "s2"
itb_s2.mkdir(parents=True, exist_ok=True)
for refl in sorted(s2_prep.glob("*REFL.tif")):
out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
if out.exists():
continue
with rasterio.open(refl) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S2-ITB] Saved {out.name}")
_link_dist_cloud_from_prepared(s2_prep, itb_s2)
def prepare_s3_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
itb_s3.mkdir(parents=True, exist_ok=True)
for comp in sorted((base / "s3").glob("composite_*.tif")):
out = itb_s3 / comp.name
if out.exists():
continue
with rasterio.open(comp) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S3-ITB] Saved {out.name}")
def _reproject_raster_to_target(
src_path,
dst_path,
target_bounds,
target_crs,
width,
height,
resampling=Resampling.bilinear,
):
dst_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
width,
height,
)
with rasterio.open(src_path) as src:
vrt_options = {
"transform": dst_transform,
"height": height,
"width": width,
"crs": target_crs,
"resampling": resampling,
}
with WarpedVRT(src, **vrt_options) as vrt:
profile = vrt.profile.copy()
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
rio_shutil.copy(vrt, dst_path, **profile)
def _rescale_dist_cloud_for_small_roi(s2_output_dir):
"""Rescale DIST_CLOUD when max distance ≤1 so EFAST fusion gets valid weights.
EFAST uses wo_i = (distance - 1) / D; values 1 yield zero/NaN weights. In small
ROIs (e.g. PhenoCam sites, 7×4 LR grid), distance_transform_edt never exceeds 1.
Scale non-zero values to 2 so fusion can produce non-NaN output.
"""
for dc_path in s2_output_dir.glob("*DIST_CLOUD.tif"):
with rasterio.open(dc_path, "r") as src:
d = src.read(1)
d_max = float(np.nanmax(d))
if d_max <= 1:
# Map (0, 1] -> (0, 2] so (d-1)/15 gives positive weight
d_scaled = np.where(d > 0, 2.0, d).astype(np.float32)
with rasterio.open(dc_path, "r+") as dst:
dst.write(d_scaled, 1)
print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
def prepare_s2(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position
s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
s2_output_dir = _get_base_dir(season, site_name, cleaning_strategy) / "s2"
clouds = _load_excluded(season, site_name, cleaning_strategy)
s2_output_dir.mkdir(parents=True, exist_ok=True)
print(
f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
if not s3_files:
raise ValueError("No non-cloud S3 files found for reference bounds")
with rasterio.open(s3_files[0]) as s3_ref:
target_bounds = s3_ref.bounds
target_crs = s3_ref.crs
s2_width = s3_ref.width * RESOLUTION_RATIO
s2_height = s3_ref.height * RESOLUTION_RATIO
for s2_file in sorted(s2_dir.glob("*.geotiff")):
if s2_file.name in clouds["s2"]:
print(
f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
)
continue
date_str = s2_file.name.split("_")[0]
refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
if refl_dst.exists():
print(f"[S2-PREP] Skipping {s2_file.name} (exists)")
continue
print(f"[S2-PREP] Processing {s2_file.name}...")
temp_normalized = s2_output_dir / f"temp_{s2_file.name}"
with rasterio.open(s2_file) as src:
data = src.read().astype("float32") / 10000.0
profile = src.profile.copy()
profile.update({"dtype": "float32", "nodata": 0})
with rasterio.open(temp_normalized, "w", **profile) as dst:
dst.write(data)
_reproject_raster_to_target(
temp_normalized, refl_dst, target_bounds, target_crs, s2_width, s2_height
)
temp_normalized.unlink()
print(f"[S2-PREP] Saved: {refl_dst}")
print("[S2-PREP] Computing distance-to-clouds...")
distance_to_clouds = _import_distance_to_clouds()
distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
_rescale_dist_cloud_for_small_roi(s2_output_dir)
print("[S2-PREP] Completed")
def prepare_s3(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
base_dir = _get_base_dir(season, site_name, cleaning_strategy)
s2_prepared_dir = base_dir / "s2"
s3_preprocessed_dir = base_dir / "s3"
clouds = _load_excluded(season, site_name, cleaning_strategy)
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
print(
f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_by_date = defaultdict(list)
for s3_file in s3_dir.glob("*.geotiff"):
if s3_file.name not in clouds["s3"]:
s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
else:
print(
f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
)
print(
f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
)
temp_composite_dir = s3_preprocessed_dir / "temp_composites"
if temp_composite_dir.exists():
shutil.rmtree(temp_composite_dir)
temp_composite_dir.mkdir()
for date_str, s3_files in sorted(s3_by_date.items()):
composite_path = temp_composite_dir / f"composite_{date_str}.tif"
if len(s3_files) == 1:
shutil.copy(s3_files[0], composite_path)
print(f"[S3-PREP] Composite {date_str}: 1 acquisition")
else:
s3_stack = []
for s3_file in s3_files:
with rasterio.open(s3_file) as src:
data = src.read()
data[:, np.abs(np.nanmean(data, axis=0)) >= 5] = np.nan
s3_stack.append(data)
composite = np.nanmean(np.array(s3_stack), axis=0).astype("float32")
with rasterio.open(s3_files[0]) as src:
profile = src.profile.copy()
profile.update({"count": composite.shape[0], "dtype": "float32"})
with rasterio.open(composite_path, "w", **profile) as dst:
dst.write(composite)
print(
f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
)
# Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
# This ensures fusion covers the same area as S2 and dimensions match
sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif"))
if len(sen2_ref_paths) == 0:
raise ValueError(f"No REFL files found in {s2_prepared_dir}")
# Get bounds from REFL file (full coverage, matches S2)
# Use integer division to match distance_to_clouds logic exactly
with rasterio.open(sen2_ref_paths[0]) as s2_ref:
target_bounds = s2_ref.bounds
target_crs = s2_ref.crs
# Use integer division matching distance_to_clouds: s2_height // ratio, s2_width // ratio
width = s2_ref.width // RESOLUTION_RATIO
height = s2_ref.height // RESOLUTION_RATIO
s3_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
width,
height,
)
print(
f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
)
# Reproject each S3 composite to match S2 REFL bounds
sen3_paths = sorted(temp_composite_dir.glob("*.tif"))
for sen3_path in sen3_paths:
vrt_options = {
"transform": s3_transform,
"height": height,
"width": width,
"crs": target_crs,
"resampling": Resampling.cubic,
}
with rasterio.open(sen3_path) as s3_src:
with WarpedVRT(s3_src, **vrt_options) as vrt:
name = sen3_path.name
outfile = s3_preprocessed_dir / name
profile = vrt.profile.copy()
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
rio_shutil.copy(vrt, outfile, **profile)
print(f"[S3-PREP] Saved: {outfile}")
_apply_s3_temporal_moving_average(
s3_preprocessed_dir, S3_MOVING_AVERAGE_WINDOW_DAYS
)
shutil.rmtree(temp_composite_dir)
print("[S3-PREP] Completed")

View file

@ -1,142 +0,0 @@
"""Pre-selection: self-contained NDVI timeseries with cloud/dark-imagery exclusion markers."""
import csv
import json
import numpy as np
import rasterio
from rasterio.warp import transform as transform_coords
from pathlib import Path
from datetime import datetime
WINDOW_DAYS = 14
MIN_WINDOW_SIZE = 3
THRESHOLDS = {"aggressive": {"threshold": 0.3, "delta": 0.15}, "nonaggressive": {"threshold": 0.2, "delta": 0.25}}
# S2 uses reflectance * 10000, S3 uses 0-1
BLUE_MIN = {"s2": 100, "s3": 0.01}
GREEN_BAND = 2
RED_BAND = 3
NIR_BAND = 4
BLUE_BAND = 1
BAND_KEYS = ["b02", "b03", "b04", "b8a"]
def _sample_3x3(input_file, site_position):
"""Sample mean NDVI and all four bands (3x3 window) at site. Returns (ndvi, {b02,b03,b04,b8a}) or (None, None)."""
try:
with rasterio.open(input_file) as src:
if src.count < 4:
return None, None
bands = [src.read(i).astype(np.float32) for i in range(1, 5)]
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None, None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None, None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
windows = [b[r0:r1, c0:c1] for b in bands]
red_w, nir_w = windows[RED_BAND - 1], windows[NIR_BAND - 1]
mask = (red_w > 0) & (nir_w > 0) & ~np.isnan(red_w) & ~np.isnan(nir_w)
if not np.any(mask):
return None, None
ndvi = float(np.mean((nir_w[mask] - red_w[mask]) / (nir_w[mask] + red_w[mask])))
band_means = {k: round(float(np.mean(w[mask])), 6) for k, w in zip(BAND_KEYS, windows)}
return ndvi, band_means
except Exception:
return None, None
def _extract_date(filename):
for part in filename.replace(".geotiff", "").split("_"):
if len(part) == 8 and part.isdigit():
return part, datetime.strptime(part, "%Y%m%d").isoformat()
return None, None
def _is_excluded(entry, entries, strategy, source="s2"):
"""True if entry is excluded by strategy (NDVI threshold/delta or dark blue)."""
th = THRESHOLDS[strategy]
if entry.get("ndvi") is None:
return True
blue_min = BLUE_MIN.get(source, BLUE_MIN["s2"])
if entry.get("b02") is not None and entry["b02"] < blue_min:
return True
entry_date = datetime.fromisoformat(entry["date"].replace("Z", "+00:00"))
window_ndvi = []
for e in entries:
if e.get("ndvi") is None:
continue
d = datetime.fromisoformat(e["date"].replace("Z", "+00:00"))
if abs((d - entry_date).days) <= WINDOW_DAYS:
window_ndvi.append(e["ndvi"])
if len(window_ndvi) < MIN_WINDOW_SIZE:
return False
threshold = max(window_ndvi) - th["delta"]
return entry["ndvi"] < threshold and entry["ndvi"] < th["threshold"]
def create_timeseries(season, site_position, site_name):
"""Build NDVI timeseries (3x3 window) for raw S2/S3, with exclusion markers for both strategies."""
lat, lon = site_position
base = Path(f"data/{site_name}/{season}")
print(f"[PRESELECT] Creating NDVI timeseries: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
for source in ["s2", "s3"]:
input_dir = base / "raw" / source
out_dir = base / "raw" / "preselection"
out_dir.mkdir(parents=True, exist_ok=True)
output_file = out_dir / f"{source}_preselection.json"
if not input_dir.exists():
print(f"[PRESELECT] Skipping {source}: {input_dir} not found")
continue
timeseries = []
for f in sorted(input_dir.glob("*.geotiff")):
if "DIST_CLOUD" in f.name:
continue
date_str, date_iso = _extract_date(f.name)
if not date_str:
continue
ndvi, band_means = _sample_3x3(f, site_position)
entry = {"filename": f.name, "date": date_iso, "ndvi": ndvi}
if band_means:
entry.update(band_means)
timeseries.append(entry)
timeseries.sort(key=lambda e: e["date"])
for e in timeseries:
e["excluded_aggressive"] = _is_excluded(e, timeseries, "aggressive", source)
e["excluded_nonaggressive"] = _is_excluded(e, timeseries, "nonaggressive", source)
with open(output_file, "w") as out:
json.dump(timeseries, out, indent=2)
csv_file = out_dir / f"{source}_preselection.csv"
fieldnames = ["filename", "date", "ndvi"] + BAND_KEYS + ["excluded_aggressive", "excluded_nonaggressive"]
with open(csv_file, "w", newline="") as out:
w = csv.DictWriter(out, fieldnames=fieldnames, extrasaction="ignore")
w.writeheader()
for e in timeseries:
w.writerow({k: e.get(k) for k in fieldnames})
n_excl_agg = sum(1 for e in timeseries if e["excluded_aggressive"])
n_excl_non = sum(1 for e in timeseries if e["excluded_nonaggressive"])
print(f"[PRESELECT] Saved {output_file} + {csv_file.name}: {len(timeseries)} entries ({n_excl_agg} aggressive, {n_excl_non} nonaggressive excluded)")
print("[PRESELECT] Completed")
# Backward compatibility
def detect_clouds(season, site_position, site_name, cleaning_strategy="aggressive"):
"""Create timeseries with exclusion markers. Strategy is read from timeseries when preparing."""
create_timeseries(season, site_position, site_name)
preselect = create_timeseries

View file

@ -1,2 +1,31 @@
[project]
name = "worldwide"
version = "0.1.0"
description = "Worldwide PhenoCam EFAST feasibility screening"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"efast @ git+https://github.com/DHI-GRAS/efast.git",
"netCDF4",
"numpy",
"openeo",
"pystac-client",
"python-dateutil",
"python-dotenv",
"rasterio",
"requests",
"scipy",
"shapely",
"tqdm",
]
[dependency-groups]
dev = [
"ruff",
]
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
"run.py" = ["F401"] "1-phenocam.py" = ["E402"]
"2-phenocam-screening.py" = ["E402"]
"3-sentinel-data.py" = ["E402"]
"4-fusion.py" = ["E402"]

View file

@ -1,12 +0,0 @@
pystac-client
rasterio
openeo
python-dotenv
netCDF4
numpy
timesat
requests
scipy
matplotlib
ruff
pre-commit

87
run.py
View file

@ -1,87 +0,0 @@
"""Pipeline entry point.
Active snippet below only **regenerates metrics.json** (temporal, baseline,
`derived`, `residual_vs_phenocam`). Requires existing post-processed GCC
timeseries under `data/{site}/{season}/processed_*`.
Un-comment imports and steps below for acquisition fusion post-process.
"""
# from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
# from postprocessing import (
# post_process_all_scenarios,
# post_process_all_itb_scenarios,
# post_process_timeseries,
# )
# from acquisition_s2 import download_s2
# from acquisition_s3 import download_s3
# from acquisition_phenocam import download_phenocam
# from preselection import create_timeseries
# from preparation import (
# prepare_s2,
# prepare_s3,
# prepare_s2_gcc_for_itb,
# prepare_s3_gcc_for_itb,
# )
# from metrics_indices import create_prepared_fusion_timeseries
from metrics_stats import calculate_all_metrics
# from phenology_timesat import write_phenocam_phenology_for_site
def run_pipeline(season, site_position, site_name):
"""Run pipeline (metrics-only by default; see module docstring)."""
try:
# print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}")
# download_s2(season, site_position, site_name)
# download_s3(season, site_position, site_name)
# download_phenocam(season, site_position, site_name)
# print(f"PhenoCam phenology (50 % amplitude): {site_name}, {season}")
# write_phenocam_phenology_for_site(site_name, season)
# print(f"Creating preselection timeseries: {site_name}, {season}")
# create_timeseries(season, site_position, site_name)
# print(f"Preparing S2 and S3 for fusion: {site_name}, {season}")
# for strategy in ["aggressive", "nonaggressive"]:
# prepare_s2(season, site_position, site_name, cleaning_strategy=strategy)
# prepare_s3(season, site_position, site_name, cleaning_strategy=strategy)
# print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
# run_all_efast_scenarios(season, site_position, site_name)
# print(f"Index-then-Blend (ItB): {site_name}, {season}")
# for strategy in ["aggressive", "nonaggressive"]:
# prepare_s2_gcc_for_itb(
# season, site_position, site_name, cleaning_strategy=strategy
# )
# prepare_s3_gcc_for_itb(
# season, site_position, site_name, cleaning_strategy=strategy
# )
# run_all_efast_itb_scenarios(season, site_position, site_name)
# post_process_all_itb_scenarios(season, site_position, site_name)
# print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
# create_prepared_fusion_timeseries(season, site_position, site_name)
# print(f"Post-processing (crop): {site_name}, {season}")
# post_process_all_scenarios(season, site_position, site_name)
# post_process_timeseries(season, site_position, site_name)
print(f"Calculating metrics: {site_name}, {season}")
calculate_all_metrics(season, site_name, site_position)
except Exception as e:
print(f"Error: {e}")
raise
if __name__ == "__main__":
run_pipeline(2024, (47.116171, 11.320308), "innsbruck")
run_pipeline(2024, (35.3045, 25.0743), "forthgr")
run_pipeline(2020, (47.116171, 11.320308), "innsbruck")
run_pipeline(2024, (58.5633, 24.3688), "pitsalu")
run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")

View file

@ -1,16 +0,0 @@
[Unit]
Description=Satellite Fusion Pipeline Web Server
After=network.target
[Service]
Type=simple
User=root
WorkingDirectory=/opt/satellite-fusion/webapp
Environment="PATH=/opt/satellite-fusion/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ExecStart=/opt/satellite-fusion/venv/bin/python3 -m http.server 8000 --directory /opt/satellite-fusion/webapp
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target

View file

@ -1,634 +0,0 @@
#!/usr/bin/env python3
"""Compute per-site suitability indicators from existing pipeline outputs.
The script is intentionally schema-tolerant: it prints one site's discovered JSON
structure first, then uses a small set of common field-name conventions to compute
SNR, S2 archive density, and S2-S3 GCC coherence.
"""
from __future__ import annotations
import argparse
import json
import math
import re
from collections.abc import Iterable
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from scipy.interpolate import UnivariateSpline
from scipy.stats import pearsonr
OUTPUT_NAME = "suitability_screening.json"
SNR_THRESHOLD = 2.0
MATCH_TOLERANCE_DAYS = 2
def load_json(path: Path) -> Any | None:
if not path.is_file():
return None
try:
with path.open("r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError) as exc:
print(f"[WARN] Could not read JSON {path}: {exc}")
return None
def jsonable_float(value: Any) -> float | None:
if isinstance(value, bool):
return None
try:
out = float(value)
except (TypeError, ValueError):
return None
if not math.isfinite(out):
return None
return out
def parse_date(value: Any) -> pd.Timestamp | None:
if value is None:
return None
if isinstance(value, pd.Timestamp):
return value.normalize()
text = str(value).strip()
if not text:
return None
match = re.search(r"(?<!\d)(\d{8})(?!\d)", text)
if match:
try:
return pd.to_datetime(match.group(1), format="%Y%m%d").normalize()
except (TypeError, ValueError):
pass
try:
ts = pd.to_datetime(text, errors="coerce")
except (TypeError, ValueError):
return None
if pd.isna(ts):
return None
if getattr(ts, "tzinfo", None) is not None:
ts = ts.tz_convert(None)
return pd.Timestamp(ts).normalize()
def compact(value: Any, *, max_text: int = 220) -> Any:
"""Return a short representation suitable for discovery logging."""
if isinstance(value, dict):
return {k: compact(v, max_text=max_text) for k, v in list(value.items())[:12]}
if isinstance(value, list):
return [compact(v, max_text=max_text) for v in value[:2]]
text = repr(value)
if len(text) > max_text:
return text[: max_text - 3] + "..."
return value
def top_keys(data: Any) -> list[str]:
if isinstance(data, dict):
return list(data.keys())
if isinstance(data, list) and data and isinstance(data[0], dict):
keys: set[str] = set()
for entry in data[:5]:
keys.update(entry.keys())
return sorted(keys)
return []
def normalize_records(data: Any) -> list[dict[str, Any]]:
"""Convert common JSON shapes into a list of record dictionaries."""
if data is None:
return []
if isinstance(data, list):
records = []
for item in data:
if isinstance(item, dict):
records.append(dict(item))
else:
records.append({"value": item})
return records
if not isinstance(data, dict):
return [{"value": data}]
for key in ("timeseries", "time_series", "data", "entries", "results", "records"):
value = data.get(key)
if isinstance(value, list):
return normalize_records(value)
# Dict keyed by date or filename.
if data and all(not isinstance(v, (list, tuple)) for v in data.values()):
records = []
for key, value in data.items():
if isinstance(value, dict):
record = dict(value)
record.setdefault("date", key)
else:
record = {"date": key, "value": value}
records.append(record)
return records
return [dict(data)]
def first_records(data: Any, count: int = 2) -> list[Any]:
records = normalize_records(data)
return records[:count]
def recursive_snr_candidates(data: Any, prefix: str = "") -> list[tuple[str, Any]]:
found: list[tuple[str, Any]] = []
if isinstance(data, dict):
for key, value in data.items():
path = f"{prefix}.{key}" if prefix else str(key)
if "snr" in str(key).lower():
found.append((path, value))
found.extend(recursive_snr_candidates(value, path))
elif isinstance(data, list):
for i, value in enumerate(data[:10]):
found.extend(recursive_snr_candidates(value, f"{prefix}[{i}]"))
return found
def find_numeric_snr(data: Any) -> float | None:
candidates = recursive_snr_candidates(data)
# Prefer exact leaf keys named "snr"; fall back to any numeric snr-containing key.
candidates.sort(key=lambda kv: 0 if kv[0].split(".")[-1].lower() == "snr" else 1)
for _, value in candidates:
numeric = jsonable_float(value)
if numeric is not None:
return numeric
if isinstance(value, dict):
nested = value.get("snr")
numeric = jsonable_float(nested)
if numeric is not None:
return numeric
return None
def find_site_roots(base_dir: Path) -> list[tuple[str, Path]]:
"""Find direct site roots, plus the repo's common site/year layout."""
roots: list[tuple[str, Path]] = []
if not base_dir.is_dir():
return roots
def looks_like_site_root(path: Path) -> bool:
return any(
(
(path / "metrics.json").exists(),
(path / "raw" / "preselection").exists(),
(path / "phenocam").exists(),
(path / "raw" / "phenocam").exists(),
)
)
for child in sorted(p for p in base_dir.iterdir() if p.is_dir()):
if looks_like_site_root(child):
roots.append((child.name, child))
continue
for grandchild in sorted(p for p in child.iterdir() if p.is_dir()):
if looks_like_site_root(grandchild):
name = child.name if grandchild.name.isdigit() else f"{child.name}_{grandchild.name}"
roots.append((name, grandchild))
return roots
def find_s2_preselection(site_root: Path) -> Path | None:
candidates = [
site_root / "raw" / "preselection" / "s2_preselection.json",
site_root / "preselection" / "s2_preselection.json",
]
return next((p for p in candidates if p.is_file()), None)
def find_s3_timeseries(site_root: Path) -> Path | None:
candidates = [
site_root / "processed_aggressive_sigma20" / "gcc" / "s3" / "timeseries.json",
site_root / "processed_aggressive_itb_sigma20" / "gcc" / "s3" / "timeseries.json",
]
for candidate in candidates:
if candidate.is_file():
return candidate
matches = sorted(site_root.glob("processed*aggressive*sigma20*/gcc/s3/timeseries.json"))
return matches[0] if matches else None
def find_metrics(site_root: Path) -> Path | None:
path = site_root / "metrics.json"
return path if path.is_file() else None
def find_phenocam(site_root: Path) -> Path | None:
candidates = [
site_root / "phenocam" / "gcc_90.json",
site_root / "phenocam" / "phenocam_gcc.json",
site_root / "raw" / "phenocam" / "gcc_90.json",
site_root / "raw" / "phenocam" / "phenocam_gcc.json",
]
for candidate in candidates:
if candidate.is_file():
return candidate
patterns = [
"phenocam/*gcc*90*.json",
"phenocam/*gcc*.json",
"raw/phenocam/*gcc*90*.json",
"raw/phenocam/*gcc*.json",
"raw/phenocam/*.json",
]
for pattern in patterns:
matches = sorted(site_root.glob(pattern))
if matches:
return matches[0]
return None
def print_structure(label: str, path: Path | None) -> None:
print(f"\n[{label}]")
if path is None:
print("missing")
return
data = load_json(path)
print(f"path: {path}")
print(f"type: {type(data).__name__}")
print(f"keys: {top_keys(data)}")
records = [] if label == "metrics.json" else first_records(data, 2)
if records:
print(f"first {len(records)} entr{'y' if len(records) == 1 else 'ies'}:")
print(json.dumps(compact(records), indent=2, default=str))
if label == "metrics.json":
snr = recursive_snr_candidates(data)
phenocam_keys = []
if isinstance(data, dict):
for key, value in data.items():
if "phenocam" in str(key).lower():
phenocam_keys.append((key, top_keys(value)))
print(f"phenocam-like keys: {phenocam_keys}")
print(f"snr-like keys: {[(path, compact(value)) for path, value in snr]}")
def run_discovery(site_name: str, site_root: Path) -> None:
print("\n=== Discovery mode ===")
print(f"Using site: {site_name} ({site_root})")
print_structure("s2_preselection.json", find_s2_preselection(site_root))
print_structure("S3 timeseries.json", find_s3_timeseries(site_root))
print_structure("metrics.json", find_metrics(site_root))
print_structure("PhenoCam gcc_90 file", find_phenocam(site_root))
print("\n=== Computing indicators ===")
def choose_discovery_site(site_roots: list[tuple[str, Path]]) -> tuple[str, Path]:
def score(item: tuple[str, Path]) -> int:
_, root = item
return sum(
int(path is not None)
for path in (
find_s2_preselection(root),
find_s3_timeseries(root),
find_metrics(root),
find_phenocam(root),
)
)
return max(site_roots, key=score)
def truthy_status(value: Any, *, field_name: str | None = None) -> bool | None:
if isinstance(value, bool):
if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
return not value
return value
if value is None:
return True
if isinstance(value, (int, float)) and not isinstance(value, bool):
if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
return not bool(value)
return bool(value)
text = str(value).strip().lower()
if text in {"", "none", "null", "nan", "ok", "pass", "passed", "keep", "kept", "valid", "selected"}:
return True
if text in {
"fail",
"failed",
"false",
"reject",
"rejected",
"exclude",
"excluded",
"invalid",
"cloud",
"cloudy",
"dark",
"bad",
}:
return False
if field_name and any(word in field_name.lower() for word in ("reason", "status")):
return False
return None
def acquisition_passes(entry: dict[str, Any], strategy: str) -> bool:
strategy_aliases = {
strategy,
strategy.replace("nonaggressive", "non_aggressive"),
strategy.replace("nonaggressive", "non-aggressive"),
}
negative_prefixes = ("excluded", "exclude", "rejected", "reject")
positive_prefixes = ("passed", "pass", "keep", "kept", "valid", "selected")
for alias in strategy_aliases:
for prefix in negative_prefixes:
key = f"{prefix}_{alias}"
if key in entry:
return not bool(entry[key])
for prefix in positive_prefixes:
key = f"{prefix}_{alias}"
if key in entry:
return bool(entry[key])
for alias in strategy_aliases:
nested = entry.get(alias)
if isinstance(nested, dict):
for key, value in nested.items():
passed = truthy_status(value, field_name=key)
if passed is not None:
return passed
elif nested is not None:
passed = truthy_status(nested, field_name=alias)
if passed is not None:
return passed
# Generic status fields.
for key in (*negative_prefixes, *positive_prefixes, "status", "strategy", "reason", "rejection_reason"):
if key in entry:
passed = truthy_status(entry[key], field_name=key)
if passed is not None:
return passed
# Dict keyed by date with a scalar rejection reason.
if "value" in entry and len(entry) <= 3:
passed = truthy_status(entry.get("value"), field_name="value")
if passed is not None:
return passed
# Existing pipeline entries with band means and no rejection marker are usable.
return True
def band_value(entry: dict[str, Any], names: Iterable[str]) -> float | None:
lowered = {str(k).lower(): v for k, v in entry.items()}
for name in names:
if name.lower() in lowered:
value = jsonable_float(lowered[name.lower()])
if value is not None:
return value
for container_key in ("bands", "band_means", "reflectance", "reflectances", "means", "window_means"):
container = entry.get(container_key)
if isinstance(container, dict):
value = band_value(container, names)
if value is not None:
return value
return None
def entry_date(entry: dict[str, Any]) -> pd.Timestamp | None:
for key in ("date", "datetime", "time", "timestamp", "acquisition_date"):
if key in entry:
date = parse_date(entry[key])
if date is not None:
return date
for key in ("filename", "file", "path", "name"):
if key in entry:
date = parse_date(entry[key])
if date is not None:
return date
return None
def s2_gcc_series(s2_data: Any) -> pd.DataFrame:
rows = []
for entry in normalize_records(s2_data):
if not isinstance(entry, dict) or not acquisition_passes(entry, "aggressive"):
continue
date = entry_date(entry)
blue = band_value(entry, ("b02", "blue", "B02", "band_1", "band1"))
green = band_value(entry, ("b03", "green", "B03", "band_2", "band2"))
red = band_value(entry, ("b04", "red", "B04", "band_3", "band3"))
if date is None or blue is None or green is None or red is None:
continue
denom = blue + green + red
if denom <= 0:
continue
rows.append({"date": date, "s2_gcc": green / denom})
if not rows:
return pd.DataFrame(columns=["date", "s2_gcc"])
return pd.DataFrame(rows).groupby("date", as_index=False)["s2_gcc"].mean().sort_values("date")
def value_from_record(entry: dict[str, Any], preferred: Iterable[str]) -> float | None:
lowered = {str(k).lower(): v for k, v in entry.items()}
for name in preferred:
value = jsonable_float(lowered.get(name.lower()))
if value is not None:
return value
for key, value in lowered.items():
if any(token in key for token in ("gcc", "greenness")):
numeric = jsonable_float(value)
if numeric is not None:
return numeric
return None
def gcc_timeseries(data: Any, value_name: str) -> pd.DataFrame:
rows = []
for entry in normalize_records(data):
if not isinstance(entry, dict):
continue
date = entry_date(entry)
value = value_from_record(
entry,
("greenness_index", "gcc_90", "gcc", "value", "mean", "site_value"),
)
if date is not None and value is not None:
rows.append({"date": date, value_name: value})
if not rows:
return pd.DataFrame(columns=["date", value_name])
return pd.DataFrame(rows).groupby("date", as_index=False)[value_name].mean().sort_values("date")
def compute_archive_density(s2_data: Any | None) -> tuple[int | None, int | None]:
if s2_data is None:
return None, None
records = [entry for entry in normalize_records(s2_data) if isinstance(entry, dict)]
if not records:
return None, None
aggressive = sum(1 for entry in records if acquisition_passes(entry, "aggressive"))
nonaggressive = sum(1 for entry in records if acquisition_passes(entry, "nonaggressive"))
return aggressive, nonaggressive
def compute_coherence(s2_data: Any | None, s3_data: Any | None) -> tuple[int | None, float | None, float | None]:
if s2_data is None or s3_data is None:
return None, None, None
s2 = s2_gcc_series(s2_data)
s3 = gcc_timeseries(s3_data, "s3_gcc")
if s2.empty or s3.empty:
return 0, None, None
matched = pd.merge_asof(
s2.sort_values("date"),
s3.sort_values("date"),
on="date",
direction="nearest",
tolerance=pd.Timedelta(days=MATCH_TOLERANCE_DAYS),
).dropna(subset=["s2_gcc", "s3_gcc"])
n = int(len(matched))
if n < 2:
return n, None, None
r, p_value = pearsonr(matched["s2_gcc"].to_numpy(), matched["s3_gcc"].to_numpy())
return n, jsonable_float(r), jsonable_float(p_value)
def phenocam_series(data: Any | None) -> pd.DataFrame:
if data is None:
return pd.DataFrame(columns=["date", "gcc"])
rows = []
for entry in normalize_records(data):
if isinstance(entry, dict):
date = entry_date(entry)
value = value_from_record(
entry,
("gcc_90", "greenness_index", "gcc", "gcc_mean", "value"),
)
else:
date = None
value = jsonable_float(entry)
if date is not None and value is not None:
rows.append({"date": date, "gcc": value})
if not rows:
return pd.DataFrame(columns=["date", "gcc"])
return pd.DataFrame(rows).groupby("date", as_index=False)["gcc"].mean().sort_values("date")
def compute_snr_from_phenocam(phenocam_data: Any | None) -> float | None:
series = phenocam_series(phenocam_data)
if len(series) < 5:
return None
x = (series["date"] - series["date"].min()).dt.days.to_numpy(dtype=float)
y = series["gcc"].to_numpy(dtype=float)
if len(np.unique(x)) < 5:
return None
try:
spline = UnivariateSpline(x, y, k=3)
residual = y - spline(x)
except Exception as exc:
print(f"[WARN] Could not fit PhenoCam smoothing spline: {exc}")
return None
rmse = float(np.sqrt(np.mean(residual**2)))
amplitude = float(np.max(y) - np.min(y))
if rmse <= 0:
return None
return amplitude / rmse
def compute_snr(metrics_data: Any | None, phenocam_data: Any | None) -> float | None:
from_metrics = find_numeric_snr(metrics_data)
if from_metrics is not None:
return from_metrics
return compute_snr_from_phenocam(phenocam_data)
def compute_site(site_root: Path) -> dict[str, Any]:
s2_data = load_json(find_s2_preselection(site_root) or Path("__missing__"))
s3_data = load_json(find_s3_timeseries(site_root) or Path("__missing__"))
metrics_data = load_json(find_metrics(site_root) or Path("__missing__"))
phenocam_data = load_json(find_phenocam(site_root) or Path("__missing__"))
snr = compute_snr(metrics_data, phenocam_data)
n_s2_aggressive, n_s2_nonaggressive = compute_archive_density(s2_data)
n_matched, pearson_r, p_value = compute_coherence(s2_data, s3_data)
return {
"snr": snr,
"snr_pass": None if snr is None else snr >= SNR_THRESHOLD,
"n_s2_aggressive": n_s2_aggressive,
"n_s2_nonaggressive": n_s2_nonaggressive,
"coherence_n_matched": n_matched,
"coherence_pearson_r": pearson_r,
"coherence_p_value": p_value,
}
def print_summary(results: dict[str, dict[str, Any]]) -> None:
print("\nSuitability summary")
if not results:
print("(no sites found)")
return
columns = [
("site", "site"),
("snr", "snr"),
("snr_pass", "pass"),
("n_s2_aggressive", "n_s2_agg"),
("n_s2_nonaggressive", "n_s2_nonagg"),
("coherence_n_matched", "n_match"),
("coherence_pearson_r", "pearson_r"),
("coherence_p_value", "p_value"),
]
def fmt(value: Any, key: str) -> str:
if value is None:
return "null"
if key.startswith("n_") or key == "coherence_n_matched":
return str(int(value))
if isinstance(value, bool):
return "true" if value else "false"
if isinstance(value, (int, float)):
return f"{float(value):.4g}"
return str(value)
rows = []
for site, values in results.items():
rows.append([site, *[fmt(values.get(key), key) for key, _ in columns[1:]]])
widths = [
max(len(header), *(len(row[i]) for row in rows))
for i, (_, header) in enumerate(columns)
]
header = " ".join(header.ljust(widths[i]) for i, (_, header) in enumerate(columns))
print(header)
print(" ".join("-" * width for width in widths))
for row in rows:
print(" ".join(row[i].ljust(widths[i]) for i in range(len(columns))))
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--base-dir",
required=True,
type=Path,
help="Pipeline output root containing one subdirectory per site.",
)
args = parser.parse_args()
base_dir = args.base_dir.expanduser().resolve()
site_roots = find_site_roots(base_dir)
if site_roots:
run_discovery(*choose_discovery_site(site_roots))
else:
print(f"[WARN] No site directories found under {base_dir}")
results = {site_name: compute_site(site_root) for site_name, site_root in site_roots}
output_path = base_dir / OUTPUT_NAME
with output_path.open("w", encoding="utf-8") as f:
json.dump(results, f, indent=2, allow_nan=False)
f.write("\n")
print_summary(results)
print(f"\nWrote {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

1491
uv.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,397 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Fusion Viewer</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
<script src="common.js"></script>
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
.selectors { margin-bottom: 20px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { margin: 0 0 5px 0; font-size: 22px; }
.season-row { padding-bottom: 15px; }
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
.download-links { margin-left: 10px; font-size: 14px; }
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
.download-links a:hover { text-decoration: underline; }
#dateSlider { width: 100%; margin: 15px 0; }
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
#fusionMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
.leaflet-image-layer { image-rendering: pixelated; }
.leaflet-control-attribution { display: none; }
</style>
</head>
<body>
<div class="container">
<div class="header-sticky">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html" class="active">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="siteName">Innsbruck</h1>
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
<label>Strategy:</label>
<select id="strategySelect">
<option value="aggressive">Aggressive</option>
<option value="nonaggressive">Non-aggressive</option>
</select>
<label>Sigma:</label>
<select id="sigmaSelect">
<option value="20">σ=20</option>
<option value="30">σ=30</option>
</select>
<label>Mode:</label>
<select id="fusionModeSelect" title="BtI = reflectance fusion; ItB = GCC fusion">
<option value="bti">BtI (REFL)</option>
<option value="itb">ItB (GCC)</option>
</select>
</div>
<input type="range" id="dateSlider" min="0" max="365" value="0">
<div id="dateDisplay">2024-01-01</div>
</div>
<div class="map-label" id="mapLabelFusion">Fusion RGB (closest available)</div>
<div id="mapDate" class="map-date"></div>
<div id="fusionMap"></div>
<div id="plots">
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
</div>
</div>
<script>
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
let siteName = "innsbruck", season = "2024";
let strategy = "aggressive", sigma = "20", fusionMode = "bti";
let sitePosition = [47.116171, 11.320308];
let start = new Date(2024, 0, 1);
let availableSiteSeasons = {};
let fusionMap = null, overlay = null, marker = null;
let ndviTs = [], gccTs = [], bandsTs = [];
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
const urlParams = new URLSearchParams(location.search);
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
const daysFromDate = (dateStr) => {
const [y, m, d] = dateStr.split("-").map(Number);
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
};
function getPreparedBase() {
return fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
}
function getFusionDir() {
const sub = sigma === "30" ? "fusion_sigma30" : "fusion";
return `data/${siteName}/${season}/${getPreparedBase()}/${sub}`;
}
function getFusionTimeseriesDir() {
return sigma === "30" ? "fusion_sigma30" : "fusion";
}
async function loadTimeseries() {
const sub = getFusionTimeseriesDir();
const base = `data/${siteName}/${season}/${getPreparedBase()}`;
try {
if (fusionMode === "itb") {
const g = await fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : []));
ndviTs = [];
gccTs = g;
bandsTs = [];
} else {
const [n, g, b] = await Promise.all([
fetch(`${base}/ndvi/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
fetch(`${base}/bands/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
]);
ndviTs = n;
gccTs = g;
bandsTs = b;
}
} catch {
ndviTs = [];
gccTs = [];
bandsTs = [];
}
drawPlots();
updateDownloadLinks();
}
function drawPlot(canvasId, data, key, color) {
const canvas = document.getElementById(canvasId);
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const pts = data.filter(t => t[key] != null);
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
const dates = pts.map(t => new Date(t.date));
const vals = pts.map(t => t[key]);
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
const minV = Math.min(...vals), maxV = Math.max(...vals);
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
ctx.strokeStyle = color;
ctx.beginPath();
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
ctx.stroke();
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(curDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
}
function drawPlots() {
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
}
function updateDownloadLinks() {
const el = document.getElementById("downloadLinks");
if (!el) return;
const sub = getFusionTimeseriesDir();
const prep = `data/${siteName}/${season}/${getPreparedBase()}`;
if (fusionMode === "itb") {
el.innerHTML = `<a href="${prep}/gcc/${sub}/timeseries.json">[GCC JSON]</a>`;
return;
}
const base = `${prep}/export/${sub}`;
const name = `${siteName}_${season}_fusion_${strategy}_${sub}`;
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
}
async function findFusionFile(dateStr) {
const target = new Date(dateStr);
const yearEnd = new Date(parseInt(season), 11, 31);
const seasonStart = start.getTime();
const seasonEnd = yearEnd.getTime();
for (let offset = 0; offset <= 365; offset++) {
for (const dir of offset === 0 ? [0] : [-1, 1]) {
const d = new Date(target.getTime() + dir * offset * 86400000);
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
const filename = (fusionMode === "itb" ? "GCC_" : "REFL_") + `${ds}.tif`;
try {
const res = await fetch(`${getFusionDir()}/${filename}`, { method: "HEAD" });
if (res.ok) return filename;
} catch {}
}
}
return null;
}
function transformBounds(bbox, fromCRS) {
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
return [[sw[1], sw[0]], [ne[1], ne[0]]];
}
async function loadGeotiff(filename) {
const path = `${getFusionDir()}/${filename}`;
const buf = await (await fetch(path)).arrayBuffer();
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
const dateStr = filename.replace(/^(REFL|GCC)_/, "").replace(".tif", "");
return { dataUrl, bounds, dateStr };
}
async function updateMap() {
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const filename = await findFusionFile(dateStr);
if (!filename || !fusionMap) {
if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
return;
}
try {
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
if (overlay) fusionMap.removeLayer(overlay);
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(fusionMap);
fusionMap.fitBounds(bounds);
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
} catch (e) {
if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
}
}
async function probeDataExists(sitename, s) {
try {
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
return res.ok;
} catch { return false; }
}
function getSiteBySitename(sn) {
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
}
async function setSiteSeason(newSite, newSeason) {
siteName = newSite;
season = newSeason;
start = new Date(parseInt(season), 0, 1);
const site = getSiteBySitename(newSite);
if (site?.geometry?.coordinates) {
const [lon, lat] = site.geometry.coordinates;
sitePosition = [lat, lon];
}
if (fusionMap) { fusionMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
document.getElementById("season").textContent = season;
const yearEnd = new Date(parseInt(season), 11, 31);
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
const params = new URLSearchParams(location.search);
params.set("site", siteName);
params.set("season", season);
params.set("mode", fusionMode);
history.replaceState({}, "", `?${params}`);
const urlDate = params.get("date");
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
await loadTimeseries();
await updateMap();
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch { window.sitesData = { features: [] }; }
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeDataExists(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
`<option value="${s}">${s}</option>`
).join("");
document.getElementById("seasonSelect").value = initialSeason;
strategy = urlParams.get("strategy") || "aggressive";
sigma = urlParams.get("sigma") || "20";
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
document.getElementById("strategySelect").value = strategy;
document.getElementById("sigmaSelect").value = sigma;
document.getElementById("fusionModeSelect").value = fusionMode;
const ml = document.getElementById("mapLabelFusion");
if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
const initSite = getSiteBySitename(initialSite);
if (initSite?.geometry?.coordinates) {
const [lon, lat] = initSite.geometry.coordinates;
sitePosition = [lat, lon];
}
fusionMap = L.map("fusionMap", { zoomControl: false }).setView(sitePosition, 12)
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(fusionMap);
siteSelect.addEventListener("change", function() {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
setSiteSeason(sn, document.getElementById("seasonSelect").value);
});
document.getElementById("seasonSelect").addEventListener("change", function() {
setSiteSeason(siteSelect.value, this.value);
});
document.getElementById("strategySelect").addEventListener("change", function() {
strategy = this.value;
urlParams.set("strategy", strategy);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("sigmaSelect").addEventListener("change", function() {
sigma = this.value;
urlParams.set("sigma", sigma);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("fusionModeSelect").addEventListener("change", function() {
fusionMode = this.value;
urlParams.set("mode", fusionMode);
history.replaceState({}, "", `?${urlParams}`);
const ml = document.getElementById("mapLabelFusion");
if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
loadTimeseries(); updateMap();
});
await setSiteSeason(initialSite, initialSeason);
}
document.getElementById("dateSlider").addEventListener("input", function() {
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
drawPlots(); updateMap();
});
init();
</script>
</body>
</html>

View file

@ -1,284 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Gap validation</title>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
.selectors { margin-bottom: 18px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { font-size: 22px; margin-top: 0; }
h2 { font-size: 16px; margin-top: 22px; color: #333; }
h2:first-of-type { margin-top: 8px; }
table { border-collapse: collapse; width: 100%; font-size: 12px; margin-bottom: 14px; }
th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; vertical-align: top; }
th { background: #f5f5f5; }
td.num { text-align: right; font-variant-numeric: tabular-nums; }
td.paths { font-size: 11px; word-break: break-all; color: #444; max-width: 420px; }
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
.intro code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
.section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; line-height: 1.45; }
.empty { color: #666; font-style: italic; }
.err { color: #a00; }
details.meta { font-size: 12px; margin-top: 12px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
details.meta summary { cursor: pointer; font-weight: 600; }
details.meta pre { margin: 8px 0 0; overflow: auto; font-size: 11px; max-height: 200px; }
</style>
</head>
<body>
<div class="container">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html" class="active">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="pageTitle">Gap validation</h1>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
</div>
<div id="content"></div>
</div>
<script>
let siteName = "innsbruck",
season = "2024";
let availableSiteSeasons = {};
const urlParams = new URLSearchParams(location.search);
async function probeSummary(sn, s) {
try {
const res = await fetch(`data/${sn}/${s}/validation/gap_validation_summary.json`, {
method: "HEAD",
});
return res.ok;
} catch {
return false;
}
}
function fmt(v, d = 4) {
if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
return v.toFixed(d);
}
function fmtInt(v) {
if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
return String(Math.round(v));
}
function crossoverBlock(summary) {
const scen = summary.scenario;
const wcRoot = summary.whittaker_crossover || {};
const wc = (scen && wcRoot[scen]) || Object.values(wcRoot)[0];
if (!wc) return "";
const first = wc.first_gap_days_fusion_nse_below_whittaker;
const def = wc.whittaker_definition || "";
let h = `<h2>Whittaker crossover (NSE<sub>S2</sub>)</h2>`;
h += `<p class="section-note">${def}</p>`;
h += `<p class="section-note"><b>First gap length (days)</b> where fusion NSE<sub>S2</sub> &lt; Whittaker NSE<sub>S2</sub> (strict): <b>${first != null ? first : "—"}</b> (none if fusion never falls below).</p>`;
const rows = wc.by_gap || [];
if (rows.length) {
h += `<table><tr><th>Gap days</th><th class="num">NSE<sub>S2</sub> fusion</th><th class="num">NSE<sub>S2</sub> Whittaker</th></tr>`;
for (const r of rows) {
h += `<tr><td>${r.gap_days}</td><td class="num">${fmt(r.nse_s2_fusion, 3)}</td><td class="num">${fmt(r.nse_s2_whittaker, 3)}</td></tr>`;
}
h += `</table>`;
}
return h;
}
function manifestTable(manifest) {
if (!manifest?.entries?.length) return "";
let h = `<h2>Gap manifest</h2>`;
h += `<p class="section-note">From <code>data/${siteName}/${season}/validation/gap_manifest.json</code>. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.</p>`;
h += `<table><tr><th>Transition</th><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
for (const e of manifest.entries) {
const w = `${e.window_start} → ${e.window_end}`;
h += `<tr><td>${e.transition || "—"}</td><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
}
h += `</table>`;
return h;
}
function resultsTable(results) {
if (!results?.length) return `<p class="empty">No result rows in summary.</p>`;
const head = `<tr>
<th>Transition</th><th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
<th class="num">RMSE<br><span style="font-weight:normal">gap</span></th>
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">gap</span></th>
<th class="num">RMSE<br><span style="font-weight:normal">no gap</span></th>
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">no gap</span></th>
<th class="num">ΔRMSE</th><th class="num">ΔNSE</th>
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">Whitt.</span></th>
<th class="num">n</th>
<th>Paths / error</th>
</tr>`;
const parts = [head];
for (const r of results) {
if (r.error) {
parts.push(
`<tr><td>${r.transition ?? "—"}</td><td>${r.gap_days ?? "—"}</td><td colspan="9" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
);
continue;
}
const g = r.spatial?.gap || {};
const ng = r.spatial?.no_gap || {};
const wh = r.spatial?.whittaker || {};
const dRm = r.spatial?.delta_rmse;
const dNs = r.spatial?.delta_nse;
const p = r.paths || {};
const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("<br>");
parts.push(`<tr>
<td>${r.transition || "—"}</td>
<td>${r.gap_days}</td>
<td>${r.prediction_date || "—"}</td>
<td style="font-size:11px">${r.withheld_s2_filename || "—"}</td>
<td class="num">${fmt(g.rmse)}</td>
<td class="num">${fmt(g.nse_s2, 3)}</td>
<td class="num">${fmt(ng.rmse)}</td>
<td class="num">${fmt(ng.nse_s2, 3)}</td>
<td class="num">${fmt(dRm)}</td>
<td class="num">${fmt(dNs, 3)}</td>
<td class="num">${fmt(wh.nse_s2, 3)}</td>
<td class="num">${fmtInt(g.n_pixels)}</td>
<td class="paths">${pathNote}</td>
</tr>`);
}
return `<table>${parts.join("")}</table>`;
}
function metaDetails(summary) {
const cmd = summary.command_line;
const git = summary.git_commit;
if (!cmd && !git) return "";
let h = `<details class="meta"><summary>Run metadata</summary>`;
if (git) h += `<p>Git: <code>${git}</code></p>`;
if (cmd?.length) h += `<pre>${cmd.map((x) => String(x)).join(" ")}</pre>`;
h += `</details>`;
return h;
}
async function render(summary, manifest) {
const el = document.getElementById("content");
if (!summary) {
el.innerHTML = `<p class="err">Could not load <code>data/${siteName}/${season}/validation/gap_validation_summary.json</code>.</p>
<p class="section-note">From <code>processing/</code>: <code>python -m gap_validation.run --site ${siteName} --season ${season} --lat LAT --lon LON</code> (see <code>--help</code>). Serve from <code>processing/</code>: <code>python3 -m http.server 8000</code><code>/webapp/gap_validation.html</code> (<code>webapp/data</code><code>../data</code>).</p>`;
if (manifest?.entries) el.innerHTML += manifestTable(manifest);
return;
}
const scen = summary.scenario || "—";
const sn = summary.site_name ?? siteName;
const se = summary.season ?? season;
let html = `<div class="intro">
Tier-2 withheld S2, spatial GCC vs withheld scene, NSE<sub>S2</sub>, and Whittaker comparison.
Summary: <code>data/${sn}/${se}/validation/gap_validation_summary.json</code>.
Scenario in this file: <b>${scen}</b> (one run overwrites; re-run CLI for other strategy/σ/mode).
</div>`;
html += `<h2>Spatial metrics (per gap length)</h2>`;
html += `<p class="section-note">Reference = GCC from withheld S2 REFL (bilinear to fusion grid). Prediction = fused GCC. ΔRMSE = RMSE<sub>gap</sub> RMSE<sub>no gap</sub>; ΔNSE = NSE<sub>no gap</sub> NSE<sub>gap</sub>.</p>`;
html += resultsTable(summary.results);
html += crossoverBlock(summary);
html += metaDetails(summary);
if (manifest?.entries) html += manifestTable(manifest);
el.innerHTML = html;
}
async function load() {
let summary = null,
manifest = null;
try {
const r1 = await fetch(`data/${siteName}/${season}/validation/gap_validation_summary.json`);
summary = r1.ok ? await r1.json() : null;
} catch {
summary = null;
}
try {
const r2 = await fetch(`data/${siteName}/${season}/validation/gap_manifest.json`);
manifest = r2.ok ? await r2.json() : null;
} catch {
manifest = null;
}
await render(summary, manifest);
const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
document.getElementById("pageTitle").textContent =
(site?.properties?.description || siteName) + " — gap validation — " + season;
urlParams.set("site", siteName);
urlParams.set("season", season);
history.replaceState({}, "", `?${urlParams}`);
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch {
window.sitesData = { features: [] };
}
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeSummary(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
const initialSeason =
urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
? urlSeason
: (availableSiteSeasons[initialSite] || [])[0] || "2024";
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
.map((s) => `<option value="${s}">${s}</option>`)
.join("");
document.getElementById("seasonSelect").value = initialSeason;
siteName = initialSite;
season = initialSeason;
siteSelect.addEventListener("change", function () {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
siteName = sn;
season = document.getElementById("seasonSelect").value;
load();
});
document.getElementById("seasonSelect").addEventListener("change", function () {
season = this.value;
load();
});
await load();
}
init();
</script>
</body>
</html>

File diff suppressed because it is too large Load diff

View file

@ -1,367 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Metrics</title>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
.selectors { margin-bottom: 20px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { font-size: 22px; }
h2 { font-size: 16px; margin-top: 24px; color: #333; }
h2:first-of-type { margin-top: 8px; }
h3 { font-size: 14px; margin: 14px 0 6px 0; color: #444; font-weight: 600; }
table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 12px; }
th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; }
th { background: #f5f5f5; }
td.num { text-align: right; font-variant-numeric: tabular-nums; }
.fusion-block table { margin-bottom: 4px; }
.fusion-block table + table { margin-top: 12px; }
.section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
.section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
.intro-short { margin-bottom: 0; }
details.definitions { margin-top: 28px; font-size: 13px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
details.definitions summary { cursor: pointer; font-weight: 600; color: #333; }
details.definitions ul { margin: 8px 0 0 18px; padding: 0; }
details.definitions li { margin-bottom: 4px; }
.scenario-key { font-size: 11px; color: #666; font-weight: normal; }
.empty { color: #666; font-style: italic; }
.err { color: #a00; }
details.how-read {
font-size: 12px; color: #333; line-height: 1.5; max-width: 820px; margin: 0 0 18px 0;
padding: 8px 12px 10px; border: 1px solid #ccd; border-radius: 4px; background: #f8fafc;
}
details.how-read summary {
cursor: pointer; font-weight: 600; font-size: 13px; color: #111; margin-bottom: 0;
}
details.how-read ol { margin: 10px 0 0; padding-left: 1.35rem; }
details.how-read li { margin-bottom: 7px; }
details.how-read li:last-child { margin-bottom: 0; }
</style>
</head>
<body>
<div class="container">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html" class="active">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="siteName">Metrics</h1>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
</div>
<div id="content"></div>
</div>
<script>
/** Shown in the UI; pearson_r, rmse, mae, n_samples remain in metrics.json only. */
const DISPLAY_METRIC_COLS = ["r_squared", "nrmse", "nse_pc"];
const DISPLAY_METRIC_LABELS = {
r_squared: "R² vs mean",
nrmse: "nRMSE",
nse_pc: "NSE_PC",
};
const FUSION_BTI_ROWS = [
["aggressive_sigma20", "Aggressive", 20],
["aggressive_sigma30", "Aggressive", 30],
["nonaggressive_sigma20", "Non-aggressive", 20],
["nonaggressive_sigma30", "Non-aggressive", 30],
];
function mv(m, c) {
return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
}
function fmtMetric(col, v) {
if (v == null || typeof v !== "number") return "—";
if (col === "r_squared" || col === "nse_pc") return v.toFixed(3);
if (col === "nrmse") return v.toFixed(4);
return fmt(v);
}
let siteName = "innsbruck", season = "2024";
let availableSiteSeasons = {};
const urlParams = new URLSearchParams(location.search);
async function probeMetrics(sn, s) {
try {
const res = await fetch(`data/${sn}/${s}/metrics.json`, { method: "HEAD" });
return res.ok;
} catch { return false; }
}
function fmt(v) {
if (v == null || typeof v !== "number") return "—";
return Number.isInteger(v) ? String(v) : v.toFixed(4);
}
function fusionMeanResidual(m) {
const x = m?.residual_vs_phenocam?.mean;
const n = Number(x);
return Number.isFinite(n) ? n : null;
}
function fusionSubTableRows(temporal, keysWithLabels, includeMeanResid) {
const parts = [];
for (const [key, stratLabel, sig] of keysWithLabels) {
const m = temporal[key];
if (!m) continue;
const mr = fusionMeanResidual(m);
const meanCell = includeMeanResid
? `<td class="num">${mr !== null ? mr.toFixed(3) : "—"}</td>`
: "";
parts.push(
`<tr><td>${stratLabel}, σ=${sig} <span class="scenario-key">(${key})</span></td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}${meanCell}</tr>`
);
}
return parts;
}
function fusionTables(temporal) {
if (!temporal || typeof temporal !== "object") {
return `<p class="empty">No fusion temporal data</p>`;
}
const itbRows = FUSION_BTI_ROWS.map(([k, s, sig]) => [`${k}_itb`, s, sig]);
const allKeys = [...FUSION_BTI_ROWS.map((r) => r[0]), ...itbRows.map((r) => r[0])];
let showMean = false;
for (const k of allKeys) {
if (fusionMeanResidual(temporal[k]) !== null) {
showMean = true;
break;
}
}
const btiBody = fusionSubTableRows(temporal, FUSION_BTI_ROWS, showMean);
const itbBody = fusionSubTableRows(temporal, itbRows, showMean);
if (!btiBody.length && !itbBody.length) {
return `<p class="empty">No fusion scenarios in temporal</p>`;
}
const meanTh = showMean ? `<th class="num">Mean resid.</th>` : "";
const head = `<tr><th>Setting</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}${meanTh}</tr>`;
let h = `<div class="fusion-block">`;
if (btiBody.length) {
h += `<h3>Bands-then-Index (BtI)</h3>`;
h += `<table>${head}${btiBody.join("")}</table>`;
}
if (itbBody.length) {
h += `<h3>Index-then-Bands (ItB)</h3>`;
h += `<table>${head}${itbBody.join("")}</table>`;
}
h += `</div>`;
return h;
}
/** Returns only &lt;table&gt;&lt;/table&gt; or empty string (no heading). */
function baselineTable(b) {
if (!b || typeof b !== "object") return "";
const rows = [];
const pushRow = (label, m) => {
if (!m || typeof m !== "object") return;
rows.push(
`<tr><td>${label}</td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}</tr>`
);
};
pushRow("S2 GCC (all acquisitions)", b.s2);
for (const strat of ["aggressive", "nonaggressive"]) {
pushRow(`S3 composite GCC (${strat})`, b.s3?.[strat]);
pushRow(`S2 GCC cloud-screened (${strat})`, b.s2_cloudfree?.[strat]);
pushRow(`S2 Whittaker λ=400 (${strat})`, b.s2_whittaker_lambda400?.[strat]);
}
if (!rows.length) return "";
const head = `<tr><th>Baseline</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}</tr>`;
return `<table>${head}${rows.join("")}</table>`;
}
function fmtFixed3(v) {
const n = Number(v);
return Number.isFinite(n) ? n.toFixed(3) : "—";
}
function derivedSection(d) {
if (!d) return "";
const dn = d.delta_nse_pc_sigma20_minus_sigma30;
const paired = d.bti_vs_itb_mean_residual || [];
if (!dn && !paired.length) return "";
let h = `<h2>Summaries</h2>`;
h += `<p class="section-note">Same numbers as Fusion, condensed. First table: which σ fits PhenoCam better (NSE_PC only). Second: mean bias BtI vs ItB.</p>`;
if (dn) {
h += `<p class="section-note"><b>ΔNSE_PC</b> = NSE_PC(σ20) NSE_PC(σ30). <b>+</b>σ20 better. <b></b>σ30 better.</p>`;
h += `<table><tr><th>Mode</th><th>Strategy</th><th class="num">ΔNSE_PC</th></tr>`;
let anyDelta = false;
for (const mode of ["bti", "itb"]) {
for (const strat of ["aggressive", "nonaggressive"]) {
const v = dn[mode]?.[strat];
if (Number.isFinite(Number(v))) anyDelta = true;
h += `<tr><td>${mode.toUpperCase()}</td><td>${strat}</td><td class="num">${fmtFixed3(v)}</td></tr>`;
}
}
h += `</table>`;
if (!anyDelta) {
h += `<p class="section-note">ΔNSE_PC needs both σ20 and σ30 fusion rows in <code>temporal</code> (BtI and ItB). Re-run <code>metrics_stats</code>.</p>`;
}
}
if (paired.length) {
h += `<p class="section-note">Mean(fused PhenoCam) per row. <b>+</b> / <b></b> = average over / under PhenoCam. Closer to <b>0</b> in a column = less bias for that workflow.</p>`;
h += `<table><tr><th>Strategy</th><th>σ</th><th class="num">Mean residual BtI</th><th class="num">Mean residual ItB</th></tr>`;
for (const row of paired) {
h += `<tr><td>${row.strategy}</td><td>${row.sigma}</td><td class="num">${fmtFixed3(row.mean_residual_bti)}</td><td class="num">${fmtFixed3(row.mean_residual_itb)}</td></tr>`;
}
h += `</table>`;
}
return h;
}
function howToReadBlock() {
return `<details class="how-read">
<summary>How to read</summary>
<ol>
<li>All scores are satellite or fusion <b>GCC</b> vs <b>PhenoCam GCC</b> at the site 3×3 window, <b>same calendar days</b> only. Extra stats: <code>metrics.json</code>.</li>
<li><b>R² vs mean</b> and <b>NSE_PC</b> are the same value (1 SS<sub>res</sub>/SS<sub>tot</sub> vs predicting mean PhenoCam each day); not (Pearson <i>r</i>)²; can be negative. Higher = better. <b>nRMSE</b>: lower = better.</li>
<li><b>Fusion:</b> same row number in BtI and in ItB = same screening + same σ — compare left/right. Down one block = change screening or σ.</li>
<li><b>Mean resid.</b> (if present): mean(fused PhenoCam). Sign = average bias; use R² vs mean / nRMSE / NSE_PC for overall fit.</li>
<li><b>Summaries:</b> ΔNSE_PC = NSE at σ20 minus NSE at σ30 (+ means σ20 wins). Paired table: closer to 0 = less mean bias.</li>
</ol>
</details>`;
}
function definitionsDetails() {
return `<details class="definitions">
<summary>Definitions</summary>
<ul>
<li><b>BtI</b>: fuse reflectance bands, then GCC.</li>
<li><b>ItB</b>: GCC on S2 and S3, then fuse GCC.</li>
<li><b>Scenario</b>: screening (<code>aggressive</code> / <code>nonaggressive</code>) × σ (20 / 30 days).</li>
<li><a href="phenology.html">Phenology</a> — PhenoCam SOS/EOS (TIMESAT).</li>
<li><b>R² vs mean</b> — coefficient of determination vs a constant mean(PhenoCam) baseline; JSON key <code>r_squared</code>; duplicates <code>nse_pc</code>. Not (Pearson <i>r</i>)².</li>
<li><code>metrics.json</code> — also Pearson <i>r</i>, RMSE, MAE, <code>n_samples</code>.</li>
</ul>
</details>`;
}
function render(data) {
const el = document.getElementById("content");
if (!data) {
el.innerHTML = `<p class="err">Could not load metrics.json</p>`;
return;
}
let html = "";
html += `<div class="intro intro-short">
GCC at the 3×3 site window vs PhenoCam. Sections: PhenoCam → baselines → fusion (BtI, then ItB) → summaries.
<code>data/${siteName}/${season}/metrics.json</code>
</div>`;
html += howToReadBlock();
if (data.phenocam_stats) {
html += `<h2>PhenoCam (ground truth)</h2>`;
html += `<p class="section-note">Camera ROI GCC (not compared to itself). Dates / SOSEOS: <a href="phenology.html">Phenology</a>.</p>`;
html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
const p = data.phenocam_stats;
html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
}
const baselineTbl = baselineTable(data.baseline);
if (baselineTbl) {
html += `<h2>Baselines (vs PhenoCam)</h2>`;
html += `<p class="section-note">Same columns as fusion (vs PhenoCam). Higher R² vs mean / NSE_PC, lower nRMSE = better. S3 = coarse-only; Whittaker = smoothed S2-only.</p>`;
html += baselineTbl;
}
html += `<h2>Fusion (vs PhenoCam)</h2>`;
html += `<p class="section-note">BtI block vs ItB block: same row = same screening + σ. Within a block: four EFAST combinations.</p>`;
html += fusionTables(data.temporal || {});
html += derivedSection(data.derived);
html += definitionsDetails();
el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
}
async function load() {
try {
const res = await fetch(`data/${siteName}/${season}/metrics.json`);
render(res.ok ? await res.json() : null);
} catch {
render(null);
}
const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
document.getElementById("siteName").textContent = (site?.properties?.description || siteName) + " — " + season;
urlParams.set("site", siteName);
urlParams.set("season", season);
history.replaceState({}, "", `?${urlParams}`);
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch { window.sitesData = { features: [] }; }
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeMetrics(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
const initialSeason =
urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
? urlSeason
: (availableSiteSeasons[initialSite] || [])[0] || "2024";
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
.map((s) => `<option value="${s}">${s}</option>`)
.join("");
document.getElementById("seasonSelect").value = initialSeason;
siteName = initialSite;
season = initialSeason;
siteSelect.addEventListener("change", function () {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
siteName = sn;
season = document.getElementById("seasonSelect").value;
load();
});
document.getElementById("seasonSelect").addEventListener("change", function () {
season = this.value;
load();
});
await load();
}
init();
</script>
</body>
</html>

View file

@ -1,146 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Phenology</title>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 900px; margin: 0 auto; padding: 20px; }
h1 { font-size: 22px; margin-top: 0; }
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
table { border-collapse: collapse; width: 100%; font-size: 13px; }
th, td { border: 1px solid #ccc; padding: 8px 10px; text-align: left; }
th { background: #f5f5f5; }
td.num { text-align: center; font-variant-numeric: tabular-nums; }
td.site { font-weight: 500; }
a.rowlink { color: #0066cc; text-decoration: none; }
a.rowlink:hover { text-decoration: underline; }
.empty { color: #666; }
.err { color: #a00; }
.loading { color: #666; }
</style>
</head>
<body>
<div class="container">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html" class="active">Phenology</a>
</div>
<h1>PhenoCam phenology (50% amplitude)</h1>
<p class="intro">
Green-up and green-down dates from <code>data/&lt;site&gt;/&lt;season&gt;/raw/phenocam/phenocam_phenology.json</code>
(TIMESAT on PhenoCam GCC). Site/season rows match <code>data/sites.geojson</code>.
Run <code>python phenology_timesat.py --all</code> or the pipeline to generate missing JSON files.
</p>
<p id="status" class="loading">Loading…</p>
<div id="tableWrap"></div>
</div>
<script>
function escapeHtml(s) {
return String(s)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
function cellDate(v) {
if (v == null || v === "") return "<span class='empty'></span>";
return escapeHtml(v);
}
async function loadPhenologyRow(site, season) {
const path = `data/${site}/${season}/raw/phenocam/phenocam_phenology.json`;
try {
const res = await fetch(path);
if (!res.ok) return { ok: false, up: null, down: null };
const j = await res.json();
return {
ok: true,
up: j.green_up_50pct_date ?? null,
down: j.green_down_50pct_date ?? null
};
} catch {
return { ok: false, up: null, down: null };
}
}
async function main() {
const status = document.getElementById("status");
const wrap = document.getElementById("tableWrap");
let features = [];
try {
const res = await fetch("data/sites.geojson");
if (!res.ok) throw new Error("Could not load sites.geojson");
const g = await res.json();
features = g.features || [];
} catch (e) {
status.textContent = "";
status.className = "err";
status.textContent = "Failed to load data/sites.geojson.";
return;
}
const rows = [];
for (const f of features) {
const site = f.properties && f.properties.sitename;
if (!site) continue;
const desc = (f.properties && f.properties.description) || site;
const seasons = f.properties && f.properties.seasons
? Object.keys(f.properties.seasons).sort()
: [];
for (const season of seasons) {
rows.push({ site, season, desc });
}
}
rows.sort((a, b) => a.site.localeCompare(b.site) || a.season.localeCompare(b.season));
const results = await Promise.all(
rows.map((r) =>
loadPhenologyRow(r.site, r.season).then((phen) => ({ ...r, ...phen }))
)
);
const head =
"<thead><tr>" +
"<th>Site</th><th>Season</th><th>Description</th>" +
"<th>Green-up</th><th>Green-down</th>" +
"</tr></thead>";
const body = results
.map((r) => {
const q = new URLSearchParams();
q.set("site", r.site);
q.set("season", r.season);
const viewer = `index.html?${q.toString()}`;
return (
"<tr>" +
`<td class="site"><a class="rowlink" href="${viewer}">${escapeHtml(r.site)}</a></td>` +
`<td class="num">${r.season}</td>` +
`<td>${escapeHtml(r.desc)}</td>` +
`<td class="num">${cellDate(r.up)}</td>` +
`<td class="num">${cellDate(r.down)}</td>` +
"</tr>"
);
})
.join("");
status.textContent = "";
status.className = "";
wrap.innerHTML = "<table>" + head + "<tbody>" + body + "</tbody></table>";
}
main();
</script>
</body>
</html>

View file

@ -1,390 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Postprocessed Viewer</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
<script src="common.js"></script>
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
.selectors { margin-bottom: 20px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { margin: 0 0 5px 0; font-size: 22px; }
.season-row { padding-bottom: 15px; }
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
.download-links { margin-left: 10px; font-size: 14px; }
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
.download-links a:hover { text-decoration: underline; }
#dateSlider { width: 100%; margin: 15px 0; }
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
#postprocessedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
.leaflet-image-layer { image-rendering: pixelated; }
.leaflet-control-attribution { display: none; }
</style>
</head>
<body>
<div class="container">
<div class="header-sticky">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html" class="active">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="siteName">Innsbruck</h1>
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
<label>Strategy:</label>
<select id="strategySelect">
<option value="aggressive">Aggressive</option>
<option value="nonaggressive">Non-aggressive</option>
</select>
<label>Sigma:</label>
<select id="sigmaSelect">
<option value="20">σ=20</option>
<option value="30">σ=30</option>
</select>
<label>Source:</label>
<select id="sourceSelect">
<option value="s2">S2</option>
<option value="fusion">Fusion</option>
<option value="s3">S3</option>
</select>
<label>Mode:</label>
<select id="fusionModeSelect" title="BtI vs ItB processed paths">
<option value="bti">BtI</option>
<option value="itb">ItB</option>
</select>
</div>
<input type="range" id="dateSlider" min="0" max="365" value="0">
<div id="dateDisplay">2024-01-01</div>
</div>
<div class="map-label">Postprocessed RGB (closest available)</div>
<div id="mapDate" class="map-date"></div>
<div id="postprocessedMap"></div>
<div id="plots">
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
</div>
</div>
<script>
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
let siteName = "innsbruck", season = "2024";
let strategy = "aggressive", sigma = "20", source = "s2", fusionMode = "bti";
let sitePosition = [47.116171, 11.320308];
let start = new Date(2024, 0, 1);
let availableSiteSeasons = {};
let postprocessedMap = null, overlay = null, marker = null;
let ndviTs = [], gccTs = [], bandsTs = [];
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
const urlParams = new URLSearchParams(location.search);
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
const daysFromDate = (dateStr) => {
const [y, m, d] = dateStr.split("-").map(Number);
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
};
function getProcessedPath() {
const mid = fusionMode === "itb" ? `processed_${strategy}_itb_sigma${sigma}` : `processed_${strategy}_sigma${sigma}`;
return `data/${siteName}/${season}/${mid}`;
}
async function loadTimeseries() {
const base = getProcessedPath();
try {
const [n, g, b] = await Promise.all([
fetch(`${base}/ndvi/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
fetch(`${base}/gcc/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
fetch(`${base}/bands/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
]);
ndviTs = n;
gccTs = g;
bandsTs = b;
} catch {
ndviTs = [];
gccTs = [];
bandsTs = [];
}
drawPlots();
updateDownloadLinks();
}
function drawPlot(canvasId, data, key, color) {
const canvas = document.getElementById(canvasId);
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const pts = data.filter(t => t[key] != null);
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
const dates = pts.map(t => new Date(t.date));
const vals = pts.map(t => t[key]);
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
const minV = Math.min(...vals), maxV = Math.max(...vals);
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
ctx.strokeStyle = color;
ctx.beginPath();
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
ctx.stroke();
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(curDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
}
function drawPlots() {
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
}
function updateDownloadLinks() {
const el = document.getElementById("downloadLinks");
if (!el) return;
const root = getProcessedPath();
if (fusionMode === "itb") {
el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
return;
}
const base = `${root}/export/${source}`;
const name = `${siteName}_${season}_postprocessed_${strategy}_sigma${sigma}_${source}`;
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
}
async function findProcessedFile(dateStr) {
const target = new Date(dateStr);
const yearEnd = new Date(parseInt(season), 11, 31);
const seasonStart = start.getTime();
const seasonEnd = yearEnd.getTime();
for (let offset = 0; offset <= 365; offset++) {
for (const dir of offset === 0 ? [0] : [-1, 1]) {
const d = new Date(target.getTime() + dir * offset * 86400000);
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
const filename = `${ds}_0.geotiff`;
try {
const res = await fetch(`${getProcessedPath()}/${source}/${filename}`, { method: "HEAD" });
if (res.ok) return filename;
} catch {}
}
}
return null;
}
function transformBounds(bbox, fromCRS) {
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
return [[sw[1], sw[0]], [ne[1], ne[0]]];
}
async function loadGeotiff(filename) {
const path = `${getProcessedPath()}/${source}/${filename}`;
const buf = await (await fetch(path)).arrayBuffer();
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
const dateStr = filename.replace("_0.geotiff", "");
return { dataUrl, bounds, dateStr };
}
async function updateMap() {
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const filename = await findProcessedFile(dateStr);
if (!filename || !postprocessedMap) {
if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
return;
}
try {
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
if (overlay) postprocessedMap.removeLayer(overlay);
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(postprocessedMap);
postprocessedMap.fitBounds(bounds);
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
} catch (e) {
if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
}
}
async function probeDataExists(sitename, s) {
try {
const res = await fetch(`data/${sitename}/${s}/metrics.json`, { method: "HEAD" });
return res.ok;
} catch { return false; }
}
function getSiteBySitename(sn) {
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
}
async function setSiteSeason(newSite, newSeason) {
siteName = newSite;
season = newSeason;
start = new Date(parseInt(season), 0, 1);
const site = getSiteBySitename(newSite);
if (site?.geometry?.coordinates) {
const [lon, lat] = site.geometry.coordinates;
sitePosition = [lat, lon];
}
if (postprocessedMap) { postprocessedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
document.getElementById("season").textContent = season;
const yearEnd = new Date(parseInt(season), 11, 31);
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
const params = new URLSearchParams(location.search);
params.set("site", siteName);
params.set("season", season);
params.set("mode", fusionMode);
history.replaceState({}, "", `?${params}`);
const urlDate = params.get("date");
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
await loadTimeseries();
await updateMap();
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch { window.sitesData = { features: [] }; }
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeDataExists(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
`<option value="${s}">${s}</option>`
).join("");
document.getElementById("seasonSelect").value = initialSeason;
strategy = urlParams.get("strategy") || "aggressive";
sigma = urlParams.get("sigma") || "20";
source = urlParams.get("source") || "s2";
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
document.getElementById("strategySelect").value = strategy;
document.getElementById("sigmaSelect").value = sigma;
document.getElementById("sourceSelect").value = source;
document.getElementById("fusionModeSelect").value = fusionMode;
const initSite = getSiteBySitename(initialSite);
if (initSite?.geometry?.coordinates) {
const [lon, lat] = initSite.geometry.coordinates;
sitePosition = [lat, lon];
}
postprocessedMap = L.map("postprocessedMap", { zoomControl: false }).setView(sitePosition, 12)
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(postprocessedMap);
siteSelect.addEventListener("change", function() {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
setSiteSeason(sn, document.getElementById("seasonSelect").value);
});
document.getElementById("seasonSelect").addEventListener("change", function() {
setSiteSeason(siteSelect.value, this.value);
});
document.getElementById("strategySelect").addEventListener("change", function() {
strategy = this.value;
urlParams.set("strategy", strategy);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("sigmaSelect").addEventListener("change", function() {
sigma = this.value;
urlParams.set("sigma", sigma);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("sourceSelect").addEventListener("change", function() {
source = this.value;
urlParams.set("source", source);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("fusionModeSelect").addEventListener("change", function() {
fusionMode = this.value;
urlParams.set("mode", fusionMode);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
await setSiteSeason(initialSite, initialSeason);
}
document.getElementById("dateSlider").addEventListener("input", function() {
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
drawPlots(); updateMap();
});
init();
</script>
</body>
</html>

View file

@ -1,379 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Prepared S2/S3 Viewer</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
<script src="common.js"></script>
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
.selectors { margin-bottom: 20px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { margin: 0 0 5px 0; font-size: 22px; }
.season-row { padding-bottom: 15px; }
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
.download-links { margin-left: 10px; font-size: 14px; }
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
.download-links a:hover { text-decoration: underline; }
#dateSlider { width: 100%; margin: 15px 0; }
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
#preparedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
.leaflet-image-layer { image-rendering: pixelated; }
.leaflet-control-attribution { display: none; }
</style>
</head>
<body>
<div class="container">
<div class="header-sticky">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html">Pre-selection</a>
<a href="prepared.html" class="active">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="siteName">Innsbruck</h1>
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
<label>Strategy:</label>
<select id="strategySelect">
<option value="aggressive">Aggressive</option>
<option value="nonaggressive">Non-aggressive</option>
</select>
<label>Source:</label>
<select id="sourceSelect">
<option value="s2">S2</option>
<option value="s3">S3</option>
</select>
<label>Mode:</label>
<select id="fusionModeSelect" title="BtI = REFL/composite; ItB = GCC rasters">
<option value="bti">BtI</option>
<option value="itb">ItB</option>
</select>
</div>
<input type="range" id="dateSlider" min="0" max="365" value="0">
<div id="dateDisplay">2024-01-01</div>
</div>
<div class="map-label" id="mapLabel">Prepared RGB (closest available)</div>
<div id="mapDate" class="map-date"></div>
<div id="preparedMap"></div>
<div id="plots">
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
</div>
</div>
<script>
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
let siteName = "innsbruck", season = "2024";
let strategy = "aggressive", source = "s2", fusionMode = "bti";
let sitePosition = [47.116171, 11.320308];
let start = new Date(2024, 0, 1);
let availableSiteSeasons = {};
let preparedMap = null, overlay = null, marker = null;
let ndviTs = [], gccTs = [], bandsTs = [];
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
const urlParams = new URLSearchParams(location.search);
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
const daysFromDate = (dateStr) => {
const [y, m, d] = dateStr.split("-").map(Number);
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
};
function getPreparedPath() {
const mid = fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
return `data/${siteName}/${season}/${mid}`;
}
async function loadTimeseries() {
try {
const [n, g, b] = await Promise.all([
fetch(`${getPreparedPath()}/ndvi/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
fetch(`${getPreparedPath()}/gcc/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
fetch(`${getPreparedPath()}/bands/${source}/timeseries.json`).then(r => r.ok ? r.json() : [])
]);
ndviTs = n; gccTs = g; bandsTs = b;
} catch { ndviTs = []; gccTs = []; bandsTs = []; }
drawPlots();
updateDownloadLinks();
}
function drawPlot(canvasId, data, key, color) {
const canvas = document.getElementById(canvasId);
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const pts = data.filter(t => t[key] != null);
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
const dates = pts.map(t => new Date(t.date));
const vals = pts.map(t => t[key]);
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
const minV = Math.min(...vals), maxV = Math.max(...vals);
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
ctx.strokeStyle = color;
ctx.beginPath();
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
ctx.stroke();
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(curDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
}
function drawPlots() {
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
}
function updateDownloadLinks() {
const el = document.getElementById("downloadLinks");
if (!el) return;
const root = getPreparedPath();
if (fusionMode === "itb") {
el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
return;
}
const base = `${root}/export/${source}`;
const name = `${siteName}_${season}_prepared_${strategy}_${source}`;
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
}
async function findPreparedFile(dateStr) {
const target = new Date(dateStr);
const yearEnd = new Date(parseInt(season), 11, 31);
const seasonStart = start.getTime();
const seasonEnd = yearEnd.getTime();
for (let offset = 0; offset <= 365; offset++) {
for (const dir of offset === 0 ? [0] : [-1, 1]) {
const d = new Date(target.getTime() + dir * offset * 86400000);
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
const filename =
source === "s2"
? fusionMode === "itb"
? `S2A_MSIL2A_${ds}_GCC.tif`
: `S2A_MSIL2A_${ds}_REFL.tif`
: `composite_${ds}.tif`;
try {
const res = await fetch(`${getPreparedPath()}/${source}/${filename}`, { method: "HEAD" });
if (res.ok) return filename;
} catch {}
}
}
return null;
}
function transformBounds(bbox, fromCRS) {
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
return [[sw[1], sw[0]], [ne[1], ne[0]]];
}
async function loadGeotiff(filename) {
const path = `${getPreparedPath()}/${source}/${filename}`;
const buf = await (await fetch(path)).arrayBuffer();
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
const m = filename.match(/(\d{8})/);
const dateStr = m ? m[1] : "";
return { dataUrl, bounds, dateStr };
}
async function updateMap() {
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const filename = await findPreparedFile(dateStr);
if (!filename || !preparedMap) {
if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
return;
}
try {
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
if (overlay) preparedMap.removeLayer(overlay);
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(preparedMap);
preparedMap.fitBounds(bounds);
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
} catch (e) {
if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
document.getElementById("mapDate").textContent = "";
}
}
async function probeDataExists(sitename, s) {
try {
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
return res.ok;
} catch { return false; }
}
function getSiteBySitename(sn) {
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
}
async function setSiteSeason(newSite, newSeason) {
siteName = newSite;
season = newSeason;
start = new Date(parseInt(season), 0, 1);
const site = getSiteBySitename(newSite);
if (site?.geometry?.coordinates) {
const [lon, lat] = site.geometry.coordinates;
sitePosition = [lat, lon];
}
if (preparedMap) { preparedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
document.getElementById("season").textContent = season;
const yearEnd = new Date(parseInt(season), 11, 31);
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
const params = new URLSearchParams(location.search);
params.set("site", siteName);
params.set("season", season);
params.set("mode", fusionMode);
history.replaceState({}, "", `?${params}`);
const urlDate = params.get("date");
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
await loadTimeseries();
await updateMap();
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch { window.sitesData = { features: [] }; }
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeDataExists(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
`<option value="${s}">${s}</option>`
).join("");
document.getElementById("seasonSelect").value = initialSeason;
strategy = urlParams.get("strategy") || "aggressive";
source = urlParams.get("source") || "s2";
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
document.getElementById("strategySelect").value = strategy;
document.getElementById("sourceSelect").value = source;
document.getElementById("fusionModeSelect").value = fusionMode;
const ml = document.getElementById("mapLabel");
if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
const initSite = getSiteBySitename(initialSite);
if (initSite?.geometry?.coordinates) {
const [lon, lat] = initSite.geometry.coordinates;
sitePosition = [lat, lon];
}
preparedMap = L.map("preparedMap", { zoomControl: false }).setView(sitePosition, 12)
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(preparedMap);
siteSelect.addEventListener("change", function() {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
setSiteSeason(sn, document.getElementById("seasonSelect").value);
});
document.getElementById("seasonSelect").addEventListener("change", function() {
setSiteSeason(siteSelect.value, this.value);
});
document.getElementById("strategySelect").addEventListener("change", function() {
strategy = this.value;
urlParams.set("strategy", strategy);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("sourceSelect").addEventListener("change", function() {
source = this.value;
urlParams.set("source", source);
history.replaceState({}, "", `?${urlParams}`);
loadTimeseries(); updateMap();
});
document.getElementById("fusionModeSelect").addEventListener("change", function() {
fusionMode = this.value;
urlParams.set("mode", fusionMode);
history.replaceState({}, "", `?${urlParams}`);
const ml = document.getElementById("mapLabel");
if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
loadTimeseries(); updateMap();
});
await setSiteSeason(initialSite, initialSeason);
}
document.getElementById("dateSlider").addEventListener("input", function() {
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
drawPlots(); updateMap();
});
init();
</script>
</body>
</html>

View file

@ -1,541 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>S2 Band Reflectance Timeseries</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
<style>
body { margin: 0; font-family: sans-serif; }
.nav { margin-bottom: 15px; font-size: 14px; }
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.nav a.active { font-weight: bold; }
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
.selectors { margin-bottom: 20px; }
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
h1 { margin: 0 0 5px 0; font-size: 22px; }
.season-row { padding-bottom: 15px; }
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
.download-links { margin-left: 10px; font-size: 14px; }
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
.download-links a:hover { text-decoration: underline; }
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
#dateSlider { width: 100%; margin: 15px 0; }
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
#s2map { height: 400px; border: 1px solid #ccc; margin-top: 10px; }
.leaflet-image-layer { image-rendering: pixelated; }
.leaflet-control-attribution { display: none; }
</style>
</head>
<body>
<div class="container">
<div class="header-sticky">
<div class="nav">
<a href="index.html">Full</a>
<a href="preselection.html" class="active">Pre-selection</a>
<a href="prepared.html">Prepared</a>
<a href="fusion.html">Fusion</a>
<a href="postprocessed.html">Postprocessed</a>
<a href="metrics.html">Metrics</a>
<a href="gap_validation.html">Gap validation</a>
<a href="phenology.html">Phenology</a>
</div>
<h1 id="siteName">Innsbruck</h1>
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
<div class="selectors">
<label>Site:</label>
<select id="siteSelect"></select>
<label>Season:</label>
<select id="seasonSelect"></select>
<label>Source:</label>
<select id="sourceSelect">
<option value="s2">S2</option>
<option value="s3">S3</option>
</select>
<label>Exclusion:</label>
<select id="exclusionSelect">
<option value="none">None</option>
<option value="aggressive">Aggressive</option>
<option value="nonaggressive">Non-aggressive</option>
</select>
</div>
<input type="range" id="dateSlider" min="0" max="365" value="0">
<div id="dateDisplay">2024-01-01</div>
</div>
<div class="map-label" id="mapLabel">S2 RGB (closest available)</div>
<div id="s2rgbdate" class="map-date"></div>
<div id="s2map"></div>
<div id="bandPlots"></div>
</div>
<script>
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
const BANDS = [
{ key: "b02", label: "B02 (Blue)", color: "#0066ff" },
{ key: "b03", label: "B03 (Green)", color: "#00aa00" },
{ key: "b04", label: "B04 (Red)", color: "#cc0000" },
{ key: "b8a", label: "B8A (NIR)", color: "#9900cc" }
];
let siteName = "innsbruck", season = "2024";
let source = "s2";
let exclusion = "none";
let sitePosition = [47.116171, 11.320308];
let start = new Date(2024, 0, 1);
let timeseries = [];
let gccTimeseries = [];
let ndviTimeseries = [];
let availableSiteSeasons = {};
let s2Map = null, s2Overlay = null, s2Marker = null;
const urlParams = new URLSearchParams(location.search);
function filteredTimeseries(arr) {
if (exclusion === "none") return arr;
const key = exclusion === "aggressive" ? "excluded_aggressive" : "excluded_nonaggressive";
return arr.filter(t => !t[key]);
}
function fmtDate(d) {
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
}
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
const daysFromDate = (dateStr) => {
const [y, m, d] = dateStr.split("-").map(Number);
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
};
function drawBandPlot(canvasId, bandKey, bandLabel, color) {
const canvas = document.getElementById(canvasId);
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const data = filteredTimeseries(timeseries).filter(t => t[bandKey] != null);
if (!data.length) return;
const dates = data.map(t => new Date(t.date));
const values = data.map(t => t[bandKey]);
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
const dateRange = maxDate - minDate || 1;
const minVal = Math.min(...values), maxVal = Math.max(...values);
const valRange = maxVal - minVal || 1;
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath();
ctx.moveTo(pad, pad);
ctx.lineTo(pad, pad + plotH);
ctx.lineTo(pad + plotW, pad + plotH);
ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minVal.toFixed(4), 2, pad + plotH + 10);
ctx.fillText(maxVal.toFixed(4), 2, pad + 3);
ctx.strokeStyle = color;
ctx.beginPath();
data.forEach((t, i) => {
const px = x(t.date), py = y(t[bandKey]);
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
});
ctx.stroke();
ctx.fillStyle = "#888";
const axisY = pad + plotH;
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(currentDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(xPos, pad);
ctx.lineTo(xPos, pad + plotH);
ctx.stroke();
const closest = data.reduce((c, t) =>
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
);
if (closest) {
ctx.fillStyle = "#f00";
ctx.font = "bold 10px sans-serif";
ctx.fillText(closest[bandKey].toFixed(4), xPos + 5, y(closest[bandKey]) - 5);
}
}
function drawNdviPlot() {
const canvas = document.getElementById("plot_ndvi");
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const data = filteredTimeseries(ndviTimeseries).filter(t => t.ndvi != null);
if (!data.length) return;
const dates = data.map(t => new Date(t.date));
const values = data.map(t => t.ndvi);
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
const dateRange = maxDate - minDate || 1;
const minVal = Math.min(...values), maxVal = Math.max(...values);
const valRange = maxVal - minVal || 1;
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath();
ctx.moveTo(pad, pad);
ctx.lineTo(pad, pad + plotH);
ctx.lineTo(pad + plotW, pad + plotH);
ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
ctx.strokeStyle = "#2d7a3e";
ctx.beginPath();
data.forEach((t, i) => {
const px = x(t.date), py = y(t.ndvi);
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
});
ctx.stroke();
ctx.fillStyle = "#888";
const axisY = pad + plotH;
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(currentDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(xPos, pad);
ctx.lineTo(xPos, pad + plotH);
ctx.stroke();
const closest = data.reduce((c, t) =>
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
);
if (closest) {
ctx.fillStyle = "#f00";
ctx.font = "bold 10px sans-serif";
ctx.fillText(closest.ndvi.toFixed(3), xPos + 5, y(closest.ndvi) - 5);
}
}
function drawGccPlot() {
const canvas = document.getElementById("plot_gcc");
if (!canvas) return;
const ctx = canvas.getContext("2d");
canvas.width = canvas.offsetWidth;
canvas.height = 100;
const w = canvas.width, h = canvas.height, pad = 30;
const plotW = w - pad * 2, plotH = h - pad * 2;
const data = filteredTimeseries(gccTimeseries).filter(t => t.greenness_index != null);
if (!data.length) return;
const dates = data.map(t => new Date(t.date));
const values = data.map(t => t.greenness_index);
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
const dateRange = maxDate - minDate || 1;
const minVal = Math.min(...values), maxVal = Math.max(...values);
const valRange = maxVal - minVal || 1;
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
ctx.clearRect(0, 0, w, h);
ctx.strokeStyle = "#ccc";
ctx.beginPath();
ctx.moveTo(pad, pad);
ctx.lineTo(pad, pad + plotH);
ctx.lineTo(pad + plotW, pad + plotH);
ctx.stroke();
ctx.fillStyle = "#000";
ctx.font = "9px sans-serif";
ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
ctx.strokeStyle = "#00aa00";
ctx.beginPath();
data.forEach((t, i) => {
const px = x(t.date), py = y(t.greenness_index);
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
});
ctx.stroke();
ctx.fillStyle = "#888";
const axisY = pad + plotH;
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const xPos = x(currentDate);
ctx.strokeStyle = "#f00";
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(xPos, pad);
ctx.lineTo(xPos, pad + plotH);
ctx.stroke();
const closest = data.reduce((c, t) =>
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
);
if (closest) {
ctx.fillStyle = "#f00";
ctx.font = "bold 10px sans-serif";
ctx.fillText(closest.greenness_index.toFixed(3), xPos + 5, y(closest.greenness_index) - 5);
}
}
function drawAllPlots() {
drawNdviPlot();
drawGccPlot();
BANDS.forEach(b => drawBandPlot(`plot_${b.key}`, b.key, b.label, b.color));
}
function computeGcc(entry) {
const b = entry.b02 + entry.b03 + entry.b04;
return b > 0 ? entry.b03 / b : null;
}
async function loadTimeseries() {
const rawBase = `data/${siteName}/${season}/raw`;
const src = document.getElementById("sourceSelect")?.value || "s2";
source = src;
try {
const preselectionRes = await fetch(`${rawBase}/preselection/${source}_preselection.json`);
const preselection = preselectionRes.ok ? await preselectionRes.json() : [];
timeseries = preselection;
ndviTimeseries = preselection;
gccTimeseries = preselection.map(t => ({ ...t, greenness_index: computeGcc(t) })).filter(t => t.greenness_index != null);
} catch {
timeseries = [];
ndviTimeseries = [];
gccTimeseries = [];
}
const srcLabel = source.toUpperCase();
document.getElementById("mapLabel").textContent = `${srcLabel} RGB (closest available)`;
const jsonUrl = `${rawBase}/preselection/${source}_preselection.json`;
const csvUrl = `${rawBase}/preselection/${source}_preselection.csv`;
document.getElementById("downloadLinks").innerHTML =
`<a href="${jsonUrl}" download="${siteName}_${season}_${source}_preselection.json" target="_blank">[JSON]</a>` +
`<a href="${csvUrl}" download="${siteName}_${season}_${source}_preselection.csv" target="_blank">[CSV]</a>`;
document.getElementById("bandPlots").innerHTML =
`<div class="plot-label">${srcLabel} NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>` +
`<div class="plot-label">${srcLabel} GCC (Greenness Index)</div><canvas id="plot_gcc" class="plot"></canvas>` +
BANDS.map(b => `<div class="plot-label">${b.label}</div><canvas id="plot_${b.key}" class="plot"></canvas>`).join("");
const yearEnd = new Date(parseInt(season), 11, 31);
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
drawAllPlots();
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
updateS2Imagery();
}
async function probeDataExists(sitename, s) {
try {
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
return res.ok;
} catch { return false; }
}
function getSiteBySitename(sitename) {
return window.sitesData?.features?.find(f => f.properties?.sitename === sitename);
}
async function setSiteSeason(newSite, newSeason) {
siteName = newSite;
season = newSeason;
start = new Date(parseInt(season), 0, 1);
const site = getSiteBySitename(newSite);
if (site?.geometry?.coordinates) {
const [lon, lat] = site.geometry.coordinates;
sitePosition = [lat, lon];
}
if (s2Map) { s2Map.setView(sitePosition, 12); if (s2Marker) s2Marker.setLatLng(sitePosition); }
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
document.getElementById("season").textContent = season;
const params = new URLSearchParams(location.search);
params.set("site", siteName);
params.set("season", season);
history.replaceState({}, "", `?${params}`);
await loadTimeseries();
const urlDate = params.get("date");
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
}
async function init() {
try {
const res = await fetch("data/sites.geojson");
window.sitesData = res.ok ? await res.json() : { features: [] };
} catch {
window.sitesData = { features: [] };
}
const features = window.sitesData.features || [];
for (const f of features) {
const sn = f.properties?.sitename;
if (!sn) continue;
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
const withData = [];
for (const s of seasonsFromGeo) {
if (await probeDataExists(sn, s)) withData.push(s);
}
if (withData.length) availableSiteSeasons[sn] = withData;
}
const availableSites = Object.keys(availableSiteSeasons);
const siteSelect = document.getElementById("siteSelect");
siteSelect.innerHTML = "";
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
const opt = document.createElement("option");
opt.value = sn;
opt.textContent = sn;
siteSelect.appendChild(opt);
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
});
const urlSite = urlParams.get("site");
const urlSeason = urlParams.get("season");
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
siteSelect.value = initialSite;
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
`<option value="${s}">${s}</option>`
).join("");
document.getElementById("seasonSelect").value = initialSeason;
document.getElementById("sourceSelect").value = urlParams.get("source") || "s2";
exclusion = urlParams.get("exclusion") || "none";
document.getElementById("exclusionSelect").value = exclusion;
const initSite = getSiteBySitename(initialSite);
if (initSite?.geometry?.coordinates) {
const [lon, lat] = initSite.geometry.coordinates;
sitePosition = [lat, lon];
}
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
s2Map = L.map("s2map", { zoomControl: false }).setView(sitePosition, 12)
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
s2Marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(s2Map);
siteSelect.addEventListener("change", function() {
const sn = this.value;
const seas = availableSiteSeasons[sn] || [];
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
document.getElementById("seasonSelect").value = seas[0] || "2024";
setSiteSeason(sn, document.getElementById("seasonSelect").value);
});
document.getElementById("seasonSelect").addEventListener("change", function() {
setSiteSeason(siteSelect.value, this.value);
});
document.getElementById("sourceSelect").addEventListener("change", async function() {
source = this.value;
urlParams.set("source", source);
history.replaceState({}, "", `?${urlParams}`);
await loadTimeseries();
});
document.getElementById("exclusionSelect").addEventListener("change", function() {
exclusion = this.value;
urlParams.set("exclusion", exclusion);
history.replaceState({}, "", `?${urlParams}`);
drawAllPlots();
updateS2Imagery();
});
await setSiteSeason(initialSite, initialSeason);
}
document.getElementById("dateSlider").addEventListener("input", function() {
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
drawAllPlots();
updateS2Imagery();
});
function closestFilename(dateStr) {
const target = new Date(dateStr);
const withData = filteredTimeseries(timeseries).filter(t => t.filename);
if (!withData.length) return null;
const closest = withData.reduce((c, t) =>
Math.abs(new Date(t.date) - target) < Math.abs(new Date(c.date) - target) ? t : c
);
return closest.filename;
}
function transformBounds(bbox, fromCRS) {
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
return [[sw[1], sw[0]], [ne[1], ne[0]]];
}
async function loadS2Geotiff(filename) {
const path = `data/${siteName}/${season}/raw/${source}/${filename}`;
const tiff = await GeoTIFF.fromArrayBuffer(await (await fetch(path)).arrayBuffer());
const image = await tiff.getImage();
const rasters = await image.readRasters();
const width = image.getWidth(), height = image.getHeight();
const bbox = image.getBoundingBox();
const geoKeys = image.getGeoKeys();
const crsCode = geoKeys.ProjectedCSTypeGeoKey ? `EPSG:${geoKeys.ProjectedCSTypeGeoKey}` :
(geoKeys.GeographicTypeGeoKey !== 4326 ? `EPSG:${geoKeys.GeographicTypeGeoKey}` : "EPSG:4326");
const [blue, green, red] = [0, 1, 2].map(i => Array.from(rasters[i]));
const normalize = (arr) => {
let min = Infinity, max = -Infinity;
for (const v of arr) if (!isNaN(v) && v > 0) { min = Math.min(min, v); max = Math.max(max, v); }
return arr.map(v => Math.max(0, Math.min(255, ((v - min) / (max - min || 1)) * 255)));
};
const [rN, gN, bN] = [red, green, blue].map(normalize);
const canvas = Object.assign(document.createElement("canvas"), { width, height });
const ctx = canvas.getContext("2d");
ctx.imageSmoothingEnabled = false;
const imgData = ctx.createImageData(width, height);
for (let i = 0; i < rN.length; i++) {
const idx = i * 4;
if (rN[i] === 0 && gN[i] === 0 && bN[i] === 0) imgData.data[idx + 3] = 0;
else { imgData.data[idx] = rN[i]; imgData.data[idx + 1] = gN[i]; imgData.data[idx + 2] = bN[i]; imgData.data[idx + 3] = 255; }
}
ctx.putImageData(imgData, 0, 0);
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
return { dataUrl: canvas.toDataURL(), bounds };
}
async function updateS2Imagery() {
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
const filename = closestFilename(dateStr);
if (!filename || !s2Map) {
if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
document.getElementById("s2rgbdate").textContent = "";
return;
}
try {
const { dataUrl, bounds } = await loadS2Geotiff(filename);
if (s2Overlay) s2Map.removeLayer(s2Overlay);
s2Overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(s2Map);
s2Map.fitBounds(bounds);
const d = filename.split("_")[0];
document.getElementById("s2rgbdate").textContent = `${d.slice(0,4)}-${d.slice(4,6)}-${d.slice(6,8)}`;
} catch (e) {
if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
document.getElementById("s2rgbdate").textContent = "";
}
}
init();
</script>
</body>
</html>