Switching horses.
This commit is contained in:
parent
25cbd97662
commit
e3e14027fc
51 changed files with 5078 additions and 11678 deletions
12
.gitignore
vendored
12
.gitignore
vendored
|
|
@ -1,10 +1,9 @@
|
|||
# Project data
|
||||
data/*
|
||||
webapp/data
|
||||
# Generated caches and downloads (regenerate via pipeline steps)
|
||||
data/
|
||||
|
||||
# Environment
|
||||
# Environment and secrets
|
||||
.env
|
||||
.venv
|
||||
.venv/
|
||||
venv/
|
||||
env/
|
||||
|
||||
|
|
@ -42,6 +41,3 @@ dist/
|
|||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
AGENTS.md
|
||||
.vibe
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.4
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
|
||||
278
1-phenocam.py
Normal file
278
1-phenocam.py
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
"""Step 1: download worldwide PhenoCam sites for a calendar year.
|
||||
|
||||
Inputs (``data/``): none — queries the PhenoCam API.
|
||||
|
||||
Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``phenocam/{year}.json`` — site list manifest
|
||||
- ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata
|
||||
- ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV
|
||||
|
||||
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
|
||||
|
||||
Next step: :mod:`2-phenocam-screening`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
|
||||
if str(PROCESSING_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(PROCESSING_DIR))
|
||||
|
||||
from acquisition_phenocam import PHENOCAM_API # noqa: E402
|
||||
from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date # noqa: E402
|
||||
|
||||
EVALUATION_YEAR = 2025
|
||||
HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
|
||||
ONE_DAY_CSV_SUFFIX = "_1day.csv"
|
||||
|
||||
|
||||
def check_phenocam_host() -> None:
|
||||
try:
|
||||
response = requests.get(HOST_PROBE, timeout=30)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(
|
||||
f"PhenoCam API unreachable (phenocam.nau.edu): "
|
||||
f"{exc.__class__.__name__}: {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
|
||||
start = _parse_iso_date(first)
|
||||
end = _parse_iso_date(last)
|
||||
if start is None or end is None:
|
||||
return False
|
||||
return start <= date(season, 12, 31) and end >= date(season, 1, 1)
|
||||
|
||||
|
||||
def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
|
||||
return cache_dir / "phenocam" / str(evaluation_year)
|
||||
|
||||
|
||||
def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
||||
return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
|
||||
|
||||
|
||||
def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
||||
return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
|
||||
|
||||
|
||||
def load_candidate_cameras(
|
||||
evaluation_year: int,
|
||||
*,
|
||||
site_filter: set[str] | None = None,
|
||||
active_only: bool = False,
|
||||
limit: int | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
cameras: list[dict[str, Any]] = []
|
||||
for camera in _paginate_cameras():
|
||||
if active_only and not camera.get("active"):
|
||||
continue
|
||||
sitename = str(camera["Sitename"])
|
||||
if site_filter is not None and sitename not in site_filter:
|
||||
continue
|
||||
if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year):
|
||||
continue
|
||||
cameras.append(dict(camera))
|
||||
cameras.sort(key=lambda item: str(item["Sitename"]))
|
||||
if limit is not None:
|
||||
cameras = cameras[:limit]
|
||||
return cameras
|
||||
|
||||
|
||||
def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
|
||||
rois: list[dict[str, Any]] = []
|
||||
url = f"{PHENOCAM_API}/roilists/"
|
||||
params: dict[str, Any] | None = {"site": site_name}
|
||||
while url:
|
||||
response = requests.get(url, params=params, timeout=60)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
rois.extend(
|
||||
item for item in payload.get("results", []) if item.get("site") == site_name
|
||||
)
|
||||
url = payload.get("next")
|
||||
params = None
|
||||
if rois:
|
||||
break
|
||||
return dict(rois[0]) if rois else None
|
||||
|
||||
|
||||
def download_one_day_csv(csv_url: str, output_path: Path) -> None:
|
||||
response = requests.get(csv_url, timeout=60)
|
||||
response.raise_for_status()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(response.text, encoding="utf-8")
|
||||
|
||||
|
||||
def download_site(
|
||||
camera: dict[str, Any],
|
||||
evaluation_year: int,
|
||||
cache_dir: Path,
|
||||
) -> str:
|
||||
sitename = str(camera["Sitename"])
|
||||
roi = fetch_roi_record(sitename)
|
||||
payload = {"response": {"camera": camera, "roi": roi}}
|
||||
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
csv_url = roi.get("one_day_summary") if roi else None
|
||||
if csv_url:
|
||||
download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename))
|
||||
return sitename
|
||||
|
||||
|
||||
def load_or_download_site(
|
||||
camera: dict[str, Any],
|
||||
evaluation_year: int,
|
||||
cache_dir: Path,
|
||||
*,
|
||||
refresh: bool,
|
||||
) -> str:
|
||||
sitename = str(camera["Sitename"])
|
||||
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
||||
csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
|
||||
if not refresh and json_path.is_file():
|
||||
if not csv_path.is_file():
|
||||
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
roi = payload.get("response", {}).get("roi") or {}
|
||||
csv_url = roi.get("one_day_summary")
|
||||
if csv_url:
|
||||
download_one_day_csv(csv_url, csv_path)
|
||||
return sitename
|
||||
return download_site(camera, evaluation_year, cache_dir)
|
||||
|
||||
|
||||
def run_download(
|
||||
*,
|
||||
cache_dir: Path,
|
||||
evaluation_year: int,
|
||||
active_only: bool = False,
|
||||
site_filter: set[str] | None = None,
|
||||
limit: int | None = None,
|
||||
refresh: bool = False,
|
||||
) -> list[str]:
|
||||
check_phenocam_host()
|
||||
candidates = load_candidate_cameras(
|
||||
evaluation_year,
|
||||
site_filter=site_filter,
|
||||
active_only=active_only,
|
||||
limit=limit,
|
||||
)
|
||||
print(
|
||||
f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
|
||||
f"{evaluation_year}"
|
||||
)
|
||||
|
||||
sitenames: list[str] = []
|
||||
for index, camera in enumerate(candidates, start=1):
|
||||
sitename = str(camera["Sitename"])
|
||||
print(
|
||||
f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
|
||||
f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
|
||||
)
|
||||
sitenames.append(
|
||||
load_or_download_site(
|
||||
camera,
|
||||
evaluation_year,
|
||||
cache_dir,
|
||||
refresh=refresh,
|
||||
)
|
||||
)
|
||||
return sorted(sitenames)
|
||||
|
||||
|
||||
def write_manifest(
|
||||
sitenames: list[str],
|
||||
output_path: Path,
|
||||
cache_dir: Path,
|
||||
evaluation_year: int,
|
||||
) -> None:
|
||||
rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent)
|
||||
payload = {
|
||||
"evaluation_year": evaluation_year,
|
||||
"count": len(sitenames),
|
||||
"sites_dir": rel_sites_dir.as_posix(),
|
||||
"sites": sitenames,
|
||||
}
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"[PhenoCam-1] Wrote {output_path}")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--cache-dir",
|
||||
type=Path,
|
||||
default=Path("data"),
|
||||
help="Base directory for per-site files and manifest",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--evaluation-year",
|
||||
type=int,
|
||||
default=EVALUATION_YEAR,
|
||||
help=f"Calendar year to download (default: {EVALUATION_YEAR})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--active-only",
|
||||
action="store_true",
|
||||
help="Restrict candidates to cameras marked active in the API",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Process only the first N candidate sites (testing)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sites",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Comma-separated sitenames to download (testing)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--refresh",
|
||||
action="store_true",
|
||||
help="Re-download sites even when cache files exist",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-json",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Manifest output path (default: data/phenocam/{year}.json)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
site_filter = None
|
||||
if args.sites:
|
||||
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
|
||||
|
||||
sitenames = run_download(
|
||||
cache_dir=args.cache_dir,
|
||||
evaluation_year=args.evaluation_year,
|
||||
active_only=args.active_only,
|
||||
site_filter=site_filter,
|
||||
limit=args.limit,
|
||||
refresh=args.refresh,
|
||||
)
|
||||
manifest_path = args.output_json or (
|
||||
args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
|
||||
)
|
||||
write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
495
2-phenocam-screening.py
Normal file
495
2-phenocam-screening.py
Normal file
|
|
@ -0,0 +1,495 @@
|
|||
"""Step 2: PhenoCam GCC + SNR screening on step-1 cache.
|
||||
|
||||
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``phenocam/{year}.json`` — step-1 manifest
|
||||
- ``phenocam/{year}/{sitename}.json`` — per-site metadata
|
||||
- ``phenocam/{year}/{sitename}_1day.csv`` — GCC timeseries
|
||||
|
||||
Outputs (``data/phenocam_screening/``):
|
||||
|
||||
- ``{year}.json`` — full per-site results
|
||||
- ``{year}.csv`` — flat summary table
|
||||
|
||||
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional; default: all manifest sites).
|
||||
|
||||
Next step: :mod:`3-sentinel-clouds`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from scipy.interpolate import UnivariateSpline
|
||||
|
||||
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
|
||||
if str(PROCESSING_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(PROCESSING_DIR))
|
||||
|
||||
from acquisition_phenocam import _phenocam_summary_gcc_value # noqa: E402
|
||||
|
||||
MIN_GCC_POINTS = 30
|
||||
SNR_THRESHOLD = 2.0
|
||||
CLUSTER_RADIUS_M = 500.0
|
||||
GATE_ORDER = ("phenocam", "snr", "cluster")
|
||||
ONE_DAY_CSV_SUFFIX = "_1day.csv"
|
||||
_EARTH_RADIUS_M = 6371000.0
|
||||
|
||||
|
||||
def load_manifest(path: Path) -> dict[str, Any]:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
for key in ("evaluation_year", "sites_dir", "sites"):
|
||||
if key not in payload:
|
||||
raise ValueError(f"Expected '{key}' in manifest {path}")
|
||||
return payload
|
||||
|
||||
|
||||
def resolve_sites_dir(manifest_path: Path, manifest: dict[str, Any]) -> Path:
|
||||
return (manifest_path.parent / manifest["sites_dir"]).resolve()
|
||||
|
||||
|
||||
def load_site_entry(sites_dir: Path, sitename: str) -> dict[str, Any]:
|
||||
json_path = sites_dir / f"{sitename}.json"
|
||||
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
csv_path = sites_dir / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
|
||||
payload["_one_day_csv"] = csv_path if csv_path.is_file() else None
|
||||
return payload
|
||||
|
||||
|
||||
def parse_gcc90_series(csv_path: Path, evaluation_year: int) -> list[tuple[str, float]]:
|
||||
lines = [
|
||||
line
|
||||
for line in csv_path.read_text(encoding="utf-8").split("\n")
|
||||
if line and not line.startswith("#")
|
||||
]
|
||||
reader = csv.DictReader(lines)
|
||||
fieldnames = reader.fieldnames or ()
|
||||
use_mean_fallback = "gcc_90" not in fieldnames
|
||||
|
||||
year_start = date(evaluation_year, 1, 1)
|
||||
year_end = date(evaluation_year, 12, 31)
|
||||
series: list[tuple[str, float]] = []
|
||||
for row in reader:
|
||||
date_str = row.get("date")
|
||||
if not date_str:
|
||||
continue
|
||||
try:
|
||||
row_date = datetime.strptime(date_str, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
continue
|
||||
if not (year_start <= row_date <= year_end):
|
||||
continue
|
||||
gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
|
||||
if gcc is None:
|
||||
continue
|
||||
series.append((row_date.isoformat(), float(gcc)))
|
||||
series.sort(key=lambda item: item[0])
|
||||
return series
|
||||
|
||||
|
||||
def _months_covered(day_strings: list[str]) -> int:
|
||||
months: set[int] = set()
|
||||
for day in day_strings:
|
||||
months.add(datetime.strptime(day, "%Y-%m-%d").month)
|
||||
return len(months)
|
||||
|
||||
|
||||
def _aic_for_spline(x: np.ndarray, y: np.ndarray, spline: UnivariateSpline) -> float:
|
||||
residuals = y - spline(x)
|
||||
rss = float(np.sum(residuals**2))
|
||||
n = len(y)
|
||||
if rss <= 0 or n < 4:
|
||||
return math.inf
|
||||
edf = float(spline.get_knots().shape[0] + spline.get_coeffs().shape[0])
|
||||
return n * math.log(rss / n) + 2.0 * edf
|
||||
|
||||
|
||||
def compute_snr_aic_spline(series: list[tuple[str, float]]) -> float | None:
|
||||
if len(series) < MIN_GCC_POINTS:
|
||||
return None
|
||||
|
||||
dates = [datetime.strptime(day, "%Y-%m-%d").date() for day, _ in series]
|
||||
x = np.array([(d - dates[0]).days for d in dates], dtype=float)
|
||||
y = np.array([value for _, value in series], dtype=float)
|
||||
if len(np.unique(x)) < 5:
|
||||
return None
|
||||
|
||||
y_var = float(np.var(y))
|
||||
if y_var <= 0:
|
||||
return None
|
||||
|
||||
candidates = np.logspace(-4, 2, 40) * y_var * len(y)
|
||||
best_spline: UnivariateSpline | None = None
|
||||
best_aic = math.inf
|
||||
for smoothing in candidates:
|
||||
try:
|
||||
spline = UnivariateSpline(x, y, k=3, s=float(smoothing))
|
||||
except Exception:
|
||||
continue
|
||||
aic = _aic_for_spline(x, y, spline)
|
||||
if aic < best_aic:
|
||||
best_aic = aic
|
||||
best_spline = spline
|
||||
|
||||
if best_spline is None:
|
||||
return None
|
||||
|
||||
residuals = y - best_spline(x)
|
||||
rmse = float(np.sqrt(np.mean(residuals**2)))
|
||||
amplitude = float(np.max(y) - np.min(y))
|
||||
if rmse <= 0:
|
||||
return None
|
||||
return amplitude / rmse
|
||||
|
||||
|
||||
def screen_site(
|
||||
site_entry: dict[str, Any],
|
||||
*,
|
||||
evaluation_year: int,
|
||||
min_gcc_points: int,
|
||||
snr_threshold: float,
|
||||
) -> dict[str, Any]:
|
||||
response = site_entry["response"]
|
||||
roi = response.get("roi")
|
||||
csv_path = site_entry.get("_one_day_csv")
|
||||
calculations: dict[str, Any] = {
|
||||
"evaluation_year": evaluation_year,
|
||||
"n_gcc_points": 0,
|
||||
"first_gcc_date": None,
|
||||
"last_gcc_date": None,
|
||||
"months_with_gcc": 0,
|
||||
"snr": None,
|
||||
"min_gcc_points": min_gcc_points,
|
||||
"snr_threshold": snr_threshold,
|
||||
"status": "FAIL",
|
||||
"failing_gate": None,
|
||||
"passed_gates": [],
|
||||
"reason": None,
|
||||
}
|
||||
|
||||
if roi is None or not roi.get("one_day_summary") or csv_path is None:
|
||||
calculations["failing_gate"] = "phenocam"
|
||||
calculations["reason"] = "no_roi"
|
||||
return {"response": response, "calculations": calculations}
|
||||
|
||||
series = parse_gcc90_series(csv_path, evaluation_year)
|
||||
calculations["n_gcc_points"] = len(series)
|
||||
if calculations["n_gcc_points"] == 0:
|
||||
calculations["failing_gate"] = "phenocam"
|
||||
calculations["reason"] = "no_gcc_in_year"
|
||||
return {"response": response, "calculations": calculations}
|
||||
|
||||
day_strings = [day for day, _ in series]
|
||||
calculations["first_gcc_date"] = day_strings[0]
|
||||
calculations["last_gcc_date"] = day_strings[-1]
|
||||
calculations["months_with_gcc"] = _months_covered(day_strings)
|
||||
|
||||
if calculations["n_gcc_points"] < min_gcc_points:
|
||||
calculations["failing_gate"] = "phenocam"
|
||||
calculations["reason"] = "insufficient_gcc_points"
|
||||
return {"response": response, "calculations": calculations}
|
||||
|
||||
calculations["passed_gates"].append("phenocam")
|
||||
|
||||
snr = compute_snr_aic_spline(series)
|
||||
calculations["snr"] = snr
|
||||
if snr is None or snr < snr_threshold:
|
||||
calculations["failing_gate"] = "snr"
|
||||
calculations["reason"] = "insufficient_snr" if snr is not None else "snr_undefined"
|
||||
return {"response": response, "calculations": calculations}
|
||||
|
||||
calculations["passed_gates"].append("snr")
|
||||
calculations["status"] = "PASS"
|
||||
calculations["failing_gate"] = None
|
||||
calculations["reason"] = None
|
||||
return {"response": response, "calculations": calculations}
|
||||
|
||||
|
||||
def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
p1, p2 = math.radians(lat1), math.radians(lat2)
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
a = math.sin(dlat / 2) ** 2 + math.cos(p1) * math.cos(p2) * math.sin(dlon / 2) ** 2
|
||||
return 2 * _EARTH_RADIUS_M * math.asin(math.sqrt(a))
|
||||
|
||||
|
||||
def _site_coords(row: dict[str, Any]) -> tuple[float, float] | None:
|
||||
camera = row["response"]["camera"]
|
||||
lat, lon = camera.get("Lat"), camera.get("Lon")
|
||||
if lat is None or lon is None:
|
||||
return None
|
||||
return float(lat), float(lon)
|
||||
|
||||
|
||||
def _cluster_rank(row: dict[str, Any]) -> tuple[int, float]:
|
||||
calc = row["calculations"]
|
||||
return calc["n_gcc_points"], float(calc.get("snr") or 0.0)
|
||||
|
||||
|
||||
def apply_cluster_gate(results: list[dict[str, Any]], *, radius_m: float) -> int:
|
||||
pool: list[tuple[int, float, float]] = []
|
||||
for idx, row in enumerate(results):
|
||||
if "snr" not in row["calculations"]["passed_gates"]:
|
||||
continue
|
||||
coords = _site_coords(row)
|
||||
if coords is None:
|
||||
row["calculations"]["passed_gates"].append("cluster")
|
||||
continue
|
||||
pool.append((idx, coords[0], coords[1]))
|
||||
|
||||
n = len(pool)
|
||||
parent = list(range(n))
|
||||
|
||||
def find(x: int) -> int:
|
||||
while parent[x] != x:
|
||||
parent[x] = parent[parent[x]]
|
||||
x = parent[x]
|
||||
return x
|
||||
|
||||
def union(a: int, b: int) -> None:
|
||||
ra, rb = find(a), find(b)
|
||||
if ra != rb:
|
||||
parent[rb] = ra
|
||||
|
||||
for i in range(n):
|
||||
_, lat1, lon1 = pool[i]
|
||||
for j in range(i + 1, n):
|
||||
_, lat2, lon2 = pool[j]
|
||||
if _haversine_m(lat1, lon1, lat2, lon2) <= radius_m:
|
||||
union(i, j)
|
||||
|
||||
clusters: dict[int, list[int]] = {}
|
||||
for i in range(n):
|
||||
clusters.setdefault(find(i), []).append(i)
|
||||
|
||||
demoted = 0
|
||||
for members in clusters.values():
|
||||
result_indices = [pool[i][0] for i in members]
|
||||
cluster_size = len(result_indices)
|
||||
winner_idx = max(result_indices, key=lambda idx: _cluster_rank(results[idx]))
|
||||
winner_name = str(results[winner_idx]["response"]["camera"]["Sitename"])
|
||||
for idx in result_indices:
|
||||
calc = results[idx]["calculations"]
|
||||
calc["cluster_size"] = cluster_size
|
||||
if idx == winner_idx:
|
||||
calc["passed_gates"].append("cluster")
|
||||
else:
|
||||
calc["status"] = "FAIL"
|
||||
calc["failing_gate"] = "cluster"
|
||||
calc["reason"] = "nearby_duplicate"
|
||||
calc["cluster_winner"] = winner_name
|
||||
demoted += 1
|
||||
return demoted
|
||||
|
||||
|
||||
def run_screening(
|
||||
manifest: dict[str, Any],
|
||||
sites_dir: Path,
|
||||
*,
|
||||
evaluation_year: int,
|
||||
min_gcc_points: int,
|
||||
snr_threshold: float,
|
||||
site_filter: set[str] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
results: list[dict[str, Any]] = []
|
||||
sitenames = manifest["sites"]
|
||||
if site_filter is not None:
|
||||
sitenames = [name for name in sitenames if name in site_filter]
|
||||
for index, sitename in enumerate(sitenames, start=1):
|
||||
print(f"[PhenoCam-2] ({index}/{len(sitenames)}) {sitename}")
|
||||
site_entry = load_site_entry(sites_dir, sitename)
|
||||
results.append(
|
||||
screen_site(
|
||||
site_entry,
|
||||
evaluation_year=evaluation_year,
|
||||
min_gcc_points=min_gcc_points,
|
||||
snr_threshold=snr_threshold,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def print_summary(results: list[dict[str, Any]], evaluation_year: int) -> None:
|
||||
passing = [row for row in results if row["calculations"]["status"] == "PASS"]
|
||||
gates_label = " + ".join(GATE_ORDER)
|
||||
print(
|
||||
f"\n[PhenoCam-2] Screening for {evaluation_year}: "
|
||||
f"{len(passing)}/{len(results)} pass ({gates_label})"
|
||||
)
|
||||
|
||||
for gate in GATE_ORDER:
|
||||
fails = sum(1 for row in results if row["calculations"]["failing_gate"] == gate)
|
||||
after = sum(1 for row in results if gate in row["calculations"]["passed_gates"])
|
||||
print(f" after_{gate}: {after}, fail_at_{gate}: {fails}")
|
||||
|
||||
print("\nPer-site table")
|
||||
print(
|
||||
f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} "
|
||||
f"{'status':>6} gate reason"
|
||||
)
|
||||
print("-" * 72)
|
||||
for row in sorted(
|
||||
results,
|
||||
key=lambda item: str(item["response"]["camera"]["Sitename"]),
|
||||
):
|
||||
camera = row["response"]["camera"]
|
||||
calc = row["calculations"]
|
||||
snr_text = f"{calc['snr']:.2f}" if calc["snr"] is not None else ""
|
||||
print(
|
||||
f"{camera['Sitename']:<24} {calc['n_gcc_points']:4d} "
|
||||
f"{calc['months_with_gcc']:3d} {snr_text:>6} "
|
||||
f"{calc['status']:>6} {(calc['failing_gate'] or '-'):<8} "
|
||||
f"{calc['reason'] or '-'}"
|
||||
)
|
||||
|
||||
|
||||
def write_screening_json(
|
||||
results: list[dict[str, Any]],
|
||||
output_path: Path,
|
||||
evaluation_year: int,
|
||||
) -> None:
|
||||
passing = [row for row in results if row["calculations"]["status"] == "PASS"]
|
||||
payload = {
|
||||
"evaluation_year": evaluation_year,
|
||||
"count": len(results),
|
||||
"qualifying_count": len(passing),
|
||||
"sites": sorted(
|
||||
results,
|
||||
key=lambda item: str(item["response"]["camera"]["Sitename"]),
|
||||
),
|
||||
}
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"[PhenoCam-2] Wrote {output_path}")
|
||||
|
||||
|
||||
def write_screening_csv(results: list[dict[str, Any]], output_path: Path) -> None:
|
||||
rows: list[dict[str, Any]] = []
|
||||
for row in results:
|
||||
camera = row["response"]["camera"]
|
||||
metadata = camera.get("sitemetadata") or {}
|
||||
roi = row["response"].get("roi") or {}
|
||||
calc = row["calculations"]
|
||||
rows.append(
|
||||
{
|
||||
"Sitename": camera.get("Sitename"),
|
||||
"Lat": camera.get("Lat"),
|
||||
"Lon": camera.get("Lon"),
|
||||
"site_description": metadata.get("site_description"),
|
||||
"primary_veg_type": metadata.get("primary_veg_type"),
|
||||
"site_type": metadata.get("site_type"),
|
||||
"one_day_summary": roi.get("one_day_summary"),
|
||||
**calc,
|
||||
}
|
||||
)
|
||||
fieldnames = list(rows[0].keys()) if rows else ["Sitename", "status"]
|
||||
if rows:
|
||||
extra = [k for row in rows for k in row if k not in fieldnames]
|
||||
fieldnames.extend(dict.fromkeys(extra))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
print(f"[PhenoCam-2] Wrote {output_path}")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--evaluation-year",
|
||||
type=int,
|
||||
default=2025,
|
||||
help="Evaluation year (default: 2025)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sites",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Comma-separated sitenames (default: all sites in step-1 manifest)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-gcc-points",
|
||||
type=int,
|
||||
default=MIN_GCC_POINTS,
|
||||
help=f"Minimum valid gcc_90 observations in-year (default: {MIN_GCC_POINTS})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--snr-threshold",
|
||||
type=float,
|
||||
default=SNR_THRESHOLD,
|
||||
help=f"Minimum AIC-spline SNR (default: {SNR_THRESHOLD})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-json",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Screening output (default: data/phenocam_screening/{year}.json)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-csv",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Flat CSV summary path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cluster-radius-m",
|
||||
type=float,
|
||||
default=CLUSTER_RADIUS_M,
|
||||
help=f"Deduplicate SNR-passed sites within this radius (default: {CLUSTER_RADIUS_M})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-cluster",
|
||||
action="store_true",
|
||||
help="Skip nearby-site deduplication gate",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
evaluation_year = args.evaluation_year
|
||||
manifest_path = Path("data") / "phenocam" / f"{evaluation_year}.json"
|
||||
if not manifest_path.is_file():
|
||||
raise SystemExit(f"Step-1 manifest not found: {manifest_path}")
|
||||
|
||||
site_filter = None
|
||||
if args.sites:
|
||||
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
|
||||
|
||||
manifest = load_manifest(manifest_path)
|
||||
sites_dir_path = resolve_sites_dir(manifest_path, manifest)
|
||||
|
||||
results = run_screening(
|
||||
manifest,
|
||||
sites_dir_path,
|
||||
evaluation_year=evaluation_year,
|
||||
min_gcc_points=args.min_gcc_points,
|
||||
snr_threshold=args.snr_threshold,
|
||||
site_filter=site_filter,
|
||||
)
|
||||
if not args.no_cluster:
|
||||
demoted = apply_cluster_gate(results, radius_m=args.cluster_radius_m)
|
||||
if demoted:
|
||||
print(f"[PhenoCam-2] Cluster dedup: demoted {demoted} nearby duplicate(s)")
|
||||
print_summary(results, evaluation_year)
|
||||
|
||||
default_dir = Path("data") / "phenocam_screening"
|
||||
json_name = f"{evaluation_year}.json"
|
||||
csv_name = f"{evaluation_year}.csv"
|
||||
write_screening_json(
|
||||
results,
|
||||
args.output_json or (default_dir / json_name),
|
||||
evaluation_year,
|
||||
)
|
||||
write_screening_csv(results, args.output_csv or (default_dir / csv_name))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
805
3-sentinel-data.py
Normal file
805
3-sentinel-data.py
Normal file
|
|
@ -0,0 +1,805 @@
|
|||
"""Step 3: Download S2 and S3 rasters and prepare EFAST inputs.
|
||||
|
||||
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``phenocam_screening/{year}.json`` — step-2 PASS sites (coordinates included)
|
||||
|
||||
Outputs (``data/``):
|
||||
|
||||
- ``sentinel_data/{year}/{sitename}/raw/s3/*.tif`` — S3 SYN L2 per-date GeoTIFFs
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/s2/`` — S2 REFL + DIST_CLOUD GeoTIFFs
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/s3/`` — S3 composite GeoTIFFs
|
||||
- ``sentinel_data/{year}/{sitename}/data.json`` — run summary
|
||||
|
||||
Requires ``CDSE_USER`` / ``CDSE_PASSWORD`` (``uv sync`` installs efast).
|
||||
|
||||
CLI:
|
||||
|
||||
- ``--evaluation-year`` (default 2025)
|
||||
- ``--site`` (optional; default: all step-2 PASS sites)
|
||||
|
||||
Prior step: :mod:`2-phenocam-screening`.
|
||||
Next step: :mod:`4-fusion`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import netCDF4
|
||||
import numpy as np
|
||||
import openeo
|
||||
import rasterio
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from pystac_client import Client
|
||||
from rasterio import shutil as rio_shutil
|
||||
from rasterio.enums import Resampling
|
||||
from rasterio.errors import WindowError
|
||||
from rasterio.transform import from_bounds
|
||||
from rasterio.vrt import WarpedVRT
|
||||
from rasterio.warp import transform_geom
|
||||
from rasterio.windows import Window
|
||||
from rasterio.windows import from_bounds as window_from_bounds
|
||||
from rasterio.windows import transform as window_transform
|
||||
from shapely import wkt as shapely_wkt
|
||||
from tqdm import tqdm
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public constants — edit here to change pipeline behaviour
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
S2_BANDS = ["B02", "B03", "B04"]
|
||||
|
||||
S3_BANDS = [
|
||||
"Syn_Oa04_reflectance",
|
||||
"Syn_Oa06_reflectance",
|
||||
"Syn_Oa08_reflectance",
|
||||
"Syn_Oa17_reflectance",
|
||||
]
|
||||
S3_BAND_NAMES = ["SDR_Oa04", "SDR_Oa06", "SDR_Oa08", "SDR_Oa17"]
|
||||
|
||||
RESOLUTION_RATIO = 30
|
||||
S3_MOSAIC_DAYS = 100
|
||||
S3_COMPOSITE_STEP = 2
|
||||
S3_COMPOSITE_SIGMA_DOY = 10
|
||||
S3_COMPOSITE_D = 20
|
||||
S3_SMOOTHING_STD = 1
|
||||
S3_REFLECTANCE_SCALE = 10_000 # OpenEO SYN L2 SDR → 0–1 (EFAST expects < 5)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal S2 constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EARTH_SEARCH_URL = "https://earth-search.aws.element84.com/v1"
|
||||
|
||||
_BAND_ASSETS: dict[str, str] = {
|
||||
"B02": "blue",
|
||||
"B03": "green",
|
||||
"B04": "red",
|
||||
"B05": "rededge1",
|
||||
"B06": "rededge2",
|
||||
"B07": "rededge3",
|
||||
"B08": "nir",
|
||||
"B8A": "nir08",
|
||||
"B11": "swir16",
|
||||
"B12": "swir22",
|
||||
}
|
||||
_SCL_ASSET = "scl"
|
||||
_MIN_BBOX_HALF_DEG = 0.008
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal S3 constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CDSE_TOKEN_URL = (
|
||||
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/"
|
||||
"protocol/openid-connect/token"
|
||||
)
|
||||
OPENEO_URL = "openeo.dataspace.copernicus.eu"
|
||||
S3_COLLECTION = "SENTINEL3_SYN_L2_SYN"
|
||||
|
||||
DATA_DIR = Path("data")
|
||||
DEFAULT_YEAR = 2025
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credentials
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _cdse_credentials() -> dict[str, str | None]:
|
||||
load_dotenv()
|
||||
return {
|
||||
"username": os.getenv("CDSE_USER"),
|
||||
"password": os.getenv("CDSE_PASSWORD"),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Screening manifest helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_screening_pass_sites(year: int) -> list[dict[str, Any]]:
|
||||
"""Return list of PASS-site dicts from step-2 screening JSON.
|
||||
|
||||
Each entry has ``sitename``, ``lat``, ``lon`` keys.
|
||||
"""
|
||||
path = DATA_DIR / "phenocam_screening" / f"{year}.json"
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"Step-2 screening manifest not found: {path}")
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
sites = []
|
||||
for row in payload.get("sites", []):
|
||||
calc = row.get("calculations", {})
|
||||
if calc.get("status") != "PASS":
|
||||
continue
|
||||
camera = row.get("response", {}).get("camera", {})
|
||||
name = camera.get("Sitename")
|
||||
lat = camera.get("Lat")
|
||||
lon = camera.get("Lon")
|
||||
if name and lat is not None and lon is not None:
|
||||
sites.append({"sitename": str(name), "lat": float(lat), "lon": float(lon)})
|
||||
return sites
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S2: geometry helpers (from s2_cloud_native.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def wkt_to_bbox(geometry_wkt: str) -> list[float]:
|
||||
"""Convert a WKT geometry to a ``[west, south, east, north]`` bbox."""
|
||||
geom = shapely_wkt.loads(geometry_wkt)
|
||||
minx, miny, maxx, maxy = geom.bounds
|
||||
if minx == maxx and miny == maxy:
|
||||
minx -= _MIN_BBOX_HALF_DEG
|
||||
maxx += _MIN_BBOX_HALF_DEG
|
||||
miny -= _MIN_BBOX_HALF_DEG
|
||||
maxy += _MIN_BBOX_HALF_DEG
|
||||
return [minx, miny, maxx, maxy]
|
||||
|
||||
|
||||
def _boa_offset(item: Any) -> int:
|
||||
"""Return the BOA additive offset for a STAC item.
|
||||
|
||||
Processing baseline >= 04.00 applies a -1000 offset; earlier baselines use 0.
|
||||
"""
|
||||
if item.properties.get("earthsearch:boa_offset_applied"):
|
||||
return 0
|
||||
baseline_str = str(
|
||||
item.properties.get("processing:baseline")
|
||||
or item.properties.get("s2:processing_baseline")
|
||||
or "0"
|
||||
)
|
||||
try:
|
||||
baseline = float(baseline_str)
|
||||
except ValueError:
|
||||
baseline = 0.0
|
||||
return -1000 if baseline >= 4.0 else 0
|
||||
|
||||
|
||||
def _window_for_bbox(
|
||||
src: rasterio.io.DatasetReader,
|
||||
bbox_4326: list[float],
|
||||
) -> Window | None:
|
||||
"""Return the rasterio Window for a EPSG:4326 bbox clipped to src bounds."""
|
||||
bbox_geom = {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[bbox_4326[0], bbox_4326[1]],
|
||||
[bbox_4326[2], bbox_4326[1]],
|
||||
[bbox_4326[2], bbox_4326[3]],
|
||||
[bbox_4326[0], bbox_4326[3]],
|
||||
[bbox_4326[0], bbox_4326[1]],
|
||||
]
|
||||
],
|
||||
}
|
||||
src_geom = transform_geom("EPSG:4326", src.crs.to_wkt(), bbox_geom)
|
||||
xs = [c[0] for c in src_geom["coordinates"][0][:4]]
|
||||
ys = [c[1] for c in src_geom["coordinates"][0][:4]]
|
||||
win = window_from_bounds(min(xs), min(ys), max(xs), max(ys), src.transform)
|
||||
try:
|
||||
return win.intersection(Window(0, 0, src.width, src.height))
|
||||
except WindowError:
|
||||
return None
|
||||
|
||||
|
||||
def _read_window(
|
||||
href: str,
|
||||
bbox_4326: list[float],
|
||||
out_shape: tuple[int, int] | None = None,
|
||||
resampling: Resampling = Resampling.bilinear,
|
||||
) -> tuple[np.ndarray, dict[str, Any]] | None:
|
||||
"""Range-read a single-band array for the bbox window from a COG URL."""
|
||||
with rasterio.open(href) as src:
|
||||
win = _window_for_bbox(src, bbox_4326)
|
||||
if win is None:
|
||||
return None
|
||||
data = src.read(1, window=win, out_shape=out_shape, resampling=resampling)
|
||||
profile: dict[str, Any] = {
|
||||
"crs": src.crs,
|
||||
"transform": window_transform(win, src.transform),
|
||||
"height": data.shape[0],
|
||||
"width": data.shape[1],
|
||||
"dtype": src.dtypes[0],
|
||||
}
|
||||
return data, profile
|
||||
|
||||
|
||||
def _read_bands(
|
||||
item: Any,
|
||||
bbox: list[float],
|
||||
bands: list[str],
|
||||
) -> tuple[list[np.ndarray], dict[str, Any]] | None:
|
||||
"""Range-read all requested bands for one STAC item."""
|
||||
band_arrays: list[np.ndarray] = []
|
||||
ref_profile: dict[str, Any] | None = None
|
||||
|
||||
for band_name in bands:
|
||||
asset_key = _BAND_ASSETS.get(band_name)
|
||||
if asset_key is None or asset_key not in item.assets:
|
||||
return None
|
||||
ref_shape = (
|
||||
(ref_profile["height"], ref_profile["width"]) if ref_profile else None
|
||||
)
|
||||
result = _read_window(item.assets[asset_key].href, bbox, out_shape=ref_shape)
|
||||
if result is None:
|
||||
return None
|
||||
data, profile = result
|
||||
if ref_profile is None:
|
||||
ref_profile = profile
|
||||
band_arrays.append(data.astype("float32"))
|
||||
|
||||
return (band_arrays, ref_profile) if ref_profile is not None else None
|
||||
|
||||
|
||||
def _cloud_mask(item: Any, bbox: list[float], shape: tuple[int, int]) -> np.ndarray:
|
||||
"""Return a boolean cloud/shadow mask from the item's SCL band.
|
||||
|
||||
Masks SCL classes 0 (no data), 3 (cloud shadow), and >7 (clouds, cirrus, snow).
|
||||
"""
|
||||
scl = item.assets.get(_SCL_ASSET)
|
||||
result = (
|
||||
_read_window(scl.href, bbox, out_shape=shape, resampling=Resampling.nearest)
|
||||
if scl
|
||||
else None
|
||||
)
|
||||
if result is None:
|
||||
return np.zeros(shape, dtype=bool)
|
||||
scl_data, _ = result
|
||||
return (scl_data == 0) | (scl_data == 3) | (scl_data > 7)
|
||||
|
||||
|
||||
def _pad_to_multiple(arr: np.ndarray, ratio: int) -> np.ndarray:
|
||||
"""Zero-pad (bands, H, W) so H and W are multiples of ``ratio``."""
|
||||
pad_h = (ratio - arr.shape[1] % ratio) % ratio
|
||||
pad_w = (ratio - arr.shape[2] % ratio) % ratio
|
||||
if pad_h or pad_w:
|
||||
arr = np.pad(arr, ((0, 0), (0, pad_h), (0, pad_w)), constant_values=0)
|
||||
return arr
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S2: STAC search + download (from s2_cloud_native.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def stac_search_s2(
|
||||
bbox: list[float],
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
) -> list[Any]:
|
||||
"""Search Earth Search for S2 L2A items intersecting a bbox."""
|
||||
client = Client.open(EARTH_SEARCH_URL)
|
||||
search = client.search(
|
||||
collections=["sentinel-2-l2a"],
|
||||
bbox=bbox,
|
||||
datetime=(
|
||||
f"{start_date.strftime('%Y-%m-%dT%H:%M:%SZ')}/"
|
||||
f"{end_date.strftime('%Y-%m-%dT23:59:59Z')}"
|
||||
),
|
||||
max_items=10_000,
|
||||
)
|
||||
return list({item.id: item for item in search.items()}.values())
|
||||
|
||||
|
||||
def download_s2_window(
|
||||
items: list[Any],
|
||||
bbox: list[float],
|
||||
output_dir: Path,
|
||||
bands: list[str],
|
||||
ratio: int = RESOLUTION_RATIO,
|
||||
) -> None:
|
||||
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
||||
|
||||
Writes ``{item.id}_REFL.tif`` directly — no intermediate raw download.
|
||||
Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
|
||||
``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for item in tqdm(items, unit="granule", desc="S2 COG window read"):
|
||||
out_path = output_dir / f"{item.id}_REFL.tif"
|
||||
if out_path.is_file():
|
||||
continue
|
||||
|
||||
bands_result = _read_bands(item, bbox, bands)
|
||||
if bands_result is None:
|
||||
tqdm.write(f"[S2] Skipping {item.id}: missing asset or no bbox overlap")
|
||||
continue
|
||||
band_arrays, ref_profile = bands_result
|
||||
target_shape = (ref_profile["height"], ref_profile["width"])
|
||||
mask = _cloud_mask(item, bbox, target_shape)
|
||||
|
||||
stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
|
||||
np.clip(stacked, 0, None, out=stacked)
|
||||
stacked[:, mask] = 0.0
|
||||
stacked = _pad_to_multiple(stacked, ratio)
|
||||
|
||||
out_profile = {
|
||||
"driver": "GTiff",
|
||||
"count": len(bands),
|
||||
"dtype": "float32",
|
||||
"nodata": 0,
|
||||
"crs": ref_profile["crs"],
|
||||
"transform": ref_profile["transform"],
|
||||
"height": stacked.shape[1],
|
||||
"width": stacked.shape[2],
|
||||
"compress": "lzw",
|
||||
}
|
||||
with rasterio.open(out_path, "w", **out_profile) as dst:
|
||||
dst.write(stacked)
|
||||
for i, band_name in enumerate(bands, 1):
|
||||
dst.set_band_description(i, band_name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S3: download (from s3_openeo.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _utm_epsg(bbox: list[float]) -> int:
|
||||
"""Return the UTM EPSG code for the centre of a ``[W, S, E, N]`` bbox."""
|
||||
lon = (bbox[0] + bbox[2]) / 2
|
||||
lat = (bbox[1] + bbox[3]) / 2
|
||||
zone = int((lon + 180) / 6) + 1
|
||||
return 32600 + zone if lat >= 0 else 32700 + zone
|
||||
|
||||
|
||||
def _cdse_token(username: str, password: str) -> str:
|
||||
"""Obtain a CDSE bearer token via password grant."""
|
||||
resp = requests.post(
|
||||
CDSE_TOKEN_URL,
|
||||
data={
|
||||
"grant_type": "password",
|
||||
"username": username,
|
||||
"password": password,
|
||||
"client_id": "cdse-public",
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["access_token"]
|
||||
|
||||
|
||||
def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
|
||||
"""Split an OpenEO NetCDF into per-date GeoTIFFs.
|
||||
|
||||
Output filenames match the ``S3*__YYYYMMDDTHHMMSS.tif`` pattern that
|
||||
``s3_processing.produce_median_composite`` expects.
|
||||
|
||||
Handles half-pixel cell-centre coordinates, ascending y-axis (flip_y),
|
||||
and fills NetCDF masked values with NaN.
|
||||
"""
|
||||
written = 0
|
||||
with netCDF4.Dataset(str(nc_path), "r") as nc:
|
||||
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
|
||||
x_coords = np.asarray(nc.variables["x"][:], dtype=float)
|
||||
y_coords = np.asarray(nc.variables["y"][:], dtype=float)
|
||||
|
||||
half_x = abs(x_coords[1] - x_coords[0]) / 2 if len(x_coords) > 1 else 0.0
|
||||
half_y = abs(y_coords[1] - y_coords[0]) / 2 if len(y_coords) > 1 else 0.0
|
||||
transform = from_bounds(
|
||||
x_coords.min() - half_x,
|
||||
y_coords.min() - half_y,
|
||||
x_coords.max() + half_x,
|
||||
y_coords.max() + half_y,
|
||||
len(x_coords),
|
||||
len(y_coords),
|
||||
)
|
||||
flip_y = len(y_coords) > 1 and y_coords[0] < y_coords[-1]
|
||||
|
||||
date_counts: dict[str, int] = {}
|
||||
for t_idx, time_val in enumerate(times):
|
||||
date_str = time_val.strftime("%Y%m%d")
|
||||
n = date_counts.get(date_str, 0)
|
||||
date_counts[date_str] = n + 1
|
||||
|
||||
raw = np.stack(
|
||||
[nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0
|
||||
)
|
||||
stacked = (
|
||||
np.ma.filled(raw, fill_value=np.nan).astype("float32")
|
||||
/ S3_REFLECTANCE_SCALE
|
||||
)
|
||||
if flip_y:
|
||||
stacked = stacked[:, ::-1, :]
|
||||
|
||||
filename = f"S3_{date_str}_{n}__{date_str}T120000.tif"
|
||||
with rasterio.open(
|
||||
output_dir / filename,
|
||||
"w",
|
||||
driver="GTiff",
|
||||
height=len(y_coords),
|
||||
width=len(x_coords),
|
||||
count=len(S3_BANDS),
|
||||
dtype="float32",
|
||||
nodata=float("nan"),
|
||||
crs=f"EPSG:{epsg}",
|
||||
transform=transform,
|
||||
compress="lzw",
|
||||
) as dst:
|
||||
dst.write(stacked)
|
||||
for i, band_name in enumerate(S3_BAND_NAMES, 1):
|
||||
dst.set_band_description(i, band_name)
|
||||
written += 1
|
||||
|
||||
return written
|
||||
|
||||
|
||||
def download_s3_openeo(
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
aoi_geometry: str,
|
||||
output_dir: Path,
|
||||
credentials: dict[str, str | None],
|
||||
) -> None:
|
||||
"""Download S3 SYN L2 SDR for an AOI via CDSE OpenEO, server-side clipped.
|
||||
|
||||
Writes per-date ``S3_{YYYYMMDD}_{n}__{YYYYMMDD}T120000.tif`` files to
|
||||
``output_dir``, ready for ``s3_processing.produce_median_composite``.
|
||||
Skips if any ``S3*.tif`` files already exist.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if any(output_dir.glob("S3*.tif")):
|
||||
print("[S3-OEO] Skipping — output_dir already contains S3 GeoTIFFs")
|
||||
return
|
||||
|
||||
bbox = wkt_to_bbox(aoi_geometry)
|
||||
epsg = _utm_epsg(bbox)
|
||||
spatial_extent = {
|
||||
"west": bbox[0],
|
||||
"east": bbox[2],
|
||||
"south": bbox[1],
|
||||
"north": bbox[3],
|
||||
}
|
||||
|
||||
print("[S3-OEO] Authenticating with CDSE...")
|
||||
token = _cdse_token(credentials["username"], credentials["password"]) # type: ignore[arg-type]
|
||||
conn = openeo.connect(OPENEO_URL)
|
||||
conn.authenticate_oidc_access_token(token)
|
||||
|
||||
start_str = start_date.strftime("%Y-%m-%d")
|
||||
end_str = end_date.strftime("%Y-%m-%d")
|
||||
print(f"[S3-OEO] Loading {S3_COLLECTION} ({start_str} → {end_str})...")
|
||||
datacube = conn.load_collection(
|
||||
S3_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
temporal_extent=[start_str, end_str],
|
||||
bands=S3_BANDS,
|
||||
).resample_spatial(projection=epsg)
|
||||
|
||||
nc_path = output_dir / "_s3_syn_l2.nc"
|
||||
print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
|
||||
t0 = time.time()
|
||||
datacube.download(str(nc_path), format="NetCDF")
|
||||
print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
|
||||
|
||||
print("[S3-OEO] Splitting into per-date GeoTIFFs...")
|
||||
written = _netcdf_to_geotiffs(nc_path, output_dir, epsg)
|
||||
nc_path.unlink(missing_ok=True)
|
||||
print(f"[S3-OEO] {written} GeoTIFFs written to {output_dir}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S2: distance_to_clouds helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _import_distance_to_clouds():
|
||||
try:
|
||||
from efast.s2_processing import distance_to_clouds
|
||||
|
||||
return distance_to_clouds
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"efast not found. Install with: uv sync"
|
||||
) from exc
|
||||
|
||||
|
||||
def _rescale_dist_cloud(s2_dir: Path) -> None:
|
||||
"""Ensure DIST_CLOUD values are in pixel units (not normalised to [0,1])."""
|
||||
for dc_path in s2_dir.glob("*DIST_CLOUD.tif"):
|
||||
with rasterio.open(dc_path) as src:
|
||||
d = src.read(1)
|
||||
if float(np.nanmax(d)) <= 1:
|
||||
with rasterio.open(dc_path, "r+") as dst:
|
||||
dst.write(np.where(d > 0, 2.0, d).astype(np.float32), 1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# S3: compositing + reprojection helpers (from 4-sentinel-data.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _import_s3_processing():
|
||||
try:
|
||||
from efast import s3_processing
|
||||
|
||||
return s3_processing
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"efast not found. Install with: uv sync"
|
||||
) from exc
|
||||
|
||||
|
||||
def _reproject_s3_composites_to_s2_grid(
|
||||
composite_dir: Path,
|
||||
s2_refl_path: Path,
|
||||
s3_out_dir: Path,
|
||||
*,
|
||||
resolution_ratio: int = RESOLUTION_RATIO,
|
||||
) -> None:
|
||||
"""Reproject S3 composites to the S2 spatial grid at LR resolution."""
|
||||
s3_out_dir.mkdir(parents=True, exist_ok=True)
|
||||
with rasterio.open(s2_refl_path) as s2_ref:
|
||||
target_bounds = s2_ref.bounds
|
||||
target_crs = s2_ref.crs
|
||||
width = s2_ref.width // resolution_ratio
|
||||
height = s2_ref.height // resolution_ratio
|
||||
s3_transform = rasterio.transform.from_bounds(
|
||||
target_bounds.left,
|
||||
target_bounds.bottom,
|
||||
target_bounds.right,
|
||||
target_bounds.top,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
|
||||
for sen3_path in sorted(composite_dir.glob("composite_*.tif")):
|
||||
date_part = sen3_path.stem.split("_", 1)[1].replace("-", "")
|
||||
outfile = s3_out_dir / f"composite_{date_part}.tif"
|
||||
vrt_options = {
|
||||
"transform": s3_transform,
|
||||
"height": height,
|
||||
"width": width,
|
||||
"crs": target_crs,
|
||||
"resampling": Resampling.cubic,
|
||||
}
|
||||
with rasterio.open(sen3_path) as s3_src:
|
||||
with WarpedVRT(s3_src, **vrt_options) as vrt:
|
||||
profile = vrt.profile.copy()
|
||||
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
|
||||
rio_shutil.copy(vrt, outfile, **profile)
|
||||
|
||||
|
||||
def _s3_reflectance_scale(raw_s3_dir: Path) -> float:
|
||||
"""Return multiplier that maps raw SYN L2 SDR values to 0–1 reflectance."""
|
||||
for path in raw_s3_dir.glob("S3*.tif"):
|
||||
with rasterio.open(path) as src:
|
||||
mx = float(np.nanmax(src.read()))
|
||||
if np.isfinite(mx) and mx > 5:
|
||||
return 1.0 / S3_REFLECTANCE_SCALE
|
||||
return 1.0
|
||||
|
||||
|
||||
def _stage_s3_for_efast(raw_s3_dir: Path, staging_dir: Path) -> int:
|
||||
"""Copy ``S3_*.tif`` inputs, scaling reflectance when still in DN form."""
|
||||
scale = _s3_reflectance_scale(raw_s3_dir)
|
||||
if staging_dir.exists():
|
||||
shutil.rmtree(staging_dir)
|
||||
staging_dir.mkdir(parents=True)
|
||||
|
||||
count = 0
|
||||
for src_path in sorted(raw_s3_dir.glob("S3*.tif")):
|
||||
dst_path = staging_dir / src_path.name
|
||||
with rasterio.open(src_path) as src:
|
||||
data = src.read().astype("float32") * scale
|
||||
profile = src.profile.copy()
|
||||
profile.update(dtype="float32")
|
||||
descriptions = src.descriptions
|
||||
with rasterio.open(dst_path, "w", **profile) as dst:
|
||||
dst.write(data)
|
||||
for i, desc in enumerate(descriptions, 1):
|
||||
if desc:
|
||||
dst.set_band_description(i, desc)
|
||||
count += 1
|
||||
|
||||
if scale != 1.0:
|
||||
print(f"[S3-PREP] Scaled raw SDR by {scale:g} for EFAST compositing")
|
||||
return count
|
||||
|
||||
|
||||
def _prepare_s3(
|
||||
raw_s3_dir: Path,
|
||||
s2_refl_path: Path,
|
||||
s3_out_dir: Path,
|
||||
*,
|
||||
work_dir: Path | None = None,
|
||||
) -> None:
|
||||
"""Run EFAST S3 compositing pipeline and reproject to S2 grid."""
|
||||
s3 = _import_s3_processing()
|
||||
base = work_dir or (s3_out_dir / "_efast_work")
|
||||
staging = base / "scaled"
|
||||
composites = base / "composites"
|
||||
blurred = base / "blurred"
|
||||
calibrated = base / "calibrated"
|
||||
|
||||
for directory in (staging, composites, blurred, calibrated):
|
||||
if directory.exists():
|
||||
shutil.rmtree(directory)
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
staged = _stage_s3_for_efast(raw_s3_dir, staging)
|
||||
if staged == 0:
|
||||
raise ValueError(f"No S3*.tif files found in {raw_s3_dir}")
|
||||
|
||||
print(
|
||||
f"[S3-PREP] produce_median_composite: mosaic_days={S3_MOSAIC_DAYS}, "
|
||||
f"step={S3_COMPOSITE_STEP}, sigma_doy={S3_COMPOSITE_SIGMA_DOY}, "
|
||||
f"D={S3_COMPOSITE_D}"
|
||||
)
|
||||
s3.produce_median_composite(
|
||||
staging,
|
||||
composites,
|
||||
step=S3_COMPOSITE_STEP,
|
||||
mosaic_days=S3_MOSAIC_DAYS,
|
||||
s3_bands=[1, 2, 3, 4],
|
||||
D=S3_COMPOSITE_D,
|
||||
sigma_doy=S3_COMPOSITE_SIGMA_DOY,
|
||||
)
|
||||
s3.smoothing(
|
||||
composites,
|
||||
blurred,
|
||||
product="composite",
|
||||
std=S3_SMOOTHING_STD,
|
||||
preserve_nan=False,
|
||||
)
|
||||
s3.reformat_s3(blurred, calibrated, product="composite", scaling_factor=1)
|
||||
|
||||
for old in s3_out_dir.glob("composite_*.tif"):
|
||||
old.unlink()
|
||||
_reproject_s3_composites_to_s2_grid(calibrated, s2_refl_path, s3_out_dir)
|
||||
|
||||
if work_dir is None and base.exists():
|
||||
shutil.rmtree(base)
|
||||
|
||||
n_out = len(list(s3_out_dir.glob("composite_*.tif")))
|
||||
print(f"[S3-PREP] Wrote {n_out} composites")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-site pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def process_site(
|
||||
sitename: str,
|
||||
lat: float,
|
||||
lon: float,
|
||||
year: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Download S2 + S3 and run EFAST preparation for one site."""
|
||||
site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
|
||||
s2_out = site_dir / "prepared" / "s2"
|
||||
s3_raw = site_dir / "raw" / "s3"
|
||||
s3_out = site_dir / "prepared" / "s3"
|
||||
aoi_wkt = f"POINT ({lon} {lat})"
|
||||
bbox = wkt_to_bbox(aoi_wkt)
|
||||
creds = _cdse_credentials()
|
||||
|
||||
# S3 download
|
||||
print(f"[{sitename}] Downloading S3...")
|
||||
download_s3_openeo(
|
||||
start_date=datetime(year, 1, 1),
|
||||
end_date=datetime(year, 12, 31),
|
||||
aoi_geometry=aoi_wkt,
|
||||
output_dir=s3_raw,
|
||||
credentials=creds,
|
||||
)
|
||||
|
||||
# S2 download
|
||||
print(f"[{sitename}] Searching S2 on Earth Search...")
|
||||
items = stac_search_s2(bbox, datetime(year, 1, 1), datetime(year, 12, 31))
|
||||
print(f"[{sitename}] {len(items)} S2 items found — downloading windows...")
|
||||
download_s2_window(items, bbox, s2_out, S2_BANDS, RESOLUTION_RATIO)
|
||||
|
||||
# S2 distance-to-clouds
|
||||
print(f"[{sitename}] Computing distance-to-clouds...")
|
||||
distance_to_clouds = _import_distance_to_clouds()
|
||||
distance_to_clouds(s2_out, ratio=RESOLUTION_RATIO)
|
||||
_rescale_dist_cloud(s2_out)
|
||||
|
||||
# S3 compositing
|
||||
s2_refl_path = next(iter(s2_out.glob("*_REFL.tif")), None)
|
||||
if s2_refl_path is None:
|
||||
raise ValueError(f"No REFL files in {s2_out} — S2 download may have failed")
|
||||
s3_out.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[{sitename}] Running S3 compositing pipeline...")
|
||||
_prepare_s3(s3_raw, s2_refl_path, s3_out)
|
||||
|
||||
summary = {
|
||||
"sitename": sitename,
|
||||
"evaluation_year": year,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"s2_refl_count": len(list(s2_out.glob("*_REFL.tif"))),
|
||||
"s2_dist_cloud_count": len(list(s2_out.glob("*_DIST_CLOUD.tif"))),
|
||||
"s3_raw_count": len(list(s3_raw.glob("S3*.tif"))),
|
||||
"s3_composite_count": len(list(s3_out.glob("composite_*.tif"))),
|
||||
}
|
||||
site_dir.mkdir(parents=True, exist_ok=True)
|
||||
(site_dir / "data.json").write_text(
|
||||
json.dumps(summary, indent=2) + "\n", encoding="utf-8"
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
|
||||
parser.add_argument(
|
||||
"--site",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Single sitename to process (default: all step-2 PASS sites)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
year = args.evaluation_year
|
||||
|
||||
pass_sites = _load_screening_pass_sites(year)
|
||||
if not pass_sites:
|
||||
print("[Sentinel-3] No PASS sites found in step-2 screening output")
|
||||
return 1
|
||||
|
||||
if args.site:
|
||||
pass_sites = [s for s in pass_sites if s["sitename"] == args.site]
|
||||
if not pass_sites:
|
||||
print(f"[Sentinel-3] Site '{args.site}' not found in step-2 PASS sites")
|
||||
return 1
|
||||
|
||||
print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
|
||||
for i, site in enumerate(pass_sites, 1):
|
||||
sitename = site["sitename"]
|
||||
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
|
||||
try:
|
||||
summary = process_site(sitename, site["lat"], site["lon"], year)
|
||||
print(
|
||||
f"[Sentinel-3] {sitename} done — "
|
||||
f"{summary['s2_refl_count']} REFL, "
|
||||
f"{summary['s3_composite_count']} composites"
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"[Sentinel-3] {sitename} FAILED: {exc}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
330
4-fusion.py
Normal file
330
4-fusion.py
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
"""Step 4: Compute GCC and run EFAST BtI + ItB fusion for prepared sites.
|
||||
|
||||
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/s2/`` — ``*_REFL.tif`` + ``*_DIST_CLOUD.tif``
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/s3/`` — ``composite_*.tif`` (4-band)
|
||||
|
||||
Outputs (``data/``):
|
||||
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/s2/*_GCC.tif`` — S2 GCC (in-place)
|
||||
- ``sentinel_data/{year}/{sitename}/prepared/gcc_s3/*.tif`` — S3 GCC composites
|
||||
- ``fusion/{year}/{sitename}/bti/fusion/REFL_*.tif`` — BtI fused 4-band reflectance
|
||||
- ``fusion/{year}/{sitename}/bti/gcc/GCC_*.tif`` — GCC derived from BtI fusion
|
||||
- ``fusion/{year}/{sitename}/itb/s2/GCC_*.tif`` — per-acquisition S2 GCC (simplified names)
|
||||
- ``fusion/{year}/{sitename}/itb/s3/GCC_*.tif`` — per-composite S3 GCC (simplified names)
|
||||
- ``fusion/{year}/{sitename}/itb/fusion/GCC_*.tif`` — ItB fused GCC
|
||||
|
||||
Requires ``uv sync`` (efast).
|
||||
|
||||
CLI:
|
||||
|
||||
- ``--evaluation-year`` (default 2025)
|
||||
- ``--site`` (optional; default: all prepared sites under ``sentinel_data/{year}/``)
|
||||
|
||||
Prior step: :mod:`3-sentinel-data`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from dateutil import rrule
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RESOLUTION_RATIO = 30
|
||||
MOSAIC_STEP = 2
|
||||
MAX_DAYS = 100
|
||||
MINIMUM_ACQUISITION_IMPORTANCE = 0
|
||||
|
||||
DATA_DIR = Path("data")
|
||||
DEFAULT_YEAR = 2025
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# efast import helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _import_efast():
|
||||
try:
|
||||
import efast.efast as efast_module
|
||||
|
||||
return efast_module
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"efast not found. Install with: uv sync"
|
||||
) from exc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GCC computation (from s2_cloud_native.py and s3_openeo.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_gcc_s2(s2_dir: Path, output_dir: Path) -> None:
|
||||
"""Compute GCC from S2 REFL files and write ``*_GCC.tif`` to ``output_dir``.
|
||||
|
||||
Reads every ``*_REFL.tif`` (band order B02/B03/B04) and writes a co-located
|
||||
single-band GCC file. Cloud-masked pixels (zero in all bands) remain zero.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
for src_path in sorted(s2_dir.glob("*_REFL.tif")):
|
||||
out_path = output_dir / src_path.name.replace("_REFL.tif", "_GCC.tif")
|
||||
if out_path.is_file():
|
||||
continue
|
||||
with rasterio.open(src_path) as src:
|
||||
b, g, r = src.read(1), src.read(2), src.read(3)
|
||||
profile = src.profile
|
||||
total = b + g + r
|
||||
gcc = g / (total + 1e-10)
|
||||
gcc[total == 0] = 0
|
||||
profile.update(count=1)
|
||||
with rasterio.open(out_path, "w", **profile) as dst:
|
||||
dst.write(gcc[np.newaxis].astype("float32"))
|
||||
|
||||
|
||||
def compute_gcc_s3(s3_dir: Path, output_dir: Path) -> None:
|
||||
"""Compute GCC from S3 composite files and write single-band GeoTIFFs.
|
||||
|
||||
Reads every ``composite_*.tif`` (band order Oa04/Oa06/Oa08/Oa17) and writes
|
||||
a single-band GCC file. NaN pixels in the input remain NaN.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
for src_path in sorted(s3_dir.glob("composite_*.tif")):
|
||||
out_path = output_dir / src_path.name
|
||||
if out_path.is_file():
|
||||
continue
|
||||
with rasterio.open(src_path) as src:
|
||||
b, g, r = src.read(1), src.read(2), src.read(3)
|
||||
profile = src.profile
|
||||
total = b + g + r
|
||||
gcc = g / (total + 1e-10)
|
||||
gcc[np.isnan(total)] = np.nan
|
||||
profile.update(count=1, dtype="float32")
|
||||
with rasterio.open(out_path, "w", **profile) as dst:
|
||||
dst.write(gcc[np.newaxis].astype("float32"))
|
||||
|
||||
|
||||
def compute_gcc_from_refl(refl_dir: Path, gcc_dir: Path) -> None:
|
||||
"""Derive GCC from ``REFL_YYYYMMDD.tif`` files (BtI fusion output).
|
||||
|
||||
Reads every ``REFL_*.tif`` and writes a co-located single-band
|
||||
``GCC_YYYYMMDD.tif``. Zero pixels remain zero.
|
||||
"""
|
||||
gcc_dir.mkdir(parents=True, exist_ok=True)
|
||||
for src_path in sorted(refl_dir.glob("REFL_*.tif")):
|
||||
out_path = gcc_dir / src_path.name.replace("REFL_", "GCC_")
|
||||
if out_path.is_file():
|
||||
continue
|
||||
with rasterio.open(src_path) as src:
|
||||
b, g, r = src.read(1), src.read(2), src.read(3)
|
||||
profile = src.profile
|
||||
total = b + g + r
|
||||
gcc = g / (total + 1e-10)
|
||||
gcc[total == 0] = 0
|
||||
profile.update(count=1)
|
||||
with rasterio.open(out_path, "w", **profile) as dst:
|
||||
dst.write(gcc[np.newaxis].astype("float32"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Date-range detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _refl_date_range(s2_dir: Path) -> tuple[datetime, datetime] | None:
|
||||
"""Return (start, end) datetime from REFL filenames in ``s2_dir``.
|
||||
|
||||
Filenames are expected to follow the S2 product naming convention, where
|
||||
the acquisition date ``YYYYMMDD`` appears at position index 2 when the
|
||||
stem is split by ``_``, e.g.
|
||||
``S2A_MSIL2A_20230911T114111_N0509_R025_T29PKT_20230911T153131_REFL.tif``.
|
||||
"""
|
||||
dates: list[datetime] = []
|
||||
for p in s2_dir.glob("*_REFL.tif"):
|
||||
parts = p.stem.split("_")
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
dates.append(datetime.strptime(parts[2][:8], "%Y%m%d"))
|
||||
except ValueError:
|
||||
pass
|
||||
if not dates:
|
||||
return None
|
||||
return min(dates), max(dates)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-site fusion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def fuse_site(sitename: str, year: int) -> dict[str, Any]:
|
||||
"""Run GCC computation and EFAST BtI + ItB fusion for one prepared site."""
|
||||
efast = _import_efast()
|
||||
|
||||
s2_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s2"
|
||||
s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3"
|
||||
gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3"
|
||||
base = DATA_DIR / "fusion" / str(year) / sitename
|
||||
|
||||
if not s2_dir.is_dir() or not any(s2_dir.glob("*_REFL.tif")):
|
||||
raise FileNotFoundError(f"No REFL files in {s2_dir}")
|
||||
if not s3_dir.is_dir() or not any(s3_dir.glob("composite_*.tif")):
|
||||
raise FileNotFoundError(f"No composite files in {s3_dir}")
|
||||
|
||||
print(f"[{sitename}] Computing S2 GCC (in-place)...")
|
||||
compute_gcc_s2(s2_dir, s2_dir)
|
||||
|
||||
print(f"[{sitename}] Computing S3 GCC...")
|
||||
compute_gcc_s3(s3_dir, gcc_s3_dir)
|
||||
|
||||
date_range = _refl_date_range(s2_dir)
|
||||
if date_range is None:
|
||||
raise ValueError(f"Could not detect date range from REFL filenames in {s2_dir}")
|
||||
start, end = date_range
|
||||
print(f"[{sitename}] Date range: {start.date()} → {end.date()}")
|
||||
|
||||
fusion_dates = list(
|
||||
rrule.rrule(
|
||||
rrule.DAILY,
|
||||
dtstart=start + timedelta(MOSAIC_STEP),
|
||||
until=end - timedelta(MOSAIC_STEP),
|
||||
interval=MOSAIC_STEP,
|
||||
)
|
||||
)
|
||||
|
||||
_fusion_kwargs = dict(
|
||||
ratio=RESOLUTION_RATIO,
|
||||
max_days=MAX_DAYS,
|
||||
minimum_acquisition_importance=MINIMUM_ACQUISITION_IMPORTANCE,
|
||||
)
|
||||
|
||||
# --- ItB: GCC first, then fuse GCC ---
|
||||
itb_s2 = base / "itb" / "s2"
|
||||
itb_s3 = base / "itb" / "s3"
|
||||
itb_fusion = base / "itb" / "fusion"
|
||||
itb_s2.mkdir(parents=True, exist_ok=True)
|
||||
itb_s3.mkdir(parents=True, exist_ok=True)
|
||||
itb_fusion.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for p in sorted(s2_dir.glob("*_GCC.tif")):
|
||||
dst = itb_s2 / f"GCC_{p.stem.split('_')[2][:8]}.tif"
|
||||
if not dst.exists():
|
||||
shutil.copy2(p, dst)
|
||||
for p in sorted(gcc_s3_dir.glob("composite_*.tif")):
|
||||
dst = itb_s3 / f"GCC_{p.stem.split('_')[1]}.tif"
|
||||
if not dst.exists():
|
||||
shutil.copy2(p, dst)
|
||||
|
||||
print(f"[{sitename}] ItB: fusing GCC over {len(fusion_dates)} dates...")
|
||||
for date in fusion_dates:
|
||||
efast.fusion(date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs)
|
||||
|
||||
# --- BtI: fuse reflectance (3-band, matching S2 B02/B03/B04), then derive GCC ---
|
||||
# S3 composites have 4 bands; strip band 4 (Oa17/NIR) so shapes match S2 REFL.
|
||||
s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb"
|
||||
s3_rgb_dir.mkdir(parents=True, exist_ok=True)
|
||||
for p in sorted(s3_dir.glob("composite_*.tif")):
|
||||
out = s3_rgb_dir / p.name
|
||||
if not out.exists():
|
||||
with rasterio.open(p) as src:
|
||||
data = src.read([1, 2, 3])
|
||||
profile = src.profile.copy()
|
||||
profile.update(count=3)
|
||||
with rasterio.open(out, "w", **profile) as dst:
|
||||
dst.write(data)
|
||||
|
||||
bti_fusion = base / "bti" / "fusion"
|
||||
bti_gcc = base / "bti" / "gcc"
|
||||
bti_fusion.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"[{sitename}] BtI: fusing REFL over {len(fusion_dates)} dates...")
|
||||
for date in fusion_dates:
|
||||
efast.fusion(date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs)
|
||||
|
||||
print(f"[{sitename}] BtI: deriving GCC from fused REFL...")
|
||||
compute_gcc_from_refl(bti_fusion, bti_gcc)
|
||||
|
||||
return {
|
||||
"sitename": sitename,
|
||||
"evaluation_year": year,
|
||||
"start": start.date().isoformat(),
|
||||
"end": end.date().isoformat(),
|
||||
"fusion_dates": len(fusion_dates),
|
||||
"itb_fusion_files": len(list(itb_fusion.glob("*.tif"))),
|
||||
"bti_fusion_files": len(list(bti_fusion.glob("*.tif"))),
|
||||
"bti_gcc_files": len(list(bti_gcc.glob("*.tif"))),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Site discovery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _discover_sites(year: int) -> list[str]:
|
||||
"""Return sitenames that have prepared S2 REFL files under sentinel_data."""
|
||||
base = DATA_DIR / "sentinel_data" / str(year)
|
||||
if not base.is_dir():
|
||||
return []
|
||||
return sorted(
|
||||
d.name
|
||||
for d in base.iterdir()
|
||||
if d.is_dir() and any((d / "prepared" / "s2").glob("*_REFL.tif"))
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
|
||||
parser.add_argument(
|
||||
"--site",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Single sitename to fuse (default: all prepared sites)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
year = args.evaluation_year
|
||||
|
||||
if args.site:
|
||||
sites = [args.site]
|
||||
else:
|
||||
sites = _discover_sites(year)
|
||||
if not sites:
|
||||
print(f"[Fusion] No prepared sites found under data/sentinel_data/{year}/")
|
||||
return 1
|
||||
|
||||
print(f"[Fusion] Processing {len(sites)} site(s)")
|
||||
for i, sitename in enumerate(sites, 1):
|
||||
print(f"[Fusion] ({i}/{len(sites)}) {sitename}")
|
||||
try:
|
||||
summary = fuse_site(sitename, year)
|
||||
print(
|
||||
f"[Fusion] {sitename} done — "
|
||||
f"{summary['fusion_dates']} dates, "
|
||||
f"itb={summary['itb_fusion_files']} bti={summary['bti_fusion_files']} "
|
||||
f"bti_gcc={summary['bti_gcc_files']}"
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"[Fusion] {sitename} FAILED: {exc}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
695
5-metrics.py
Normal file
695
5-metrics.py
Normal file
|
|
@ -0,0 +1,695 @@
|
|||
"""Step 5: Pre-compute per-site GCC timeseries + raster index for the webapp.
|
||||
|
||||
Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``phenocam_screening/{year}.json`` — qualifying sites + metadata
|
||||
- ``phenocam/{year}/{site}_1day.csv`` — daily GCC timeseries
|
||||
- ``sentinel_data/{year}/{site}/prepared/s2/*_GCC.tif`` — S2 GCC rasters
|
||||
- ``sentinel_data/{year}/{site}/prepared/gcc_s3/composite_*.tif`` — S3 GCC rasters
|
||||
- ``fusion/{year}/{site}/bti/gcc/GCC_*.tif`` — BtI GCC rasters
|
||||
- ``fusion/{year}/{site}/itb/fusion/GCC_*.tif`` — ItB GCC rasters
|
||||
|
||||
Outputs (``data/metrics/``):
|
||||
|
||||
- ``manifest.json`` — years + per-site metadata
|
||||
- ``{year}/{site}/gcc_phenocam.json`` — PhenoCam ``gcc_90`` at matched dates
|
||||
- ``{year}/{site}/gcc_s2.json`` — S2 GCC (center pixel, cloud-free scenes)
|
||||
- ``{year}/{site}/gcc_s2_whittaker.json`` — Whittaker-smoothed S2 GCC
|
||||
- ``{year}/{site}/gcc_s3.json`` — S3 composite GCC
|
||||
- ``{year}/{site}/gcc_s3_smooth.json`` — S3 5-day moving average
|
||||
- ``{year}/{site}/gcc_fusion_bti.json`` — BtI fused GCC
|
||||
- ``{year}/{site}/gcc_fusion_itb.json`` — ItB fused GCC
|
||||
- ``{year}/{site}/phenocam_images.json`` — midday photo URLs for the viewer
|
||||
- ``{year}/{site}/rasters_s2_refl.json`` — S2 REFL paths (BtI view)
|
||||
- ``{year}/{site}/rasters_s3_composite.json`` — S3 composite paths (BtI view)
|
||||
- ``{year}/{site}/rasters_s2_gcc.json`` — S2 GCC paths (ItB view)
|
||||
- ``{year}/{site}/rasters_s3_gcc.json`` — S3 GCC paths (ItB view)
|
||||
- ``{year}/{site}/rasters_fusion_bti_refl.json`` — BtI fused REFL paths
|
||||
- ``{year}/{site}/rasters_fusion_itb_gcc.json`` — ItB fused GCC paths
|
||||
- ``{year}/{site}/metrics.json`` — NSE, RMSE, nRMSE, Pearson r vs PhenoCam per series
|
||||
- ``{year}/{site}/bands_s2.json`` — S2 center-pixel reflectance (B02, B03, B04) per scene
|
||||
- ``{year}/{site}/bands_s3.json`` — S3 center-pixel reflectance (Oa04, Oa06, Oa08, Oa17) per composite
|
||||
- ``{year}/{site}/covariates.json`` — spatial CV/std, S2/S3 counts, gap stats
|
||||
|
||||
CLI:
|
||||
|
||||
- ``--evaluation-year`` (default 2025)
|
||||
- ``--site`` (optional; default: all qualifying sites with sentinel data)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import datetime
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.crs import CRS
|
||||
from rasterio.transform import rowcol
|
||||
from pyproj import Transformer
|
||||
from scipy.stats import pearsonr
|
||||
from tqdm import tqdm
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DATA_DIR = Path("data")
|
||||
DEFAULT_YEAR = 2025
|
||||
|
||||
# GCC smoothing window for S3 moving average (days)
|
||||
S3_SMOOTH_WINDOW = 5
|
||||
|
||||
# Whittaker lambda (penalised smoothing strength for S2)
|
||||
WHITTAKER_LAMBDA = 400.0
|
||||
|
||||
# Half-width in metres for the spatial heterogeneity footprint (~300 m = 1 S3 pixel)
|
||||
SPATIAL_CV_HALF_M = 150
|
||||
|
||||
# PhenoCam archive image URL pattern
|
||||
PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: raster pixel extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _read_center_pixel(path: Path, lat: float, lon: float) -> float | None:
|
||||
"""Return the 3×3 mean GCC value at (lat, lon) from a single-band raster.
|
||||
|
||||
Returns ``None`` when the pixel is masked/zero/NaN.
|
||||
"""
|
||||
try:
|
||||
with rasterio.open(path) as src:
|
||||
transformer = Transformer.from_crs(
|
||||
CRS.from_epsg(4326), src.crs, always_xy=True
|
||||
)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
row, col = rowcol(src.transform, x, y)
|
||||
h, w = src.height, src.width
|
||||
r0, r1 = max(0, row - 1), min(h, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(w, col + 2)
|
||||
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
|
||||
data = src.read(1, window=window).astype(float)
|
||||
nodata = src.nodata
|
||||
if nodata is not None:
|
||||
data = np.where(data == nodata, np.nan, data)
|
||||
data[data == 0] = np.nan
|
||||
val = np.nanmean(data)
|
||||
return None if np.isnan(val) else float(val)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: date extraction from filenames
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _date_from_gcc_tif(path: Path) -> str | None:
|
||||
"""Extract YYYYMMDD from ``GCC_YYYYMMDD.tif`` or ``composite_YYYYMMDD.tif``."""
|
||||
m = re.search(r"(\d{8})", path.stem)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def _date_from_s2_tif(path: Path) -> str | None:
|
||||
"""Extract YYYYMMDD from S2 product name ``S2X_TTTT_YYYYMMDD_…``."""
|
||||
parts = path.stem.split("_")
|
||||
if len(parts) >= 3:
|
||||
m = re.match(r"(\d{8})", parts[2])
|
||||
return m.group(1) if m else None
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: Whittaker smoother (2nd-order differences, tridiagonal solver)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
|
||||
"""Penalised least-squares smoother (Whittaker, 2nd-order differences).
|
||||
|
||||
Masked (None) values are filled via the smooth and then re-set to None in
|
||||
the output so the caller can distinguish observed from gap-filled points.
|
||||
"""
|
||||
n = len(values)
|
||||
if n < 4:
|
||||
return values[:]
|
||||
|
||||
obs_mask = [v is not None for v in values]
|
||||
y = np.array([v if v is not None else 0.0 for v in values], dtype=float)
|
||||
w = np.array([1.0 if m else 0.0 for m in obs_mask], dtype=float)
|
||||
|
||||
W = np.diag(w)
|
||||
D = np.diff(np.eye(n), n=2, axis=0) # (n-2) x n second-difference matrix
|
||||
A = W + lam * D.T @ D
|
||||
try:
|
||||
z = np.linalg.solve(A, w * y)
|
||||
except np.linalg.LinAlgError:
|
||||
return values[:]
|
||||
|
||||
result: list[float | None] = []
|
||||
for i, m in enumerate(obs_mask):
|
||||
result.append(float(z[i]) if m else None)
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: PhenoCam CSV parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _parse_phenocam_csv(
|
||||
csv_path: Path, year: int, site: str
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Return (gcc_series, image_list) filtered to ``year``.
|
||||
|
||||
``gcc_series`` entries: ``{"date": "YYYY-MM-DD", "gcc_90": float}``
|
||||
``image_list`` entries: ``{"date": "YYYY-MM-DD", "url": str}``
|
||||
"""
|
||||
gcc_series: list[dict] = []
|
||||
image_list: list[dict] = []
|
||||
year_str = str(year)
|
||||
|
||||
if not csv_path.is_file():
|
||||
return gcc_series, image_list
|
||||
|
||||
with csv_path.open() as f:
|
||||
lines = [l for l in f if not l.startswith("#")]
|
||||
|
||||
reader = csv.DictReader(lines)
|
||||
for row in reader:
|
||||
if row.get("year") != year_str:
|
||||
continue
|
||||
date = row.get("date", "")
|
||||
gcc_raw = row.get("gcc_90")
|
||||
if gcc_raw and gcc_raw not in ("NA", ""):
|
||||
try:
|
||||
gcc_series.append({"date": date, "gcc_90": float(gcc_raw)})
|
||||
except ValueError:
|
||||
pass
|
||||
fn = row.get("midday_filename", "").strip()
|
||||
if fn and fn != "NA" and date:
|
||||
month = date[5:7]
|
||||
url = PHENOCAM_IMAGE_URL.format(
|
||||
site=site, year=year_str, month=month, filename=fn
|
||||
)
|
||||
image_list.append({"date": date, "url": url})
|
||||
|
||||
return gcc_series, image_list
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: moving average
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _moving_average(
|
||||
series: list[dict], value_key: str, window: int
|
||||
) -> list[dict]:
|
||||
"""Compute centred moving average; returns new list with ``_smooth`` suffix key."""
|
||||
if not series:
|
||||
return []
|
||||
vals = [p[value_key] for p in series]
|
||||
half = window // 2
|
||||
smoothed = []
|
||||
for i, pt in enumerate(series):
|
||||
chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
|
||||
smoothed.append({
|
||||
"date": pt["date"],
|
||||
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
|
||||
})
|
||||
return smoothed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: validation metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MATCH_TOLERANCE_DAYS = 5
|
||||
|
||||
|
||||
def compute_metrics(
|
||||
ref: list[dict], ref_key: str,
|
||||
pred: list[dict], pred_key: str,
|
||||
) -> dict | None:
|
||||
"""Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
|
||||
|
||||
Each pred point is matched to the nearest ref date within
|
||||
``MATCH_TOLERANCE_DAYS``. Returns a dict or ``None`` if fewer than
|
||||
2 matched pairs exist.
|
||||
"""
|
||||
ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
|
||||
if not ref_lookup:
|
||||
return None
|
||||
|
||||
ref_dates = sorted(ref_lookup)
|
||||
|
||||
obs, sim = [], []
|
||||
for pt in pred:
|
||||
v = pt.get(pred_key)
|
||||
if v is None:
|
||||
continue
|
||||
nearest = min(ref_dates, key=lambda d: abs((
|
||||
np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
|
||||
gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
|
||||
if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
|
||||
obs.append(ref_lookup[nearest])
|
||||
sim.append(v)
|
||||
|
||||
if len(obs) < 2:
|
||||
return None
|
||||
|
||||
obs_arr = np.array(obs)
|
||||
sim_arr = np.array(sim)
|
||||
obs_mean = obs_arr.mean()
|
||||
|
||||
rmse = float(np.sqrt(np.mean((sim_arr - obs_arr) ** 2)))
|
||||
nrmse = rmse / obs_mean if obs_mean else None
|
||||
ss_res = float(np.sum((obs_arr - sim_arr) ** 2))
|
||||
ss_tot = float(np.sum((obs_arr - obs_mean) ** 2))
|
||||
nse = (1.0 - ss_res / ss_tot) if ss_tot else None
|
||||
r, _ = pearsonr(obs_arr, sim_arr)
|
||||
|
||||
def _r4(v: float | None) -> float | None:
|
||||
return round(v, 4) if v is not None else None
|
||||
|
||||
return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
|
||||
|
||||
|
||||
S2_BAND_NAMES = ["B02", "B03", "B04"]
|
||||
S3_BAND_NAMES = ["Oa04", "Oa06", "Oa08", "Oa17"]
|
||||
|
||||
|
||||
def _read_multiband_center(
|
||||
path: Path, lat: float, lon: float, band_names: list[str]
|
||||
) -> dict[str, float | None]:
|
||||
"""Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
|
||||
try:
|
||||
with rasterio.open(path) as src:
|
||||
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
row, col = rowcol(src.transform, x, y)
|
||||
h, w = src.height, src.width
|
||||
r0, r1 = max(0, row - 1), min(h, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(w, col + 2)
|
||||
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
|
||||
nodata = src.nodata
|
||||
result = {}
|
||||
for i, name in enumerate(band_names, 1):
|
||||
if i > src.count:
|
||||
result[name] = None
|
||||
continue
|
||||
data = src.read(i, window=window).astype(float)
|
||||
if nodata is not None:
|
||||
data = np.where(data == nodata, np.nan, data)
|
||||
data[data == 0] = np.nan
|
||||
val = np.nanmean(data)
|
||||
result[name] = None if np.isnan(val) else round(float(val), 6)
|
||||
return result
|
||||
except Exception:
|
||||
return {name: None for name in band_names}
|
||||
|
||||
|
||||
def _multiband_series(
|
||||
tif_paths: list[Path],
|
||||
date_fn,
|
||||
lat: float,
|
||||
lon: float,
|
||||
band_names: list[str],
|
||||
desc: str,
|
||||
) -> list[dict]:
|
||||
"""Extract center-pixel values for all bands; return ``[{date, band1, band2, …}]``."""
|
||||
result = []
|
||||
for p in tqdm(tif_paths, desc=desc, leave=False):
|
||||
date = date_fn(p)
|
||||
if date is None:
|
||||
continue
|
||||
vals = _read_multiband_center(p, lat, lon, band_names)
|
||||
if any(v is not None for v in vals.values()):
|
||||
result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", **vals})
|
||||
return sorted(result, key=lambda x: x["date"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: spatial heterogeneity + observation density
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _read_footprint_stats(
|
||||
path: Path, lat: float, lon: float, half_m: float = SPATIAL_CV_HALF_M
|
||||
) -> tuple[float, float] | tuple[None, None]:
|
||||
"""Return (mean, std) of valid GCC pixels within a ±half_m metre square window.
|
||||
|
||||
Returns ``(None, None)`` on any error or when fewer than 4 valid pixels exist.
|
||||
"""
|
||||
try:
|
||||
with rasterio.open(path) as src:
|
||||
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
res = abs(src.transform.a) # pixel size in CRS units (metres for UTM)
|
||||
half_px = max(1, int(round(half_m / res)))
|
||||
row, col = rowcol(src.transform, x, y)
|
||||
h, w = src.height, src.width
|
||||
r0, r1 = max(0, row - half_px), min(h, row + half_px + 1)
|
||||
c0, c1 = max(0, col - half_px), min(w, col + half_px + 1)
|
||||
window = rasterio.windows.Window(c0, r0, c1 - c0, r1 - r0)
|
||||
data = src.read(1, window=window).astype(float)
|
||||
nodata = src.nodata
|
||||
if nodata is not None:
|
||||
data = np.where(data == nodata, np.nan, data)
|
||||
data[data <= 0] = np.nan
|
||||
valid = data[~np.isnan(data)]
|
||||
if len(valid) < 4:
|
||||
return None, None
|
||||
return float(np.mean(valid)), float(np.std(valid))
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
|
||||
def compute_covariates(
|
||||
s2_gcc_paths: list[Path],
|
||||
s2_series: list[dict],
|
||||
s3_series: list[dict],
|
||||
n_gcc_points: int | None,
|
||||
lat: float,
|
||||
lon: float,
|
||||
) -> dict:
|
||||
"""Compute spatial heterogeneity and temporal observation density covariates."""
|
||||
# Spatial GCC statistics over ~300 m footprint
|
||||
means, stds = [], []
|
||||
for p in s2_gcc_paths:
|
||||
m, s = _read_footprint_stats(p, lat, lon)
|
||||
if m is not None and m > 0:
|
||||
means.append(m)
|
||||
stds.append(s)
|
||||
|
||||
spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
|
||||
spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
|
||||
|
||||
# S2 temporal gap statistics
|
||||
s2_dates = [datetime.date.fromisoformat(p["date"]) for p in s2_series]
|
||||
if len(s2_dates) >= 2:
|
||||
gaps = [(s2_dates[i + 1] - s2_dates[i]).days for i in range(len(s2_dates) - 1)]
|
||||
s2_mean_gap = round(float(np.mean(gaps)), 1)
|
||||
s2_max_gap = int(max(gaps))
|
||||
else:
|
||||
s2_mean_gap = None
|
||||
s2_max_gap = None
|
||||
|
||||
return {
|
||||
"spatial_gcc_cv": spatial_gcc_cv,
|
||||
"spatial_gcc_std": spatial_gcc_std,
|
||||
"s2_scene_count": len(s2_series),
|
||||
"s2_mean_gap_days": s2_mean_gap,
|
||||
"s2_max_gap_days": s2_max_gap,
|
||||
"s3_composite_count": len(s3_series),
|
||||
"n_gcc_points": n_gcc_points,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-site export
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_json(path: Path, data: Any) -> None:
|
||||
path.write_text(json.dumps(data, separators=(",", ":")))
|
||||
|
||||
|
||||
def _raster_series(
|
||||
tif_paths: list[Path],
|
||||
date_fn,
|
||||
lat: float,
|
||||
lon: float,
|
||||
desc: str,
|
||||
) -> list[dict]:
|
||||
"""Extract center-pixel GCC from each tif, return ``[{date, gcc}]`` sorted."""
|
||||
result = []
|
||||
for p in tqdm(tif_paths, desc=desc, leave=False):
|
||||
date = date_fn(p)
|
||||
if date is None:
|
||||
continue
|
||||
val = _read_center_pixel(p, lat, lon)
|
||||
if val is not None:
|
||||
result.append({"date": f"{date[:4]}-{date[4:6]}-{date[6:]}", "gcc": val})
|
||||
return sorted(result, key=lambda x: x["date"])
|
||||
|
||||
|
||||
def _raster_index(tif_paths: list[Path], date_fn, rel_root: Path) -> list[dict]:
|
||||
"""Build raster index: ``[{date, path}]`` sorted by date."""
|
||||
result = []
|
||||
for p in tif_paths:
|
||||
date = date_fn(p)
|
||||
if date is None:
|
||||
continue
|
||||
try:
|
||||
rel = str(p.relative_to(rel_root))
|
||||
except ValueError:
|
||||
rel = str(p)
|
||||
result.append({"date": date, "path": rel})
|
||||
return sorted(result, key=lambda x: x["date"])
|
||||
|
||||
|
||||
def export_site(
|
||||
site: str,
|
||||
year: int,
|
||||
lat: float,
|
||||
lon: float,
|
||||
out_dir: Path,
|
||||
n_gcc_points: int | None = None,
|
||||
) -> bool:
|
||||
"""Export timeseries.json and rasters.json for one site. Returns True on success."""
|
||||
sentinel_base = DATA_DIR / "sentinel_data" / str(year) / site / "prepared"
|
||||
fusion_base = DATA_DIR / "fusion" / str(year) / site
|
||||
|
||||
s2_gcc_dir = sentinel_base / "s2"
|
||||
s3_gcc_dir = sentinel_base / "gcc_s3"
|
||||
bti_gcc_dir = fusion_base / "bti" / "gcc"
|
||||
itb_gcc_dir = fusion_base / "itb" / "fusion"
|
||||
|
||||
# Raster slider sources
|
||||
s2_refl_dir = sentinel_base / "s2"
|
||||
s3_comp_dir = sentinel_base / "s3"
|
||||
bti_refl_dir = fusion_base / "bti" / "fusion"
|
||||
|
||||
has_fusion = bti_gcc_dir.is_dir() and any(bti_gcc_dir.glob("GCC_*.tif"))
|
||||
if not has_fusion:
|
||||
return False
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# --- GCC timeseries from rasters ---
|
||||
s2_gcc_paths = sorted(s2_gcc_dir.glob("*_GCC.tif"))
|
||||
s3_gcc_paths = sorted(s3_gcc_dir.glob("composite_*.tif"))
|
||||
bti_paths = sorted(bti_gcc_dir.glob("GCC_*.tif"))
|
||||
itb_paths = sorted(itb_gcc_dir.glob("GCC_*.tif"))
|
||||
|
||||
s2_series = _raster_series(s2_gcc_paths, _date_from_s2_tif, lat, lon, f"{site} S2")
|
||||
s3_series = _raster_series(s3_gcc_paths, _date_from_gcc_tif, lat, lon, f"{site} S3")
|
||||
bti_series = _raster_series(bti_paths, _date_from_gcc_tif, lat, lon, f"{site} BtI")
|
||||
itb_series = _raster_series(itb_paths, _date_from_gcc_tif, lat, lon, f"{site} ItB")
|
||||
|
||||
# Whittaker on S2
|
||||
s2_vals = [p["gcc"] for p in s2_series]
|
||||
s2_smooth_vals = _whittaker_smooth(s2_vals)
|
||||
s2_whittaker = [
|
||||
{"date": p["date"], "gcc": v}
|
||||
for p, v in zip(s2_series, s2_smooth_vals)
|
||||
if v is not None
|
||||
]
|
||||
|
||||
# S3 5-day moving average
|
||||
s3_smooth = _moving_average(s3_series, "gcc", S3_SMOOTH_WINDOW)
|
||||
|
||||
# PhenoCam CSV
|
||||
csv_path = DATA_DIR / "phenocam" / str(year) / f"{site}_1day.csv"
|
||||
phenocam_series, image_list = _parse_phenocam_csv(csv_path, year, site)
|
||||
|
||||
s3_smooth_series = [
|
||||
{"date": p["date"], "gcc": p["gcc_smooth"]}
|
||||
for p in s3_smooth
|
||||
if p.get("gcc_smooth") is not None
|
||||
]
|
||||
|
||||
# Band reflectance timeseries (multi-band center-pixel)
|
||||
bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
|
||||
bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
|
||||
|
||||
# --- Per-metric JSON outputs ---
|
||||
_write_json(out_dir / "gcc_phenocam.json", phenocam_series)
|
||||
_write_json(out_dir / "gcc_s2.json", s2_series)
|
||||
_write_json(out_dir / "gcc_s2_whittaker.json", s2_whittaker)
|
||||
_write_json(out_dir / "gcc_s3.json", s3_series)
|
||||
_write_json(out_dir / "gcc_s3_smooth.json", s3_smooth_series)
|
||||
_write_json(out_dir / "gcc_fusion_bti.json", bti_series)
|
||||
_write_json(out_dir / "gcc_fusion_itb.json", itb_series)
|
||||
_write_json(out_dir / "phenocam_images.json", image_list)
|
||||
_write_json(out_dir / "bands_s2.json", bands_s2)
|
||||
_write_json(out_dir / "bands_s3.json", bands_s3)
|
||||
|
||||
# --- Raster index for slider ---
|
||||
rel_root = DATA_DIR.parent # paths relative to project root
|
||||
|
||||
# Valid-pixel sets: only show S2/S3 rasters where the center pixel had
|
||||
# usable data (non-zero GCC). This excludes cloud-masked / snow-covered
|
||||
# scenes that would render as black or visually nonsensical.
|
||||
s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
|
||||
s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
|
||||
|
||||
s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
|
||||
if r["date"] in s2_valid_dates]
|
||||
s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
if r["date"] in s3_valid_dates]
|
||||
s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
|
||||
if r["date"] in s2_valid_dates]
|
||||
s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
if r["date"] in s3_valid_dates]
|
||||
bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
|
||||
_write_json(out_dir / "rasters_s2_refl.json", s2_refl)
|
||||
_write_json(out_dir / "rasters_s3_composite.json", s3_comp)
|
||||
_write_json(out_dir / "rasters_s2_gcc.json", s2_gcc)
|
||||
_write_json(out_dir / "rasters_s3_gcc.json", s3_gcc)
|
||||
_write_json(out_dir / "rasters_fusion_bti_refl.json", bti_refl)
|
||||
_write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
|
||||
|
||||
# --- Site covariates (heterogeneity + observation density) ---
|
||||
_write_json(out_dir / "covariates.json", compute_covariates(
|
||||
s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
|
||||
))
|
||||
|
||||
# --- Validation metrics vs PhenoCam gcc_90 ---
|
||||
_write_json(out_dir / "metrics.json", {
|
||||
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
|
||||
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
|
||||
"s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker, "gcc"),
|
||||
"s3_smooth": compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
|
||||
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
|
||||
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
|
||||
})
|
||||
|
||||
# Remove legacy bundled outputs if present
|
||||
for legacy in ("timeseries.json", "rasters.json"):
|
||||
(out_dir / legacy).unlink(missing_ok=True)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Manifest
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
VEG_TYPE_LABELS = {
|
||||
"AG": "Agriculture",
|
||||
"DB": "Deciduous broadleaf",
|
||||
"DN": "Deciduous needleleaf",
|
||||
"EB": "Evergreen broadleaf",
|
||||
"EN": "Evergreen needleleaf",
|
||||
"GR": "Grassland",
|
||||
"MX": "Mixed",
|
||||
"SH": "Shrubland",
|
||||
"TN": "Tundra",
|
||||
"UN": "Unknown",
|
||||
"WL": "Wetland",
|
||||
"RF": "Reference",
|
||||
}
|
||||
|
||||
|
||||
def build_manifest(years: list[int], filter_site: str | None = None) -> dict:
|
||||
manifest: dict[str, Any] = {"years": years, "sites": {}}
|
||||
|
||||
for year in years:
|
||||
screening_path = DATA_DIR / "phenocam_screening" / f"{year}.json"
|
||||
if not screening_path.is_file():
|
||||
continue
|
||||
data = json.loads(screening_path.read_text())
|
||||
sites_meta: dict[str, Any] = {}
|
||||
for entry in data.get("sites", []):
|
||||
if entry.get("calculations", {}).get("status") != "PASS":
|
||||
continue
|
||||
cam = entry.get("response", {}).get("camera", {})
|
||||
roi = entry.get("response", {}).get("roi", {})
|
||||
calc = entry.get("calculations", {})
|
||||
site = cam.get("Sitename", "")
|
||||
if not site:
|
||||
continue
|
||||
if filter_site and site != filter_site:
|
||||
continue
|
||||
sm = cam.get("sitemetadata", {})
|
||||
veg_raw = sm.get("primary_veg_type") or roi.get("roitype") or "UN"
|
||||
fusion_dir = DATA_DIR / "fusion" / str(year) / site / "bti" / "gcc"
|
||||
has_fusion = fusion_dir.is_dir() and any(fusion_dir.glob("GCC_*.tif"))
|
||||
sites_meta[site] = {
|
||||
"lat": cam.get("Lat"),
|
||||
"lon": cam.get("Lon"),
|
||||
"veg_type": veg_raw,
|
||||
"veg_label": VEG_TYPE_LABELS.get(veg_raw, veg_raw),
|
||||
"description": sm.get("site_description", ""),
|
||||
"dominant_species": sm.get("dominant_species", ""),
|
||||
"group": sm.get("group", ""),
|
||||
"snr": calc.get("snr"),
|
||||
"n_gcc_points": calc.get("n_gcc_points"),
|
||||
"has_fusion": has_fusion,
|
||||
}
|
||||
manifest["sites"][str(year)] = sites_meta
|
||||
|
||||
return manifest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
|
||||
parser.add_argument("--site", type=str, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
year = args.evaluation_year
|
||||
filter_site = args.site
|
||||
|
||||
out_base = DATA_DIR / "metrics"
|
||||
out_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Determine years with screening data
|
||||
screening_dir = DATA_DIR / "phenocam_screening"
|
||||
years = sorted(
|
||||
int(p.stem) for p in screening_dir.glob("*.json") if p.stem.isdigit()
|
||||
)
|
||||
if not years:
|
||||
years = [year]
|
||||
|
||||
print(f"Building manifest for years: {years}")
|
||||
manifest = build_manifest(years, filter_site)
|
||||
|
||||
# Export per-site data for the requested year
|
||||
year_sites = manifest["sites"].get(str(year), {})
|
||||
fusion_sites = {s: m for s, m in year_sites.items() if m["has_fusion"]}
|
||||
if filter_site:
|
||||
fusion_sites = {s: m for s, m in fusion_sites.items() if s == filter_site}
|
||||
|
||||
print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
|
||||
for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
|
||||
out_dir = out_base / str(year) / site
|
||||
ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
|
||||
if ok:
|
||||
print(f" ✓ {site}")
|
||||
else:
|
||||
print(f" ✗ {site} — no fusion data found")
|
||||
|
||||
manifest_path = out_base / "manifest.json"
|
||||
manifest_path.write_text(json.dumps(manifest, separators=(",", ":")))
|
||||
print(f"Manifest written → {manifest_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
151
AGENTS.md
Normal file
151
AGENTS.md
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
# AGENTS.md
|
||||
|
||||
Worldwide PhenoCam EFAST feasibility screening. Human summary: [`README.md`](README.md).
|
||||
|
||||
---
|
||||
|
||||
## Layout
|
||||
|
||||
| Path | Role |
|
||||
|------|------|
|
||||
| `1-phenocam.py` | Step 1: download PhenoCam metadata + `one_day_summary` CSV |
|
||||
| `2-phenocam-screening.py` | Step 2: PhenoCam + SNR gates on cached CSVs |
|
||||
| `3-sentinel-data.py` | Step 3: S2 (Earth Search COG) + S3 (CDSE OpenEO) download + EFAST prep |
|
||||
| `4-fusion.py` | Step 4: GCC computation + EFAST BtI/ItB fusion loop |
|
||||
| `5-metrics.py` | Step 5: timeseries, covariates, `metrics.json`, webapp manifest |
|
||||
| `data/` | Manifests, per-site caches, screening outputs (large; mostly generated) |
|
||||
| `webapp/` | Static QA viewer (`make serve` from workspace root) |
|
||||
|
||||
Workspace orchestration: [`../AGENTS.md`](../AGENTS.md).
|
||||
|
||||
---
|
||||
|
||||
## Where to work
|
||||
|
||||
| Task | Location |
|
||||
|------|----------|
|
||||
| PhenoCam bulk download | `1-phenocam.py` |
|
||||
| GCC/SNR screening on disk | `2-phenocam-screening.py` |
|
||||
| S2/S3 download + EFAST prep | `3-sentinel-data.py` |
|
||||
| GCC + fusion | `4-fusion.py` |
|
||||
| Metrics + webapp index | `5-metrics.py` |
|
||||
| Web QA | `../Makefile` target `serve` → `webapp/index.html` |
|
||||
|
||||
---
|
||||
|
||||
## Setup
|
||||
|
||||
**Preferred (uv):** from `processing/`:
|
||||
|
||||
```bash
|
||||
uv sync # all deps from pyproject.toml (incl. efast)
|
||||
```
|
||||
|
||||
Run any script as `uv run python <script>.py …`. Python version is pinned in `.python-version` (3.11.10).
|
||||
|
||||
- `CDSE_USER` — Copernicus Data Space username
|
||||
- `CDSE_PASSWORD` — Copernicus Data Space password
|
||||
|
||||
Required for step 3 S3 download (CDSE OpenEO). Step 3 S2 download uses AWS Earth Search (no auth).
|
||||
|
||||
---
|
||||
|
||||
## CLI convention
|
||||
|
||||
Every numbered step script shares two user-facing flags:
|
||||
|
||||
| Flag | Default | Role |
|
||||
|------|---------|------|
|
||||
| `--evaluation-year` | `2025` | Calendar year; input/output paths under `data/` use `{year}` |
|
||||
| `--site` | all eligible | Single sitename to limit scope (testing or single-site runs) |
|
||||
|
||||
All other tunable parameters (bands, resolution ratio, compositing window, etc.) are public constants at the top of each script. Paths are derived from the year — do not pass manifest paths on the CLI. Each script docstring lists **Inputs** and **Outputs** under `data/`.
|
||||
|
||||
Resume behaviour: step 3 skips S3 sites when `raw/s3/S3*.tif` already exist; step 3 skips S2 scenes when `*_REFL.tif` already exists. Step 4 skips GCC/fusion files that already exist. Step 5 overwrites JSON sidecars for processed sites.
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
### Stepped pipeline (resumable)
|
||||
|
||||
```bash
|
||||
uv run python 1-phenocam.py --evaluation-year 2025
|
||||
uv run python 2-phenocam-screening.py --evaluation-year 2025
|
||||
uv run python 3-sentinel-data.py --evaluation-year 2025
|
||||
uv run python 4-fusion.py --evaluation-year 2025
|
||||
uv run python 5-metrics.py --evaluation-year 2025
|
||||
|
||||
# single site
|
||||
uv run python 3-sentinel-data.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
uv run python 4-fusion.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
uv run python 5-metrics.py --evaluation-year 2025 --site ICOSFR-Fon1
|
||||
```
|
||||
|
||||
S3 uses CDSE OpenEO collection `SENTINEL3_SYN_L2_SYN` (bands Oa04/Oa06/Oa08/Oa17). S2 uses AWS Earth Search COG range reads (no auth). No S2↔S3 radiometric harmonisation.
|
||||
|
||||
---
|
||||
|
||||
## Screening gates
|
||||
|
||||
### Step 2 (`2-phenocam-screening.py`)
|
||||
|
||||
| Gate | Rule |
|
||||
|------|------|
|
||||
| `phenocam` | ROI + `one_day_summary` CSV; ≥ `MIN_GCC_POINTS` (30) valid `gcc_90` in evaluation year |
|
||||
| `snr` | AIC-selected cubic spline SNR ≥ `SNR_THRESHOLD` (2.0) |
|
||||
| `cluster` | SNR-passed sites within 500 m deduplicated; keep highest `n_gcc_points` (SNR tie-break) |
|
||||
|
||||
---
|
||||
|
||||
## Data layout
|
||||
|
||||
**Naming:** `data/` paths follow step script names — `1-phenocam.py` → `phenocam/`, `2-phenocam-screening.py` → `phenocam_screening/`, `3-sentinel-data.py` → `sentinel_data/`, `4-fusion.py` → `fusion/`, `5-metrics.py` → `metrics/`.
|
||||
|
||||
```
|
||||
data/
|
||||
phenocam/
|
||||
{year}.json # step-1 manifest
|
||||
{year}/
|
||||
{sitename}.json # camera + ROI API payload
|
||||
{sitename}_1day.csv # raw PhenoCam summary CSV
|
||||
phenocam_screening/
|
||||
{year}.json # step-2 results
|
||||
{year}.csv
|
||||
sentinel_data/{year}/{sitename}/
|
||||
raw/s3/ # step 3: S3 SYN L2 per-date GeoTIFFs
|
||||
prepared/s2/ # step 3: *_REFL.tif, *_DIST_CLOUD.tif, *_GCC.tif
|
||||
prepared/s3/ # step 3: composite_*.tif
|
||||
prepared/gcc_s3/ # step 4: single-band GCC composites
|
||||
data.json # step-3 run summary
|
||||
fusion/{year}/{sitename}/
|
||||
bti/fusion/REFL_*.tif # step 4: BtI fused reflectance
|
||||
bti/gcc/GCC_*.tif # step 4: BtI GCC
|
||||
itb/s2/GCC_*.tif # step 4: S2 GCC (ItB stack)
|
||||
itb/s3/GCC_*.tif # step 4: S3 GCC (ItB stack)
|
||||
itb/fusion/GCC_*.tif # step 4: ItB fused GCC
|
||||
metrics/
|
||||
manifest.json # step 5: years + site metadata for webapp
|
||||
{year}/{sitename}/
|
||||
gcc_*.json, metrics.json, covariates.json, rasters_*.json, bands_*.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module map
|
||||
|
||||
| File | Responsibility |
|
||||
|------|----------------|
|
||||
| `1-phenocam.py` | Paginate PhenoCam API; cache JSON + CSV; write manifest |
|
||||
| `2-phenocam-screening.py` | Parse cached CSVs; PhenoCam + SNR gates |
|
||||
| `3-sentinel-data.py` | S2 COG range reads (Earth Search); S3 OpenEO download; EFAST REFL/DIST_CLOUD/composites |
|
||||
| `4-fusion.py` | GCC from S2 REFL + S3 composites; daily `efast.fusion` BtI + ItB |
|
||||
| `5-metrics.py` | PhenoCam-matched GCC series, baselines, fusion metrics, raster index, covariates |
|
||||
619
LICENSE
619
LICENSE
|
|
@ -1,619 +0,0 @@
|
|||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
=================================
|
||||
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
159
README.md
159
README.md
|
|
@ -1,146 +1,57 @@
|
|||
# Satellite Data Fusion Pipeline
|
||||
# Worldwide PhenoCam EFAST feasibility screening
|
||||
|
||||
Python pipeline for downloading Sentinel-2 and Sentinel-3 imagery and PhenoCam ground truth, applying NDVI-based cloud pre-selection, fusing sensors with the [EFAST](https://github.com/DHI-GRAS/efast) algorithm, and evaluating fused **Green Chromatic Coordinate (GCC)** time series against PhenoCam `gcc_90`.
|
||||
Screen the global [PhenoCam Network](https://phenocam.nau.edu/) for sites where EFAST Sentinel-2 / Sentinel-3 fusion is likely to work: enough PhenoCam `gcc_90`, seasonal signal, and S2/S3 coverage for a calendar year.
|
||||
|
||||
## Features
|
||||
Agent-oriented detail: [`AGENTS.md`](AGENTS.md).
|
||||
|
||||
- **Acquisition** — S2 L2A (AWS Element84 STAC), S3 OLCI L1B (Copernicus OpenEO), PhenoCam midday images and GCC CSV
|
||||
- **Pre-selection** — Aggressive and non-aggressive NDVI-based cloud screening (plus dark-scene rejection)
|
||||
- **Preparation** — Harmonised reflectance/GCC rasters, distance-to-cloud weights, S3 compositing and optional temporal smoothing
|
||||
- **Fusion** — EFAST under eight scenarios per site (BtI and ItB × two strategies × σ ∈ {20, 30} days)
|
||||
- **Post-processing** — Crop to valid-data window; NDVI and GCC timeseries at the site
|
||||
- **Metrics** — Temporal comparison vs PhenoCam (`metrics.json`); optional Tier-2 withheld-S2 gap validation
|
||||
- **Web viewer** — Static HTML dashboard over pipeline outputs (`webapp/`)
|
||||
---
|
||||
|
||||
## Installation
|
||||
## Quick start
|
||||
|
||||
From `processing/`:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install git+https://github.com/DHI-GRAS/efast.git # not on PyPI
|
||||
uv sync
|
||||
uv run python 1-phenocam.py --evaluation-year 2025
|
||||
```
|
||||
|
||||
Create `.env` with Copernicus Data Space credentials:
|
||||
### Stepped pipeline (resumable)
|
||||
|
||||
- `CDSE_USER`
|
||||
- `CDSE_PASSWORD`
|
||||
|
||||
Python version is pinned in `.python-version` (use `.venv/` locally).
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from run import run_pipeline
|
||||
|
||||
run_pipeline(season=2024, site_position=(47.116171, 11.320308), site_name="innsbruck")
|
||||
```
|
||||
|
||||
`site_position` is always **`(lat, lon)`**. Study sites are listed at the bottom of `run.py`: `innsbruck`, `forthgr`, `pitsalu`, `vindeln2`, `sunflowerjerez1`, `institutekarnobat`.
|
||||
|
||||
By default, most stages in `run.py` are **commented out** (metrics-only). Uncomment acquisition → pre-selection → preparation → fusion → post-processing for a full run.
|
||||
|
||||
### Pipeline stages
|
||||
|
||||
1. Download S2, S3, and PhenoCam
|
||||
2. Pre-selection (per-sensor NDVI screening → `raw/preselection/`)
|
||||
3. Prepare S2/S3 for each strategy (`prepared_{aggressive|nonaggressive}/` and `_itb/` variants)
|
||||
4. EFAST fusion (BtI reflectance and ItB GCC products)
|
||||
5. Post-process crops and timeseries (`processed_*_sigma{20,30}/`)
|
||||
6. Compute metrics vs PhenoCam → `metrics.json`
|
||||
|
||||
### Gap validation (optional)
|
||||
|
||||
With prepared data and EFAST installed:
|
||||
All steps use `--evaluation-year` (default 2025) and optional `--site`. See each script docstring for inputs/outputs under `data/`.
|
||||
|
||||
```bash
|
||||
# Phenology sidecars (TIMESAT 50 % amplitude)
|
||||
python -m phenology_timesat --all
|
||||
uv run python 1-phenocam.py --evaluation-year 2025
|
||||
uv run python 2-phenocam-screening.py --evaluation-year 2025
|
||||
uv run python 3-sentinel-data.py --evaluation-year 2025
|
||||
uv run python 4-fusion.py --evaluation-year 2025
|
||||
uv run python 5-metrics.py --evaluation-year 2025
|
||||
|
||||
# Spatial NSE_S2 vs withheld S2 (unit test: Estonia peatland, 30 d, green-up)
|
||||
python -m gap_validation.run --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688 \
|
||||
--strategy aggressive --sigma 20 --mode bti --transition green_up --gap-days 30
|
||||
|
||||
# All six sites, best BtI scenario per site
|
||||
python -m gap_validation.batch_spatial
|
||||
|
||||
# Full-season NSE_PC on gap-degraded stack (slow)
|
||||
python -m gap_validation.temporal_pc --site pitsalu --season 2024 --lat 58.5633 --lon 24.3688
|
||||
python -m gap_validation.batch_temporal
|
||||
|
||||
# TIMESAT day-offsets on gap fusion vs PhenoCam (needs temporal tier)
|
||||
python -m gap_validation.phenology_offsets
|
||||
# single site
|
||||
uv run python 3-sentinel-data.py --evaluation-year 2025 --site innsbruck
|
||||
uv run python 4-fusion.py --evaluation-year 2025 --site innsbruck
|
||||
uv run python 5-metrics.py --evaluation-year 2025 --site innsbruck
|
||||
```
|
||||
|
||||
Writes `gap_manifest.json`, `gap_withheld_images.json`, `gap_validation_summary.json` (spatial), and optionally `gap_metrics.json` (temporal). Masked fusion under `validation/fusion/gap_{N}_{transition}/`. See `python -m gap_validation.run --help`.
|
||||
Step 3 S3 uses CDSE OpenEO (`SENTINEL3_SYN_L2_SYN`); S2 uses AWS Earth Search COG range reads (no auth).
|
||||
|
||||
## Data layout
|
||||
---
|
||||
|
||||
```
|
||||
data/{site_name}/{season}/
|
||||
raw/
|
||||
s2/ # {YYYYMMDD}_{n}.geotiff — B02, B03, B04, B8A
|
||||
s3/ # {YYYYMMDD}_{n}.geotiff — Oa04, Oa06, Oa08, Oa17
|
||||
phenocam/ # JPEGs, GCC JSON, phenology sidecar
|
||||
preselection/ # {s2,s3}_preselection.{json,csv}
|
||||
prepared_{strategy}/
|
||||
s2/ # REFL + DIST_CLOUD GeoTIFFs
|
||||
s3/ # composite_{YYYYMMDD}.tif
|
||||
fusion/ # REFL_{YYYYMMDD}.tif (σ≈20)
|
||||
fusion_sigma30/ # REFL (σ=30)
|
||||
prepared_{strategy}_itb/
|
||||
s2/ s3/ fusion/ # GCC products (Index-then-Blend)
|
||||
processed_{strategy}_sigma{20,30}/
|
||||
s2/ s3/ fusion/ # cropped {YYYYMMDD}_0.geotiff
|
||||
gcc/ ndvi/ # timeseries.json per source
|
||||
processed_{strategy}_itb_sigma{20,30}/
|
||||
s2/ s3/ fusion/ gcc/
|
||||
validation/ # gap experiment (when run)
|
||||
metrics.json
|
||||
```
|
||||
## Outputs (under `data/`)
|
||||
|
||||
Site metadata: `data/sites.geojson` (six thesis sites). `data/coweeta/` is local/legacy and not listed there.
|
||||
| Artifact | Step | Role |
|
||||
|----------|------|------|
|
||||
| `phenocam/{year}.json` | 1 | Site list + `sites_dir` pointer |
|
||||
| `phenocam/{year}/{site}.json`, `{site}_1day.csv` | 1 | Raw API + GCC CSV |
|
||||
| `phenocam_screening/{year}.json` / `.csv` | 2 | PhenoCam + SNR gate results |
|
||||
| `sentinel_data/{year}/{site}/prepared/s2/` | 3 | S2 REFL + DIST_CLOUD GeoTIFFs |
|
||||
| `sentinel_data/{year}/{site}/prepared/s3/` | 3 | S3 composite GeoTIFFs |
|
||||
| `fusion/{year}/{site}/` | 4 | BtI/ItB fused rasters |
|
||||
| `metrics/{year}/{site}/`, `metrics/manifest.json` | 5 | Timeseries JSON, covariates, webapp manifest |
|
||||
|
||||
### File formats
|
||||
The 2025 manifest currently lists **739** cameras with archive overlap; most per-site CSV/JSON files are cached under `data/phenocam/2025/`.
|
||||
|
||||
**Sentinel-2** — Multi-band GeoTIFF; bands `[blue, green, red, nir]`; `VIEWING_ZENITH_ANGLE` metadata; filename `{YYYYMMDD}_{increment}.geotiff`.
|
||||
|
||||
**Sentinel-3** — Multi-band GeoTIFF; same band order; filename `{YYYYMMDD}_{increment}.geotiff`.
|
||||
|
||||
**Prepared S2** — `S2A_MSIL2A_{YYYYMMDD}_REFL.tif` plus `*DIST_CLOUD.tif` (cloud-distance weights for EFAST).
|
||||
---
|
||||
|
||||
## Web viewer
|
||||
|
||||
Static HTML/JS in `webapp/` — no build step. Shared GeoTIFF helpers: `webapp/common.js`. CDN: Leaflet, geotiff.js, proj4. Symlink: `webapp/data` → `../data`.
|
||||
|
||||
Serve from the **repository root** (not `webapp/`):
|
||||
|
||||
```bash
|
||||
python3 -m http.server 8000
|
||||
# http://localhost:8000/webapp/index.html
|
||||
```
|
||||
|
||||
Or from the workspace root: `make serve`.
|
||||
|
||||
| Page | Purpose | Primary data paths |
|
||||
|------|---------|-------------------|
|
||||
| `index.html` | Post-processed maps, NDVI/GCC timeseries, PhenoCam | `processed_{strategy}_sigma{n}/`, `raw/phenocam/` |
|
||||
| `preselection.html` | Cloud-screening diagnostics | `raw/preselection/{s2,s3}_preselection.json` |
|
||||
| `prepared.html` | Prepared REFL/GCC before crop | `prepared_{strategy}/`, `prepared_{strategy}_itb/` |
|
||||
| `fusion.html` | EFAST daily fusion rasters | `prepared_*/fusion/`, `fusion_sigma30/` |
|
||||
| `postprocessed.html` | Cropped processed stacks | `processed_*_sigma*/` |
|
||||
| `metrics.html` | Tabular `metrics.json` (thesis export source) | `{site}/{season}/metrics.json` under `webapp/data/` |
|
||||
| `gap_validation.html` | Withheld-S2 gap experiment | `{site}/{season}/validation/gap_validation_summary.json` |
|
||||
| `phenology.html` | TIMESAT on PhenoCam GCC | `raw/phenocam/phenocam_phenology.json` |
|
||||
|
||||
Site/season dropdowns use `data/sites.geojson`. Map pages: **BtI | ItB**; scenarios `aggressive` / `nonaggressive`, σ 20 / 30. Keep the shared nav consistent across all eight pages. QA only — thesis tables are exported from the workspace root (`make export` or `../scripts/export_thesis_tables.py`).
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
ruff check --fix . && ruff format .
|
||||
```
|
||||
|
||||
Pre-commit hooks: `.pre-commit-config.yaml`.
|
||||
|
||||
## License
|
||||
|
||||
GNU Affero General Public License v3.0 (AGPL-3.0). See [LICENSE](LICENSE).
|
||||
From the workspace root, `make serve` serves `processing/` at [http://localhost:8000/webapp/index.html](http://localhost:8000/webapp/index.html). Requires step 5 (`data/metrics/manifest.json`).
|
||||
|
|
|
|||
|
|
@ -1,282 +0,0 @@
|
|||
"""PhenoCam acquisition from PhenoCam Network API."""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
PHENOCAM_API = "https://phenocam.nau.edu/api"
|
||||
|
||||
|
||||
def _phenocam_summary_gcc_value(row, use_mean_fallback: bool):
|
||||
"""Extract daily GCC from a one-day summary row.
|
||||
|
||||
Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth).
|
||||
Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present.
|
||||
With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``.
|
||||
"""
|
||||
if not use_mean_fallback:
|
||||
oflag = row.get("outlierflag_gcc_90")
|
||||
if oflag is not None and str(oflag).strip() in ("1", "1.0"):
|
||||
return None
|
||||
|
||||
raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
|
||||
if raw is None:
|
||||
return None
|
||||
text = str(raw).strip()
|
||||
if not text or text.upper() == "NA":
|
||||
return None
|
||||
try:
|
||||
val = float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
if val <= -9998.0:
|
||||
return None
|
||||
return val
|
||||
|
||||
|
||||
def _find_start_offset(site_name, start_dt, total_count):
|
||||
"""Binary search to find approximate offset for start date."""
|
||||
low, high = 0, total_count - 1
|
||||
limit = 1
|
||||
|
||||
for _ in range(15):
|
||||
mid = (low + high) // 2
|
||||
response = requests.get(
|
||||
f"{PHENOCAM_API}/middayimages/",
|
||||
params={"site": site_name, "limit": limit, "offset": mid},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json().get("results", [])
|
||||
if not results:
|
||||
break
|
||||
|
||||
mid_date_str = results[0].get("imgdate", "")
|
||||
if not mid_date_str:
|
||||
break
|
||||
|
||||
try:
|
||||
mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
|
||||
if mid_date < start_dt:
|
||||
low = mid + 1
|
||||
else:
|
||||
high = mid
|
||||
except ValueError:
|
||||
break
|
||||
|
||||
return max(0, low - 100)
|
||||
|
||||
|
||||
def download_phenocam(season, site_position, site_name, date_range=None):
|
||||
"""Wrapper that downloads both phenocam images and GCC time series."""
|
||||
_download_phenocam_images(season, site_position, site_name, date_range)
|
||||
_download_phenocam_gcc(season, site_position, site_name, date_range)
|
||||
|
||||
|
||||
def _download_phenocam_images(season, site_position, site_name, date_range=None):
|
||||
lat, lon = site_position
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
|
||||
start_date, end_date = datetime_range.split("/")
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{PHENOCAM_API}/middayimages/",
|
||||
params={"site": site_name, "limit": 1},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
total_count = response.json().get("count", 0)
|
||||
|
||||
if total_count == 0:
|
||||
print(f"[PhenoCam] No images found for site '{site_name}'")
|
||||
return
|
||||
|
||||
print(
|
||||
f"[PhenoCam] Found {total_count} total images, estimating start offset..."
|
||||
)
|
||||
start_offset = _find_start_offset(site_name, start_dt, total_count)
|
||||
|
||||
url = f"{PHENOCAM_API}/middayimages/"
|
||||
params = {"site": site_name, "offset": start_offset}
|
||||
|
||||
print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
|
||||
images = []
|
||||
page = 1
|
||||
max_pages = 500
|
||||
past_end_date = False
|
||||
|
||||
while url and page <= max_pages and not past_end_date:
|
||||
response = requests.get(url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
results = data.get("results", [])
|
||||
|
||||
if not results:
|
||||
break
|
||||
|
||||
for img in results:
|
||||
img_date_str = img.get("imgdate", "")
|
||||
if not img_date_str:
|
||||
continue
|
||||
try:
|
||||
img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
|
||||
if img_date > end_dt:
|
||||
past_end_date = True
|
||||
break
|
||||
if start_dt <= img_date <= end_dt:
|
||||
images.append(img)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if url and not past_end_date:
|
||||
url = data.get("next")
|
||||
params = None
|
||||
page += 1
|
||||
if page % 50 == 0:
|
||||
print(
|
||||
f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..."
|
||||
)
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
print(f"[PhenoCam] Site '{site_name}' not found")
|
||||
return
|
||||
raise
|
||||
|
||||
print(f"[PhenoCam] Found {len(images)} images")
|
||||
|
||||
def _download_image(img):
|
||||
date_str = img.get("imgdate", "").replace("-", "")
|
||||
if not date_str:
|
||||
return None
|
||||
|
||||
filepath = output_dir / f"{date_str}.jpg"
|
||||
if filepath.exists():
|
||||
return f"Skipped {date_str}.jpg (exists)"
|
||||
|
||||
img_path = img.get("imgpath")
|
||||
if not img_path:
|
||||
return None
|
||||
|
||||
img_url = f"https://phenocam.nau.edu{img_path}"
|
||||
try:
|
||||
img_response = requests.get(img_url, timeout=30)
|
||||
img_response.raise_for_status()
|
||||
filepath.write_bytes(img_response.content)
|
||||
return f"Saved {date_str}.jpg"
|
||||
except Exception as e:
|
||||
return f"Error downloading {date_str}: {e}"
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(_download_image, img) for img in images]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
print(f"[PhenoCam] {result}")
|
||||
|
||||
print("[PhenoCam] Completed")
|
||||
|
||||
|
||||
def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
|
||||
"""Fetch greenness-index time series from PhenoCam API. Saves JSON and CSV."""
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
output_file = Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
start_date, end_date = datetime_range.split("/")
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
|
||||
|
||||
# Get ROIs for site (paginate through results)
|
||||
try:
|
||||
url = f"{PHENOCAM_API}/roilists/"
|
||||
params = {"site": site_name}
|
||||
rois = []
|
||||
while url:
|
||||
r = requests.get(url, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
rois.extend(
|
||||
[roi for roi in data.get("results", []) if roi["site"] == site_name]
|
||||
)
|
||||
url = data.get("next")
|
||||
params = None
|
||||
if len(rois) > 0:
|
||||
break
|
||||
if not rois:
|
||||
print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
|
||||
return
|
||||
csv_url = rois[0].get("one_day_summary")
|
||||
if not csv_url:
|
||||
print("[PhenoCam-GI] No CSV data URL found for ROI")
|
||||
return
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
|
||||
return
|
||||
|
||||
# Fetch CSV data
|
||||
try:
|
||||
csv_r = requests.get(csv_url, timeout=30)
|
||||
csv_r.raise_for_status()
|
||||
lines = [
|
||||
line for line in csv_r.text.split("\n") if line and not line.startswith("#")
|
||||
]
|
||||
reader = csv.DictReader(lines)
|
||||
fieldnames = reader.fieldnames or ()
|
||||
use_mean_fallback = "gcc_90" not in fieldnames
|
||||
if use_mean_fallback:
|
||||
print(
|
||||
"[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)"
|
||||
)
|
||||
|
||||
timeseries = []
|
||||
for row in reader:
|
||||
try:
|
||||
date_str = row.get("date")
|
||||
if not date_str:
|
||||
continue
|
||||
date = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
if start_dt <= date <= end_dt:
|
||||
gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
|
||||
if gcc is not None:
|
||||
timeseries.append(
|
||||
{"date": date.isoformat(), "greenness_index": gcc}
|
||||
)
|
||||
except (ValueError, KeyError):
|
||||
continue
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[PhenoCam-GI] Error fetching CSV: {e}")
|
||||
return
|
||||
|
||||
timeseries.sort(key=lambda x: x["date"])
|
||||
|
||||
output_dir = output_file.parent
|
||||
json_path = output_dir / "phenocam_gcc.json"
|
||||
csv_path = output_dir / "phenocam_gcc.csv"
|
||||
|
||||
with open(json_path, "w") as f:
|
||||
json.dump(timeseries, f, indent=2)
|
||||
|
||||
with open(csv_path, "w", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=["date", "greenness_index"])
|
||||
writer.writeheader()
|
||||
writer.writerows(timeseries)
|
||||
|
||||
print(
|
||||
f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)"
|
||||
)
|
||||
|
||||
from phenocam_snr import write_phenocam_snr
|
||||
|
||||
write_phenocam_snr(site_name, season, base=Path("data"))
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
"""Sentinel-2-MSI acquisition from AWS Element84 Earth Search (STAC catalog)."""
|
||||
import numpy as np
|
||||
import rasterio
|
||||
import xml.etree.ElementTree as ET
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from rasterio.crs import CRS
|
||||
from rasterio.warp import Resampling, calculate_default_transform, reproject, transform_geom
|
||||
from rasterio.windows import from_bounds, transform as window_transform
|
||||
from pystac_client import Client
|
||||
|
||||
BBOX_SIZE = 0.011
|
||||
TARGET_CRS = CRS.from_epsg(32632)
|
||||
|
||||
|
||||
def _get_bbox(lon, lat):
|
||||
half = BBOX_SIZE / 2
|
||||
return [lon - half, lat - half, lon + half, lat + half]
|
||||
|
||||
|
||||
def _get_window_for_bbox(src, bbox):
|
||||
bbox_geom = {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[bbox[0], bbox[1]],
|
||||
[bbox[2], bbox[1]],
|
||||
[bbox[2], bbox[3]],
|
||||
[bbox[0], bbox[3]],
|
||||
[bbox[0], bbox[1]],
|
||||
]
|
||||
],
|
||||
}
|
||||
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
|
||||
coords = bbox_transformed["coordinates"][0]
|
||||
x_coords = [c[0] for c in coords[:4]]
|
||||
y_coords = [c[1] for c in coords[:4]]
|
||||
bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
|
||||
src_bounds = src.bounds
|
||||
intersect_bbox = [
|
||||
max(bbox_crs[0], src_bounds.left),
|
||||
max(bbox_crs[1], src_bounds.bottom),
|
||||
min(bbox_crs[2], src_bounds.right),
|
||||
min(bbox_crs[3], src_bounds.top),
|
||||
]
|
||||
return from_bounds(*intersect_bbox, src.transform)
|
||||
|
||||
|
||||
def _extract_viewing_angle(item):
|
||||
if "granule_metadata" not in item.assets:
|
||||
return None
|
||||
try:
|
||||
xml_url = item.assets["granule_metadata"].href
|
||||
xml_resp = requests.get(xml_url, timeout=10)
|
||||
xml_resp.raise_for_status()
|
||||
root = ET.fromstring(xml_resp.content)
|
||||
angles = [
|
||||
abs(float(zenith_elem.text))
|
||||
for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle")
|
||||
if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None
|
||||
]
|
||||
return angles[0] if angles else None
|
||||
except Exception as e:
|
||||
print(f"[S2] Warning: Could not extract viewing angle: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def download_s2(season, site_position, site_name, date_range=None):
|
||||
lat, lon = site_position
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
output_dir = Path(f"data/{site_name}/{season}/raw/s2/")
|
||||
|
||||
print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
|
||||
bbox = _get_bbox(lon, lat)
|
||||
bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("[S2] Connecting to STAC catalog...")
|
||||
client = Client.open("https://earth-search.aws.element84.com/v1")
|
||||
search = client.search(
|
||||
collections=["sentinel-2-l2a"],
|
||||
intersects={"type": "Point", "coordinates": [lon, lat]},
|
||||
datetime=datetime_range,
|
||||
max_items=1000,
|
||||
)
|
||||
|
||||
print("[S2] Searching items...")
|
||||
items_by_key = {}
|
||||
for item in search.items():
|
||||
date = item.datetime.strftime("%Y%m%d")
|
||||
parts = item.id.split("_")
|
||||
increment = parts[3] if len(parts) > 3 else "0"
|
||||
key = (date, increment)
|
||||
if key not in items_by_key:
|
||||
items_by_key[key] = item
|
||||
|
||||
print(f"[S2] Found {len(items_by_key)} unique items")
|
||||
|
||||
for (date, increment), item in items_by_key.items():
|
||||
filepath = output_dir / f"{date}_{increment}.geotiff"
|
||||
if filepath.exists():
|
||||
print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
|
||||
continue
|
||||
|
||||
print(f"[S2] Processing {date}_{increment}...")
|
||||
band_data = {}
|
||||
profile = None
|
||||
|
||||
for band_name, asset_name in bands.items():
|
||||
if asset_name not in item.assets:
|
||||
continue
|
||||
asset = item.assets[asset_name]
|
||||
with rasterio.open(asset.href) as src:
|
||||
window = _get_window_for_bbox(src, bbox)
|
||||
if window.height <= 0 or window.width <= 0:
|
||||
continue
|
||||
data = src.read(window=window)
|
||||
new_transform = window_transform(window, src.transform)
|
||||
if profile is None:
|
||||
profile = {
|
||||
"driver": "GTiff",
|
||||
"height": window.height,
|
||||
"width": window.width,
|
||||
"count": len(bands),
|
||||
"dtype": data.dtype,
|
||||
"crs": src.crs,
|
||||
"transform": new_transform,
|
||||
"compress": "lzw",
|
||||
}
|
||||
band_idx = list(bands.keys()).index(band_name)
|
||||
band_data[band_idx] = data[0]
|
||||
|
||||
if profile and len(band_data) == len(bands):
|
||||
stacked = np.array([band_data[i] for i in sorted(band_data.keys())])
|
||||
band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
|
||||
viewing_angle = _extract_viewing_angle(item)
|
||||
|
||||
if profile["crs"] != TARGET_CRS:
|
||||
src_transform = profile["transform"]
|
||||
src_height, src_width = profile["height"], profile["width"]
|
||||
left, bottom, right, top = rasterio.transform.array_bounds(
|
||||
src_height, src_width, src_transform
|
||||
)
|
||||
dst_transform, dst_width, dst_height = calculate_default_transform(
|
||||
profile["crs"], TARGET_CRS, src_width, src_height,
|
||||
left=left, bottom=bottom, right=right, top=top,
|
||||
)
|
||||
reprojected = np.empty(
|
||||
(len(stacked), dst_height, dst_width), dtype=stacked.dtype
|
||||
)
|
||||
for i in range(len(stacked)):
|
||||
reproject(
|
||||
source=stacked[i],
|
||||
destination=reprojected[i],
|
||||
src_transform=src_transform,
|
||||
src_crs=profile["crs"],
|
||||
dst_transform=dst_transform,
|
||||
dst_crs=TARGET_CRS,
|
||||
resampling=Resampling.bilinear,
|
||||
)
|
||||
stacked = reprojected
|
||||
profile.update({
|
||||
"crs": TARGET_CRS,
|
||||
"transform": dst_transform,
|
||||
"width": dst_width,
|
||||
"height": dst_height,
|
||||
})
|
||||
|
||||
with rasterio.open(filepath, "w", **profile) as dst:
|
||||
for i, data in enumerate(stacked, 1):
|
||||
dst.write(data, i)
|
||||
dst.set_band_description(i, band_names[i - 1])
|
||||
tags = {}
|
||||
if viewing_angle is not None:
|
||||
tags["VIEWING_ZENITH_ANGLE"] = str(viewing_angle)
|
||||
pb = item.properties.get("s2:processing_baseline")
|
||||
if pb is not None:
|
||||
tags["PROCESSING_BASELINE"] = str(pb)
|
||||
if tags:
|
||||
dst.update_tags(**tags)
|
||||
|
||||
angle_msg = (
|
||||
f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else ""
|
||||
)
|
||||
print(f"[S2] Saved: {filepath}{angle_msg}")
|
||||
else:
|
||||
print(f"[S2] Skipping {date}_{increment} (missing bands)")
|
||||
|
||||
print("[S2] Completed")
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
"""Sentinel-3-OLCI acquisition from Copernicus Data Space OpenEO API."""
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
import openeo
|
||||
import requests
|
||||
import netCDF4
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.transform import from_bounds
|
||||
|
||||
load_dotenv()
|
||||
|
||||
BBOX_SIZE = 0.016 # Larger than S2 to ensure full coverage including padded pixels
|
||||
|
||||
|
||||
def _get_bbox(lon, lat):
|
||||
half = BBOX_SIZE / 2
|
||||
return [lon - half, lat - half, lon + half, lat + half]
|
||||
|
||||
|
||||
def _process_netcdf(nc_file, output_dir, bands, openeo_bands):
|
||||
with netCDF4.Dataset(str(nc_file), "r") as nc:
|
||||
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
|
||||
x_coords = nc.variables["x"][:]
|
||||
y_coords = nc.variables["y"][:]
|
||||
band_vars = sorted(
|
||||
[v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
|
||||
)
|
||||
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
|
||||
|
||||
transform = from_bounds(
|
||||
float(x_coords.min()),
|
||||
float(y_coords.min()),
|
||||
float(x_coords.max()),
|
||||
float(y_coords.max()),
|
||||
len(x_coords),
|
||||
len(y_coords),
|
||||
)
|
||||
|
||||
print(f"[S3] Found {len(times)} time steps")
|
||||
date_counts = {}
|
||||
for t_idx, time_val in enumerate(times):
|
||||
dt = (
|
||||
time_val
|
||||
if isinstance(time_val, datetime)
|
||||
else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
|
||||
)
|
||||
date_str = dt.strftime("%Y%m%d")
|
||||
increment = date_counts.get(date_str, 0)
|
||||
date_counts[date_str] = increment + 1
|
||||
|
||||
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
|
||||
stacked = np.stack(band_data, axis=0)
|
||||
|
||||
output_path = output_dir / f"{date_str}_{increment}.geotiff"
|
||||
with rasterio.open(
|
||||
output_path,
|
||||
"w",
|
||||
driver="GTiff",
|
||||
height=len(y_coords),
|
||||
width=len(x_coords),
|
||||
count=len(band_data),
|
||||
dtype=stacked.dtype,
|
||||
crs="EPSG:32632",
|
||||
transform=transform,
|
||||
compress="lzw",
|
||||
) as dst:
|
||||
dst.write(stacked)
|
||||
for i, band_name in enumerate(band_names, 1):
|
||||
dst.set_band_description(i, band_name)
|
||||
print(f"[S3] Saved: {output_path}")
|
||||
|
||||
|
||||
def download_s3(season, site_position, site_name, date_range=None):
|
||||
lat, lon = site_position
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
output_dir = Path(f"data/{site_name}/{season}/raw/s3/")
|
||||
|
||||
print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
|
||||
bbox = _get_bbox(lon, lat)
|
||||
bands = {
|
||||
"SDR_Oa04": "blue",
|
||||
"SDR_Oa06": "green",
|
||||
"SDR_Oa08": "red",
|
||||
"SDR_Oa17": "nir",
|
||||
}
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
band_map = {
|
||||
"SDR_Oa04": "B04",
|
||||
"SDR_Oa06": "B06",
|
||||
"SDR_Oa08": "B08",
|
||||
"SDR_Oa17": "B17",
|
||||
}
|
||||
openeo_bands = [band_map.get(b, b) for b in bands.keys()]
|
||||
|
||||
start_date, end_date = datetime_range.split("/")
|
||||
spatial_extent = {
|
||||
"west": bbox[0],
|
||||
"east": bbox[2],
|
||||
"south": bbox[1],
|
||||
"north": bbox[3],
|
||||
}
|
||||
|
||||
print("[S3] Authenticating...")
|
||||
token_response = requests.post(
|
||||
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
|
||||
data={
|
||||
"grant_type": "password",
|
||||
"username": os.getenv("CDSE_USER"),
|
||||
"password": os.getenv("CDSE_PASSWORD"),
|
||||
"client_id": "cdse-public",
|
||||
},
|
||||
)
|
||||
token_response.raise_for_status()
|
||||
tokens = token_response.json()
|
||||
access_token = tokens["access_token"]
|
||||
|
||||
print("[S3] Connecting to OpenEO...")
|
||||
conn = openeo.connect("openeo.dataspace.copernicus.eu")
|
||||
conn.authenticate_oidc_access_token(access_token)
|
||||
|
||||
print("[S3] Loading collection...")
|
||||
datacube = conn.load_collection(
|
||||
"SENTINEL3_OLCI_L1B",
|
||||
spatial_extent=spatial_extent,
|
||||
temporal_extent=[start_date, end_date],
|
||||
bands=openeo_bands,
|
||||
).resample_spatial(projection=32632)
|
||||
|
||||
output_file = output_dir / "s3_data.nc"
|
||||
print(f"[S3] Downloading NetCDF to {output_file}...")
|
||||
print(f"[S3] Temporal extent: {start_date} to {end_date}")
|
||||
print(f"[S3] Spatial extent: {spatial_extent}")
|
||||
print(f"[S3] Bands: {openeo_bands}")
|
||||
print("[S3] This may take several minutes depending on data volume...")
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
datacube.download(str(output_file), format="NetCDF")
|
||||
elapsed = time.time() - start_time
|
||||
print(f"[S3] Download completed in {elapsed:.1f} seconds")
|
||||
except Exception as e:
|
||||
elapsed = time.time() - start_time
|
||||
print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}")
|
||||
raise
|
||||
|
||||
print("[S3] Processing NetCDF...")
|
||||
process_start = time.time()
|
||||
_process_netcdf(output_file, output_dir, bands, openeo_bands)
|
||||
process_elapsed = time.time() - process_start
|
||||
print(f"[S3] Processing completed in {process_elapsed:.1f} seconds")
|
||||
|
||||
print(f"[S3] Removing temporary NetCDF file...")
|
||||
os.remove(output_file)
|
||||
print("[S3] Completed")
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
25.0743,
|
||||
35.3045
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2024": {}
|
||||
},
|
||||
"elevation": 68,
|
||||
"description": "FORTH Heraklion Greece",
|
||||
"sitename": "forthgr",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Agriculture"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
11.320308,
|
||||
47.116171
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2020": {},
|
||||
"2024": {}
|
||||
},
|
||||
"elevation": 972,
|
||||
"description": "Neustift Field Site, Stubai Valley, Tyrol, Austria",
|
||||
"sitename": "innsbruck",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Grassland"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
24.3688,
|
||||
58.5633
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2024": {}
|
||||
},
|
||||
"elevation": 3,
|
||||
"description": "Abandoned peat extraction area, Estonia",
|
||||
"sitename": "pitsalu",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Wetland"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
19.7673,
|
||||
64.2437
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2023": {}
|
||||
},
|
||||
"elevation": 224,
|
||||
"description": "SITES Svartberget Research Station, Vindeln, Sweden",
|
||||
"sitename": "vindeln2",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Deciduous Broadleaf"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
-6.0033,
|
||||
36.7455
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2024": {}
|
||||
},
|
||||
"elevation": 56,
|
||||
"description": "Sun flower plot, Jerez, Spain",
|
||||
"sitename": "sunflowerjerez1",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Agriculture"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
26.9837,
|
||||
42.6558
|
||||
]
|
||||
},
|
||||
"properties": {
|
||||
"country": "",
|
||||
"seasons": {
|
||||
"2024": {}
|
||||
},
|
||||
"elevation": 262,
|
||||
"description": "Institute of Agriculture in Karnobat (selection fields)",
|
||||
"sitename": "institutekarnobat",
|
||||
"ndvi_selected": true,
|
||||
"vegetation_type": "Agriculture"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
84
deploy.sh
84
deploy.sh
|
|
@ -1,84 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
MODE="${1:-setup}"
|
||||
SERVER="${2:-root@49.12.2.88}"
|
||||
APP_DIR="/opt/satellite-fusion"
|
||||
DATA_DIR="$APP_DIR/data"
|
||||
|
||||
case "$MODE" in
|
||||
setup)
|
||||
echo "Deploying to $SERVER..."
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$TEMP_DIR/"
|
||||
cat > "$TEMP_DIR/.env.example" <<EOF
|
||||
CDSE_USER=your_username_here
|
||||
CDSE_PASSWORD=your_password_here
|
||||
EOF
|
||||
ssh $SERVER "mkdir -p $APP_DIR"
|
||||
rsync -av --delete "$TEMP_DIR/" "$SERVER:$APP_DIR/"
|
||||
rm -rf "$TEMP_DIR"
|
||||
|
||||
ssh $SERVER <<ENDSSH
|
||||
set -e
|
||||
cd $APP_DIR
|
||||
|
||||
# Find/install Python 3.11
|
||||
if ! command -v python3.11 &> /dev/null; then
|
||||
apt-get update -qq
|
||||
apt-get install -y python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
|
||||
apt-get install -y -t trixie-backports python3.11 python3.11-venv python3.11-dev 2>/dev/null || {
|
||||
apt-get install -y software-properties-common
|
||||
add-apt-repository -y ppa:deadsnakes/ppa 2>/dev/null || true
|
||||
apt-get update -qq
|
||||
apt-get install -y python3.11 python3.11-venv python3.11-dev
|
||||
}
|
||||
}
|
||||
fi
|
||||
|
||||
# Setup venv
|
||||
[ -d venv ] && rm -rf venv
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install --upgrade pip -q
|
||||
pip install -r requirements.txt -q
|
||||
pip install git+https://github.com/DHI-GRAS/efast.git -q
|
||||
|
||||
# Setup .env
|
||||
[ ! -f .env ] && [ -f .env.example ] && cp .env.example .env
|
||||
|
||||
# Setup systemd service
|
||||
if [ -f satellite-fusion-web.service ]; then
|
||||
sed "s|/opt/satellite-fusion|$APP_DIR|g" satellite-fusion-web.service | \
|
||||
sed "s|--directory /opt/satellite-fusion|--directory $APP_DIR/webapp|g" > /tmp/satellite-fusion-web.service
|
||||
cp /tmp/satellite-fusion-web.service /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
fi
|
||||
|
||||
# Create data directory and webapp/data symlink
|
||||
mkdir -p $DATA_DIR
|
||||
ln -sf ../data $APP_DIR/webapp/data
|
||||
ENDSSH
|
||||
echo "Setup complete!"
|
||||
;;
|
||||
|
||||
upload)
|
||||
echo "Uploading data to $SERVER..."
|
||||
rsync -avh --progress --exclude='*.pyc' --exclude='__pycache__' data/ "$SERVER:$DATA_DIR/"
|
||||
echo "Data upload complete!"
|
||||
;;
|
||||
|
||||
code)
|
||||
echo "Uploading code to $SERVER..."
|
||||
rsync -av --exclude='__pycache__' --exclude='*.pyc' --exclude='.git' --exclude='data/' --exclude='.env' . "$SERVER:$APP_DIR/"
|
||||
echo "Code upload complete!"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 {setup|upload|code} [server]"
|
||||
echo " setup - Deploy code and setup server (default)"
|
||||
echo " upload - Upload data directory only"
|
||||
echo " code - Upload code files only (no setup)"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
176
fusion.py
176
fusion.py
|
|
@ -1,176 +0,0 @@
|
|||
"""EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
|
||||
|
||||
|
||||
def _import_efast():
|
||||
"""Lazy import of efast to avoid import errors when not using efast functions."""
|
||||
try:
|
||||
import efast
|
||||
|
||||
return efast
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
|
||||
)
|
||||
|
||||
|
||||
def run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy="aggressive",
|
||||
sigma=None,
|
||||
date_range=None,
|
||||
*,
|
||||
s2_output_dir=None,
|
||||
s3_output_dir=None,
|
||||
fusion_output_dir=None,
|
||||
):
|
||||
lat, lon = site_position
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
|
||||
efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy)
|
||||
s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
|
||||
s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
|
||||
fusion_output_dir = fusion_output_dir or (
|
||||
efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
|
||||
)
|
||||
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
|
||||
efast = _import_efast()
|
||||
|
||||
start_str, end_str = datetime_range.split("/")
|
||||
start_date = datetime.strptime(start_str, "%Y-%m-%d")
|
||||
end_date = datetime.strptime(end_str, "%Y-%m-%d")
|
||||
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
date_str = current_date.strftime("%Y%m%d")
|
||||
output_file = fusion_output_dir / f"REFL_{date_str}.tif"
|
||||
try:
|
||||
kwargs = {
|
||||
"product": "REFL",
|
||||
"max_days": 30,
|
||||
"date_position": 2,
|
||||
"minimum_acquisition_importance": 0.0,
|
||||
"ratio": RESOLUTION_RATIO,
|
||||
}
|
||||
if sigma is not None:
|
||||
kwargs["sigma"] = sigma
|
||||
efast.fusion(
|
||||
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
|
||||
)
|
||||
print(
|
||||
f"[EFAST] Saved: {output_file}"
|
||||
if output_file.exists()
|
||||
else f"[EFAST] No output for {date_str} (insufficient nearby data)"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[EFAST] Error processing {date_str}: {e}")
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
print("[EFAST] Completed")
|
||||
|
||||
|
||||
def run_all_efast_scenarios(
|
||||
season, site_position, site_name, sigma_value=30, date_range=None
|
||||
):
|
||||
"""Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=None,
|
||||
date_range=date_range,
|
||||
)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma_value,
|
||||
date_range=date_range,
|
||||
)
|
||||
|
||||
|
||||
def run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy="aggressive",
|
||||
sigma=None,
|
||||
date_range=None,
|
||||
*,
|
||||
s2_output_dir=None,
|
||||
s3_output_dir=None,
|
||||
fusion_output_dir=None,
|
||||
):
|
||||
lat, lon = site_position
|
||||
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
||||
efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
|
||||
s2_output_dir = s2_output_dir or (efast_base_dir / "s2")
|
||||
s3_output_dir = s3_output_dir or (efast_base_dir / "s3")
|
||||
fusion_output_dir = fusion_output_dir or (
|
||||
efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
|
||||
)
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
efast = _import_efast()
|
||||
start_str, end_str = datetime_range.split("/")
|
||||
start_date = datetime.strptime(start_str, "%Y-%m-%d")
|
||||
end_date = datetime.strptime(end_str, "%Y-%m-%d")
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
date_str = current_date.strftime("%Y%m%d")
|
||||
output_file = fusion_output_dir / f"GCC_{date_str}.tif"
|
||||
try:
|
||||
kwargs = {
|
||||
"product": "GCC",
|
||||
"max_days": 30,
|
||||
"date_position": 2,
|
||||
"minimum_acquisition_importance": 0.0,
|
||||
"ratio": RESOLUTION_RATIO,
|
||||
}
|
||||
if sigma is not None:
|
||||
kwargs["sigma"] = sigma
|
||||
efast.fusion(
|
||||
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
|
||||
)
|
||||
print(
|
||||
f"[EFAST-ITB] Saved: {output_file}"
|
||||
if output_file.exists()
|
||||
else f"[EFAST-ITB] No output for {date_str}"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[EFAST-ITB] Error {date_str}: {e}")
|
||||
current_date += timedelta(days=1)
|
||||
print("[EFAST-ITB] Completed")
|
||||
|
||||
|
||||
def run_all_efast_itb_scenarios(
|
||||
season, site_position, site_name, sigma_value=30, date_range=None
|
||||
):
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=None,
|
||||
date_range=date_range,
|
||||
)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma_value,
|
||||
date_range=date_range,
|
||||
)
|
||||
|
|
@ -1,263 +0,0 @@
|
|||
"""
|
||||
No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
|
||||
|
||||
Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
|
||||
directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
|
||||
``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
|
||||
and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
|
||||
|
||||
Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
|
||||
(:mod:`gap_validation.phenology_offsets`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_stats import _norm_date_key, load_timeseries
|
||||
from phenology_timesat import (
|
||||
_timesat as _timesat_pkg,
|
||||
build_yraw_three_years,
|
||||
iter_sites_seasons_from_sites_geojson,
|
||||
phenocam_phenology_path,
|
||||
run_timesat_phenology_from_yraw,
|
||||
)
|
||||
|
||||
FUSION_SCENARIO_KEYS: tuple[str, ...] = (
|
||||
"aggressive_sigma20",
|
||||
"aggressive_sigma30",
|
||||
"nonaggressive_sigma20",
|
||||
"nonaggressive_sigma30",
|
||||
"aggressive_sigma20_itb",
|
||||
"aggressive_sigma30_itb",
|
||||
"nonaggressive_sigma20_itb",
|
||||
"nonaggressive_sigma30_itb",
|
||||
)
|
||||
|
||||
|
||||
def fusion_phenology_path(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/fusion_phenology.json")
|
||||
|
||||
|
||||
def parse_scenario_key(key: str) -> tuple[str, int, str]:
|
||||
"""``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
|
||||
mode = "itb" if key.endswith("_itb") else "bti"
|
||||
base = key.replace("_itb", "")
|
||||
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
|
||||
if not m:
|
||||
raise ValueError(f"Cannot parse scenario key: {key!r}")
|
||||
return m.group(1), int(m.group(2)), mode
|
||||
|
||||
|
||||
def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
|
||||
strategy, sigma, mode = parse_scenario_key(scenario_key)
|
||||
if mode == "bti":
|
||||
processed = f"processed_{strategy}_sigma{sigma}"
|
||||
else:
|
||||
processed = f"processed_{strategy}_itb_sigma{sigma}"
|
||||
return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
|
||||
|
||||
|
||||
def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
|
||||
"""YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
|
||||
raw = load_timeseries(ts_path)
|
||||
out: dict[str, float] = {}
|
||||
for k, v in raw.items():
|
||||
nk = _norm_date_key(k)
|
||||
if nk and v is not None:
|
||||
try:
|
||||
fv = float(v)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if fv == fv: # finite
|
||||
out[nk] = fv
|
||||
return out
|
||||
|
||||
|
||||
def timesat_transitions_from_by_date(
|
||||
by_date: dict[str, float], season: int
|
||||
) -> dict[str, str | float | None]:
|
||||
"""Run TIMESAT on fusion GCC; return transition dates for *season*."""
|
||||
if len(by_date) < 10:
|
||||
return {
|
||||
"green_up_50pct_date": None,
|
||||
"green_down_50pct_date": None,
|
||||
"timesat_input": None,
|
||||
"n_values": len(by_date),
|
||||
}
|
||||
y1, y2, y3 = season - 1, season, season + 1
|
||||
yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
|
||||
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
|
||||
return {
|
||||
"green_up_50pct_date": out.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": out.get("green_down_50pct_date"),
|
||||
"timesat_input": stack_mode,
|
||||
"n_values": len(by_date),
|
||||
}
|
||||
|
||||
|
||||
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
|
||||
if not iso_a or not iso_b:
|
||||
return None
|
||||
try:
|
||||
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
|
||||
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
|
||||
return abs((a - b).days)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _offsets_vs_reference(
|
||||
fused: dict[str, str | float | None], reference: dict
|
||||
) -> dict[str, int | None]:
|
||||
ref_up = reference.get("green_up_50pct_date")
|
||||
ref_dn = reference.get("green_down_50pct_date")
|
||||
fup = fused.get("green_up_50pct_date")
|
||||
fdn = fused.get("green_down_50pct_date")
|
||||
return {
|
||||
"abs_day_offset_green_up": _day_offset(fup, ref_up),
|
||||
"abs_day_offset_green_down": _day_offset(fdn, ref_dn),
|
||||
}
|
||||
|
||||
|
||||
def compute_fusion_phenology_for_site(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
|
||||
) -> dict:
|
||||
ref_path = phenocam_phenology_path(site_name, season)
|
||||
reference = (
|
||||
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
|
||||
)
|
||||
scenarios: dict[str, dict] = {}
|
||||
for key in scenario_keys:
|
||||
ts_path = fusion_gcc_timeseries_path(site_name, season, key)
|
||||
if not ts_path.is_file():
|
||||
scenarios[key] = {
|
||||
"workflow": parse_scenario_key(key)[2],
|
||||
"missing_timeseries": str(ts_path),
|
||||
}
|
||||
continue
|
||||
by_date = fusion_gcc_by_date(ts_path)
|
||||
fused = timesat_transitions_from_by_date(by_date, season)
|
||||
strategy, sigma, mode = parse_scenario_key(key)
|
||||
scenarios[key] = {
|
||||
"workflow": mode,
|
||||
"strategy": strategy,
|
||||
"sigma": sigma,
|
||||
"timeseries_path": str(ts_path),
|
||||
**fused,
|
||||
**_offsets_vs_reference(fused, reference),
|
||||
}
|
||||
return {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"reference": {
|
||||
"source": str(ref_path) if ref_path.is_file() else None,
|
||||
"green_up_50pct_date": reference.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": reference.get("green_down_50pct_date"),
|
||||
},
|
||||
"scenarios": scenarios,
|
||||
}
|
||||
|
||||
|
||||
def write_fusion_phenology_for_site(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
|
||||
) -> Path | None:
|
||||
if _timesat_pkg is None:
|
||||
out = fusion_phenology_path(site_name, season)
|
||||
print(
|
||||
f"[Fusion phenology] Skipped (no timesat); would write {out}. "
|
||||
"pip install timesat"
|
||||
)
|
||||
return None
|
||||
payload = compute_fusion_phenology_for_site(
|
||||
site_name, season, scenario_keys=scenario_keys
|
||||
)
|
||||
out = fusion_phenology_path(site_name, season)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
n_ok = sum(
|
||||
1
|
||||
for s in payload["scenarios"].values()
|
||||
if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
|
||||
)
|
||||
print(
|
||||
f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
|
||||
f"≥1 transition date)"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def write_fusion_phenology_all(
|
||||
*,
|
||||
sites_geojson: str | Path = "data/sites.geojson",
|
||||
seasons: dict[str, int] | None = None,
|
||||
) -> int:
|
||||
if seasons:
|
||||
pairs = sorted((s, seasons[s]) for s in seasons.keys())
|
||||
else:
|
||||
pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
|
||||
n = 0
|
||||
for site, season in pairs:
|
||||
print(f"=== {site} {season} ===")
|
||||
if write_fusion_phenology_for_site(site, season):
|
||||
n += 1
|
||||
print(f"[Fusion phenology] Processed {n} site/season pair(s).")
|
||||
return n
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(
|
||||
description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
|
||||
)
|
||||
ap.add_argument("--site", type=str, default=None)
|
||||
ap.add_argument("--season", type=int, default=None)
|
||||
ap.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--primary-only",
|
||||
action="store_true",
|
||||
help="With --all: only thesis primary seasons per site.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--sites-geojson",
|
||||
type=Path,
|
||||
default=Path("data/sites.geojson"),
|
||||
)
|
||||
args = ap.parse_args()
|
||||
if _timesat_pkg is None:
|
||||
raise SystemExit("Install timesat: pip install timesat")
|
||||
|
||||
primary = {
|
||||
"forthgr": 2024,
|
||||
"innsbruck": 2024,
|
||||
"pitsalu": 2024,
|
||||
"vindeln2": 2023,
|
||||
"sunflowerjerez1": 2024,
|
||||
"institutekarnobat": 2024,
|
||||
}
|
||||
if args.all:
|
||||
write_fusion_phenology_all(
|
||||
sites_geojson=args.sites_geojson,
|
||||
seasons=primary if args.primary_only else None,
|
||||
)
|
||||
return
|
||||
if not args.site or args.season is None:
|
||||
raise SystemExit("Provide --site and --season, or use --all --primary-only")
|
||||
write_fusion_phenology_for_site(args.site, args.season)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1 +0,0 @@
|
|||
"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
from gap_validation.run import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,135 +0,0 @@
|
|||
"""Run spatial NSE_S2 gap validation for all thesis sites (best BtI scenario per site)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.run import run_validation
|
||||
|
||||
# Primary season per site (matches scripts/export_thesis_tables.py).
|
||||
PRIMARY_SEASON = {
|
||||
"forthgr": 2024,
|
||||
"innsbruck": 2024,
|
||||
"pitsalu": 2024,
|
||||
"vindeln2": 2023,
|
||||
"sunflowerjerez1": 2024,
|
||||
"institutekarnobat": 2024,
|
||||
}
|
||||
|
||||
|
||||
def _site_positions(geojson: Path) -> dict[str, tuple[float, float]]:
|
||||
data = json.loads(geojson.read_text(encoding="utf-8"))
|
||||
out: dict[str, tuple[float, float]] = {}
|
||||
for feat in data.get("features", []):
|
||||
props = feat.get("properties") or {}
|
||||
name = props.get("sitename")
|
||||
coords = (feat.get("geometry") or {}).get("coordinates")
|
||||
if not name or not coords or len(coords) < 2:
|
||||
continue
|
||||
lon, lat = float(coords[0]), float(coords[1])
|
||||
out[str(name)] = (lat, lon)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_scenario(key: str) -> tuple[str, int | None, str]:
|
||||
"""``aggressive_sigma20`` → (strategy, sigma, bti)."""
|
||||
mode = "itb" if key.endswith("_itb") else "bti"
|
||||
base = key.replace("_itb", "")
|
||||
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
|
||||
if not m:
|
||||
raise ValueError(f"Cannot parse scenario key: {key!r}")
|
||||
strategy = m.group(1)
|
||||
sigma = int(m.group(2))
|
||||
return strategy, sigma if sigma == 30 else (None if sigma == 20 else sigma), mode
|
||||
|
||||
|
||||
def _best_from_metrics(metrics_path: Path, workflow: str) -> str | None:
|
||||
"""Best scenario key (max no-gap NSE_PC) for ``workflow`` (``bti`` or ``itb``)."""
|
||||
if workflow not in ("bti", "itb"):
|
||||
raise ValueError(f"workflow must be bti or itb, got {workflow!r}")
|
||||
if not metrics_path.is_file():
|
||||
return None
|
||||
temporal = json.loads(metrics_path.read_text(encoding="utf-8")).get("temporal") or {}
|
||||
want_itb = workflow == "itb"
|
||||
best_key, best_nse = None, None
|
||||
for k, v in temporal.items():
|
||||
if k.endswith("_itb") != want_itb or not isinstance(v, dict):
|
||||
continue
|
||||
n = v.get("nse_pc")
|
||||
if isinstance(n, (int, float)) and (best_nse is None or n > best_nse):
|
||||
best_nse = n
|
||||
best_key = k
|
||||
return best_key
|
||||
|
||||
|
||||
def _best_bti_from_metrics(metrics_path: Path) -> str | None:
|
||||
return _best_from_metrics(metrics_path, "bti")
|
||||
|
||||
|
||||
def _best_itb_from_metrics(metrics_path: Path) -> str | None:
|
||||
return _best_from_metrics(metrics_path, "itb")
|
||||
|
||||
|
||||
def _resolve_workflows(workflow: str) -> tuple[str, ...]:
|
||||
return ("bti", "itb") if workflow == "both" else (workflow,)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Batch spatial gap validation (six sites).")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument("--write-manifest-only", action="store_true")
|
||||
ap.add_argument(
|
||||
"--workflow",
|
||||
choices=["bti", "itb", "both"],
|
||||
default="both",
|
||||
help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--gap-days",
|
||||
type=int,
|
||||
action="append",
|
||||
help="Filter gap lengths (default: all 15 and 30 in manifest).",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
gap_filter = args.gap_days
|
||||
workflows = _resolve_workflows(args.workflow)
|
||||
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
print(f"[skip] No coordinates for {site}")
|
||||
continue
|
||||
metrics_path = args.data_dir / site / str(season) / "metrics.json"
|
||||
for workflow in workflows:
|
||||
scenario_key = _best_from_metrics(metrics_path, workflow)
|
||||
if not scenario_key:
|
||||
print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
|
||||
continue
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sigma_kw = 30 if sigma == 30 else None
|
||||
print(f"=== {site} {season} {scenario_key} ===")
|
||||
out = run_validation(
|
||||
site,
|
||||
season,
|
||||
pos,
|
||||
strategy,
|
||||
sigma_kw,
|
||||
mode,
|
||||
skip_manifest=False,
|
||||
skip_fusion=args.skip_fusion,
|
||||
write_manifest_only=args.write_manifest_only,
|
||||
gap_days_filter=gap_filter,
|
||||
transition_filter=None,
|
||||
s2_calendar_strategy=strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
"""Run full-season gap-degraded NSE_PC for all thesis sites (best BtI scenario)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.batch_spatial import (
|
||||
PRIMARY_SEASON,
|
||||
_best_from_metrics,
|
||||
_parse_scenario,
|
||||
_resolve_workflows,
|
||||
_site_positions,
|
||||
)
|
||||
from gap_validation.temporal_pc import run_temporal_pc
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Batch temporal gap NSE_PC (six sites).")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument(
|
||||
"--workflow",
|
||||
choices=["bti", "itb", "both"],
|
||||
default="both",
|
||||
help="Fusion workflow(s) to validate (default: both best BtI and best ItB).",
|
||||
)
|
||||
ap.add_argument("--gap-days", type=int, action="append")
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
workflows = _resolve_workflows(args.workflow)
|
||||
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
print(f"[skip] No coordinates for {site}")
|
||||
continue
|
||||
metrics_path = args.data_dir / site / str(season) / "metrics.json"
|
||||
for workflow in workflows:
|
||||
scenario_key = _best_from_metrics(metrics_path, workflow)
|
||||
if not scenario_key:
|
||||
print(f"[skip] {site} {season}: no metrics.json / {workflow} scenarios")
|
||||
continue
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sigma_kw = 30 if sigma == 30 else None
|
||||
print(f"=== {site} {season} temporal {scenario_key} ===")
|
||||
out = run_temporal_pc(
|
||||
site,
|
||||
season,
|
||||
pos,
|
||||
strategy,
|
||||
sigma_kw,
|
||||
mode,
|
||||
skip_manifest=False,
|
||||
skip_fusion=args.skip_fusion,
|
||||
gap_days_filter=args.gap_days,
|
||||
transition_filter=None,
|
||||
s2_calendar_strategy=strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
"""Gap windows, phenological midpoints, manifest and withheld-image sidecar."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from phenology_timesat import phenocam_phenology_path
|
||||
|
||||
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
|
||||
DEFAULT_GAP_LENGTHS = (15, 30)
|
||||
TRANSITIONS = ("green_up", "green_down")
|
||||
|
||||
|
||||
def validation_dir(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/validation")
|
||||
|
||||
|
||||
def _parse_iso_date(s, season: int) -> date | None:
|
||||
if not s or not isinstance(s, str):
|
||||
return None
|
||||
try:
|
||||
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return None
|
||||
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
||||
return d if y0 <= d <= y1 else None
|
||||
|
||||
|
||||
def transition_midpoint(
|
||||
site_name: str,
|
||||
season: int,
|
||||
transition: str,
|
||||
phenology_path: Path | None = None,
|
||||
) -> date | None:
|
||||
"""TIMESAT 50 % amplitude date for ``green_up`` or ``green_down``; None if missing."""
|
||||
if transition not in TRANSITIONS:
|
||||
raise ValueError(f"transition must be one of {TRANSITIONS}, got {transition!r}")
|
||||
path = phenology_path or phenocam_phenology_path(site_name, season)
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
rec = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
key = (
|
||||
"green_up_50pct_date"
|
||||
if transition == "green_up"
|
||||
else "green_down_50pct_date"
|
||||
)
|
||||
return _parse_iso_date(rec.get(key), season)
|
||||
|
||||
|
||||
def phenology_midpoint(
|
||||
site_name: str, season: int, phenology_path: Path | None = None
|
||||
) -> date:
|
||||
"""Legacy: green-up if in season, else green-down, else July 1."""
|
||||
for tr in ("green_up", "green_down"):
|
||||
d = transition_midpoint(site_name, season, tr, phenology_path)
|
||||
if d:
|
||||
return d
|
||||
return date(season, 7, 1)
|
||||
|
||||
|
||||
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
|
||||
"""[start, end] inclusive, gap_days wide, clamped to calendar year."""
|
||||
half = gap_days // 2
|
||||
start = mid - timedelta(days=half)
|
||||
end = mid + timedelta(days=gap_days - 1 - half)
|
||||
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
||||
if start < y0:
|
||||
end = min(y1, end + (y0 - start))
|
||||
start = y0
|
||||
if end > y1:
|
||||
start = max(y0, start - (end - y1))
|
||||
end = y1
|
||||
return start, end
|
||||
|
||||
|
||||
def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
|
||||
"""Return sorted (acquisition_date, filename) for *REFL.tif."""
|
||||
out: list[tuple[date, str]] = []
|
||||
if not prepared_s2.is_dir():
|
||||
return out
|
||||
for p in sorted(prepared_s2.glob("*REFL.tif")):
|
||||
m = REFL_DATE_RE.search(p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
||||
out.append((d, p.name))
|
||||
out.sort(key=lambda x: x[0])
|
||||
return out
|
||||
|
||||
|
||||
def nearest_s2_acquisition(
|
||||
prediction: date, pairs: list[tuple[date, str]]
|
||||
) -> tuple[date, str] | None:
|
||||
if not pairs:
|
||||
return None
|
||||
return min(pairs, key=lambda t: abs((t[0] - prediction).days))
|
||||
|
||||
|
||||
def build_manifest_entries(
|
||||
site_name: str,
|
||||
season: int,
|
||||
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
transitions: tuple[str, ...] = TRANSITIONS,
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
) -> list[dict]:
|
||||
"""One entry per (transition, gap_days): phenology midpoint, window, withheld S2."""
|
||||
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
|
||||
pairs = list_s2_refl_dates(prepared_s2)
|
||||
entries: list[dict] = []
|
||||
for transition in transitions:
|
||||
mid = transition_midpoint(site_name, season, transition)
|
||||
if mid is None:
|
||||
continue
|
||||
for gap_days in gap_lengths:
|
||||
w0, w1 = centered_window(mid, gap_days, season)
|
||||
prediction = mid
|
||||
ns = nearest_s2_acquisition(prediction, pairs)
|
||||
if ns is None:
|
||||
withheld_date = None
|
||||
withheld_filename = None
|
||||
else:
|
||||
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
|
||||
entries.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"midpoint_rule": f"{transition}_50pct_date",
|
||||
"midpoint_date": mid.isoformat(),
|
||||
"window_start": w0.isoformat(),
|
||||
"window_end": w1.isoformat(),
|
||||
"prediction_date": prediction.isoformat(),
|
||||
"withheld_s2_date": withheld_date,
|
||||
"withheld_s2_filename": withheld_filename,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def write_gap_withheld_images(
|
||||
site_name: str,
|
||||
season: int,
|
||||
entries: list[dict],
|
||||
) -> Path:
|
||||
"""Reproducibility sidecar for withheld scenes and gap placement."""
|
||||
path = validation_dir(site_name, season) / "gap_withheld_images.json"
|
||||
records = []
|
||||
for e in entries:
|
||||
records.append(
|
||||
{
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"transition": e.get("transition"),
|
||||
"gap_days": e.get("gap_days"),
|
||||
"midpoint_date": e.get("midpoint_date"),
|
||||
"window_start": e.get("window_start"),
|
||||
"window_end": e.get("window_end"),
|
||||
"withheld_s2_date": e.get("withheld_s2_date"),
|
||||
"withheld_s2_filename": e.get("withheld_s2_filename"),
|
||||
}
|
||||
)
|
||||
path.write_text(
|
||||
json.dumps({"site_name": site_name, "season": season, "records": records}, indent=2)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def write_manifest(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
*,
|
||||
gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
transitions: tuple[str, ...] = TRANSITIONS,
|
||||
) -> Path:
|
||||
out_dir = validation_dir(site_name, season)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
entries = build_manifest_entries(
|
||||
site_name,
|
||||
season,
|
||||
gap_lengths=gap_lengths,
|
||||
transitions=transitions,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
)
|
||||
path = out_dir / "gap_manifest.json"
|
||||
payload = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"site_position_lat_lon": list(site_position),
|
||||
"s2_calendar_strategy": s2_calendar_strategy,
|
||||
"entries": entries,
|
||||
}
|
||||
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
write_gap_withheld_images(site_name, season, entries)
|
||||
return path
|
||||
|
||||
|
||||
def load_manifest(site_name: str, season: int) -> dict:
|
||||
path = validation_dir(site_name, season) / "gap_manifest.json"
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"Missing manifest: {path}")
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
|
@ -1,438 +0,0 @@
|
|||
"""Export 2×4 RGB panels for Tier-A gap validation (thesis appendix).
|
||||
|
||||
Crops follow the same fusion-valid bounding box as ``postprocessing.process_cropped``
|
||||
and the webapp (``processed_*`` / ``common.js``), anchored on gap-degraded fusion at the
|
||||
prediction date; S2 and S3 are read from prepared stacks on that shared window.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio import windows
|
||||
from rasterio.transform import rowcol
|
||||
from rasterio.warp import Resampling, reproject
|
||||
|
||||
from gap_validation.s2_mask_dir import acquisition_yyyymmdd_in_window, yyyymmdd_from_iso
|
||||
|
||||
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
|
||||
S3_COMPOSITE_RE = re.compile(r"composite_(\d{8})\.tif$")
|
||||
TRANSITIONS = ("green_up", "green_down")
|
||||
COL_TITLES = ("Withheld S2", "Gap fusion", "S3 composite", "Nearest S2")
|
||||
ROW_LABELS = {"green_up": "Green-up", "green_down": "Green-down"}
|
||||
VALID_REFL_THRESHOLD = 0.001
|
||||
NODATA_RGB = (0.15, 0.15, 0.15)
|
||||
|
||||
|
||||
def _parse_bti_scenario(scenario: str) -> tuple[str, int]:
|
||||
m = re.match(r"^(aggressive|nonaggressive)_sigma(20|30)$", scenario)
|
||||
if not m:
|
||||
raise ValueError(f"expected BtI scenario key, got {scenario!r}")
|
||||
return m.group(1), int(m.group(2))
|
||||
|
||||
|
||||
def _prepared_base(data_dir: Path, site: str, season: int, strategy: str) -> Path:
|
||||
return data_dir / site / str(season) / f"prepared_{strategy}"
|
||||
|
||||
|
||||
def _s2_strategy_fallbacks(strategy: str, manifest: dict) -> tuple[str, ...]:
|
||||
"""Prepared trees to try for S2 REFL (best-BtI first, then manifest calendar)."""
|
||||
order: list[str] = []
|
||||
for s in (strategy, manifest.get("s2_calendar_strategy")):
|
||||
if isinstance(s, str) and s and s not in order:
|
||||
order.append(s)
|
||||
for s in ("aggressive", "nonaggressive"):
|
||||
if s not in order:
|
||||
order.append(s)
|
||||
return tuple(order)
|
||||
|
||||
|
||||
def _find_prepared_s2_refl(
|
||||
data_dir: Path,
|
||||
site: str,
|
||||
season: int,
|
||||
filename: str,
|
||||
strategies: tuple[str, ...],
|
||||
) -> Path | None:
|
||||
for strat in strategies:
|
||||
p = _prepared_base(data_dir, site, season, strat) / "s2" / filename
|
||||
if p.is_file():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def _gap_spatial_fusion_dir(
|
||||
data_dir: Path,
|
||||
site: str,
|
||||
season: int,
|
||||
gap_days: int,
|
||||
transition: str,
|
||||
strategy: str,
|
||||
sigma: int,
|
||||
) -> Path:
|
||||
return (
|
||||
data_dir
|
||||
/ site
|
||||
/ str(season)
|
||||
/ "validation"
|
||||
/ "fusion"
|
||||
/ f"gap_{gap_days}_{transition}"
|
||||
/ f"{strategy}_sigma{sigma}_bti"
|
||||
)
|
||||
|
||||
|
||||
def _iso_to_date(iso_d: str) -> date:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").date()
|
||||
|
||||
|
||||
def _exclude_ymds(entry: dict) -> set[str]:
|
||||
withheld_fn = entry.get("withheld_s2_filename") or ""
|
||||
m = REFL_DATE_RE.search(withheld_fn)
|
||||
return {m.group(1)} if m else set()
|
||||
|
||||
|
||||
def nearest_stack_s2(
|
||||
prepared_s2_dir: Path,
|
||||
prediction_iso: str,
|
||||
*,
|
||||
exclude_ymds: set[str],
|
||||
) -> Path | None:
|
||||
if not prepared_s2_dir.is_dir():
|
||||
return None
|
||||
target = _iso_to_date(prediction_iso)
|
||||
best_path: Path | None = None
|
||||
best_delta: int | None = None
|
||||
for p in prepared_s2_dir.glob("S2A_MSIL2A_*_REFL.tif"):
|
||||
m = REFL_DATE_RE.search(p.name)
|
||||
if not m or m.group(1) in exclude_ymds:
|
||||
continue
|
||||
delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
|
||||
if best_delta is None or delta < best_delta:
|
||||
best_delta = delta
|
||||
best_path = p
|
||||
return best_path
|
||||
|
||||
|
||||
def nearest_s3_composite(prepared_s3_dir: Path, prediction_iso: str) -> Path | None:
|
||||
if not prepared_s3_dir.is_dir():
|
||||
return None
|
||||
target = _iso_to_date(prediction_iso)
|
||||
best_path: Path | None = None
|
||||
best_delta: int | None = None
|
||||
for p in prepared_s3_dir.glob("composite_*.tif"):
|
||||
m = S3_COMPOSITE_RE.search(p.name)
|
||||
if not m:
|
||||
continue
|
||||
delta = abs((datetime.strptime(m.group(1), "%Y%m%d").date() - target).days)
|
||||
if best_delta is None or delta < best_delta:
|
||||
best_delta = delta
|
||||
best_path = p
|
||||
return best_path
|
||||
|
||||
|
||||
def _crop_window_from_fusion(fusion_path: Path) -> dict | None:
|
||||
"""Fusion-valid crop (``postprocessing.process_cropped``) on the full prepared grid."""
|
||||
if not fusion_path.is_file():
|
||||
return None
|
||||
with rasterio.open(fusion_path) as src:
|
||||
data = src.read()
|
||||
valid = np.isfinite(data) & (data > VALID_REFL_THRESHOLD)
|
||||
rows = np.any(valid, axis=(0, 2))
|
||||
cols = np.any(valid, axis=(0, 1))
|
||||
row_idx = np.where(rows)[0]
|
||||
col_idx = np.where(cols)[0]
|
||||
if len(row_idx) == 0 or len(col_idx) == 0:
|
||||
return None
|
||||
r0, r1 = int(row_idx[0]), int(row_idx[-1])
|
||||
c0, c1 = int(col_idx[0]), int(col_idx[-1])
|
||||
w, h = c1 - c0 + 1, r1 - r0 + 1
|
||||
win = windows.Window(c0, r0, w, h)
|
||||
return {
|
||||
"window": win,
|
||||
"crop_transform": windows.transform(win, src.transform),
|
||||
"full_transform": src.transform,
|
||||
"crs": src.crs,
|
||||
"profile": src.profile.copy(),
|
||||
}
|
||||
|
||||
|
||||
def _read_bgr_prepared_s2(prepared_refl: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
|
||||
if not prepared_refl.is_file():
|
||||
return None
|
||||
with rasterio.open(prepared_refl) as src:
|
||||
if src.count < 3:
|
||||
return None
|
||||
b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
|
||||
return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
|
||||
|
||||
|
||||
def _read_bgr_gap_fusion(fusion_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
|
||||
if not fusion_path.is_file():
|
||||
return None
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if src.count < 3:
|
||||
return None
|
||||
b, g, r = src.read(indexes=(1, 2, 3), window=crop["window"])
|
||||
return b.astype(np.float64), g.astype(np.float64), r.astype(np.float64)
|
||||
|
||||
|
||||
def _read_bgr_prepared_s3(s3_path: Path, crop: dict) -> tuple[np.ndarray, ...] | None:
|
||||
"""Resample S3 composite to the fusion grid, then crop (matches ``process_cropped``)."""
|
||||
if not s3_path.is_file():
|
||||
return None
|
||||
with rasterio.open(s3_path) as src:
|
||||
if src.count < 3:
|
||||
return None
|
||||
temp_profile = crop["profile"].copy()
|
||||
temp_profile.update({"dtype": "float32", "count": src.count})
|
||||
bands: list[np.ndarray] = []
|
||||
with rasterio.MemoryFile() as memfile:
|
||||
with memfile.open(**temp_profile) as resampled:
|
||||
for i in range(1, src.count + 1):
|
||||
reproject(
|
||||
source=rasterio.band(src, i),
|
||||
destination=rasterio.band(resampled, i),
|
||||
src_transform=src.transform,
|
||||
src_crs=src.crs,
|
||||
dst_transform=crop["full_transform"],
|
||||
dst_crs=crop["crs"],
|
||||
resampling=Resampling.nearest,
|
||||
)
|
||||
b, g, r = resampled.read(
|
||||
indexes=(1, 2, 3), window=crop["window"]
|
||||
)
|
||||
bands = [
|
||||
b.astype(np.float64),
|
||||
g.astype(np.float64),
|
||||
r.astype(np.float64),
|
||||
]
|
||||
return bands[0], bands[1], bands[2]
|
||||
|
||||
|
||||
def _refl_valid(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
|
||||
return (
|
||||
np.isfinite(blue)
|
||||
& np.isfinite(green)
|
||||
& np.isfinite(red)
|
||||
& (blue > VALID_REFL_THRESHOLD)
|
||||
& (green > VALID_REFL_THRESHOLD)
|
||||
& (red > VALID_REFL_THRESHOLD)
|
||||
)
|
||||
|
||||
|
||||
def _panel_stretch_limits(
|
||||
blue: np.ndarray, green: np.ndarray, red: np.ndarray, valid: np.ndarray
|
||||
) -> tuple[float, float]:
|
||||
"""Per-panel 2--98 % stretch on positive reflectance (webapp ``common.js`` style)."""
|
||||
if not valid.any():
|
||||
return 0.0, 1.0
|
||||
vals = np.concatenate([red[valid], green[valid], blue[valid]])
|
||||
lo, hi = np.percentile(vals, (2, 98))
|
||||
if hi <= lo:
|
||||
return 0.0, 1.0
|
||||
return float(lo), float(hi)
|
||||
|
||||
|
||||
def _bgr_to_rgba(
|
||||
blue: np.ndarray,
|
||||
green: np.ndarray,
|
||||
red: np.ndarray,
|
||||
*,
|
||||
valid: np.ndarray,
|
||||
vmin: float,
|
||||
vmax: float,
|
||||
) -> np.ndarray:
|
||||
rgba = np.zeros((*blue.shape, 4), dtype=np.float32)
|
||||
rgba[..., 3] = 1.0
|
||||
rgba[~valid, 0] = NODATA_RGB[0]
|
||||
rgba[~valid, 1] = NODATA_RGB[1]
|
||||
rgba[~valid, 2] = NODATA_RGB[2]
|
||||
span = vmax - vmin or 1.0
|
||||
for band, idx in ((red, 0), (green, 1), (blue, 2)):
|
||||
norm = np.clip((band - vmin) / span, 0.0, 1.0)
|
||||
rgba[..., idx] = np.where(valid, norm, rgba[..., idx])
|
||||
return rgba
|
||||
|
||||
|
||||
def _phenocam_pixel_cropped(
|
||||
crop: dict, site_position_lat_lon: tuple[float, float]
|
||||
) -> tuple[int, int] | None:
|
||||
lat, lon = site_position_lat_lon
|
||||
try:
|
||||
r, c = rowcol(
|
||||
crop["crop_transform"], [lon], [lat], op=crop["crs"]
|
||||
)
|
||||
return int(r[0]), int(c[0])
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_row_paths(
|
||||
data_dir: Path,
|
||||
site: str,
|
||||
season: int,
|
||||
entry: dict,
|
||||
strategy: str,
|
||||
sigma: int,
|
||||
*,
|
||||
gap_days: int,
|
||||
manifest: dict,
|
||||
) -> tuple[Path, Path, Path, Path] | None:
|
||||
pred_ymd = yyyymmdd_from_iso(entry["prediction_date"])
|
||||
transition = entry["transition"]
|
||||
prep = _prepared_base(data_dir, site, season, strategy)
|
||||
s2_strats = _s2_strategy_fallbacks(strategy, manifest)
|
||||
withheld_fn = entry.get("withheld_s2_filename")
|
||||
if not withheld_fn:
|
||||
return None
|
||||
withheld = _find_prepared_s2_refl(
|
||||
data_dir, site, season, withheld_fn, s2_strats
|
||||
)
|
||||
fusion = (
|
||||
_gap_spatial_fusion_dir(data_dir, site, season, gap_days, transition, strategy, sigma)
|
||||
/ f"REFL_{pred_ymd}.tif"
|
||||
)
|
||||
s3_exact = prep / "s3" / f"composite_{pred_ymd}.tif"
|
||||
s3 = (
|
||||
s3_exact
|
||||
if s3_exact.is_file()
|
||||
else nearest_s3_composite(prep / "s3", entry["prediction_date"])
|
||||
)
|
||||
w0 = _iso_to_date(entry["window_start"])
|
||||
w1 = _iso_to_date(entry["window_end"])
|
||||
nearest: Path | None = None
|
||||
for strat in s2_strats:
|
||||
prep_s2 = _prepared_base(data_dir, site, season, strat) / "s2"
|
||||
window_ymds = acquisition_yyyymmdd_in_window(prep_s2, w0, w1)
|
||||
exclude = window_ymds | _exclude_ymds(entry)
|
||||
nearest = nearest_stack_s2(
|
||||
prep_s2, entry["prediction_date"], exclude_ymds=exclude
|
||||
)
|
||||
if nearest is not None:
|
||||
break
|
||||
if withheld is None or not fusion.is_file() or s3 is None or nearest is None:
|
||||
return None
|
||||
return withheld, fusion, s3, nearest
|
||||
|
||||
|
||||
def build_site_panel(
|
||||
site: str,
|
||||
season: int,
|
||||
data_dir: Path,
|
||||
out_png: Path,
|
||||
*,
|
||||
best_bti_scenario: str,
|
||||
site_label: str,
|
||||
site_position_lat_lon: tuple[float, float] | None = None,
|
||||
gap_days: int = 30,
|
||||
) -> bool:
|
||||
"""Build 2×4 RGB figure; return False if manifest or any transition row is incomplete."""
|
||||
manifest_path = data_dir / site / str(season) / "validation" / "gap_manifest.json"
|
||||
if not manifest_path.is_file():
|
||||
return False
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
strategy, sigma = _parse_bti_scenario(best_bti_scenario)
|
||||
rows: list[tuple[str, dict, tuple[Path, Path, Path, Path]]] = []
|
||||
for transition in TRANSITIONS:
|
||||
entry = next(
|
||||
(
|
||||
e
|
||||
for e in manifest["entries"]
|
||||
if e.get("gap_days") == gap_days and e.get("transition") == transition
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not entry:
|
||||
continue
|
||||
paths = _resolve_row_paths(
|
||||
data_dir,
|
||||
site,
|
||||
season,
|
||||
entry,
|
||||
strategy,
|
||||
sigma,
|
||||
gap_days=gap_days,
|
||||
manifest=manifest,
|
||||
)
|
||||
if paths is None:
|
||||
continue
|
||||
rows.append((transition, entry, paths))
|
||||
|
||||
if not rows:
|
||||
return False
|
||||
|
||||
readers = (
|
||||
_read_bgr_prepared_s2,
|
||||
_read_bgr_gap_fusion,
|
||||
_read_bgr_prepared_s3,
|
||||
_read_bgr_prepared_s2,
|
||||
)
|
||||
|
||||
fig, axes = plt.subplots(
|
||||
len(rows),
|
||||
4,
|
||||
figsize=(12.0, 2.8 * len(rows)),
|
||||
squeeze=False,
|
||||
constrained_layout=True,
|
||||
)
|
||||
for row_idx, (transition, entry, paths) in enumerate(rows):
|
||||
row_title = ROW_LABELS.get(transition, transition)
|
||||
crop = _crop_window_from_fusion(paths[1])
|
||||
if crop is None:
|
||||
for ax in axes[row_idx]:
|
||||
ax.set_visible(False)
|
||||
continue
|
||||
|
||||
layers: list[tuple[np.ndarray, np.ndarray, np.ndarray]] = []
|
||||
for path, read_fn in zip(paths, readers, strict=True):
|
||||
bgr = read_fn(path, crop)
|
||||
if bgr is None:
|
||||
layers = []
|
||||
break
|
||||
layers.append(bgr)
|
||||
if len(layers) != 4:
|
||||
for ax in axes[row_idx]:
|
||||
ax.set_visible(False)
|
||||
continue
|
||||
|
||||
mark: tuple[int, int] | None = None
|
||||
if site_position_lat_lon:
|
||||
mark = _phenocam_pixel_cropped(crop, site_position_lat_lon)
|
||||
|
||||
for col_idx, (col_title, bgr) in enumerate(zip(COL_TITLES, layers, strict=True)):
|
||||
ax = axes[row_idx, col_idx]
|
||||
blue, green, red = bgr
|
||||
valid = _refl_valid(blue, green, red)
|
||||
vmin, vmax = _panel_stretch_limits(blue, green, red, valid)
|
||||
rgba = _bgr_to_rgba(
|
||||
blue, green, red, valid=valid, vmin=vmin, vmax=vmax
|
||||
)
|
||||
ax.imshow(rgba, origin="upper", aspect="equal", interpolation="nearest")
|
||||
h, w = rgba.shape[:2]
|
||||
if col_idx == 0 and mark and 0 <= mark[0] < h and 0 <= mark[1] < w:
|
||||
ax.plot(
|
||||
mark[1],
|
||||
mark[0],
|
||||
"+",
|
||||
color="red",
|
||||
markersize=8,
|
||||
markeredgewidth=1.2,
|
||||
)
|
||||
if row_idx == 0:
|
||||
ax.set_title(col_title, fontsize=9)
|
||||
if col_idx == 0:
|
||||
ax.set_ylabel(row_title, fontsize=9)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
|
||||
fig.suptitle(f"{site_label} ({season})", fontsize=10)
|
||||
out_png.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out_png, dpi=150)
|
||||
plt.close(fig)
|
||||
return True
|
||||
|
|
@ -1,200 +0,0 @@
|
|||
"""EFAST with symlinked S2 dir (gap window omitted); outputs under validation/."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from fusion import run_efast, run_efast_itb
|
||||
from preparation import _get_base_dir, _get_itb_base_dir
|
||||
|
||||
from gap_validation.s2_mask_dir import (
|
||||
acquisition_yyyymmdd_in_window,
|
||||
assert_no_leakage,
|
||||
build_masked_s2_dir_bti,
|
||||
build_masked_s2_dir_itb,
|
||||
)
|
||||
|
||||
|
||||
def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
|
||||
return _get_base_dir(season, site_name, strategy) / "s3"
|
||||
|
||||
|
||||
def validation_fusion_dir(
|
||||
site_name: str,
|
||||
season: int,
|
||||
gap_days: int,
|
||||
transition: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
) -> Path:
|
||||
"""``data/.../validation/fusion/gap_{n}_{transition}/{strategy}_sigma{20|30}_{bti|itb}/``."""
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return (
|
||||
Path(f"data/{site_name}/{season}/validation")
|
||||
/ "fusion"
|
||||
/ f"gap_{gap_days}_{transition}"
|
||||
/ f"{strategy}_sigma{sig}_{mode}"
|
||||
)
|
||||
|
||||
|
||||
def excluded_acquisition_days(
|
||||
prepared_s2: Path,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
) -> set[str]:
|
||||
"""Union of gap-window S2 days and the withheld validation acquisition."""
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
excluded = acquisition_yyyymmdd_in_window(prepared_s2, w0, w1)
|
||||
excluded.add(withheld_yyyymmdd)
|
||||
return excluded
|
||||
|
||||
|
||||
def run_masked_fusion_one_date(
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
prediction_date_iso: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
fusion_output_dir: Path,
|
||||
) -> Path:
|
||||
"""Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only."""
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
|
||||
|
||||
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
|
||||
tmp_s2 = Path(tmp) / "s2"
|
||||
if mode == "bti":
|
||||
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
elif mode == "itb":
|
||||
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"mode must be bti or itb, got {mode!r}")
|
||||
|
||||
return fusion_output_dir
|
||||
|
||||
|
||||
def run_masked_fusion_season(
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
fusion_output_dir: Path,
|
||||
) -> Path:
|
||||
"""Full-season EFAST on gap-degraded S2 stack (temporal NSE_PC tier)."""
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_range = f"{season}-01-01/{season}-12-31"
|
||||
|
||||
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
|
||||
tmp_s2 = Path(tmp) / "s2"
|
||||
if mode == "bti":
|
||||
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_bti(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=prepared_s3_dir(season, site_name, strategy),
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
else:
|
||||
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
|
||||
excl = excluded_acquisition_days(
|
||||
prep_s2, window_start_iso, window_end_iso, withheld_yyyymmdd
|
||||
)
|
||||
build_masked_s2_dir_itb(prep_s2, excl, tmp_s2)
|
||||
assert_no_leakage(withheld_yyyymmdd, tmp_s2)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=_get_itb_base_dir(season, site_name, strategy) / "s3",
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
|
||||
return fusion_output_dir
|
||||
|
||||
|
||||
def production_fusion_path(
|
||||
season: int,
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
yyyymmdd: str,
|
||||
) -> Path:
|
||||
"""Single-date fused raster from the normal prepared tree (no-gap baseline)."""
|
||||
if mode == "bti":
|
||||
base = _get_base_dir(season, site_name, strategy)
|
||||
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
|
||||
return base / sub / f"REFL_{yyyymmdd}.tif"
|
||||
base = _get_itb_base_dir(season, site_name, strategy)
|
||||
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
|
||||
return base / sub / f"GCC_{yyyymmdd}.tif"
|
||||
|
||||
|
||||
def withheld_s2_refl_path(
|
||||
season: int, site_name: str, strategy: str, withheld_filename: str | None
|
||||
) -> Path | None:
|
||||
if not withheld_filename:
|
||||
return None
|
||||
p = _get_base_dir(season, site_name, strategy) / "s2" / withheld_filename
|
||||
return p if p.is_file() else None
|
||||
|
|
@ -1,163 +0,0 @@
|
|||
"""TIMESAT transition dates on gap-degraded fusion series vs PhenoCam reference."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from fusion_phenology import timesat_transitions_from_by_date
|
||||
from phenology_timesat import phenocam_phenology_path
|
||||
|
||||
from gap_validation.batch_spatial import (
|
||||
PRIMARY_SEASON,
|
||||
_best_from_metrics,
|
||||
_parse_scenario,
|
||||
_resolve_workflows,
|
||||
_site_positions,
|
||||
)
|
||||
from gap_validation.calendar import load_manifest, validation_dir
|
||||
from gap_validation.temporal_pc import _fusion_gcc_timeseries
|
||||
|
||||
|
||||
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
|
||||
if not iso_a or not iso_b:
|
||||
return None
|
||||
try:
|
||||
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
|
||||
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
|
||||
return abs((a - b).days)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
|
||||
out = timesat_transitions_from_by_date(by_date, season)
|
||||
return {
|
||||
"green_up": out.get("green_up_50pct_date"),
|
||||
"green_down": out.get("green_down_50pct_date"),
|
||||
}
|
||||
|
||||
|
||||
def _temporal_fusion_dir(
|
||||
site: str, season: int, gap_days: int, transition: str, scenario_key: str
|
||||
) -> Path:
|
||||
strategy, sigma, mode = _parse_scenario(scenario_key)
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return (
|
||||
validation_dir(site, season)
|
||||
/ "temporal"
|
||||
/ f"gap_{gap_days}_{transition}"
|
||||
/ f"{strategy}_sigma{sig}_{mode}"
|
||||
)
|
||||
|
||||
|
||||
def compute_offsets_for_site(
|
||||
site: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
*,
|
||||
workflow: str = "bti",
|
||||
gap_days_list: tuple[int, ...] = (15, 30),
|
||||
) -> list[dict]:
|
||||
base = Path(f"data/{site}/{season}")
|
||||
metrics_path = base / "metrics.json"
|
||||
scenario_key = _best_from_metrics(metrics_path, workflow)
|
||||
if not scenario_key:
|
||||
return []
|
||||
ref_path = phenocam_phenology_path(site, season)
|
||||
reference = (
|
||||
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
|
||||
)
|
||||
manifest = load_manifest(site, season)
|
||||
rows: list[dict] = []
|
||||
for entry in manifest["entries"]:
|
||||
gd = entry.get("gap_days")
|
||||
tr = entry.get("transition")
|
||||
if gd not in gap_days_list or tr not in ("green_up", "green_down"):
|
||||
continue
|
||||
fusion_dir = _temporal_fusion_dir(site, season, gd, tr, scenario_key)
|
||||
if not fusion_dir.is_dir():
|
||||
continue
|
||||
_, _, mode = _parse_scenario(scenario_key)
|
||||
ts = _fusion_gcc_timeseries(fusion_dir, site_position, mode)
|
||||
if len(ts) < 10:
|
||||
continue
|
||||
fused = _timesat_transitions(ts, season)
|
||||
ref_key = (
|
||||
"green_up_50pct_date"
|
||||
if tr == "green_up"
|
||||
else "green_down_50pct_date"
|
||||
)
|
||||
ref_date = reference.get(ref_key)
|
||||
fused_date = fused.get("green_up" if tr == "green_up" else "green_down")
|
||||
rows.append(
|
||||
{
|
||||
"site_name": site,
|
||||
"season": season,
|
||||
"transition": tr,
|
||||
"gap_days": gd,
|
||||
"scenario": scenario_key,
|
||||
"reference_date": ref_date,
|
||||
"fused_date": fused_date,
|
||||
"abs_day_offset": _day_offset(fused_date, ref_date),
|
||||
"window_start": entry.get("window_start"),
|
||||
"window_end": entry.get("window_end"),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def write_phenology_offsets(
|
||||
site: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
*,
|
||||
workflow: str = "bti",
|
||||
gap_days_list: tuple[int, ...] = (15, 30),
|
||||
) -> Path:
|
||||
rows = compute_offsets_for_site(
|
||||
site, season, site_position, workflow=workflow, gap_days_list=gap_days_list
|
||||
)
|
||||
vdir = validation_dir(site, season)
|
||||
payload = {
|
||||
"site_name": site,
|
||||
"season": season,
|
||||
"workflow": workflow,
|
||||
"records": rows,
|
||||
}
|
||||
out = vdir / f"gap_phenology_offsets_{workflow}.json"
|
||||
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
if workflow == "bti":
|
||||
# Legacy alias for backward-compatible readers.
|
||||
(vdir / "gap_phenology_offsets.json").write_text(
|
||||
json.dumps(payload, indent=2) + "\n", encoding="utf-8"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Gap fusion TIMESAT offsets vs PhenoCam.")
|
||||
ap.add_argument("--data-dir", type=Path, default=Path("data"))
|
||||
ap.add_argument("--sites-geojson", type=Path, default=Path("data/sites.geojson"))
|
||||
ap.add_argument(
|
||||
"--workflow",
|
||||
choices=["bti", "itb", "both"],
|
||||
default="both",
|
||||
help="Fusion workflow(s) (default: both best BtI and best ItB).",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
positions = _site_positions(args.sites_geojson)
|
||||
workflows = _resolve_workflows(args.workflow)
|
||||
for site, season in sorted(PRIMARY_SEASON.items()):
|
||||
pos = positions.get(site)
|
||||
if not pos:
|
||||
continue
|
||||
for workflow in workflows:
|
||||
p = write_phenology_offsets(site, season, pos, workflow=workflow)
|
||||
print(p)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,352 +0,0 @@
|
|||
"""Tier-2 gap validation CLI: manifest, masked EFAST, spatial ``nse_s2``, Whittaker crossover."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.calendar import (
|
||||
DEFAULT_GAP_LENGTHS,
|
||||
TRANSITIONS,
|
||||
load_manifest,
|
||||
validation_dir,
|
||||
write_manifest,
|
||||
)
|
||||
from gap_validation.fusion_masked import (
|
||||
production_fusion_path,
|
||||
run_masked_fusion_one_date,
|
||||
validation_fusion_dir,
|
||||
withheld_s2_refl_path,
|
||||
)
|
||||
from gap_validation.spatial_metrics import evaluate_gap_vs_withheld
|
||||
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
|
||||
|
||||
|
||||
def _ymd_from_iso(iso_d: str) -> str:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
||||
|
||||
|
||||
def _yyyymmdd_from_withheld_filename(fn: str) -> str | None:
|
||||
for part in fn.replace(".tif", "").split("_"):
|
||||
if len(part) == 8 and part.isdigit():
|
||||
return part
|
||||
return None
|
||||
|
||||
|
||||
def _withheld_iso(entry: dict) -> str | None:
|
||||
d = entry.get("withheld_s2_date")
|
||||
if isinstance(d, str) and len(d) >= 10:
|
||||
return d[:10]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn or not isinstance(fn, str):
|
||||
return None
|
||||
ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not ymd:
|
||||
return None
|
||||
return datetime.strptime(ymd, "%Y%m%d").date().isoformat()
|
||||
|
||||
|
||||
def _fused_file(fusion_dir: Path, mode: str, ymd: str) -> Path:
|
||||
stem = "REFL" if mode == "bti" else "GCC"
|
||||
return fusion_dir / f"{stem}_{ymd}.tif"
|
||||
|
||||
|
||||
def _scenario_key(strategy: str, sigma: int | None, mode: str) -> str:
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return f"{strategy}_sigma{sig}_{mode}"
|
||||
|
||||
|
||||
def _git_rev() -> str | None:
|
||||
try:
|
||||
return subprocess.check_output(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=Path(__file__).resolve().parent.parent,
|
||||
text=True,
|
||||
).strip()
|
||||
except (OSError, subprocess.CalledProcessError):
|
||||
return None
|
||||
|
||||
|
||||
def _filter_entries(
|
||||
entries: list[dict],
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
) -> list[dict]:
|
||||
out = entries
|
||||
if gap_days_filter:
|
||||
out = [e for e in out if e.get("gap_days") in gap_days_filter]
|
||||
if transition_filter:
|
||||
out = [e for e in out if e.get("transition") in transition_filter]
|
||||
return out
|
||||
|
||||
|
||||
def run_validation(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
*,
|
||||
skip_manifest: bool,
|
||||
skip_fusion: bool,
|
||||
write_manifest_only: bool,
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
s2_calendar_strategy: str,
|
||||
manifest_gap_lengths: tuple[int, ...] = DEFAULT_GAP_LENGTHS,
|
||||
manifest_transitions: tuple[str, ...] = TRANSITIONS,
|
||||
) -> Path:
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
vdir = validation_dir(site_name, season)
|
||||
vdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not skip_manifest:
|
||||
write_manifest(
|
||||
site_name,
|
||||
season,
|
||||
site_position,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
gap_lengths=manifest_gap_lengths,
|
||||
transitions=manifest_transitions,
|
||||
)
|
||||
if write_manifest_only:
|
||||
return vdir / "gap_manifest.json"
|
||||
|
||||
manifest = load_manifest(site_name, season)
|
||||
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
|
||||
|
||||
results: list[dict] = []
|
||||
for entry in entries:
|
||||
gap_days = entry["gap_days"]
|
||||
transition = entry.get("transition", "green_up")
|
||||
pred = entry["prediction_date"]
|
||||
w0 = entry["window_start"]
|
||||
w1 = entry["window_end"]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "no_withheld_s2_filename",
|
||||
"entry": entry,
|
||||
}
|
||||
)
|
||||
continue
|
||||
ymd = _ymd_from_iso(pred)
|
||||
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not wh_ymd:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "could_not_parse_withheld_yyyymmdd",
|
||||
"withheld_s2_filename": fn,
|
||||
}
|
||||
)
|
||||
continue
|
||||
withheld_iso = (
|
||||
_withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
|
||||
)
|
||||
|
||||
fusion_out = validation_fusion_dir(
|
||||
site_name, season, gap_days, transition, strategy, sigma, mode
|
||||
)
|
||||
if not skip_fusion:
|
||||
try:
|
||||
run_masked_fusion_one_date(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
pred,
|
||||
w0,
|
||||
w1,
|
||||
wh_ymd,
|
||||
fusion_out,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": str(e),
|
||||
"entry": entry,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
fused_gap = _fused_file(fusion_out, mode, ymd)
|
||||
prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
|
||||
wh_path = withheld_s2_refl_path(season, site_name, strategy, fn)
|
||||
if wh_path is None or not fused_gap.is_file():
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"withheld_s2_filename": fn,
|
||||
"scenario": {
|
||||
"strategy": strategy,
|
||||
"sigma": 30 if sigma == 30 else 20,
|
||||
"mode": mode,
|
||||
},
|
||||
"error": "missing_withheld_refl_or_fused_gap",
|
||||
"fused_gap_path": str(fused_gap),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
spatial = evaluate_gap_vs_withheld(
|
||||
wh_path,
|
||||
fused_gap,
|
||||
prod if prod.is_file() else None,
|
||||
mode,
|
||||
whittaker_context=(base, strategy, pred, withheld_iso, w0, w1),
|
||||
)
|
||||
fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
|
||||
wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"window_start": w0,
|
||||
"window_end": w1,
|
||||
"withheld_s2_filename": fn,
|
||||
"scenario": {
|
||||
"strategy": strategy,
|
||||
"sigma": 30 if sigma == 30 else 20,
|
||||
"mode": mode,
|
||||
},
|
||||
"paths": {
|
||||
"fused_gap": str(fused_gap),
|
||||
"fused_no_gap": str(prod) if prod.is_file() else None,
|
||||
"withheld_s2_refl": str(wh_path),
|
||||
},
|
||||
"spatial": spatial,
|
||||
"whittaker_crossover_row": {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"nse_s2_fusion": fusion_nse,
|
||||
"nse_s2_whittaker": wh_nse,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
scenario = _scenario_key(strategy, sigma, mode)
|
||||
crossover_rows = [
|
||||
r["whittaker_crossover_row"]
|
||||
for r in results
|
||||
if isinstance(r.get("whittaker_crossover_row"), dict)
|
||||
]
|
||||
summary = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"scenario": scenario,
|
||||
"command_line": sys.argv,
|
||||
"git_commit": _git_rev(),
|
||||
"manifest": str(vdir / "gap_manifest.json"),
|
||||
"gap_withheld_images": str(vdir / "gap_withheld_images.json"),
|
||||
"results": results,
|
||||
"whittaker_crossover": {
|
||||
scenario: {
|
||||
"metric": "nse_s2_spatial_vs_withheld_s2_gcc",
|
||||
"whittaker_definition": (
|
||||
"Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
|
||||
"all S2 dates in the gap window and the withheld acquisition removed; "
|
||||
"prediction is a spatially constant field at smoothed GCC(prediction_date)."
|
||||
),
|
||||
"first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
|
||||
crossover_rows,
|
||||
fusion_key="nse_s2_fusion",
|
||||
whittaker_key="nse_s2_whittaker",
|
||||
),
|
||||
"by_gap": crossover_rows,
|
||||
}
|
||||
},
|
||||
}
|
||||
out_path = vdir / f"gap_validation_summary_{mode}.json"
|
||||
out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
|
||||
if mode == "bti":
|
||||
# Legacy alias for backward-compatible readers (webapp, older scripts).
|
||||
(vdir / "gap_validation_summary.json").write_text(
|
||||
json.dumps(summary, indent=2) + "\n", encoding="utf-8"
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Tier-2 withheld-S2 gap validation (outputs under data/.../validation/)."
|
||||
)
|
||||
ap.add_argument("--site", required=True)
|
||||
ap.add_argument("--season", type=int, required=True)
|
||||
ap.add_argument("--lat", type=float, required=True)
|
||||
ap.add_argument("--lon", type=float, required=True)
|
||||
ap.add_argument(
|
||||
"--strategy", default="aggressive", choices=["aggressive", "nonaggressive"]
|
||||
)
|
||||
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
|
||||
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
|
||||
ap.add_argument(
|
||||
"--gap-days",
|
||||
type=int,
|
||||
action="append",
|
||||
metavar="N",
|
||||
help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--transition",
|
||||
choices=list(TRANSITIONS),
|
||||
action="append",
|
||||
help="Restrict to transition(s); repeatable (default: all in manifest).",
|
||||
)
|
||||
ap.add_argument("--skip-manifest", action="store_true")
|
||||
ap.add_argument(
|
||||
"--skip-fusion",
|
||||
action="store_true",
|
||||
help="Reuse existing validation fusion rasters.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--write-manifest-only",
|
||||
action="store_true",
|
||||
help="Write gap_manifest.json + gap_withheld_images.json and exit.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--s2-calendar-strategy",
|
||||
default="aggressive",
|
||||
choices=["aggressive", "nonaggressive"],
|
||||
help="Which prepared_*/s2 tree is used to pick nearest S2 for withholding.",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
sigma_kw = 30 if args.sigma == 30 else None
|
||||
site_position = (args.lat, args.lon)
|
||||
gap_filter = args.gap_days if args.gap_days else None
|
||||
trans_filter = args.transition if args.transition else None
|
||||
out = run_validation(
|
||||
args.site,
|
||||
args.season,
|
||||
site_position,
|
||||
args.strategy,
|
||||
sigma_kw,
|
||||
args.mode,
|
||||
skip_manifest=args.skip_manifest,
|
||||
skip_fusion=args.skip_fusion,
|
||||
write_manifest_only=args.write_manifest_only,
|
||||
gap_days_filter=gap_filter,
|
||||
transition_filter=trans_filter,
|
||||
s2_calendar_strategy=args.s2_calendar_strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
"""Symlink prepared S2 into a temp dir, omitting gap-window acquisitions (REFL/GCC + DIST)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
|
||||
S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
|
||||
|
||||
|
||||
def yyyymmdd_in_name(name: str) -> str | None:
|
||||
m = S2_PREP_DATE_RE.search(name)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def yyyymmdd_from_iso(iso_d: str) -> str:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
||||
|
||||
|
||||
def acquisition_yyyymmdd_in_window(
|
||||
prepared_s2: Path, window_start: date, window_end: date
|
||||
) -> set[str]:
|
||||
"""All S2 acquisition days (from REFL filenames) inside [window_start, window_end]."""
|
||||
out: set[str] = set()
|
||||
if not prepared_s2.is_dir():
|
||||
return out
|
||||
for p in prepared_s2.glob("*REFL.tif"):
|
||||
m = re.search(r"S2A_MSIL2A_(\d{8})_REFL\.tif$", p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
||||
if window_start <= d <= window_end:
|
||||
out.add(m.group(1))
|
||||
return out
|
||||
|
||||
|
||||
def build_masked_s2_dir(
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
patterns: tuple[str, ...],
|
||||
) -> int:
|
||||
"""Symlink all files matching ``patterns`` except excluded acquisition days."""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
n = 0
|
||||
for pattern in patterns:
|
||||
for src in sorted(prepared_s2.glob(pattern)):
|
||||
if not src.is_file() and not src.is_symlink():
|
||||
continue
|
||||
y = yyyymmdd_in_name(src.name)
|
||||
if y and y in excluded_yyyymmdd:
|
||||
continue
|
||||
link = dest / src.name
|
||||
if link.exists() or link.is_symlink():
|
||||
link.unlink()
|
||||
link.symlink_to(src.resolve())
|
||||
n += 1
|
||||
return n
|
||||
|
||||
|
||||
def assert_no_leakage(withheld_yyyymmdd: str, masked_s2_dir: Path) -> None:
|
||||
"""Fail if the withheld validation acquisition is present in the fusion input dir."""
|
||||
for p in masked_s2_dir.iterdir():
|
||||
y = yyyymmdd_in_name(p.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
raise RuntimeError(
|
||||
f"Data leakage: withheld acquisition {withheld_yyyymmdd} "
|
||||
f"found in masked S2 dir {masked_s2_dir}"
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_bti(
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_itb(
|
||||
prepared_s2: Path,
|
||||
excluded_yyyymmdd: set[str],
|
||||
dest: Path,
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, excluded_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
"""Per-pixel GCC vs withheld S2; NSE (nse_s2); no-gap baseline; deltas."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.warp import reproject, Resampling
|
||||
from scipy.stats import pearsonr
|
||||
|
||||
# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
|
||||
VALID_REFL_THRESHOLD = 0.001
|
||||
GCC_DENOM_EPS = 1e-3
|
||||
MAX_REPORTED_NSE_S2 = 20.0
|
||||
|
||||
|
||||
def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
|
||||
t = red.astype(np.float64) + green.astype(np.float64) + blue.astype(np.float64)
|
||||
out = np.full_like(blue, np.nan, dtype=np.float64)
|
||||
m = (
|
||||
np.isfinite(t)
|
||||
& (t >= GCC_DENOM_EPS)
|
||||
& np.isfinite(blue)
|
||||
& np.isfinite(green)
|
||||
& np.isfinite(red)
|
||||
& (blue > GCC_DENOM_EPS)
|
||||
& (green > GCC_DENOM_EPS)
|
||||
& (red > GCC_DENOM_EPS)
|
||||
)
|
||||
out[m] = green[m].astype(np.float64) / t[m]
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
|
||||
"""Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if src.count < 3:
|
||||
return None
|
||||
stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
|
||||
return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
|
||||
|
||||
|
||||
def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
|
||||
"""Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if src.count >= 4:
|
||||
b = src.read(1).astype(np.float32)
|
||||
g = src.read(2).astype(np.float32)
|
||||
r = src.read(3).astype(np.float32)
|
||||
gcc = _gcc_from_rgb(b, g, r)
|
||||
else:
|
||||
gcc = src.read(1).astype(np.float32)
|
||||
prof = src.profile.copy()
|
||||
return gcc, prof
|
||||
|
||||
|
||||
def warp_refl_bands_to_grid(
|
||||
refl_path: Path,
|
||||
height: int,
|
||||
width: int,
|
||||
transform,
|
||||
crs,
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Resample S2 REFL blue/green/red to fusion grid (bilinear)."""
|
||||
with rasterio.open(refl_path) as src:
|
||||
b = np.empty((height, width), dtype=np.float32)
|
||||
g = np.empty((height, width), dtype=np.float32)
|
||||
r = np.empty((height, width), dtype=np.float32)
|
||||
for i, dst in enumerate((b, g, r), start=1):
|
||||
reproject(
|
||||
source=rasterio.band(src, i),
|
||||
destination=dst,
|
||||
src_transform=src.transform,
|
||||
src_crs=src.crs,
|
||||
dst_transform=transform,
|
||||
dst_crs=crs,
|
||||
resampling=Resampling.bilinear,
|
||||
)
|
||||
return b, g, r
|
||||
|
||||
|
||||
def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
|
||||
"""Valid pixels: BtI uses REFL-style mask; ItB uses single-band GCC (postprocessing ItB)."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if mode == "itb" or src.count < 4:
|
||||
d = src.read(1).astype(np.float32)
|
||||
return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
|
||||
stacks = src.read().astype(np.float32)
|
||||
with np.errstate(all="ignore"):
|
||||
mx = np.nanmax(stacks, axis=0)
|
||||
ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
|
||||
mx > VALID_REFL_THRESHOLD
|
||||
)
|
||||
return ok
|
||||
|
||||
|
||||
def spatial_scores(
|
||||
y_true_gcc: np.ndarray,
|
||||
y_pred_gcc: np.ndarray,
|
||||
mask: np.ndarray,
|
||||
) -> dict:
|
||||
"""RMSE, MAE, mean bias, Pearson r, nse_s2 (Nash–Sutcliffe vs spatial truth)."""
|
||||
yt = y_true_gcc[mask].astype(np.float64).ravel()
|
||||
yp = y_pred_gcc[mask].astype(np.float64).ravel()
|
||||
n = int(yt.size)
|
||||
if n < 2:
|
||||
return {"n_pixels": n}
|
||||
mean_t = float(np.mean(yt))
|
||||
rmse = float(np.sqrt(np.mean((yt - yp) ** 2)))
|
||||
mae = float(np.mean(np.abs(yt - yp)))
|
||||
bias = float(np.mean(yp - yt))
|
||||
den = float(np.sum((yt - mean_t) ** 2))
|
||||
nse_s2 = None
|
||||
if den > 0:
|
||||
raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
|
||||
if abs(raw) <= MAX_REPORTED_NSE_S2:
|
||||
nse_s2 = raw
|
||||
r = None
|
||||
if np.std(yt) > 0 and np.std(yp) > 0:
|
||||
r = float(pearsonr(yt, yp)[0])
|
||||
return {
|
||||
"n_pixels": n,
|
||||
"rmse": rmse,
|
||||
"mae": mae,
|
||||
"mean_bias": bias,
|
||||
"pearson_r": r,
|
||||
"nse_s2": nse_s2,
|
||||
}
|
||||
|
||||
|
||||
def withheld_gcc_on_fusion_grid(
|
||||
withheld_refl_path: Path, fused_path: Path
|
||||
) -> tuple[np.ndarray, np.ndarray, dict]:
|
||||
"""``y_true`` GCC (withheld S2) and ``y_pred`` GCC from ``fused_path``, same grid."""
|
||||
yp, prof = read_fused_gcc(fused_path)
|
||||
h, w = yp.shape
|
||||
b, g, r = warp_refl_bands_to_grid(
|
||||
withheld_refl_path, h, w, prof["transform"], prof["crs"]
|
||||
)
|
||||
yt = _gcc_from_rgb(b, g, r)
|
||||
return yt, yp, prof
|
||||
|
||||
|
||||
def mask_gap_whittaker(
|
||||
yt: np.ndarray,
|
||||
y_gap: np.ndarray,
|
||||
fused_gap_path: Path,
|
||||
mode: str,
|
||||
) -> np.ndarray:
|
||||
"""Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
|
||||
m = (
|
||||
valid_mask_fused(fused_gap_path, mode)
|
||||
& np.isfinite(yt)
|
||||
& np.isfinite(y_gap)
|
||||
& (yt > VALID_REFL_THRESHOLD)
|
||||
& (yt <= 1.0)
|
||||
& (y_gap > VALID_REFL_THRESHOLD)
|
||||
& (y_gap <= 1.0)
|
||||
)
|
||||
pos = _positive_bgr_mask(fused_gap_path)
|
||||
if pos is not None:
|
||||
m &= pos
|
||||
return m
|
||||
|
||||
|
||||
def common_valid_mask(
|
||||
yt: np.ndarray,
|
||||
y_gap: np.ndarray,
|
||||
y_nogap: np.ndarray | None,
|
||||
fused_gap_path: Path,
|
||||
mode: str,
|
||||
) -> np.ndarray:
|
||||
"""Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
|
||||
m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
|
||||
if y_nogap is not None:
|
||||
m &= (
|
||||
np.isfinite(y_nogap)
|
||||
& (y_nogap > VALID_REFL_THRESHOLD)
|
||||
& (y_nogap <= 1.0)
|
||||
)
|
||||
return m
|
||||
|
||||
|
||||
def evaluate_gap_vs_withheld(
|
||||
withheld_refl_path: Path,
|
||||
fused_gap_path: Path,
|
||||
fused_nogap_path: Path | None,
|
||||
mode: str,
|
||||
*,
|
||||
whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
|
||||
) -> dict:
|
||||
"""Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
|
||||
|
||||
``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
|
||||
``delta_nse`` = NSE_no_gap − NSE_gap, not exported to thesis tables).
|
||||
"""
|
||||
yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
|
||||
y_nogap = None
|
||||
if fused_nogap_path is not None and fused_nogap_path.is_file():
|
||||
y_nogap, _ = read_fused_gcc(fused_nogap_path)
|
||||
mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
|
||||
out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
|
||||
if y_nogap is not None:
|
||||
mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
|
||||
out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
|
||||
g, ng = out["gap"], out["no_gap"]
|
||||
if g.get("rmse") is not None and ng.get("rmse") is not None:
|
||||
out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
|
||||
if g.get("nse_s2") is not None and ng.get("nse_s2") is not None:
|
||||
out["delta_nse"] = float(ng["nse_s2"] - g["nse_s2"])
|
||||
if whittaker_context is not None:
|
||||
from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
|
||||
|
||||
base, strategy, prediction_iso, withheld_iso, w0, w1 = whittaker_context
|
||||
wgcc = whittaker_gcc_on_gap_masked_series(
|
||||
base,
|
||||
strategy,
|
||||
prediction_iso,
|
||||
withheld_iso,
|
||||
window_start_iso=w0,
|
||||
window_end_iso=w1,
|
||||
)
|
||||
if wgcc is not None:
|
||||
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
|
||||
return out
|
||||
|
||||
|
||||
def constant_field_scores(
|
||||
y_true_gcc: np.ndarray, scalar: float, mask: np.ndarray
|
||||
) -> dict:
|
||||
"""NSE / RMSE when prediction is a spatially constant Whittaker value (same mask as fusion)."""
|
||||
yp = np.full_like(y_true_gcc, scalar, dtype=np.float32)
|
||||
return spatial_scores(y_true_gcc, yp, mask)
|
||||
|
|
@ -1,293 +0,0 @@
|
|||
"""Full-season gap-degraded fusion → temporal NSE_PC vs PhenoCam (tier after spatial validation)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_indices import _get_gcc_from_original
|
||||
from metrics_stats import (
|
||||
WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
_norm_date_key,
|
||||
_s2_gcc_series_from_preselection,
|
||||
_whittaker_smooth_dict,
|
||||
calculate_temporal_metrics,
|
||||
load_timeseries,
|
||||
)
|
||||
|
||||
from gap_validation.calendar import TRANSITIONS, load_manifest, validation_dir, write_manifest
|
||||
from gap_validation.fusion_masked import run_masked_fusion_season
|
||||
from gap_validation.run import (
|
||||
_filter_entries,
|
||||
_scenario_key,
|
||||
_withheld_iso,
|
||||
_yyyymmdd_from_withheld_filename,
|
||||
)
|
||||
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
|
||||
|
||||
|
||||
def _fusion_gcc_timeseries(
|
||||
fusion_dir: Path, site_position: tuple[float, float], mode: str
|
||||
) -> dict[str, float]:
|
||||
"""3×3 mean GCC at site from fused REFL/GCC rasters in ``fusion_dir``."""
|
||||
pattern = "REFL_*.tif" if mode == "bti" else "GCC_*.tif"
|
||||
out: dict[str, float] = {}
|
||||
for p in sorted(fusion_dir.glob(pattern)):
|
||||
m = re.search(r"_(\d{8})\.tif$", p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date().isoformat()
|
||||
gcc = _get_gcc_from_original(p, site_position)
|
||||
if gcc is not None:
|
||||
out[d] = float(gcc)
|
||||
return out
|
||||
|
||||
|
||||
def whittaker_timeseries_gap_degraded(
|
||||
base: Path,
|
||||
strategy: str,
|
||||
window_start_iso: str,
|
||||
window_end_iso: str,
|
||||
withheld_iso: str,
|
||||
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
) -> dict[str, float]:
|
||||
"""Daily Whittaker GCC on S2 preselection with gap window + withheld day removed."""
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if not all_gcc:
|
||||
return {}
|
||||
idx = 0 if strategy == "aggressive" else 1
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
wh_k = _norm_date_key(withheld_iso)
|
||||
|
||||
def in_window(dk: str) -> bool:
|
||||
try:
|
||||
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return False
|
||||
return w0 <= d <= w1
|
||||
|
||||
kept = sorted(
|
||||
(d, g)
|
||||
for d, g in all_gcc.items()
|
||||
if d in flags
|
||||
and not flags[d][idx]
|
||||
and _norm_date_key(d) != wh_k
|
||||
and not in_window(_norm_date_key(d) or "")
|
||||
)
|
||||
if len(kept) < 2:
|
||||
return {}
|
||||
obs_d, obs_v = zip(*kept)
|
||||
return _whittaker_smooth_dict(obs_d, obs_v, lam)
|
||||
|
||||
|
||||
def run_temporal_pc(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
*,
|
||||
skip_manifest: bool,
|
||||
skip_fusion: bool,
|
||||
gap_days_filter: list[int] | None,
|
||||
transition_filter: list[str] | None,
|
||||
s2_calendar_strategy: str,
|
||||
) -> Path:
|
||||
"""Run full-season gap fusion + NSE_PC; write ``gap_metrics.json``."""
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
vdir = validation_dir(site_name, season)
|
||||
vdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not skip_manifest:
|
||||
write_manifest(
|
||||
site_name,
|
||||
season,
|
||||
site_position,
|
||||
s2_calendar_strategy=s2_calendar_strategy,
|
||||
)
|
||||
|
||||
manifest = load_manifest(site_name, season)
|
||||
entries = _filter_entries(manifest["entries"], gap_days_filter, transition_filter)
|
||||
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
|
||||
phenocam_ts = load_timeseries(phenocam_ts_path)
|
||||
|
||||
nogap_metrics_path = base / "metrics.json"
|
||||
nogap_nse: dict[str, float | None] = {}
|
||||
if nogap_metrics_path.is_file():
|
||||
m = json.loads(nogap_metrics_path.read_text(encoding="utf-8"))
|
||||
sk = _scenario_key(strategy, sigma, mode)
|
||||
block = (m.get("temporal") or {}).get(sk) or {}
|
||||
nogap_nse["nse_pc"] = block.get("nse_pc")
|
||||
|
||||
results: list[dict] = []
|
||||
crossover_rows: list[dict] = []
|
||||
|
||||
for entry in entries:
|
||||
transition = entry.get("transition", "green_up")
|
||||
gap_days = entry["gap_days"]
|
||||
pred = entry["prediction_date"]
|
||||
w0, w1 = entry["window_start"], entry["window_end"]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn:
|
||||
results.append(
|
||||
{"transition": transition, "gap_days": gap_days, "error": "no_withheld_s2"}
|
||||
)
|
||||
continue
|
||||
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not wh_ymd:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": "bad_withheld_filename",
|
||||
}
|
||||
)
|
||||
continue
|
||||
withheld_iso = _withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
|
||||
|
||||
temporal_dir = (
|
||||
vdir / "temporal" / f"gap_{gap_days}_{transition}" / _scenario_key(strategy, sigma, mode)
|
||||
)
|
||||
if not skip_fusion:
|
||||
try:
|
||||
run_masked_fusion_season(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
w0,
|
||||
w1,
|
||||
wh_ymd,
|
||||
temporal_dir,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
results.append(
|
||||
{
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
|
||||
else:
|
||||
fusion_ts = _fusion_gcc_timeseries(temporal_dir, site_position, mode)
|
||||
|
||||
fused_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||
wh_ts = whittaker_timeseries_gap_degraded(
|
||||
base, strategy, w0, w1, withheld_iso
|
||||
)
|
||||
wh_metrics = calculate_temporal_metrics(wh_ts, phenocam_ts)
|
||||
|
||||
row: dict = {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"window_start": w0,
|
||||
"window_end": w1,
|
||||
"withheld_s2_filename": fn,
|
||||
"temporal": {
|
||||
"fused": fused_metrics,
|
||||
"whittaker": wh_metrics,
|
||||
},
|
||||
"fusion_dir": str(temporal_dir),
|
||||
}
|
||||
if fused_metrics and nogap_nse.get("nse_pc") is not None:
|
||||
g_rmse = fused_metrics.get("rmse")
|
||||
ng_rmse = None
|
||||
if nogap_metrics_path.is_file():
|
||||
sk = _scenario_key(strategy, sigma, mode)
|
||||
ng_rmse = (
|
||||
(json.loads(nogap_metrics_path.read_text()).get("temporal") or {})
|
||||
.get(sk, {})
|
||||
.get("rmse")
|
||||
)
|
||||
n_g = fused_metrics.get("nse_pc")
|
||||
n_ng = nogap_nse["nse_pc"]
|
||||
if g_rmse is not None and ng_rmse is not None:
|
||||
row["delta_rmse"] = float(g_rmse - ng_rmse)
|
||||
if n_g is not None and n_ng is not None:
|
||||
row["delta_nse"] = float(n_ng - n_g)
|
||||
|
||||
fn_pc = (fused_metrics or {}).get("nse_pc")
|
||||
wh_pc = (wh_metrics or {}).get("nse_pc")
|
||||
row["utility_crossover_row"] = {
|
||||
"transition": transition,
|
||||
"gap_days": gap_days,
|
||||
"nse_pc_fusion": fn_pc,
|
||||
"nse_pc_whittaker": wh_pc,
|
||||
}
|
||||
crossover_rows.append(row["utility_crossover_row"])
|
||||
results.append(row)
|
||||
|
||||
scenario = _scenario_key(strategy, sigma, mode)
|
||||
payload = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"scenario": scenario,
|
||||
"tier": "temporal_nse_pc",
|
||||
"manifest": str(vdir / "gap_manifest.json"),
|
||||
"results": results,
|
||||
"utility_crossover": {
|
||||
scenario: {
|
||||
"metric": "nse_pc_vs_phenocam_gcc90",
|
||||
"first_gap_days_fusion_below_whittaker": first_gap_where_fusion_below_whittaker(
|
||||
crossover_rows,
|
||||
fusion_key="nse_pc_fusion",
|
||||
whittaker_key="nse_pc_whittaker",
|
||||
),
|
||||
"by_gap": crossover_rows,
|
||||
}
|
||||
},
|
||||
}
|
||||
out_path = vdir / f"gap_metrics_{mode}.json"
|
||||
out_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
if mode == "bti":
|
||||
# Legacy alias for backward-compatible readers.
|
||||
(vdir / "gap_metrics.json").write_text(
|
||||
json.dumps(payload, indent=2) + "\n", encoding="utf-8"
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Gap-degraded full-season NSE_PC tier.")
|
||||
ap.add_argument("--site", required=True)
|
||||
ap.add_argument("--season", type=int, required=True)
|
||||
ap.add_argument("--lat", type=float, required=True)
|
||||
ap.add_argument("--lon", type=float, required=True)
|
||||
ap.add_argument("--strategy", default="aggressive")
|
||||
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
|
||||
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
|
||||
ap.add_argument("--gap-days", type=int, action="append")
|
||||
ap.add_argument("--transition", choices=list(TRANSITIONS), action="append")
|
||||
ap.add_argument("--skip-manifest", action="store_true")
|
||||
ap.add_argument("--skip-fusion", action="store_true")
|
||||
ap.add_argument("--s2-calendar-strategy", default="aggressive")
|
||||
args = ap.parse_args()
|
||||
sigma_kw = 30 if args.sigma == 30 else None
|
||||
out = run_temporal_pc(
|
||||
args.site,
|
||||
args.season,
|
||||
(args.lat, args.lon),
|
||||
args.strategy,
|
||||
sigma_kw,
|
||||
args.mode,
|
||||
skip_manifest=args.skip_manifest,
|
||||
skip_fusion=args.skip_fusion,
|
||||
gap_days_filter=args.gap_days,
|
||||
transition_filter=args.transition,
|
||||
s2_calendar_strategy=args.s2_calendar_strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
"""Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_stats import (
|
||||
WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
_norm_date_key,
|
||||
_s2_gcc_series_from_preselection,
|
||||
_whittaker_smooth_dict,
|
||||
)
|
||||
|
||||
|
||||
def _date_in_window(dk: str, start: date, end: date) -> bool:
|
||||
try:
|
||||
d = datetime.strptime(dk[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return False
|
||||
return start <= d <= end
|
||||
|
||||
|
||||
def whittaker_gcc_on_gap_masked_series(
|
||||
base: Path,
|
||||
strategy: str,
|
||||
prediction_iso: str,
|
||||
withheld_iso: str,
|
||||
*,
|
||||
window_start_iso: str | None = None,
|
||||
window_end_iso: str | None = None,
|
||||
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
) -> float | None:
|
||||
"""Whittaker on cloud-screened S2 GCC excluding gap-window dates and withheld day."""
|
||||
pred_k = _norm_date_key(prediction_iso)
|
||||
wh_k = _norm_date_key(withheld_iso)
|
||||
if not pred_k or not wh_k:
|
||||
return None
|
||||
w0 = w1 = None
|
||||
if window_start_iso and window_end_iso:
|
||||
w0 = datetime.strptime(window_start_iso[:10], "%Y-%m-%d").date()
|
||||
w1 = datetime.strptime(window_end_iso[:10], "%Y-%m-%d").date()
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if not all_gcc:
|
||||
return None
|
||||
idx = 0 if strategy == "aggressive" else 1
|
||||
kept = []
|
||||
for d, g in all_gcc.items():
|
||||
if d not in flags or flags[d][idx]:
|
||||
continue
|
||||
dk = _norm_date_key(d)
|
||||
if not dk or dk == wh_k:
|
||||
continue
|
||||
if w0 is not None and w1 is not None and _date_in_window(dk, w0, w1):
|
||||
continue
|
||||
kept.append((d, g))
|
||||
kept.sort(key=lambda t: t[0])
|
||||
if len(kept) < 2:
|
||||
return None
|
||||
obs_d, obs_v = zip(*kept)
|
||||
smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
|
||||
return smooth.get(pred_k)
|
||||
|
||||
|
||||
def first_gap_where_fusion_below_whittaker(
|
||||
rows: list[dict],
|
||||
*,
|
||||
fusion_key: str = "nse_s2",
|
||||
whittaker_key: str = "nse_s2",
|
||||
) -> int | None:
|
||||
"""Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
|
||||
eligible = [
|
||||
r
|
||||
for r in rows
|
||||
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
|
||||
]
|
||||
eligible.sort(key=lambda r: (r.get("transition") or "", r["gap_days"]))
|
||||
for r in eligible:
|
||||
if r[fusion_key] < r[whittaker_key]:
|
||||
return int(r["gap_days"])
|
||||
return None
|
||||
|
|
@ -1,689 +0,0 @@
|
|||
"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.warp import transform as transform_coords
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from preselection import _sample_3x3
|
||||
|
||||
RED_BAND = 3
|
||||
NIR_BAND = 4
|
||||
BLUE_BAND = 1
|
||||
GREEN_BAND = 2
|
||||
|
||||
|
||||
def _calculate_and_write_ndvi(input_file, output_file):
|
||||
with rasterio.open(input_file) as src:
|
||||
red = src.read(RED_BAND).astype(np.float32)
|
||||
nir = src.read(NIR_BAND).astype(np.float32)
|
||||
|
||||
mask = (red > 0) & (nir > 0)
|
||||
ndvi = np.zeros_like(red, dtype=np.float32)
|
||||
ndvi[mask] = (nir[mask] - red[mask]) / (nir[mask] + red[mask])
|
||||
|
||||
profile = src.profile.copy()
|
||||
profile.update(
|
||||
{
|
||||
"count": 1,
|
||||
"dtype": "float32",
|
||||
"nodata": 0,
|
||||
"compress": "lzw",
|
||||
}
|
||||
)
|
||||
|
||||
with rasterio.open(output_file, "w", **profile) as dst:
|
||||
dst.write(ndvi, 1)
|
||||
dst.set_band_description(1, "NDVI")
|
||||
|
||||
|
||||
def _get_ndvi_value(ndvi_file, site_position):
|
||||
try:
|
||||
with rasterio.open(ndvi_file) as src:
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
|
||||
# Check if point is within bounds
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None # Point is outside raster bounds
|
||||
|
||||
samples = list(src.sample([(x[0], y[0])]))
|
||||
if samples:
|
||||
value = float(samples[0][0])
|
||||
# Check if it's actually nodata (using raster's nodata value)
|
||||
if src.nodata is not None and value == src.nodata:
|
||||
return None # This is nodata, not a valid 0 value
|
||||
if np.isnan(value):
|
||||
return None # NaN is invalid
|
||||
# 0 is a valid NDVI value (no vegetation), so return it
|
||||
return value
|
||||
except Exception as e:
|
||||
print(f"Error sampling {ndvi_file.name}: {e}")
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _create_timeseries_for_dir(
|
||||
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
|
||||
):
|
||||
print(f"[NDVI-{source_name}] Creating timeseries.json...")
|
||||
timeseries = []
|
||||
|
||||
for input_file in sorted(input_dir.glob(pattern)):
|
||||
if "DIST_CLOUD" in input_file.name:
|
||||
continue
|
||||
|
||||
filename = input_file.name
|
||||
parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
|
||||
date_str = None
|
||||
|
||||
for part in parts:
|
||||
if len(part) == 8 and part.isdigit():
|
||||
date_str = part
|
||||
break
|
||||
|
||||
if date_str:
|
||||
try:
|
||||
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
|
||||
except ValueError:
|
||||
date = date_str
|
||||
else:
|
||||
date_str = parts[0]
|
||||
date = date_str
|
||||
print(
|
||||
f"[NDVI-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
|
||||
)
|
||||
|
||||
ndvi_value, band_means = _sample_3x3(input_file, site_position)
|
||||
blue_mean = band_means.get("b02") if band_means else None
|
||||
if ndvi_value is None:
|
||||
print(
|
||||
f"[NDVI-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
|
||||
)
|
||||
|
||||
entry = {"date": date, "filename": filename, "ndvi": ndvi_value}
|
||||
if blue_mean is not None:
|
||||
entry["blue"] = blue_mean
|
||||
timeseries.append(entry)
|
||||
|
||||
timeseries.sort(key=lambda x: x["date"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timeseries_file = output_dir / "timeseries.json"
|
||||
with open(timeseries_file, "w") as f:
|
||||
json.dump(timeseries, f, indent=2)
|
||||
|
||||
print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
|
||||
|
||||
|
||||
def _process_ndvi_files(
|
||||
input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
|
||||
):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[NDVI-{source_name}] Processing {input_dir}...")
|
||||
|
||||
geotiff_files = sorted(input_dir.glob(pattern))
|
||||
if not geotiff_files:
|
||||
print(f"[NDVI-{source_name}] No files found")
|
||||
return
|
||||
|
||||
for geotiff_file in geotiff_files:
|
||||
# Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI)
|
||||
if "DIST_CLOUD" in geotiff_file.name:
|
||||
continue
|
||||
|
||||
# Check if file has enough bands (need at least 4 for RED and NIR)
|
||||
try:
|
||||
with rasterio.open(geotiff_file) as src:
|
||||
if src.count < 4:
|
||||
print(
|
||||
f"[NDVI-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 4+)"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[NDVI-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
|
||||
)
|
||||
continue
|
||||
|
||||
output_file = output_dir / (
|
||||
output_namer(geotiff_file) if output_namer else geotiff_file.name
|
||||
)
|
||||
|
||||
_calculate_and_write_ndvi(geotiff_file, output_file)
|
||||
print(f"[NDVI-{source_name}] Saved: {output_file}")
|
||||
|
||||
|
||||
def generate_ndvi_raw(season, site_position, site_name):
|
||||
# No longer creating NDVI GeoTIFF files, only timeseries
|
||||
pass
|
||||
|
||||
|
||||
def _get_output_name_prepared(geotiff_file):
|
||||
if geotiff_file.suffix == ".tif":
|
||||
if "REFL" in geotiff_file.stem:
|
||||
# For S2: S2A_MSIL2A_20240101_REFL -> date is at index [2]
|
||||
# For S3: composite_20240101.tif -> date is at index [1] after removing .tif
|
||||
parts = geotiff_file.stem.split("_")
|
||||
if len(parts) >= 3 and parts[0].startswith("S2"):
|
||||
# S2 format: S2A_MSIL2A_YYYYMMDD_REFL
|
||||
date_str = parts[2]
|
||||
elif len(parts) >= 2 and parts[0] == "composite":
|
||||
# S3 format: composite_YYYYMMDD
|
||||
date_str = parts[1]
|
||||
else:
|
||||
# Fallback: try index [1] for other formats
|
||||
date_str = parts[1] if len(parts) > 1 else parts[0]
|
||||
return f"{date_str}_ndvi.geotiff"
|
||||
return geotiff_file.name.replace(".tif", ".geotiff")
|
||||
return geotiff_file.name
|
||||
|
||||
|
||||
def _fusion_namer(f):
|
||||
date_str = f.stem.split("_")[1]
|
||||
return f"{date_str}_ndvi.geotiff"
|
||||
|
||||
|
||||
def generate_ndvi_post_process(season, site_position, site_name):
|
||||
# No longer creating NDVI GeoTIFF files, only timeseries
|
||||
pass
|
||||
|
||||
|
||||
def create_ndvi_timeseries_post_process(season, site_position, site_name):
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [20, 30]:
|
||||
processed_dir = f"processed_{strategy}_sigma{sigma}"
|
||||
for source in ["s2", "s3"]:
|
||||
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
|
||||
output_dir = Path(
|
||||
f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
|
||||
)
|
||||
_create_timeseries_for_dir(
|
||||
input_dir,
|
||||
output_dir,
|
||||
site_position,
|
||||
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
|
||||
)
|
||||
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
|
||||
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
|
||||
_create_timeseries_for_dir(
|
||||
input_dir,
|
||||
output_dir,
|
||||
site_position,
|
||||
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
|
||||
)
|
||||
|
||||
|
||||
def _calculate_and_write_gcc(input_file, output_file):
|
||||
with rasterio.open(input_file) as src:
|
||||
blue = src.read(BLUE_BAND).astype(np.float32)
|
||||
green = src.read(GREEN_BAND).astype(np.float32)
|
||||
red = src.read(RED_BAND).astype(np.float32)
|
||||
|
||||
total = red + green + blue
|
||||
mask = total > 0
|
||||
gcc = np.zeros_like(green, dtype=np.float32)
|
||||
gcc[mask] = green[mask] / total[mask]
|
||||
|
||||
profile = src.profile.copy()
|
||||
profile.update(
|
||||
{
|
||||
"count": 1,
|
||||
"dtype": "float32",
|
||||
"nodata": 0,
|
||||
"compress": "lzw",
|
||||
}
|
||||
)
|
||||
|
||||
with rasterio.open(output_file, "w", **profile) as dst:
|
||||
dst.write(gcc, 1)
|
||||
dst.set_band_description(1, "GCC")
|
||||
|
||||
|
||||
def _get_gcc_value(gcc_file, site_position):
|
||||
try:
|
||||
with rasterio.open(gcc_file) as src:
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None
|
||||
|
||||
samples = list(src.sample([(x[0], y[0])]))
|
||||
if samples:
|
||||
value = float(samples[0][0])
|
||||
if src.nodata is not None and value == src.nodata:
|
||||
return None
|
||||
if np.isnan(value):
|
||||
return None
|
||||
return value
|
||||
except Exception as e:
|
||||
print(f"Error sampling {gcc_file.name}: {e}")
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _get_gcc_from_original(input_file, site_position):
|
||||
"""Calculate GCC directly from original file without creating GeoTIFF."""
|
||||
try:
|
||||
with rasterio.open(input_file) as src:
|
||||
if src.count == 1:
|
||||
g = src.read(1).astype(np.float32)
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None
|
||||
row, col = src.index(x[0], y[0])
|
||||
if row < 0 or row >= src.height or col < 0 or col >= src.width:
|
||||
return None
|
||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
||||
win = g[r0:r1, c0:c1]
|
||||
mask = np.isfinite(win) & (win > 0)
|
||||
if not np.any(mask):
|
||||
return None
|
||||
return float(np.mean(win[mask]))
|
||||
if src.count < 3:
|
||||
return None
|
||||
|
||||
blue = src.read(BLUE_BAND).astype(np.float32)
|
||||
green = src.read(GREEN_BAND).astype(np.float32)
|
||||
red = src.read(RED_BAND).astype(np.float32)
|
||||
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None
|
||||
|
||||
row, col = src.index(x[0], y[0])
|
||||
if row < 0 or row >= src.height or col < 0 or col >= src.width:
|
||||
return None
|
||||
|
||||
# Extract 3x3 window with boundary handling
|
||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
||||
blue_window = blue[r0:r1, c0:c1]
|
||||
green_window = green[r0:r1, c0:c1]
|
||||
red_window = red[r0:r1, c0:c1]
|
||||
|
||||
# Calculate GCC for each pixel in window
|
||||
total = red_window + green_window + blue_window
|
||||
mask = (
|
||||
(total > 0)
|
||||
& ~np.isnan(total)
|
||||
& (blue_window >= 0)
|
||||
& (green_window >= 0)
|
||||
& (red_window >= 0)
|
||||
)
|
||||
if not np.any(mask):
|
||||
negative_pixels = np.sum(
|
||||
(blue_window < 0) | (green_window < 0) | (red_window < 0)
|
||||
)
|
||||
if negative_pixels > 0:
|
||||
print(
|
||||
f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
|
||||
)
|
||||
return None
|
||||
|
||||
gcc_window = np.zeros_like(green_window, dtype=np.float32)
|
||||
gcc_window[mask] = green_window[mask] / total[mask]
|
||||
|
||||
# Return mean of valid GCC values
|
||||
valid_gcc = gcc_window[mask]
|
||||
return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _create_gcc_timeseries_for_dir(
|
||||
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
|
||||
):
|
||||
print(f"[GCC-{source_name}] Creating timeseries.json...")
|
||||
timeseries = []
|
||||
|
||||
for input_file in sorted(input_dir.glob(pattern)):
|
||||
if "DIST_CLOUD" in input_file.name:
|
||||
continue
|
||||
|
||||
filename = input_file.name
|
||||
parts = filename.replace(".geotiff", "").replace(".tif", "").split("_")
|
||||
date_str = None
|
||||
|
||||
for part in parts:
|
||||
if len(part) == 8 and part.isdigit():
|
||||
date_str = part
|
||||
break
|
||||
|
||||
if date_str:
|
||||
try:
|
||||
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
|
||||
except ValueError:
|
||||
date = date_str
|
||||
else:
|
||||
date_str = parts[0]
|
||||
date = date_str
|
||||
print(
|
||||
f"[GCC-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'"
|
||||
)
|
||||
|
||||
gcc_value = _get_gcc_from_original(input_file, site_position)
|
||||
if gcc_value is None:
|
||||
print(
|
||||
f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
|
||||
)
|
||||
|
||||
timeseries.append(
|
||||
{"date": date, "filename": filename, "greenness_index": gcc_value}
|
||||
)
|
||||
|
||||
timeseries.sort(key=lambda x: x["date"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timeseries_file = output_dir / "timeseries.json"
|
||||
with open(timeseries_file, "w") as f:
|
||||
json.dump(timeseries, f, indent=2)
|
||||
|
||||
print(f"[GCC-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
|
||||
|
||||
|
||||
def _process_gcc_files(
|
||||
input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
|
||||
):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[GCC-{source_name}] Processing {input_dir}...")
|
||||
|
||||
geotiff_files = sorted(input_dir.glob(pattern))
|
||||
if not geotiff_files:
|
||||
print(f"[GCC-{source_name}] No files found")
|
||||
return
|
||||
|
||||
for geotiff_file in geotiff_files:
|
||||
if "DIST_CLOUD" in geotiff_file.name:
|
||||
continue
|
||||
|
||||
try:
|
||||
with rasterio.open(geotiff_file) as src:
|
||||
if src.count < 3:
|
||||
print(
|
||||
f"[GCC-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 3+)"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[GCC-{source_name}] Skipping {geotiff_file.name} (error reading: {e})"
|
||||
)
|
||||
continue
|
||||
|
||||
output_file = output_dir / (
|
||||
output_namer(geotiff_file) if output_namer else geotiff_file.name
|
||||
)
|
||||
|
||||
_calculate_and_write_gcc(geotiff_file, output_file)
|
||||
print(f"[GCC-{source_name}] Saved: {output_file}")
|
||||
|
||||
|
||||
def generate_gcc_post_process(season, site_position, site_name):
|
||||
# No longer creating GCC GeoTIFF files, only timeseries
|
||||
pass
|
||||
|
||||
|
||||
def create_gcc_timeseries_post_process(season, site_position, site_name):
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [20, 30]:
|
||||
processed_dir = f"processed_{strategy}_sigma{sigma}"
|
||||
for source in ["s2", "s3"]:
|
||||
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
|
||||
output_dir = Path(
|
||||
f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
|
||||
)
|
||||
_create_gcc_timeseries_for_dir(
|
||||
input_dir,
|
||||
output_dir,
|
||||
site_position,
|
||||
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
|
||||
)
|
||||
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
|
||||
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
|
||||
_create_gcc_timeseries_for_dir(
|
||||
input_dir,
|
||||
output_dir,
|
||||
site_position,
|
||||
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
|
||||
)
|
||||
itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
|
||||
base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
|
||||
if not base_itb.exists():
|
||||
continue
|
||||
for source in ["s2", "s3"]:
|
||||
inp, out = base_itb / source, base_itb / "gcc" / source
|
||||
_create_gcc_timeseries_for_dir(
|
||||
inp,
|
||||
out,
|
||||
site_position,
|
||||
f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
|
||||
)
|
||||
_create_gcc_timeseries_for_dir(
|
||||
base_itb / "fusion",
|
||||
base_itb / "gcc" / "fusion",
|
||||
site_position,
|
||||
f"POST-ITB-FUSION-{strategy}-σ{sigma}",
|
||||
)
|
||||
|
||||
|
||||
def _get_bands_from_original(input_file, site_position):
|
||||
"""Extract mean B02, B03, B04, B8A from 3x3 window at site. Returns dict or None."""
|
||||
try:
|
||||
with rasterio.open(input_file) as src:
|
||||
if src.count < 4:
|
||||
return None
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None
|
||||
row, col = src.index(x[0], y[0])
|
||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
||||
bands = [
|
||||
src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
|
||||
for i in range(4)
|
||||
]
|
||||
mask = ~np.any([np.isnan(b) for b in bands], axis=0)
|
||||
mask &= np.all([b > 0 for b in bands], axis=0)
|
||||
if not np.any(mask):
|
||||
return None
|
||||
return {
|
||||
"b02": float(np.mean(bands[0][mask])),
|
||||
"b03": float(np.mean(bands[1][mask])),
|
||||
"b04": float(np.mean(bands[2][mask])),
|
||||
"b8a": float(np.mean(bands[3][mask])),
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _create_bands_timeseries_for_dir(
|
||||
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
|
||||
):
|
||||
print(f"[BANDS-{source_name}] Creating timeseries.json...")
|
||||
timeseries = []
|
||||
for f in sorted(input_dir.glob(pattern)):
|
||||
if "DIST_CLOUD" in f.name:
|
||||
continue
|
||||
parts = f.name.replace(".geotiff", "").replace(".tif", "").split("_")
|
||||
date_str = next((p for p in parts if len(p) == 8 and p.isdigit()), None)
|
||||
if not date_str:
|
||||
continue
|
||||
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
|
||||
bands = _get_bands_from_original(f, site_position)
|
||||
timeseries.append({"date": date, "filename": f.name, **(bands or {})})
|
||||
timeseries.sort(key=lambda x: x["date"])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
(output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
|
||||
print(
|
||||
f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
|
||||
)
|
||||
|
||||
|
||||
def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
|
||||
"""Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
|
||||
|
||||
def load(p):
|
||||
p = Path(p)
|
||||
if not p.exists():
|
||||
return []
|
||||
try:
|
||||
return json.loads((p / "timeseries.json").read_text())
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
|
||||
gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
|
||||
bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
|
||||
keys = sorted(set(ndvi) | set(gcc) | set(bands))
|
||||
merged = []
|
||||
for k in keys:
|
||||
r = {"date": k, "filename": ""}
|
||||
for d in [ndvi.get(k, {}), gcc.get(k, {}), bands.get(k, {})]:
|
||||
r.update({x: d[x] for x in d if x not in ("date",)})
|
||||
merged.append(r)
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
(export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
|
||||
cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
|
||||
|
||||
def esc(v):
|
||||
s = str(v) if v is not None else ""
|
||||
return f'"{s}"' if "," in s or '"' in s else s
|
||||
|
||||
rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
|
||||
(export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
|
||||
print(
|
||||
f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
|
||||
)
|
||||
|
||||
|
||||
def create_prepared_fusion_timeseries(season, site_position, site_name):
|
||||
"""Generate NDVI, GCC, and band timeseries for prepared S2/S3 and fusion outputs."""
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
base = Path(f"data/{site_name}/{season}/prepared_{strategy}")
|
||||
for source in ["s2", "s3"]:
|
||||
inp = base / source
|
||||
if inp.exists():
|
||||
_create_timeseries_for_dir(
|
||||
inp,
|
||||
base / "ndvi" / source,
|
||||
site_position,
|
||||
f"PREPARED-{source.upper()}-{strategy}",
|
||||
"*.tif",
|
||||
)
|
||||
_create_gcc_timeseries_for_dir(
|
||||
inp,
|
||||
base / "gcc" / source,
|
||||
site_position,
|
||||
f"PREPARED-{source.upper()}-{strategy}",
|
||||
"*.tif",
|
||||
)
|
||||
_create_bands_timeseries_for_dir(
|
||||
inp,
|
||||
base / "bands" / source,
|
||||
site_position,
|
||||
f"PREPARED-{source.upper()}-{strategy}",
|
||||
"*.tif",
|
||||
)
|
||||
_write_export(
|
||||
base / "ndvi" / source,
|
||||
base / "gcc" / source,
|
||||
base / "bands" / source,
|
||||
base / "export" / source,
|
||||
)
|
||||
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
|
||||
inp = base / fusion_sub
|
||||
if inp.exists():
|
||||
_create_timeseries_for_dir(
|
||||
inp,
|
||||
base / "ndvi" / fusion_sub,
|
||||
site_position,
|
||||
f"FUSION-{strategy}-σ{sig or 20}",
|
||||
"*.tif",
|
||||
)
|
||||
_create_gcc_timeseries_for_dir(
|
||||
inp,
|
||||
base / "gcc" / fusion_sub,
|
||||
site_position,
|
||||
f"FUSION-{strategy}-σ{sig or 20}",
|
||||
"*.tif",
|
||||
)
|
||||
_create_bands_timeseries_for_dir(
|
||||
inp,
|
||||
base / "bands" / fusion_sub,
|
||||
site_position,
|
||||
f"FUSION-{strategy}-σ{sig or 20}",
|
||||
"*.tif",
|
||||
)
|
||||
_write_export(
|
||||
base / "ndvi" / fusion_sub,
|
||||
base / "gcc" / fusion_sub,
|
||||
base / "bands" / fusion_sub,
|
||||
base / "export" / fusion_sub,
|
||||
)
|
||||
itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
|
||||
if not itb.exists():
|
||||
continue
|
||||
for source in ["s2", "s3"]:
|
||||
inp = itb / source
|
||||
if inp.exists():
|
||||
_create_gcc_timeseries_for_dir(
|
||||
inp,
|
||||
itb / "gcc" / source,
|
||||
site_position,
|
||||
f"PREPARED-ITB-{source.upper()}-{strategy}",
|
||||
"*.tif",
|
||||
)
|
||||
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
|
||||
inp = itb / fusion_sub
|
||||
if inp.exists():
|
||||
_create_gcc_timeseries_for_dir(
|
||||
inp,
|
||||
itb / "gcc" / fusion_sub,
|
||||
site_position,
|
||||
f"FUSION-ITB-{strategy}-σ{sig or 20}",
|
||||
"*.tif",
|
||||
)
|
||||
|
||||
|
||||
def create_bands_timeseries_post_process(season, site_position, site_name):
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [20, 30]:
|
||||
processed_dir = f"processed_{strategy}_sigma{sigma}"
|
||||
base = Path(f"data/{site_name}/{season}/{processed_dir}")
|
||||
for source in ["s2", "s3", "fusion"]:
|
||||
inp, out = base / source, base / "bands" / source
|
||||
if inp.exists():
|
||||
_create_bands_timeseries_for_dir(
|
||||
inp,
|
||||
out,
|
||||
site_position,
|
||||
f"POST-{source.upper()}-{strategy}-σ{sigma}",
|
||||
"*.geotiff",
|
||||
)
|
||||
_write_export(
|
||||
base / "ndvi" / source,
|
||||
base / "gcc" / source,
|
||||
base / "bands" / source,
|
||||
base / "export" / source,
|
||||
)
|
||||
529
metrics_stats.py
529
metrics_stats.py
|
|
@ -1,529 +0,0 @@
|
|||
"""Metrics and statistics: temporal metrics and PhenoCam stats."""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from scipy import sparse
|
||||
from scipy.sparse.linalg import spsolve
|
||||
from scipy.stats import pearsonr
|
||||
|
||||
WHITTAKER_LAMBDA_DAYS_SQ = 400.0
|
||||
|
||||
|
||||
def _norm_date_key(s):
|
||||
if s is None:
|
||||
return None
|
||||
t = str(s).strip()
|
||||
return t.split("T")[0][:10] if "T" in t else t[:10]
|
||||
|
||||
|
||||
def load_timeseries(filepath):
|
||||
"""Load JSON timeseries and return dict mapping date -> value."""
|
||||
if not Path(filepath).exists():
|
||||
return {}
|
||||
with open(filepath) as f:
|
||||
data = json.load(f)
|
||||
return {item["date"]: item.get("greenness_index") for item in data}
|
||||
|
||||
|
||||
def match_dates(fusion_ts, phenocam_ts):
|
||||
"""Match dates between timeseries, return aligned numpy arrays (filter None values)."""
|
||||
|
||||
def _bundle(m):
|
||||
out = {}
|
||||
for k, v in m.items():
|
||||
nk = _norm_date_key(k)
|
||||
if nk and nk not in out:
|
||||
out[nk] = v
|
||||
return out
|
||||
|
||||
fa, pa = _bundle(fusion_ts), _bundle(phenocam_ts)
|
||||
common_dates = set(fa) & set(pa)
|
||||
fusion_vals = []
|
||||
phenocam_vals = []
|
||||
dates = []
|
||||
|
||||
for date in sorted(common_dates):
|
||||
fusion_val = fa[date]
|
||||
phenocam_val = pa[date]
|
||||
if fusion_val is not None and phenocam_val is not None:
|
||||
fusion_vals.append(fusion_val)
|
||||
phenocam_vals.append(phenocam_val)
|
||||
dates.append(date)
|
||||
|
||||
return np.array(fusion_vals), np.array(phenocam_vals), dates
|
||||
|
||||
|
||||
def pearson_correlation(y_true, y_pred):
|
||||
"""Calculate Pearson correlation coefficient r."""
|
||||
if len(y_true) < 2 or np.std(y_true) == 0 or np.std(y_pred) == 0:
|
||||
return None
|
||||
r, _ = pearsonr(y_true, y_pred)
|
||||
return float(r)
|
||||
|
||||
|
||||
def r_squared(y_true, y_pred):
|
||||
"""Generalized R² vs predicting mean(y_true); can be negative. Same formula as ``nse`` with the same arguments; not Pearson r squared."""
|
||||
if len(y_true) < 2 or np.std(y_true) == 0:
|
||||
return None
|
||||
ss_res = np.sum((y_true - y_pred) ** 2)
|
||||
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
|
||||
if ss_tot == 0:
|
||||
return None
|
||||
return float(1 - (ss_res / ss_tot))
|
||||
|
||||
|
||||
def rmse(y_true, y_pred):
|
||||
"""Calculate Root Mean Square Error."""
|
||||
if len(y_true) == 0:
|
||||
return None
|
||||
return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
|
||||
|
||||
|
||||
def mae(y_true, y_pred):
|
||||
"""Calculate Mean Absolute Error."""
|
||||
if len(y_true) == 0:
|
||||
return None
|
||||
return float(np.mean(np.abs(y_true - y_pred)))
|
||||
|
||||
|
||||
def nrmse(y_true, y_pred):
|
||||
"""Calculate normalized RMSE (RMSE / mean(y_true))."""
|
||||
if len(y_true) == 0:
|
||||
return None
|
||||
mean_val = np.mean(y_true)
|
||||
if mean_val == 0:
|
||||
return None
|
||||
rmse_val = rmse(y_true, y_pred)
|
||||
return float(rmse_val / mean_val) if rmse_val is not None else None
|
||||
|
||||
|
||||
def nse(y_true, y_pred):
|
||||
"""Calculate Nash-Sutcliffe Efficiency."""
|
||||
if len(y_true) < 2:
|
||||
return None
|
||||
numerator = np.sum((y_true - y_pred) ** 2)
|
||||
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
|
||||
if denominator == 0:
|
||||
return None
|
||||
return float(1 - (numerator / denominator))
|
||||
|
||||
|
||||
def residual_vs_phenocam(fusion_ts, phenocam_ts):
|
||||
"""Stats of (fused_GCC − PhenoCam_GCC) on matched dates; None if too few points.
|
||||
|
||||
Mean: positive → fusion systematically above PhenoCam; negative → below; ~0 → unbiased mean.
|
||||
Compare BtI vs ItB means at same strategy/σ (``derived.bti_vs_itb_mean_residual``): closer to 0 → less mean bias vs PhenoCam.
|
||||
"""
|
||||
yf, yp, _dates = match_dates(fusion_ts, phenocam_ts)
|
||||
if len(yf) < 2:
|
||||
return None
|
||||
r = yf - yp
|
||||
return {
|
||||
"mean": float(np.mean(r)),
|
||||
"std": float(np.std(r)),
|
||||
"mae": float(np.mean(np.abs(r))),
|
||||
"rmse": float(np.sqrt(np.mean(r**2))),
|
||||
"n_samples": int(len(r)),
|
||||
}
|
||||
|
||||
|
||||
def calculate_temporal_metrics(fusion_ts, phenocam_ts):
|
||||
"""Temporal metrics vs PhenoCam (nse_pc; nse is the same value)."""
|
||||
fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
|
||||
|
||||
if len(fusion_vals) < 2:
|
||||
return None
|
||||
|
||||
n_pc = nse(phenocam_vals, fusion_vals)
|
||||
metrics = {
|
||||
"pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
|
||||
"r_squared": r_squared(phenocam_vals, fusion_vals),
|
||||
"rmse": rmse(phenocam_vals, fusion_vals),
|
||||
"mae": mae(phenocam_vals, fusion_vals),
|
||||
"nrmse": nrmse(phenocam_vals, fusion_vals),
|
||||
"nse_pc": n_pc,
|
||||
"nse": n_pc,
|
||||
"n_samples": len(fusion_vals),
|
||||
"date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
|
||||
}
|
||||
rv = residual_vs_phenocam(fusion_ts, phenocam_ts)
|
||||
if rv:
|
||||
metrics["residual_vs_phenocam"] = rv
|
||||
return metrics
|
||||
|
||||
|
||||
def derived_tier1(temporal: dict) -> dict:
|
||||
"""ΔNSE_PC (σ20 − σ30) and paired BtI vs ItB mean residual; needs temporal fusion keys.
|
||||
|
||||
ΔNSE_PC > 0 → NSE_PC higher at σ=20 than σ=30 (tighter EFAST temporal kernel wins).
|
||||
ΔNSE_PC < 0 → σ=30 wins (broader smoothing matches PhenoCam better).
|
||||
"""
|
||||
d_nse = {"bti": {}, "itb": {}}
|
||||
for strategy in ("aggressive", "nonaggressive"):
|
||||
for mode, suf in (("bti", ""), ("itb", "_itb")):
|
||||
k20 = f"{strategy}_sigma20{suf}"
|
||||
k30 = f"{strategy}_sigma30{suf}"
|
||||
n20 = (temporal.get(k20) or {}).get("nse_pc")
|
||||
n30 = (temporal.get(k30) or {}).get("nse_pc")
|
||||
if isinstance(n20, (int, float)) and isinstance(n30, (int, float)):
|
||||
d_nse[mode][strategy] = float(n20 - n30)
|
||||
else:
|
||||
d_nse[mode][strategy] = None
|
||||
|
||||
paired = []
|
||||
for strategy in ("aggressive", "nonaggressive"):
|
||||
for sig in (20, 30):
|
||||
kb, ki = f"{strategy}_sigma{sig}", f"{strategy}_sigma{sig}_itb"
|
||||
mb = (temporal.get(kb) or {}).get("residual_vs_phenocam", {}).get("mean")
|
||||
mi = (temporal.get(ki) or {}).get("residual_vs_phenocam", {}).get("mean")
|
||||
paired.append(
|
||||
{
|
||||
"strategy": strategy,
|
||||
"sigma": sig,
|
||||
"mean_residual_bti": float(mb)
|
||||
if isinstance(mb, (int, float))
|
||||
else None,
|
||||
"mean_residual_itb": float(mi)
|
||||
if isinstance(mi, (int, float))
|
||||
else None,
|
||||
}
|
||||
)
|
||||
return {
|
||||
"delta_nse_pc_sigma20_minus_sigma30": d_nse,
|
||||
"bti_vs_itb_mean_residual": paired,
|
||||
}
|
||||
|
||||
|
||||
MATCHED_PAIR_CONFIGS = (
|
||||
"aggressive_sigma20",
|
||||
"aggressive_sigma30",
|
||||
"nonaggressive_sigma20",
|
||||
"nonaggressive_sigma30",
|
||||
)
|
||||
|
||||
|
||||
def derived_matched_pair_workflow(temporal: dict) -> dict:
|
||||
"""Per-config BtI vs ItB NSE_PC/RMSE pairs and site-level consistency flags."""
|
||||
per_config = []
|
||||
nse_deltas: list[float] = []
|
||||
nse_bti_wins_count = 0
|
||||
residual_bti_wins_count = 0
|
||||
|
||||
for config in MATCHED_PAIR_CONFIGS:
|
||||
kb = config
|
||||
ki = f"{config}_itb"
|
||||
tb = temporal.get(kb) or {}
|
||||
ti = temporal.get(ki) or {}
|
||||
nse_bti = tb.get("nse_pc")
|
||||
nse_itb = ti.get("nse_pc")
|
||||
rmse_bti = tb.get("rmse")
|
||||
rmse_itb = ti.get("rmse")
|
||||
mb = (tb.get("residual_vs_phenocam") or {}).get("mean")
|
||||
mi = (ti.get("residual_vs_phenocam") or {}).get("mean")
|
||||
|
||||
delta_nse = None
|
||||
delta_rmse = None
|
||||
bti_wins = None
|
||||
residual_bti_wins = None
|
||||
|
||||
if isinstance(nse_bti, (int, float)) and isinstance(nse_itb, (int, float)):
|
||||
delta_nse = float(nse_bti) - float(nse_itb)
|
||||
bti_wins = delta_nse > 0
|
||||
nse_deltas.append(delta_nse)
|
||||
if bti_wins:
|
||||
nse_bti_wins_count += 1
|
||||
|
||||
if isinstance(rmse_bti, (int, float)) and isinstance(rmse_itb, (int, float)):
|
||||
delta_rmse = float(rmse_bti) - float(rmse_itb)
|
||||
|
||||
if isinstance(mb, (int, float)) and isinstance(mi, (int, float)):
|
||||
if float(mb) > float(mi):
|
||||
residual_bti_wins_count += 1
|
||||
residual_bti_wins = True
|
||||
elif float(mb) < float(mi):
|
||||
residual_bti_wins = False
|
||||
else:
|
||||
residual_bti_wins = None
|
||||
|
||||
per_config.append(
|
||||
{
|
||||
"config": config,
|
||||
"nse_pc_bti": float(nse_bti) if isinstance(nse_bti, (int, float)) else None,
|
||||
"nse_pc_itb": float(nse_itb) if isinstance(nse_itb, (int, float)) else None,
|
||||
"rmse_bti": float(rmse_bti) if isinstance(rmse_bti, (int, float)) else None,
|
||||
"rmse_itb": float(rmse_itb) if isinstance(rmse_itb, (int, float)) else None,
|
||||
"delta_nse_bti_minus_itb": delta_nse,
|
||||
"delta_rmse_bti_minus_itb": delta_rmse,
|
||||
"bti_wins": bti_wins,
|
||||
"residual_bti_wins": residual_bti_wins,
|
||||
}
|
||||
)
|
||||
|
||||
mean_delta_nse = (
|
||||
float(sum(nse_deltas) / len(nse_deltas)) if nse_deltas else None
|
||||
)
|
||||
return {
|
||||
"per_config": per_config,
|
||||
"consistency": nse_bti_wins_count,
|
||||
"nse_bti_wins_count": nse_bti_wins_count,
|
||||
"residual_bti_wins_count": residual_bti_wins_count,
|
||||
"residual_nse_mismatch": residual_bti_wins_count != nse_bti_wins_count,
|
||||
"mean_delta_nse": mean_delta_nse,
|
||||
}
|
||||
|
||||
|
||||
def calculate_phenocam_stats(phenocam_ts):
|
||||
"""Calculate phenocam summary statistics."""
|
||||
values = [v for v in phenocam_ts.values() if v is not None]
|
||||
if len(values) == 0:
|
||||
return None
|
||||
|
||||
vals = np.array(values)
|
||||
return {
|
||||
"mean": float(np.mean(vals)),
|
||||
"std": float(np.std(vals)),
|
||||
"min": float(np.min(vals)),
|
||||
"max": float(np.max(vals)),
|
||||
"n_samples": len(vals),
|
||||
}
|
||||
|
||||
|
||||
def _s2_gcc_series_from_preselection(base: Path):
|
||||
"""Build the raw S2 GCC series from s2_preselection.json.
|
||||
|
||||
Uses the 3x3 site-window band means stored per raw S2 acquisition and
|
||||
computes GCC = b03 / (b02 + b03 + b04). Scale cancels, so DN vs
|
||||
reflectance is irrelevant. Returns (all_gcc, flags) where all_gcc maps
|
||||
YYYY-MM-DD -> gcc for every row with a positive band sum, and flags maps
|
||||
the same date key -> (excluded_aggressive, excluded_nonaggressive).
|
||||
"""
|
||||
path = base / "raw" / "preselection" / "s2_preselection.json"
|
||||
if not path.exists():
|
||||
return {}, {}
|
||||
with open(path) as f:
|
||||
rows = json.load(f)
|
||||
all_gcc: dict = {}
|
||||
flags: dict = {}
|
||||
for e in rows:
|
||||
nk = _norm_date_key(e.get("date"))
|
||||
if not nk:
|
||||
continue
|
||||
try:
|
||||
b02 = float(e.get("b02"))
|
||||
b03 = float(e.get("b03"))
|
||||
b04 = float(e.get("b04"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
total = b02 + b03 + b04
|
||||
if not np.isfinite(total) or total <= 0:
|
||||
continue
|
||||
gcc = b03 / total
|
||||
if not np.isfinite(gcc):
|
||||
continue
|
||||
if nk in all_gcc:
|
||||
continue
|
||||
all_gcc[nk] = float(gcc)
|
||||
flags[nk] = (
|
||||
bool(e.get("excluded_aggressive")),
|
||||
bool(e.get("excluded_nonaggressive")),
|
||||
)
|
||||
return all_gcc, flags
|
||||
|
||||
|
||||
def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
|
||||
"""Daily Whittaker (weights 1 at obs); returns {YYYY-MM-DD: z}."""
|
||||
pairs = [
|
||||
(_norm_date_key(d), float(v))
|
||||
for d, v in zip(obs_dates, obs_values)
|
||||
if v is not None and _norm_date_key(d)
|
||||
]
|
||||
if len(pairs) < 2:
|
||||
return {}
|
||||
days = sorted({p[0] for p in pairs})
|
||||
t0 = datetime.strptime(days[0], "%Y-%m-%d").date()
|
||||
t1 = datetime.strptime(days[-1], "%Y-%m-%d").date()
|
||||
n = (t1 - t0).days + 1
|
||||
if n < n_min:
|
||||
return {}
|
||||
|
||||
w = np.zeros(n)
|
||||
y = np.zeros(n)
|
||||
for dk, val in pairs:
|
||||
i = (datetime.strptime(dk, "%Y-%m-%d").date() - t0).days
|
||||
if 0 <= i < n:
|
||||
w[i] = 1.0
|
||||
y[i] = val
|
||||
|
||||
D = sparse.diags(
|
||||
[1.0, -2.0, 1.0], [0, 1, 2], shape=(n - 2, n), format="csc", dtype=np.float64
|
||||
)
|
||||
H = D.T @ D
|
||||
Wm = sparse.diags(w.astype(np.float64), format="csc")
|
||||
z = spsolve(Wm + lam * H, w * y)
|
||||
out = {}
|
||||
for i in range(n):
|
||||
out[(t0 + timedelta(days=i)).isoformat()] = float(z[i])
|
||||
return out
|
||||
|
||||
|
||||
def calculate_all_metrics(season, site_name, site_position):
|
||||
"""Calculate metrics for all 4 scenarios and save to JSON."""
|
||||
del site_position
|
||||
results = {"temporal": {}}
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
|
||||
# Load phenocam timeseries once (same for all scenarios)
|
||||
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
|
||||
phenocam_ts = load_timeseries(phenocam_ts_path)
|
||||
|
||||
if not phenocam_ts:
|
||||
print("[METRICS] Warning: No phenocam data found")
|
||||
return results
|
||||
|
||||
# Calculate phenocam stats
|
||||
phenocam_stats = calculate_phenocam_stats(phenocam_ts)
|
||||
if phenocam_stats:
|
||||
results["phenocam_stats"] = phenocam_stats
|
||||
|
||||
from phenocam_snr import compute_snr, load_phenocam_snr, write_phenocam_snr
|
||||
|
||||
snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
|
||||
if not snr_info:
|
||||
write_phenocam_snr(
|
||||
site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
|
||||
)
|
||||
snr_info = load_phenocam_snr(site_name, season, base=Path("data"))
|
||||
if not snr_info:
|
||||
snr_info = compute_snr(
|
||||
site_name, season, base=Path("data"), metrics=results, fetch_if_missing=True
|
||||
)
|
||||
if snr_info.get("snr") is not None:
|
||||
results["phenocam_snr"] = {
|
||||
"amplitude": snr_info.get("amplitude"),
|
||||
"spline_rmse_gcc90": snr_info.get("spline_rmse_gcc90"),
|
||||
"snr": snr_info.get("snr"),
|
||||
}
|
||||
|
||||
baseline = {}
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if all_gcc:
|
||||
m0 = calculate_temporal_metrics(all_gcc, phenocam_ts)
|
||||
if m0:
|
||||
baseline["s2"] = m0
|
||||
for strategy, flag_idx in (("aggressive", 0), ("nonaggressive", 1)):
|
||||
kept_items = sorted(
|
||||
(
|
||||
(d, g)
|
||||
for d, g in all_gcc.items()
|
||||
if d in flags and not flags[d][flag_idx]
|
||||
),
|
||||
key=lambda x: x[0],
|
||||
)
|
||||
if not kept_items:
|
||||
continue
|
||||
kept_ts = dict(kept_items)
|
||||
mcf = calculate_temporal_metrics(kept_ts, phenocam_ts)
|
||||
if mcf:
|
||||
baseline.setdefault("s2_cloudfree", {})[strategy] = mcf
|
||||
obs_d, obs_v = zip(*kept_items)
|
||||
smooth = _whittaker_smooth_dict(obs_d, obs_v, WHITTAKER_LAMBDA_DAYS_SQ)
|
||||
if smooth:
|
||||
mw = calculate_temporal_metrics(smooth, phenocam_ts)
|
||||
if mw:
|
||||
baseline.setdefault("s2_whittaker_lambda400", {})[strategy] = mw
|
||||
|
||||
for strategy in ("aggressive", "nonaggressive"):
|
||||
p = base / f"processed_{strategy}_sigma20" / "gcc" / "s3" / "timeseries.json"
|
||||
if not p.exists():
|
||||
continue
|
||||
s3_ts = load_timeseries(p)
|
||||
if s3_ts:
|
||||
m3 = calculate_temporal_metrics(s3_ts, phenocam_ts)
|
||||
if m3:
|
||||
baseline.setdefault("s3", {})[strategy] = m3
|
||||
|
||||
if baseline:
|
||||
results["baseline"] = baseline
|
||||
|
||||
# Calculate fusion metrics for each scenario
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [20, 30]:
|
||||
scenario_name = f"{strategy}_sigma{sigma}"
|
||||
print(f"[METRICS] Calculating metrics for {scenario_name}...")
|
||||
|
||||
processed_dir = f"processed_{strategy}_sigma{sigma}"
|
||||
|
||||
# Load fusion timeseries
|
||||
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
|
||||
fusion_ts = load_timeseries(fusion_ts_path)
|
||||
|
||||
if not fusion_ts:
|
||||
print(
|
||||
f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||
if temporal_metrics:
|
||||
results["temporal"][scenario_name] = temporal_metrics
|
||||
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [20, 30]:
|
||||
scenario_name = f"{strategy}_sigma{sigma}_itb"
|
||||
processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
|
||||
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
|
||||
fusion_ts = load_timeseries(fusion_ts_path)
|
||||
if not fusion_ts:
|
||||
print(
|
||||
f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
|
||||
)
|
||||
continue
|
||||
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||
if temporal_metrics:
|
||||
results["temporal"][scenario_name] = temporal_metrics
|
||||
|
||||
if results["temporal"]:
|
||||
derived = derived_tier1(results["temporal"])
|
||||
derived["matched_pair_workflow"] = derived_matched_pair_workflow(
|
||||
results["temporal"]
|
||||
)
|
||||
results["derived"] = derived
|
||||
|
||||
# Save results
|
||||
output_path = Path(f"data/{site_name}/{season}/metrics.json")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"[METRICS] Saved results to {output_path}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
"""Standalone script entry point."""
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: metrics_stats.py <season> <site_name> <lat> <lon>")
|
||||
print("Example: metrics_stats.py 2024 innsbruck 47.116171 11.320308")
|
||||
sys.exit(1)
|
||||
|
||||
season = int(sys.argv[1])
|
||||
site_name = sys.argv[2]
|
||||
site_position = (float(sys.argv[3]), float(sys.argv[4]))
|
||||
|
||||
results = calculate_all_metrics(season, site_name, site_position)
|
||||
|
||||
# Save results
|
||||
output_path = Path(f"data/{site_name}/{season}/metrics.json")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"[METRICS] Saved results to {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
328
phenocam_snr.py
328
phenocam_snr.py
|
|
@ -1,328 +0,0 @@
|
|||
"""PhenoCam signal-to-noise ratio for aggregate utility eligibility (Richardson et al., 2018)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
PHENOCAM_API = "https://phenocam.nau.edu/api"
|
||||
SPLINE_RMSE_RE = re.compile(
|
||||
r"^\s*#\s*Spline\s+RMSE\s+gcc_90\s*:\s*([0-9.eE+-]+)\s*$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
PRIMARY_SEASON: dict[str, int] = {
|
||||
"forthgr": 2024,
|
||||
"innsbruck": 2024,
|
||||
"pitsalu": 2024,
|
||||
"vindeln2": 2023,
|
||||
"sunflowerjerez1": 2024,
|
||||
"institutekarnobat": 2024,
|
||||
}
|
||||
|
||||
# PhenoCam ROI type codes for archive URLs (first ROI used by acquisition when multiple exist).
|
||||
SITE_ROITYPE: dict[str, str] = {
|
||||
"forthgr": "AG",
|
||||
"innsbruck": "GR",
|
||||
"pitsalu": "WL",
|
||||
"vindeln2": "MX",
|
||||
"sunflowerjerez1": "AG",
|
||||
"institutekarnobat": "AG",
|
||||
}
|
||||
|
||||
PHENOCAM_ARCHIVE = "https://phenocam.nau.edu/data/archive"
|
||||
|
||||
|
||||
def phenocam_snr_path(site_name: str, season: int, base: Path | None = None) -> Path:
|
||||
root = base or Path("data")
|
||||
return root / site_name / str(season) / "raw" / "phenocam" / "phenocam_snr.json"
|
||||
|
||||
|
||||
def parse_spline_rmse_gcc90(text: str) -> float | None:
|
||||
"""Parse ``# Spline RMSE gcc_90: <value>`` from transition-dates CSV header."""
|
||||
for line in text.splitlines():
|
||||
m = SPLINE_RMSE_RE.match(line)
|
||||
if m:
|
||||
try:
|
||||
return float(m.group(1))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def transition_dates_archive_url(site_name: str, roitype: str, seq: int = 1000) -> str:
|
||||
return (
|
||||
f"{PHENOCAM_ARCHIVE}/{site_name}/ROI/"
|
||||
f"{site_name}_{roitype}_{seq}_1day_transition_dates.csv"
|
||||
)
|
||||
|
||||
|
||||
def transition_dates_url(site_name: str) -> str | None:
|
||||
"""Return ``one_day_transition_dates`` URL for the site's primary ROI."""
|
||||
roitype = SITE_ROITYPE.get(site_name)
|
||||
if roitype:
|
||||
for seq in (1000, 2000, 1001):
|
||||
url = transition_dates_archive_url(site_name, roitype, seq)
|
||||
try:
|
||||
r = requests.head(url, timeout=15, allow_redirects=True)
|
||||
if r.status_code == 200:
|
||||
return url
|
||||
except requests.RequestException:
|
||||
continue
|
||||
try:
|
||||
url = f"{PHENOCAM_API}/roilists/"
|
||||
params: dict | None = {"site": site_name}
|
||||
while url:
|
||||
r = requests.get(url, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
for roi in data.get("results", []):
|
||||
if roi.get("site") == site_name:
|
||||
td = roi.get("one_day_transition_dates")
|
||||
if td:
|
||||
return td
|
||||
url = data.get("next")
|
||||
params = None
|
||||
except requests.RequestException:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def fetch_spline_rmse_from_archive(site_name: str) -> float | None:
|
||||
"""Fetch spline RMSE via PhenoCam archive URL (fast path)."""
|
||||
roitype = SITE_ROITYPE.get(site_name)
|
||||
if not roitype:
|
||||
return None
|
||||
for seq in (1000, 2000, 1001):
|
||||
url = transition_dates_archive_url(site_name, roitype, seq)
|
||||
try:
|
||||
r = requests.get(url, timeout=20)
|
||||
if r.status_code != 200:
|
||||
continue
|
||||
rmse = parse_spline_rmse_gcc90(r.text)
|
||||
if rmse is not None:
|
||||
return rmse
|
||||
except requests.RequestException:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def fetch_spline_rmse_gcc90(site_name: str) -> float | None:
|
||||
"""Download transition-dates file header and return spline RMSE for gcc_90."""
|
||||
rmse = fetch_spline_rmse_from_archive(site_name)
|
||||
if rmse is not None:
|
||||
return rmse
|
||||
td_url = transition_dates_url(site_name)
|
||||
if not td_url:
|
||||
return None
|
||||
try:
|
||||
r = requests.get(td_url, timeout=30)
|
||||
r.raise_for_status()
|
||||
return parse_spline_rmse_gcc90(r.text)
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
|
||||
def season_amplitude(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
base: Path | None = None,
|
||||
metrics: dict | None = None,
|
||||
) -> float | None:
|
||||
"""Seasonal amplitude max(gcc_90) - min(gcc_90) over the evaluation season."""
|
||||
if metrics:
|
||||
ps = metrics.get("phenocam_stats") or {}
|
||||
mn, mx = ps.get("min"), ps.get("max")
|
||||
if isinstance(mn, (int, float)) and isinstance(mx, (int, float)):
|
||||
return float(mx - mn)
|
||||
|
||||
root = base or Path("data")
|
||||
p = root / site_name / str(season) / "raw" / "phenocam" / "phenocam_gcc.json"
|
||||
if not p.is_file():
|
||||
return None
|
||||
data = json.loads(p.read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
vals = [
|
||||
it.get("greenness_index")
|
||||
for it in data
|
||||
if isinstance(it.get("greenness_index"), (int, float))
|
||||
]
|
||||
elif isinstance(data, dict):
|
||||
vals = [v for v in data.values() if isinstance(v, (int, float))]
|
||||
else:
|
||||
return None
|
||||
if not vals:
|
||||
return None
|
||||
return float(max(vals) - min(vals))
|
||||
|
||||
|
||||
def compute_snr(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
base: Path | None = None,
|
||||
metrics: dict | None = None,
|
||||
spline_rmse: float | None = None,
|
||||
fetch_if_missing: bool = True,
|
||||
) -> dict:
|
||||
"""Return amplitude, spline RMSE, and SNR; may fetch RMSE from PhenoCam API."""
|
||||
root = base or Path("data")
|
||||
amp = season_amplitude(site_name, season, base=root, metrics=metrics)
|
||||
rmse = spline_rmse
|
||||
if rmse is None:
|
||||
sidecar = phenocam_snr_path(site_name, season, root)
|
||||
if sidecar.is_file():
|
||||
cached = json.loads(sidecar.read_text(encoding="utf-8"))
|
||||
rmse = cached.get("spline_rmse_gcc90")
|
||||
elif fetch_if_missing:
|
||||
rmse = fetch_spline_rmse_gcc90(site_name)
|
||||
snr = None
|
||||
if isinstance(amp, (int, float)) and isinstance(rmse, (int, float)) and rmse > 0:
|
||||
snr = float(amp) / float(rmse)
|
||||
return {
|
||||
"site": site_name,
|
||||
"season": season,
|
||||
"amplitude": amp,
|
||||
"spline_rmse_gcc90": rmse,
|
||||
"snr": snr,
|
||||
}
|
||||
|
||||
|
||||
def write_phenocam_snr(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
base: Path | None = None,
|
||||
metrics: dict | None = None,
|
||||
fetch_if_missing: bool = True,
|
||||
) -> Path | None:
|
||||
"""Compute SNR and write ``phenocam_snr.json``; returns path or None on failure."""
|
||||
root = base or Path("data")
|
||||
info = compute_snr(
|
||||
site_name,
|
||||
season,
|
||||
base=root,
|
||||
metrics=metrics,
|
||||
fetch_if_missing=fetch_if_missing,
|
||||
)
|
||||
if info.get("spline_rmse_gcc90") is None:
|
||||
print(
|
||||
f"[PhenoCam-SNR] Warning: no spline RMSE for {site_name} {season}; "
|
||||
"skipping phenocam_snr.json"
|
||||
)
|
||||
return None
|
||||
out = phenocam_snr_path(site_name, season, root)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
td_url = transition_dates_url(site_name)
|
||||
payload = {
|
||||
"site": site_name,
|
||||
"season": season,
|
||||
"amplitude": info.get("amplitude"),
|
||||
"spline_rmse_gcc90": info.get("spline_rmse_gcc90"),
|
||||
"snr": info.get("snr"),
|
||||
"source": "phenocam_1day_transition_dates_header",
|
||||
"transition_dates_url": td_url,
|
||||
"roitype": SITE_ROITYPE.get(site_name),
|
||||
}
|
||||
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"[PhenoCam-SNR] Saved: {out} (SNR={info.get('snr')})")
|
||||
return out
|
||||
|
||||
|
||||
def load_phenocam_snr(
|
||||
site_name: str, season: int, *, base: Path | None = None
|
||||
) -> dict | None:
|
||||
"""Load cached SNR sidecar if present."""
|
||||
p = phenocam_snr_path(site_name, season, base)
|
||||
if not p.is_file():
|
||||
return None
|
||||
return json.loads(p.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def suggest_snr_threshold(snrs: list[float]) -> tuple[float, str]:
|
||||
"""
|
||||
Choose eligibility threshold from cross-site SNR distribution.
|
||||
|
||||
Returns (threshold, rationale). Uses a distribution-based split only when it
|
||||
separates a low-SNR group (max below 2) from a high-SNR group (min at or above 2).
|
||||
Otherwise defaults to SNR >= 2.
|
||||
"""
|
||||
if not snrs:
|
||||
return 2.0, "default SNR >= 2 (no site SNR values available)"
|
||||
sorted_snrs = sorted(snrs)
|
||||
if len(sorted_snrs) == 1:
|
||||
return 2.0, "default SNR >= 2 (single site only)"
|
||||
if all(s >= 2.0 for s in sorted_snrs):
|
||||
return 2.0, "default SNR >= 2 (all sites exceed 2; no low-SNR exclusion group)"
|
||||
|
||||
for i in range(1, len(sorted_snrs)):
|
||||
low, high = sorted_snrs[:i], sorted_snrs[i:]
|
||||
if not low or not high:
|
||||
continue
|
||||
gap = high[0] - low[-1]
|
||||
if gap >= 0.5 and low[-1] < 2.0 <= high[0]:
|
||||
threshold = (low[-1] + high[0]) / 2.0
|
||||
return (
|
||||
round(threshold, 3),
|
||||
f"gap between {low[-1]:.3f} and {high[0]:.3f} straddles SNR=2 "
|
||||
f"(midpoint {threshold:.3f})",
|
||||
)
|
||||
return 2.0, "default SNR >= 2 (no clear low/high cluster separation)"
|
||||
|
||||
|
||||
def report_all_sites(
|
||||
*,
|
||||
base: Path | None = None,
|
||||
sites: dict[str, int] | None = None,
|
||||
fetch_if_missing: bool = True,
|
||||
) -> list[dict]:
|
||||
"""Compute SNR for all primary-season sites; print table and return rows."""
|
||||
root = base or Path("data")
|
||||
site_seasons = sites or PRIMARY_SEASON
|
||||
rows: list[dict] = []
|
||||
for site in sorted(site_seasons.keys()):
|
||||
season = site_seasons[site]
|
||||
metrics_path = root / site / str(season) / "metrics.json"
|
||||
metrics = None
|
||||
if metrics_path.is_file():
|
||||
metrics = json.loads(metrics_path.read_text(encoding="utf-8"))
|
||||
info = compute_snr(
|
||||
site,
|
||||
season,
|
||||
base=root,
|
||||
metrics=metrics,
|
||||
fetch_if_missing=fetch_if_missing,
|
||||
)
|
||||
rows.append(info)
|
||||
|
||||
print(f"{'site':<20} {'season':>6} {'amplitude':>10} {'rmse_spl':>10} {'SNR':>8}")
|
||||
print("-" * 58)
|
||||
for r in rows:
|
||||
amp = r.get("amplitude")
|
||||
rmse = r.get("spline_rmse_gcc90")
|
||||
snr = r.get("snr")
|
||||
print(
|
||||
f"{r['site']:<20} {r['season']:>6} "
|
||||
f"{amp if amp is not None else '---':>10} "
|
||||
f"{rmse if rmse is not None else '---':>10} "
|
||||
f"{snr if snr is not None else '---':>8}"
|
||||
)
|
||||
|
||||
valid_snrs = [r["snr"] for r in rows if isinstance(r.get("snr"), (int, float))]
|
||||
threshold, rationale = suggest_snr_threshold(valid_snrs)
|
||||
print(f"\nSuggested threshold: SNR >= {threshold} ({rationale})")
|
||||
for r in rows:
|
||||
snr = r.get("snr")
|
||||
if isinstance(snr, (int, float)):
|
||||
r["eligible_at_2"] = snr >= 2.0
|
||||
r["eligible_at_3"] = snr >= 3.0
|
||||
r["eligible_at_suggested"] = snr >= threshold
|
||||
return rows
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
report_all_sites()
|
||||
|
|
@ -1,738 +0,0 @@
|
|||
"""
|
||||
PhenoCam GCC: green-up and green-down (50 % of seasonal amplitude) via TIMESAT.
|
||||
|
||||
Reads ``data/.../raw/phenocam/phenocam_gcc.json`` (or any path) and uses the
|
||||
``timesat`` package (``timesat.tsfprocess``) with the same seasonal-threshold
|
||||
meaning as the TIMESAT GUI: *startmethod* 1, *p_startcutoff* (0.5, 0.5) = 50 % of
|
||||
the **per-season** amplitude above the local base. See the TIMESAT manual,
|
||||
section 4.3 and row 37–38 (season start method = seasonal amplitude).
|
||||
|
||||
**License:** the ``timesat`` PyPI wheel is under the TIMESAT Research License
|
||||
(non-commercial research; see package metadata on PyPI).
|
||||
|
||||
PhenoCam time series: single-year acquisition writes
|
||||
``phenocam_gcc.json`` (and ``phenocam_gcc.csv``). The three-year series used
|
||||
for TIMESAT is stored separately as ``phenocam_gcc_3y.json`` in the same
|
||||
folder (created on first use from the one-day summary API, then reused).
|
||||
|
||||
Importable: ``write_phenocam_phenology_for_site`` is called from ``run.py``;
|
||||
the CLI entry point remains optional for ad-hoc runs.
|
||||
|
||||
**Saving results:** use ``-o path.json`` or ``--sidecar`` to write a JSON file
|
||||
(see ``--help``). Sidecar mode writes ``phenocam_phenology.json`` (two dates
|
||||
only) next to ``phenocam_gcc.json``.
|
||||
|
||||
``run_pipeline`` in ``run.py`` writes the same ``phenocam_phenology.json`` by
|
||||
default when ``timesat`` is installed. GCC for TIMESAT uses ``phenocam_gcc_3y.json``
|
||||
if present, otherwise the PhenoCam API for that site (listed in
|
||||
``data/sites.geojson``; not a site list from the API). One-year
|
||||
``phenocam_gcc.json`` on disk can still fill gaps when merged.
|
||||
|
||||
Use ``python phenology_timesat.py --all`` to batch every
|
||||
``(sitename, season)`` from ``data/sites.geojson`` (``properties.sitename`` and
|
||||
``properties.seasons``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
PHENOCAM_API = "https://phenocam.nau.edu/api"
|
||||
|
||||
try:
|
||||
import timesat as _timesat
|
||||
except ImportError:
|
||||
_timesat = None
|
||||
|
||||
NODATA = -9999.0
|
||||
|
||||
|
||||
def load_phenocam_gcc(path: Path) -> dict[str, float]:
|
||||
"""Return map YYYY-MM-DD -> greenness index from PhenoCam JSON list."""
|
||||
with open(path) as f:
|
||||
rows = json.load(f)
|
||||
out: dict[str, float] = {}
|
||||
for row in rows:
|
||||
d = str(row.get("date", ""))[:10]
|
||||
v = row.get("greenness_index")
|
||||
if d and v is not None and np.isfinite(v):
|
||||
out[d] = float(v)
|
||||
return out
|
||||
|
||||
|
||||
def _gcc_from_summary_row(row: dict, use_mean_fallback: bool) -> float | None:
|
||||
"""Extract daily GCC from a one-day summary row (same rules as acquisition)."""
|
||||
if not use_mean_fallback:
|
||||
oflag = row.get("outlierflag_gcc_90")
|
||||
if oflag is not None and str(oflag).strip() in ("1", "1.0"):
|
||||
return None
|
||||
|
||||
raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
|
||||
if raw is None:
|
||||
return None
|
||||
text = str(raw).strip()
|
||||
if not text or text.upper() == "NA":
|
||||
return None
|
||||
try:
|
||||
val = float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
if val <= -9998.0:
|
||||
return None
|
||||
return val
|
||||
|
||||
|
||||
def _phenocam_one_day_summary_csv_url(site_name: str) -> str | None:
|
||||
"""Return URL of the one-day summary CSV for *site_name*, or None on failure."""
|
||||
try:
|
||||
url = f"{PHENOCAM_API}/roilists/"
|
||||
params: dict | None = {"site": site_name}
|
||||
rois: list[dict] = []
|
||||
while url:
|
||||
r = requests.get(url, params=params, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
rois.extend(
|
||||
[roi for roi in data.get("results", []) if roi["site"] == site_name]
|
||||
)
|
||||
url = data.get("next")
|
||||
params = None
|
||||
if rois:
|
||||
break
|
||||
if not rois:
|
||||
return None
|
||||
return rois[0].get("one_day_summary") or None
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_phenocam_gcc_from_csv_text(
|
||||
text: str, start_date: str, end_date: str
|
||||
) -> dict[str, float]:
|
||||
"""Map YYYY-MM-DD -> gcc for rows in [start_date, end_date] inclusive."""
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
lines = [line for line in text.split("\n") if line and not line.startswith("#")]
|
||||
reader = csv.DictReader(lines)
|
||||
fieldnames = reader.fieldnames or ()
|
||||
use_mean_fallback = "gcc_90" not in fieldnames
|
||||
out: dict[str, float] = {}
|
||||
for row in reader:
|
||||
try:
|
||||
date_str = row.get("date")
|
||||
if not date_str:
|
||||
continue
|
||||
date = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
if not (start_dt <= date <= end_dt):
|
||||
continue
|
||||
gcc = _gcc_from_summary_row(row, use_mean_fallback)
|
||||
if gcc is not None:
|
||||
out[date.date().isoformat()] = gcc
|
||||
except (ValueError, KeyError):
|
||||
continue
|
||||
return out
|
||||
|
||||
|
||||
def save_phenocam_gcc_json(path: Path, by_date: dict[str, float]) -> None:
|
||||
"""Write the same list-of-objects format as :func:`acquisition_phenocam` GCC JSON."""
|
||||
rows = [
|
||||
{"date": d, "greenness_index": v}
|
||||
for d, v in sorted(by_date.items(), key=lambda x: x[0])
|
||||
]
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(rows, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def fetch_phenocam_gcc_three_years_separately(
|
||||
site_name: str, season: int
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Download PhenoCam one-day summary GCC for three **calendar** years
|
||||
(``season-1`` … ``season+1``), independently of :mod:`acquisition_phenocam`.
|
||||
|
||||
Uses one HTTP GET of the full summary CSV, then **three** per-year
|
||||
extractions (same logic as the acquisition CSV filter, three date windows).
|
||||
"""
|
||||
out: dict[str, float] = {}
|
||||
csv_url = _phenocam_one_day_summary_csv_url(site_name)
|
||||
if not csv_url:
|
||||
print(
|
||||
f"[PhenoCam phenology] No PhenoCam one-day summary URL for site {site_name!r}"
|
||||
)
|
||||
return out
|
||||
try:
|
||||
csv_r = requests.get(csv_url, timeout=30)
|
||||
csv_r.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
print(f"[PhenoCam phenology] API CSV fetch failed: {e}")
|
||||
return out
|
||||
text = csv_r.text
|
||||
for y in (season - 1, season, season + 1):
|
||||
part = _parse_phenocam_gcc_from_csv_text(text, f"{y}-01-01", f"{y}-12-31")
|
||||
out.update(part)
|
||||
return out
|
||||
|
||||
|
||||
def load_or_fetch_phenocam_gcc_3y(
|
||||
site_name: str, season: int, gcc_3y_path: Path
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Use ``phenocam_gcc_3y.json`` on disk if it exists and parses; else fetch
|
||||
three years from the PhenoCam one-day summary for *site_name* and save to
|
||||
*gcc_3y_path*.
|
||||
"""
|
||||
if gcc_3y_path.is_file():
|
||||
try:
|
||||
cached = load_phenocam_gcc(gcc_3y_path)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
cached = {}
|
||||
if cached:
|
||||
print(f"[PhenoCam phenology] Using {gcc_3y_path} ({len(cached)} values)")
|
||||
return cached
|
||||
out = fetch_phenocam_gcc_three_years_separately(site_name, season)
|
||||
if not out:
|
||||
return {}
|
||||
save_phenocam_gcc_json(gcc_3y_path, out)
|
||||
print(
|
||||
f"[PhenoCam phenology] Fetched and wrote {gcc_3y_path} "
|
||||
f"({len(out)} values for {season - 1}–{season + 1})"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def resolve_phenocam_gcc_for_timesat(
|
||||
site_name: str, season: int, gcc_path: Path
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Load three-year series from ``phenocam_gcc_3y.json`` (or fetch once and
|
||||
save there), merge with one-year ``gcc_path`` if present; three-year values
|
||||
win on duplicate dates.
|
||||
"""
|
||||
gcc_3y = gcc_path.parent / "phenocam_gcc_3y.json"
|
||||
by_3y = load_or_fetch_phenocam_gcc_3y(site_name, season, gcc_3y)
|
||||
by_1y: dict[str, float] = {}
|
||||
if gcc_path.is_file():
|
||||
try:
|
||||
by_1y = load_phenocam_gcc(gcc_path)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
if by_3y:
|
||||
return {**by_1y, **by_3y}
|
||||
return by_1y
|
||||
|
||||
|
||||
def _day_count(calendar_year: int) -> int:
|
||||
a = datetime(calendar_year, 1, 1)
|
||||
b = datetime(calendar_year + 1, 1, 1)
|
||||
return (b - a).days
|
||||
|
||||
|
||||
def daily_profile_for_year(by_date: dict[str, float], calendar_year: int) -> np.ndarray:
|
||||
"""
|
||||
One value per day (length 365 or 366 for leap years). Gaps are filled by
|
||||
linear interpolation in time along the year; if only one valid point exists,
|
||||
that value is used for the whole year.
|
||||
"""
|
||||
n = _day_count(calendar_year)
|
||||
raw = np.full(n, np.nan, dtype=np.float64)
|
||||
for d in range(1, n + 1):
|
||||
dt = datetime(calendar_year, 1, 1) + timedelta(days=d - 1)
|
||||
key = dt.strftime("%Y-%m-%d")
|
||||
if key in by_date:
|
||||
raw[d - 1] = by_date[key]
|
||||
valid = np.isfinite(raw) & (raw > 0.0)
|
||||
if not np.any(valid):
|
||||
raise ValueError(f"No valid GCC in JSON for calendar year {calendar_year}")
|
||||
if np.sum(valid) == 1:
|
||||
v = float(raw[valid][0])
|
||||
return np.full(n, v, dtype=np.float32)
|
||||
idx = np.arange(n, dtype=np.float64)
|
||||
raw = np.interp(idx, idx[valid], raw[valid])
|
||||
return raw.astype(np.float32)
|
||||
|
||||
|
||||
def _gcc_profile_365_for_timesat(profile: np.ndarray) -> np.ndarray:
|
||||
"""TIMESAT uses 365 days per season; drop Dec 31 on leap years."""
|
||||
p = np.asarray(profile, dtype=np.float32).ravel()
|
||||
if p.size == 366:
|
||||
return p[:365]
|
||||
if p.size == 365:
|
||||
return p
|
||||
raise ValueError(f"expected 365 or 366 daily values, got {p.size}")
|
||||
|
||||
|
||||
def yyyydoy_to_iso(v: float) -> str:
|
||||
x = int(round(float(v)))
|
||||
y = x // 1000
|
||||
doy = x - y * 1000
|
||||
d = datetime(y, 1, 1) + timedelta(days=doy - 1)
|
||||
return d.date().isoformat()
|
||||
|
||||
|
||||
def build_yraw_three_years(
|
||||
by_date: dict[str, float], y1: int, y2: int, y3: int
|
||||
) -> tuple[np.ndarray, str]:
|
||||
"""
|
||||
Stack three calendar years of daily GCC (365 pts/year) for TIMESAT.
|
||||
|
||||
If each of *y1*, *y2*, *y3* has at least one valid GCC in *by_date* (after
|
||||
per-year gap filling), returns their concatenation — **three real years**.
|
||||
|
||||
If any of those years cannot be built (e.g. single-year download only),
|
||||
falls back to **replicating** the profile for *y2* three times (legacy
|
||||
TIMESAT workaround).
|
||||
"""
|
||||
try:
|
||||
p1 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y1))
|
||||
p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
|
||||
p3 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y3))
|
||||
yraw = np.concatenate([p1, p2, p3]).astype(np.float32, copy=False)
|
||||
return yraw, "three_independent_years"
|
||||
except ValueError:
|
||||
p2 = _gcc_profile_365_for_timesat(daily_profile_for_year(by_date, y2))
|
||||
yraw = np.tile(p2, 3)
|
||||
return yraw, "single_year_replicated"
|
||||
|
||||
|
||||
def run_timesat_phenology_from_yraw(
|
||||
yraw: np.ndarray,
|
||||
years_triplet: tuple[int, int, int],
|
||||
*,
|
||||
start_cutoff: tuple[float, float] = (0.5, 0.5),
|
||||
smooth_window: float = 2.0,
|
||||
p_ignoreday: int = 366,
|
||||
) -> dict[str, str | float | None]:
|
||||
"""
|
||||
Run TIMESAT on a length ``365 * 3`` daily VI stack and calendar *years_triplet*
|
||||
(YYYY, YYYY, YYYY) for the time vector. Middle year in the triplet is the
|
||||
season whose SOS/EOS we report.
|
||||
"""
|
||||
yraw = np.asarray(yraw, dtype=np.float32).ravel()
|
||||
y1, y2, y3 = years_triplet
|
||||
nyear = 3
|
||||
npt = 365 * nyear
|
||||
if yraw.size != npt:
|
||||
raise ValueError(f"yraw must have length {npt}, got {yraw.size}")
|
||||
tlist: list[int] = []
|
||||
for y in (y1, y2, y3):
|
||||
t0 = datetime(y, 1, 1)
|
||||
for d in range(365):
|
||||
tlist.append(int((t0 + timedelta(days=d)).strftime("%Y%j")))
|
||||
tv = np.array(tlist, dtype=np.int32)
|
||||
if len(tv) != npt:
|
||||
raise RuntimeError("internal: length mismatch")
|
||||
|
||||
vi = np.asfortranarray(yraw.reshape(1, 1, -1))
|
||||
qa = np.asfortranarray(np.ones((1, 1, npt), dtype=np.float32))
|
||||
lc = np.ones((1, 1), dtype=np.uint8)
|
||||
landuse = np.ones(255, dtype=np.uint8)
|
||||
p_out = np.arange(1, npt + 1, dtype=np.int32)
|
||||
p_ylu = np.asfortranarray(np.array([0.0, 1.0], dtype=np.float64))
|
||||
ci = 0
|
||||
p_fitmethod = np.zeros(255, dtype=np.int32)
|
||||
p_fitmethod[ci] = 1
|
||||
p_smooth = np.zeros(255, dtype=np.float64)
|
||||
p_smooth[ci] = float(smooth_window)
|
||||
p_nenvi = np.zeros(255, dtype=np.int32)
|
||||
p_nenvi[ci] = 1
|
||||
p_wfact = np.zeros(255, dtype=np.float64)
|
||||
p_wfact[ci] = 1.0
|
||||
p_startmethod = np.zeros(255, dtype=np.int32)
|
||||
p_startmethod[ci] = 1
|
||||
p_startcutoff = np.zeros((255, 2), dtype=np.float64, order="F")
|
||||
p_startcutoff[ci, :] = np.array(
|
||||
[start_cutoff[0], start_cutoff[1]], dtype=np.float64
|
||||
)
|
||||
p_low = np.zeros(255, dtype=np.float64)
|
||||
p_fillbase = np.zeros(255, dtype=np.int32)
|
||||
p_seasonmethod = np.zeros(255, dtype=np.int32)
|
||||
p_seasonmethod[ci] = 1
|
||||
p_seapar = np.zeros(255, dtype=np.float64)
|
||||
p_seapar[ci] = 1.0
|
||||
|
||||
if _timesat is None:
|
||||
raise ImportError("Install the 'timesat' package: pip install timesat")
|
||||
vpp, _vppqa, nseason, yfit, _yfitqa, _seasonfit, _tseq = _timesat.tsfprocess(
|
||||
nyear,
|
||||
vi,
|
||||
qa,
|
||||
tv,
|
||||
lc,
|
||||
1,
|
||||
landuse,
|
||||
p_out,
|
||||
p_ignoreday,
|
||||
p_ylu,
|
||||
0,
|
||||
p_fitmethod,
|
||||
p_smooth,
|
||||
NODATA,
|
||||
45,
|
||||
0,
|
||||
p_nenvi,
|
||||
p_wfact,
|
||||
p_startmethod,
|
||||
p_startcutoff,
|
||||
p_low,
|
||||
p_fillbase,
|
||||
1,
|
||||
p_seasonmethod,
|
||||
p_seapar,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
npt,
|
||||
len(p_out),
|
||||
)
|
||||
a = vpp[0, 0, :]
|
||||
# three growing-season rows at indices 0, 13*2, 13*4 in the raw vector
|
||||
middle_block = 2
|
||||
off = 13 * middle_block
|
||||
sosd = a[off + 0] if a.size > off + 0 else np.nan
|
||||
sosv = a[off + 1] if a.size > off + 1 else np.nan
|
||||
eosd = a[off + 3] if a.size > off + 3 else np.nan
|
||||
eosv = a[off + 4] if a.size > off + 4 else np.nan
|
||||
yfit_max = float(np.max(yfit)) if yfit.size else float("nan")
|
||||
|
||||
def pick(x: float) -> str | None:
|
||||
if not np.isfinite(x) or x < 1.0e5 or x < 0:
|
||||
return None
|
||||
try:
|
||||
return yyyydoy_to_iso(x)
|
||||
except (OverflowError, ValueError):
|
||||
return None
|
||||
|
||||
return {
|
||||
"reference_calendar_year": y2,
|
||||
"green_up_50pct_date": pick(sosd),
|
||||
"green_up_50pct_fitted_gcc": float(sosv) if np.isfinite(sosv) else None,
|
||||
"green_down_50pct_date": pick(eosd),
|
||||
"green_down_50pct_fitted_gcc": float(eosv) if np.isfinite(eosv) else None,
|
||||
"nseason": nseason[0, 0].tolist() if nseason.ndim >= 2 else [],
|
||||
"yfit_max": yfit_max,
|
||||
}
|
||||
|
||||
|
||||
def run_timesat_phenology(
|
||||
daily_profile: np.ndarray,
|
||||
years_triplet: tuple[int, int, int],
|
||||
*,
|
||||
start_cutoff: tuple[float, float] = (0.5, 0.5),
|
||||
smooth_window: float = 2.0,
|
||||
p_ignoreday: int = 366,
|
||||
) -> dict[str, str | float | None]:
|
||||
"""
|
||||
Back-compat: run TIMESAT on one year’s 365(–366) profile **replicated** three times.
|
||||
Prefer :func:`build_yraw_three_years` + :func:`run_timesat_phenology_from_yraw`.
|
||||
"""
|
||||
prof = np.asarray(daily_profile, dtype=np.float32).ravel()
|
||||
if len(prof) not in (365, 366):
|
||||
raise ValueError("daily_profile must have length 365 or 366")
|
||||
if len(prof) == 366:
|
||||
prof = prof[:365]
|
||||
yraw = np.tile(prof, 3)
|
||||
return run_timesat_phenology_from_yraw(
|
||||
yraw,
|
||||
years_triplet,
|
||||
start_cutoff=start_cutoff,
|
||||
smooth_window=smooth_window,
|
||||
p_ignoreday=p_ignoreday,
|
||||
)
|
||||
|
||||
|
||||
def phenocam_gcc_path(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
|
||||
|
||||
|
||||
def phenocam_gcc_3y_path(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc_3y.json")
|
||||
|
||||
|
||||
def iter_sites_seasons_with_phenocam(
|
||||
data_root: str | Path = "data",
|
||||
) -> list[tuple[str, int]]:
|
||||
"""``(site_name, season)`` for every ``phenocam_gcc.json`` under *data_root* (legacy)."""
|
||||
root = Path(data_root)
|
||||
if not root.is_dir():
|
||||
return []
|
||||
out: list[tuple[str, int]] = []
|
||||
seen: set[tuple[str, int]] = set()
|
||||
for p in sorted(root.glob("*/*/raw/phenocam/phenocam_gcc.json")):
|
||||
rel = p.relative_to(root)
|
||||
site, season_s = rel.parts[0], rel.parts[1]
|
||||
if not season_s.isdigit():
|
||||
continue
|
||||
season = int(season_s)
|
||||
key = (site, season)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
out.append(key)
|
||||
return out
|
||||
|
||||
|
||||
def iter_sites_seasons_from_sites_geojson(
|
||||
path: str | Path = "data/sites.geojson",
|
||||
) -> list[tuple[str, int]]:
|
||||
"""
|
||||
``(sitename, season)`` from a GeoJSON FeatureCollection: each feature’s
|
||||
``properties.sitename`` and each key in ``properties.seasons`` (4-digit year).
|
||||
"""
|
||||
path = Path(path)
|
||||
if not path.is_file():
|
||||
return []
|
||||
with open(path, encoding="utf-8") as f:
|
||||
fc = json.load(f)
|
||||
out: list[tuple[str, int]] = []
|
||||
for feat in fc.get("features", []):
|
||||
props = feat.get("properties") or {}
|
||||
name = props.get("sitename")
|
||||
seasons = props.get("seasons")
|
||||
if not name or not isinstance(seasons, dict):
|
||||
continue
|
||||
for skey in sorted(seasons.keys()):
|
||||
if skey.isdigit() and len(skey) == 4:
|
||||
out.append((str(name), int(skey)))
|
||||
return out
|
||||
|
||||
|
||||
def write_phenocam_phenology_all(
|
||||
*,
|
||||
sites_geojson: str | Path | None = None,
|
||||
data_root: str | Path = "data",
|
||||
smooth_window: float = 2.0,
|
||||
p_ignoreday: int = 366,
|
||||
) -> int:
|
||||
"""
|
||||
Run :func:`write_phenocam_phenology_for_site` for every ``(site, season)`` in
|
||||
*sites_geojson* (default: :file:`<data_root>/sites.geojson`), not a glob over
|
||||
``data/``.
|
||||
"""
|
||||
geo = Path(
|
||||
sites_geojson
|
||||
if sites_geojson is not None
|
||||
else Path(data_root) / "sites.geojson"
|
||||
)
|
||||
pairs = iter_sites_seasons_from_sites_geojson(geo)
|
||||
if not pairs and geo.is_file():
|
||||
print(
|
||||
f"[PhenoCam phenology] No (sitename, season) entries in {geo} "
|
||||
"(check properties.sitename and properties.seasons)."
|
||||
)
|
||||
elif not pairs:
|
||||
print(f"[PhenoCam phenology] Missing or empty sites file: {geo}")
|
||||
n = 0
|
||||
for site, season in pairs:
|
||||
print(f"=== {site} {season} ===")
|
||||
write_phenocam_phenology_for_site(
|
||||
site, season, smooth_window=smooth_window, p_ignoreday=p_ignoreday
|
||||
)
|
||||
n += 1
|
||||
print(f"[PhenoCam phenology] Processed {n} site/season pair(s) from {geo}.")
|
||||
return n
|
||||
|
||||
|
||||
def phenocam_phenology_path(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_phenology.json")
|
||||
|
||||
|
||||
def write_phenocam_phenology_for_site(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
smooth_window: float = 2.0,
|
||||
p_ignoreday: int = 366,
|
||||
) -> None:
|
||||
"""
|
||||
If ``timesat`` is installed, build GCC from ``phenocam_gcc_3y.json`` (or fetch
|
||||
three years once and save there), with optional one-year ``phenocam_gcc.json``,
|
||||
then write
|
||||
``phenocam_phenology.json`` in the same directory with
|
||||
``green_up_50pct_date`` and ``green_down_50pct_date`` (ISO dates or null).
|
||||
"""
|
||||
if _timesat is None:
|
||||
out = phenocam_phenology_path(site_name, season)
|
||||
print(
|
||||
f"[PhenoCam phenology] Skipped (no timesat); would write {out}. "
|
||||
"pip install timesat"
|
||||
)
|
||||
return
|
||||
gcc = phenocam_gcc_path(site_name, season)
|
||||
try:
|
||||
by_date = resolve_phenocam_gcc_for_timesat(site_name, season, gcc)
|
||||
except OSError as e:
|
||||
print(f"[PhenoCam phenology] Skipped: {e}")
|
||||
return
|
||||
if not by_date:
|
||||
g3 = gcc.parent / "phenocam_gcc_3y.json"
|
||||
print(
|
||||
f"[PhenoCam phenology] No GCC ({gcc} and no data in {g3} after API); "
|
||||
f"skipping {phenocam_phenology_path(site_name, season).name}."
|
||||
)
|
||||
return
|
||||
try:
|
||||
yraw, stack_mode = build_yraw_three_years(
|
||||
by_date, season - 1, season, season + 1
|
||||
)
|
||||
except (OSError, ValueError) as e:
|
||||
print(f"[PhenoCam phenology] Skipped: {e}")
|
||||
return
|
||||
out = run_timesat_phenology_from_yraw(
|
||||
yraw,
|
||||
(season - 1, season, season + 1),
|
||||
smooth_window=smooth_window,
|
||||
p_ignoreday=p_ignoreday,
|
||||
)
|
||||
record = {
|
||||
"green_up_50pct_date": out.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": out.get("green_down_50pct_date"),
|
||||
}
|
||||
out_path = phenocam_phenology_path(site_name, season)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(record, f, indent=2)
|
||||
f.write("\n")
|
||||
gup, gdn = record["green_up_50pct_date"], record["green_down_50pct_date"]
|
||||
print(
|
||||
f"[PhenoCam phenology] Wrote {out_path} (green-up {gup!r}, green-down {gdn!r}; "
|
||||
f"TIMESAT input={stack_mode})"
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(
|
||||
description="TIMESAT 50 % seasonal-amplitude green-up / green-down for PhenoCam GCC JSON."
|
||||
)
|
||||
ap.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="Write phenocam for every (sitename, season) in the sites GeoJSON (see --sites-geojson).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--data-root",
|
||||
type=Path,
|
||||
default=Path("data"),
|
||||
help="Resolves default --sites-geojson to <data-root>/sites.geojson.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--sites-geojson",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="For --all: path to data/sites.geojson (default: <data-root>/sites.geojson).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"gcc_json",
|
||||
type=Path,
|
||||
nargs="?",
|
||||
default=Path("data/innsbruck/2024/raw/phenocam/phenocam_gcc.json"),
|
||||
help="Path to phenocam_gcc.json (default: Innsbruck 2024 if present).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--season",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Calendar year to build the daily GCC profile (default: infer from file path .../<year>/...).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--savitzky-hw",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="Half-width for fitmethod 1 (Savitzky–Golay); default 2.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--p-ignoreday",
|
||||
type=int,
|
||||
default=366,
|
||||
help="TIMESAT p_ignoreday (default 366).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Write results to this JSON file (same schema as stdout, plus metadata).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--sidecar",
|
||||
action="store_true",
|
||||
help="Save two-date JSON next to input as phenocam_phenology.json (implies -o).",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
if _timesat is None:
|
||||
raise SystemExit(
|
||||
"The 'timesat' package is required. Install with: pip install timesat"
|
||||
)
|
||||
if args.all:
|
||||
write_phenocam_phenology_all(
|
||||
sites_geojson=args.sites_geojson,
|
||||
data_root=args.data_root,
|
||||
smooth_window=args.savitzky_hw,
|
||||
p_ignoreday=args.p_ignoreday,
|
||||
)
|
||||
return
|
||||
path: Path = args.gcc_json
|
||||
if not path.is_file():
|
||||
raise SystemExit(f"Not a file: {path}")
|
||||
|
||||
season = args.season
|
||||
if season is None:
|
||||
for part in path.parts:
|
||||
if part.isdigit() and len(part) == 4:
|
||||
season = int(part)
|
||||
break
|
||||
if season is None:
|
||||
season = datetime.now().year
|
||||
|
||||
by_date = load_phenocam_gcc(path)
|
||||
yraw, stack_mode = build_yraw_three_years(by_date, season - 1, season, season + 1)
|
||||
out = run_timesat_phenology_from_yraw(
|
||||
yraw,
|
||||
(season - 1, season, season + 1),
|
||||
smooth_window=args.savitzky_hw,
|
||||
p_ignoreday=args.p_ignoreday,
|
||||
)
|
||||
payload = {
|
||||
**out,
|
||||
"source_gcc_json": str(path.resolve()),
|
||||
"profile_year": season,
|
||||
"timesat_input": stack_mode,
|
||||
"method": "TIMESAT tsfprocess; startmethod=1; p_startcutoff=[0.5,0.5] (50% seasonal amplitude)",
|
||||
}
|
||||
out_path = args.output
|
||||
if args.sidecar:
|
||||
out_path = path.parent / "phenocam_phenology.json"
|
||||
if out_path is not None:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
to_write = (
|
||||
{
|
||||
"green_up_50pct_date": out.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": out.get("green_down_50pct_date"),
|
||||
}
|
||||
if args.sidecar
|
||||
else payload
|
||||
)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(to_write, f, indent=2)
|
||||
f.write("\n")
|
||||
print(f"Wrote {out_path}", file=sys.stderr)
|
||||
print(json.dumps(payload, indent=2))
|
||||
gup = out.get("green_up_50pct_date")
|
||||
gdn = out.get("green_down_50pct_date")
|
||||
if gup and gdn:
|
||||
print(
|
||||
f"Green-up (50 %): {gup} | Green-down (50 %): {gdn} "
|
||||
f"(profile year {season}, TIMESAT reference year {out['reference_calendar_year']})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,268 +0,0 @@
|
|||
"""Post-processing: crop fusion/S2/S3 to valid pixels."""
|
||||
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio import windows
|
||||
from rasterio.warp import reproject, Resampling
|
||||
|
||||
|
||||
def process_cropped(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
|
||||
):
|
||||
"""Crop fusion to valid data, then crop S2/S3 to match."""
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
prepared = base / f"prepared_{cleaning_strategy}"
|
||||
processed_dir = (
|
||||
f"processed_{cleaning_strategy}_sigma{sigma}"
|
||||
if sigma
|
||||
else f"processed_{cleaning_strategy}_sigma20"
|
||||
)
|
||||
processed = base / processed_dir
|
||||
|
||||
s2_prep = prepared / "s2"
|
||||
s3_prep = prepared / "s3"
|
||||
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
|
||||
|
||||
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(
|
||||
f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
|
||||
)
|
||||
|
||||
# Crop fusion to valid data and get dimensions
|
||||
fusion_dims = {}
|
||||
for fusion_file in fusion_prep.glob("REFL_*.tif"):
|
||||
date_str = fusion_file.stem.split("_")[1]
|
||||
with rasterio.open(fusion_file) as src:
|
||||
data = src.read()
|
||||
valid = ~np.isnan(data) & (data > 0.001)
|
||||
rows = np.any(valid, axis=(0, 2))
|
||||
cols = np.any(valid, axis=(0, 1))
|
||||
row_idx = np.where(rows)[0]
|
||||
col_idx = np.where(cols)[0]
|
||||
if len(row_idx) == 0 or len(col_idx) == 0:
|
||||
print(f"[PROCESS] Skipping {fusion_file.name} (no valid pixels)")
|
||||
continue
|
||||
r0, r1 = row_idx[0], row_idx[-1]
|
||||
c0, c1 = col_idx[0], col_idx[-1]
|
||||
w, h = c1 - c0 + 1, r1 - r0 + 1
|
||||
window = windows.Window(c0, r0, w, h)
|
||||
data_crop = src.read(window=window)
|
||||
transform = rasterio.windows.transform(window, src.transform)
|
||||
p = src.profile.copy()
|
||||
p.update({"width": w, "height": h, "transform": transform})
|
||||
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(output_file, "w", **p) as dst:
|
||||
dst.write(data_crop)
|
||||
fusion_dims[date_str] = (
|
||||
c0,
|
||||
r0,
|
||||
w,
|
||||
h,
|
||||
transform,
|
||||
src.transform,
|
||||
src.crs,
|
||||
src.profile,
|
||||
)
|
||||
print(f"[PROCESS] Cropped fusion: {output_file}")
|
||||
|
||||
# Crop S2 and S3 to fusion size
|
||||
for date_str, (
|
||||
c0,
|
||||
r0,
|
||||
w,
|
||||
h,
|
||||
transform,
|
||||
fusion_transform,
|
||||
crs,
|
||||
fusion_profile,
|
||||
) in fusion_dims.items():
|
||||
window = windows.Window(c0, r0, w, h)
|
||||
# S2
|
||||
for s2_file in s2_prep.glob("*REFL.tif"):
|
||||
if s2_file.stem.split("_")[2] == date_str:
|
||||
output_file = processed / "s2" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(s2_file) as src:
|
||||
data = src.read(window=window)
|
||||
p2 = src.profile.copy()
|
||||
p2.update(
|
||||
{"width": w, "height": h, "transform": transform, "crs": crs}
|
||||
)
|
||||
with rasterio.open(output_file, "w", **p2) as dst:
|
||||
dst.write(data)
|
||||
print(f"[PROCESS] Cropped: {output_file}")
|
||||
# S3: resample to fusion pixel size, then crop
|
||||
s3_file = s3_prep / f"composite_{date_str}.tif"
|
||||
if s3_file.exists():
|
||||
output_file = processed / "s3" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(s3_file) as src:
|
||||
# Resample to fusion pixel size
|
||||
temp_profile = fusion_profile.copy()
|
||||
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
|
||||
with rasterio.MemoryFile() as memfile:
|
||||
with memfile.open(**temp_profile) as resampled:
|
||||
for i in range(1, src.count + 1):
|
||||
reproject(
|
||||
source=rasterio.band(src, i),
|
||||
destination=rasterio.band(resampled, i),
|
||||
src_transform=src.transform,
|
||||
src_crs=src.crs,
|
||||
dst_transform=fusion_transform,
|
||||
dst_crs=crs,
|
||||
resampling=Resampling.nearest,
|
||||
)
|
||||
# Crop using same window
|
||||
data = resampled.read(window=window)
|
||||
p2 = resampled.profile.copy()
|
||||
p2.update({"width": w, "height": h, "transform": transform})
|
||||
with rasterio.open(output_file, "w", **p2) as dst:
|
||||
dst.write(data)
|
||||
print(f"[PROCESS] Cropped: {output_file}")
|
||||
|
||||
print("[PROCESS] Completed")
|
||||
|
||||
|
||||
def process_cropped_itb(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
|
||||
):
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
prepared = base / f"prepared_{cleaning_strategy}_itb"
|
||||
processed_dir = (
|
||||
f"processed_{cleaning_strategy}_itb_sigma{sigma}"
|
||||
if sigma
|
||||
else f"processed_{cleaning_strategy}_itb_sigma20"
|
||||
)
|
||||
processed = base / processed_dir
|
||||
s2_prep = prepared / "s2"
|
||||
s3_prep = prepared / "s3"
|
||||
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
|
||||
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(
|
||||
f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
|
||||
)
|
||||
fusion_dims = {}
|
||||
for fusion_file in fusion_prep.glob("GCC_*.tif"):
|
||||
date_str = fusion_file.stem.split("_")[1]
|
||||
with rasterio.open(fusion_file) as src:
|
||||
data = src.read()
|
||||
valid = ~np.isnan(data) & (data > 0.001)
|
||||
rows = np.any(valid, axis=(0, 2))
|
||||
cols = np.any(valid, axis=(0, 1))
|
||||
row_idx = np.where(rows)[0]
|
||||
col_idx = np.where(cols)[0]
|
||||
if len(row_idx) == 0 or len(col_idx) == 0:
|
||||
print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
|
||||
continue
|
||||
r0, r1 = row_idx[0], row_idx[-1]
|
||||
c0, c1 = col_idx[0], col_idx[-1]
|
||||
w, h = c1 - c0 + 1, r1 - r0 + 1
|
||||
window = windows.Window(c0, r0, w, h)
|
||||
data_crop = src.read(window=window)
|
||||
transform = rasterio.windows.transform(window, src.transform)
|
||||
p = src.profile.copy()
|
||||
p.update({"width": w, "height": h, "transform": transform})
|
||||
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(output_file, "w", **p) as dst:
|
||||
dst.write(data_crop)
|
||||
fusion_dims[date_str] = (
|
||||
c0,
|
||||
r0,
|
||||
w,
|
||||
h,
|
||||
transform,
|
||||
src.transform,
|
||||
src.crs,
|
||||
src.profile,
|
||||
)
|
||||
print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
|
||||
for date_str, (
|
||||
c0,
|
||||
r0,
|
||||
w,
|
||||
h,
|
||||
transform,
|
||||
fusion_transform,
|
||||
crs,
|
||||
fusion_profile,
|
||||
) in fusion_dims.items():
|
||||
window = windows.Window(c0, r0, w, h)
|
||||
for s2_file in s2_prep.glob("*GCC.tif"):
|
||||
parts = s2_file.stem.split("_")
|
||||
if len(parts) > 2 and parts[2] == date_str:
|
||||
output_file = processed / "s2" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(s2_file) as src:
|
||||
data = src.read(window=window)
|
||||
p2 = src.profile.copy()
|
||||
p2.update(
|
||||
{"width": w, "height": h, "transform": transform, "crs": crs}
|
||||
)
|
||||
with rasterio.open(output_file, "w", **p2) as dst:
|
||||
dst.write(data)
|
||||
print(f"[PROCESS-ITB] Cropped: {output_file}")
|
||||
break
|
||||
s3_file = s3_prep / f"composite_{date_str}.tif"
|
||||
if s3_file.exists():
|
||||
output_file = processed / "s3" / f"{date_str}_0.geotiff"
|
||||
with rasterio.open(s3_file) as src:
|
||||
temp_profile = fusion_profile.copy()
|
||||
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
|
||||
with rasterio.MemoryFile() as memfile:
|
||||
with memfile.open(**temp_profile) as resampled:
|
||||
for i in range(1, src.count + 1):
|
||||
reproject(
|
||||
source=rasterio.band(src, i),
|
||||
destination=rasterio.band(resampled, i),
|
||||
src_transform=src.transform,
|
||||
src_crs=src.crs,
|
||||
dst_transform=fusion_transform,
|
||||
dst_crs=crs,
|
||||
resampling=Resampling.nearest,
|
||||
)
|
||||
data = resampled.read(window=window)
|
||||
p2 = resampled.profile.copy()
|
||||
p2.update({"width": w, "height": h, "transform": transform})
|
||||
with rasterio.open(output_file, "w", **p2) as dst:
|
||||
dst.write(data)
|
||||
print(f"[PROCESS-ITB] Cropped: {output_file}")
|
||||
print("[PROCESS-ITB] Completed")
|
||||
|
||||
|
||||
def post_process_all_itb_scenarios(season, site_position, site_name):
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [None, 30]:
|
||||
process_cropped_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
)
|
||||
|
||||
|
||||
def post_process_all_scenarios(season, site_position, site_name):
|
||||
"""Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
for sigma in [None, 30]:
|
||||
process_cropped(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
)
|
||||
|
||||
|
||||
def post_process_timeseries(season, site_position, site_name):
|
||||
"""Generate NDVI, GCC, and S2 bands timeseries for all 4 scenarios."""
|
||||
from metrics_indices import (
|
||||
create_ndvi_timeseries_post_process,
|
||||
create_gcc_timeseries_post_process,
|
||||
create_bands_timeseries_post_process,
|
||||
)
|
||||
|
||||
create_ndvi_timeseries_post_process(season, site_position, site_name)
|
||||
create_gcc_timeseries_post_process(season, site_position, site_name)
|
||||
create_bands_timeseries_post_process(season, site_position, site_name)
|
||||
364
preparation.py
364
preparation.py
|
|
@ -1,364 +0,0 @@
|
|||
"""Data preparation: S2/S3 preprocessing for fusion."""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.warp import Resampling
|
||||
from rasterio.vrt import WarpedVRT
|
||||
from rasterio import shutil as rio_shutil
|
||||
|
||||
RESOLUTION_RATIO = 21
|
||||
# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
|
||||
S3_MOVING_AVERAGE_WINDOW_DAYS = 5
|
||||
|
||||
|
||||
def _apply_s3_temporal_moving_average(s3_dir, window):
|
||||
"""In-place smoothing of composite_*.tif along calendar order; nodata 0 → NaN for averaging."""
|
||||
if window <= 1:
|
||||
return
|
||||
paths = sorted(s3_dir.glob("composite_*.tif"), key=lambda p: p.stem.split("_")[1])
|
||||
if not paths:
|
||||
return
|
||||
k = (window - 1) // 2
|
||||
arrs = []
|
||||
profiles = []
|
||||
for p in paths:
|
||||
with rasterio.open(p) as src:
|
||||
d = src.read().astype(np.float32)
|
||||
d[d == 0] = np.nan
|
||||
arrs.append(d)
|
||||
profiles.append(src.profile.copy())
|
||||
stack = np.stack(arrs, axis=0)
|
||||
t, _, _, _ = stack.shape
|
||||
out = np.empty_like(stack)
|
||||
for i in range(t):
|
||||
lo, hi = max(0, i - k), min(t, i + k + 1)
|
||||
out[i] = np.nanmean(stack[lo:hi], axis=0)
|
||||
out = np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
|
||||
for p, prof, slc in zip(paths, profiles, out):
|
||||
prof.update({"dtype": "float32", "nodata": 0})
|
||||
with rasterio.open(p, "w", **prof) as dst:
|
||||
dst.write(slc)
|
||||
print(f"[S3-PREP] Applied {window}-day centred MA ({t} composites)")
|
||||
|
||||
|
||||
def _import_distance_to_clouds():
|
||||
"""Lazy import of efast.distance_to_clouds."""
|
||||
try:
|
||||
from efast.s2_processing import distance_to_clouds
|
||||
|
||||
return distance_to_clouds
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
|
||||
)
|
||||
|
||||
|
||||
def _load_excluded(season, site_name, cleaning_strategy):
|
||||
"""Load excluded filenames from NDVI timeseries (excluded_aggressive / excluded_nonaggressive)."""
|
||||
base = Path(f"data/{site_name}/{season}/raw/preselection")
|
||||
key = f"excluded_{cleaning_strategy}"
|
||||
clouds = {"s2": set(), "s3": set()}
|
||||
for source in ["s2", "s3"]:
|
||||
ts_file = base / f"{source}_preselection.json"
|
||||
if ts_file.exists():
|
||||
data = json.loads(ts_file.read_text())
|
||||
clouds[source] = {e["filename"] for e in data if e.get(key)}
|
||||
return clouds
|
||||
|
||||
|
||||
def _get_base_dir(season, site_name, cleaning_strategy):
|
||||
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
|
||||
|
||||
|
||||
def _get_itb_base_dir(season, site_name, cleaning_strategy):
|
||||
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
|
||||
|
||||
|
||||
def _compute_gcc_from_refl_array(blue, green, red):
|
||||
total = (
|
||||
blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
|
||||
)
|
||||
mask = (total > 0) & np.isfinite(total)
|
||||
gcc = np.zeros_like(green, dtype=np.float32)
|
||||
gcc[mask] = green[mask].astype(np.float32) / total[mask]
|
||||
return gcc
|
||||
|
||||
|
||||
def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
|
||||
dst_s2_dir.mkdir(parents=True, exist_ok=True)
|
||||
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
|
||||
dst = dst_s2_dir / src.name
|
||||
if dst.is_symlink() or dst.exists():
|
||||
dst.unlink(missing_ok=True)
|
||||
try:
|
||||
dst.symlink_to(src.resolve())
|
||||
except OSError:
|
||||
shutil.copy2(src, dst)
|
||||
|
||||
|
||||
def prepare_s2_gcc_for_itb(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive"
|
||||
):
|
||||
base = _get_base_dir(season, site_name, cleaning_strategy)
|
||||
itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
|
||||
s2_prep = base / "s2"
|
||||
itb_s2.mkdir(parents=True, exist_ok=True)
|
||||
for refl in sorted(s2_prep.glob("*REFL.tif")):
|
||||
out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
|
||||
if out.exists():
|
||||
continue
|
||||
with rasterio.open(refl) as src:
|
||||
if src.count < 4:
|
||||
continue
|
||||
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
|
||||
gcc = _compute_gcc_from_refl_array(b, g, r)
|
||||
profile = src.profile.copy()
|
||||
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
|
||||
with rasterio.open(out, "w", **profile) as dst:
|
||||
dst.write(gcc, 1)
|
||||
print(f"[S2-ITB] Saved {out.name}")
|
||||
_link_dist_cloud_from_prepared(s2_prep, itb_s2)
|
||||
|
||||
|
||||
def prepare_s3_gcc_for_itb(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive"
|
||||
):
|
||||
base = _get_base_dir(season, site_name, cleaning_strategy)
|
||||
itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
|
||||
itb_s3.mkdir(parents=True, exist_ok=True)
|
||||
for comp in sorted((base / "s3").glob("composite_*.tif")):
|
||||
out = itb_s3 / comp.name
|
||||
if out.exists():
|
||||
continue
|
||||
with rasterio.open(comp) as src:
|
||||
if src.count < 4:
|
||||
continue
|
||||
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
|
||||
gcc = _compute_gcc_from_refl_array(b, g, r)
|
||||
profile = src.profile.copy()
|
||||
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
|
||||
with rasterio.open(out, "w", **profile) as dst:
|
||||
dst.write(gcc, 1)
|
||||
print(f"[S3-ITB] Saved {out.name}")
|
||||
|
||||
|
||||
def _reproject_raster_to_target(
|
||||
src_path,
|
||||
dst_path,
|
||||
target_bounds,
|
||||
target_crs,
|
||||
width,
|
||||
height,
|
||||
resampling=Resampling.bilinear,
|
||||
):
|
||||
dst_transform = rasterio.transform.from_bounds(
|
||||
target_bounds.left,
|
||||
target_bounds.bottom,
|
||||
target_bounds.right,
|
||||
target_bounds.top,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
with rasterio.open(src_path) as src:
|
||||
vrt_options = {
|
||||
"transform": dst_transform,
|
||||
"height": height,
|
||||
"width": width,
|
||||
"crs": target_crs,
|
||||
"resampling": resampling,
|
||||
}
|
||||
with WarpedVRT(src, **vrt_options) as vrt:
|
||||
profile = vrt.profile.copy()
|
||||
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
|
||||
rio_shutil.copy(vrt, dst_path, **profile)
|
||||
|
||||
|
||||
def _rescale_dist_cloud_for_small_roi(s2_output_dir):
|
||||
"""Rescale DIST_CLOUD when max distance ≤1 so EFAST fusion gets valid weights.
|
||||
|
||||
EFAST uses wo_i = (distance - 1) / D; values ≤1 yield zero/NaN weights. In small
|
||||
ROIs (e.g. PhenoCam sites, 7×4 LR grid), distance_transform_edt never exceeds 1.
|
||||
Scale non-zero values to ≥2 so fusion can produce non-NaN output.
|
||||
"""
|
||||
for dc_path in s2_output_dir.glob("*DIST_CLOUD.tif"):
|
||||
with rasterio.open(dc_path, "r") as src:
|
||||
d = src.read(1)
|
||||
d_max = float(np.nanmax(d))
|
||||
if d_max <= 1:
|
||||
# Map (0, 1] -> (0, 2] so (d-1)/15 gives positive weight
|
||||
d_scaled = np.where(d > 0, 2.0, d).astype(np.float32)
|
||||
with rasterio.open(dc_path, "r+") as dst:
|
||||
dst.write(d_scaled, 1)
|
||||
print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
|
||||
|
||||
|
||||
def prepare_s2(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
|
||||
):
|
||||
lat, lon = site_position
|
||||
s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
|
||||
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
|
||||
s2_output_dir = _get_base_dir(season, site_name, cleaning_strategy) / "s2"
|
||||
|
||||
clouds = _load_excluded(season, site_name, cleaning_strategy)
|
||||
s2_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(
|
||||
f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
|
||||
)
|
||||
|
||||
s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
|
||||
if not s3_files:
|
||||
raise ValueError("No non-cloud S3 files found for reference bounds")
|
||||
|
||||
with rasterio.open(s3_files[0]) as s3_ref:
|
||||
target_bounds = s3_ref.bounds
|
||||
target_crs = s3_ref.crs
|
||||
s2_width = s3_ref.width * RESOLUTION_RATIO
|
||||
s2_height = s3_ref.height * RESOLUTION_RATIO
|
||||
|
||||
for s2_file in sorted(s2_dir.glob("*.geotiff")):
|
||||
if s2_file.name in clouds["s2"]:
|
||||
print(
|
||||
f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
|
||||
)
|
||||
continue
|
||||
date_str = s2_file.name.split("_")[0]
|
||||
refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
|
||||
if refl_dst.exists():
|
||||
print(f"[S2-PREP] Skipping {s2_file.name} (exists)")
|
||||
continue
|
||||
|
||||
print(f"[S2-PREP] Processing {s2_file.name}...")
|
||||
temp_normalized = s2_output_dir / f"temp_{s2_file.name}"
|
||||
with rasterio.open(s2_file) as src:
|
||||
data = src.read().astype("float32") / 10000.0
|
||||
profile = src.profile.copy()
|
||||
profile.update({"dtype": "float32", "nodata": 0})
|
||||
with rasterio.open(temp_normalized, "w", **profile) as dst:
|
||||
dst.write(data)
|
||||
|
||||
_reproject_raster_to_target(
|
||||
temp_normalized, refl_dst, target_bounds, target_crs, s2_width, s2_height
|
||||
)
|
||||
temp_normalized.unlink()
|
||||
print(f"[S2-PREP] Saved: {refl_dst}")
|
||||
|
||||
print("[S2-PREP] Computing distance-to-clouds...")
|
||||
distance_to_clouds = _import_distance_to_clouds()
|
||||
distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
|
||||
_rescale_dist_cloud_for_small_roi(s2_output_dir)
|
||||
print("[S2-PREP] Completed")
|
||||
|
||||
|
||||
def prepare_s3(
|
||||
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
|
||||
):
|
||||
lat, lon = site_position
|
||||
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
|
||||
base_dir = _get_base_dir(season, site_name, cleaning_strategy)
|
||||
s2_prepared_dir = base_dir / "s2"
|
||||
s3_preprocessed_dir = base_dir / "s3"
|
||||
|
||||
clouds = _load_excluded(season, site_name, cleaning_strategy)
|
||||
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(
|
||||
f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
|
||||
)
|
||||
|
||||
s3_by_date = defaultdict(list)
|
||||
for s3_file in s3_dir.glob("*.geotiff"):
|
||||
if s3_file.name not in clouds["s3"]:
|
||||
s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
|
||||
else:
|
||||
print(
|
||||
f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
|
||||
)
|
||||
|
||||
print(
|
||||
f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
|
||||
)
|
||||
|
||||
temp_composite_dir = s3_preprocessed_dir / "temp_composites"
|
||||
if temp_composite_dir.exists():
|
||||
shutil.rmtree(temp_composite_dir)
|
||||
temp_composite_dir.mkdir()
|
||||
|
||||
for date_str, s3_files in sorted(s3_by_date.items()):
|
||||
composite_path = temp_composite_dir / f"composite_{date_str}.tif"
|
||||
if len(s3_files) == 1:
|
||||
shutil.copy(s3_files[0], composite_path)
|
||||
print(f"[S3-PREP] Composite {date_str}: 1 acquisition")
|
||||
else:
|
||||
s3_stack = []
|
||||
for s3_file in s3_files:
|
||||
with rasterio.open(s3_file) as src:
|
||||
data = src.read()
|
||||
data[:, np.abs(np.nanmean(data, axis=0)) >= 5] = np.nan
|
||||
s3_stack.append(data)
|
||||
composite = np.nanmean(np.array(s3_stack), axis=0).astype("float32")
|
||||
with rasterio.open(s3_files[0]) as src:
|
||||
profile = src.profile.copy()
|
||||
profile.update({"count": composite.shape[0], "dtype": "float32"})
|
||||
with rasterio.open(composite_path, "w", **profile) as dst:
|
||||
dst.write(composite)
|
||||
print(
|
||||
f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
|
||||
)
|
||||
|
||||
# Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
|
||||
# This ensures fusion covers the same area as S2 and dimensions match
|
||||
sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif"))
|
||||
if len(sen2_ref_paths) == 0:
|
||||
raise ValueError(f"No REFL files found in {s2_prepared_dir}")
|
||||
|
||||
# Get bounds from REFL file (full coverage, matches S2)
|
||||
# Use integer division to match distance_to_clouds logic exactly
|
||||
with rasterio.open(sen2_ref_paths[0]) as s2_ref:
|
||||
target_bounds = s2_ref.bounds
|
||||
target_crs = s2_ref.crs
|
||||
# Use integer division matching distance_to_clouds: s2_height // ratio, s2_width // ratio
|
||||
width = s2_ref.width // RESOLUTION_RATIO
|
||||
height = s2_ref.height // RESOLUTION_RATIO
|
||||
s3_transform = rasterio.transform.from_bounds(
|
||||
target_bounds.left,
|
||||
target_bounds.bottom,
|
||||
target_bounds.right,
|
||||
target_bounds.top,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
|
||||
print(
|
||||
f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
|
||||
)
|
||||
|
||||
# Reproject each S3 composite to match S2 REFL bounds
|
||||
sen3_paths = sorted(temp_composite_dir.glob("*.tif"))
|
||||
for sen3_path in sen3_paths:
|
||||
vrt_options = {
|
||||
"transform": s3_transform,
|
||||
"height": height,
|
||||
"width": width,
|
||||
"crs": target_crs,
|
||||
"resampling": Resampling.cubic,
|
||||
}
|
||||
with rasterio.open(sen3_path) as s3_src:
|
||||
with WarpedVRT(s3_src, **vrt_options) as vrt:
|
||||
name = sen3_path.name
|
||||
outfile = s3_preprocessed_dir / name
|
||||
profile = vrt.profile.copy()
|
||||
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
|
||||
rio_shutil.copy(vrt, outfile, **profile)
|
||||
print(f"[S3-PREP] Saved: {outfile}")
|
||||
|
||||
_apply_s3_temporal_moving_average(
|
||||
s3_preprocessed_dir, S3_MOVING_AVERAGE_WINDOW_DAYS
|
||||
)
|
||||
shutil.rmtree(temp_composite_dir)
|
||||
print("[S3-PREP] Completed")
|
||||
142
preselection.py
142
preselection.py
|
|
@ -1,142 +0,0 @@
|
|||
"""Pre-selection: self-contained NDVI timeseries with cloud/dark-imagery exclusion markers."""
|
||||
import csv
|
||||
import json
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.warp import transform as transform_coords
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
WINDOW_DAYS = 14
|
||||
MIN_WINDOW_SIZE = 3
|
||||
THRESHOLDS = {"aggressive": {"threshold": 0.3, "delta": 0.15}, "nonaggressive": {"threshold": 0.2, "delta": 0.25}}
|
||||
# S2 uses reflectance * 10000, S3 uses 0-1
|
||||
BLUE_MIN = {"s2": 100, "s3": 0.01}
|
||||
|
||||
GREEN_BAND = 2
|
||||
RED_BAND = 3
|
||||
NIR_BAND = 4
|
||||
BLUE_BAND = 1
|
||||
BAND_KEYS = ["b02", "b03", "b04", "b8a"]
|
||||
|
||||
|
||||
def _sample_3x3(input_file, site_position):
|
||||
"""Sample mean NDVI and all four bands (3x3 window) at site. Returns (ndvi, {b02,b03,b04,b8a}) or (None, None)."""
|
||||
try:
|
||||
with rasterio.open(input_file) as src:
|
||||
if src.count < 4:
|
||||
return None, None
|
||||
bands = [src.read(i).astype(np.float32) for i in range(1, 5)]
|
||||
lon, lat = site_position[1], site_position[0]
|
||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
||||
if not (
|
||||
src.bounds.left <= x[0] <= src.bounds.right
|
||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
||||
):
|
||||
return None, None
|
||||
row, col = src.index(x[0], y[0])
|
||||
if row < 0 or row >= src.height or col < 0 or col >= src.width:
|
||||
return None, None
|
||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
||||
windows = [b[r0:r1, c0:c1] for b in bands]
|
||||
red_w, nir_w = windows[RED_BAND - 1], windows[NIR_BAND - 1]
|
||||
mask = (red_w > 0) & (nir_w > 0) & ~np.isnan(red_w) & ~np.isnan(nir_w)
|
||||
if not np.any(mask):
|
||||
return None, None
|
||||
ndvi = float(np.mean((nir_w[mask] - red_w[mask]) / (nir_w[mask] + red_w[mask])))
|
||||
band_means = {k: round(float(np.mean(w[mask])), 6) for k, w in zip(BAND_KEYS, windows)}
|
||||
return ndvi, band_means
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
|
||||
def _extract_date(filename):
|
||||
for part in filename.replace(".geotiff", "").split("_"):
|
||||
if len(part) == 8 and part.isdigit():
|
||||
return part, datetime.strptime(part, "%Y%m%d").isoformat()
|
||||
return None, None
|
||||
|
||||
|
||||
def _is_excluded(entry, entries, strategy, source="s2"):
|
||||
"""True if entry is excluded by strategy (NDVI threshold/delta or dark blue)."""
|
||||
th = THRESHOLDS[strategy]
|
||||
if entry.get("ndvi") is None:
|
||||
return True
|
||||
blue_min = BLUE_MIN.get(source, BLUE_MIN["s2"])
|
||||
if entry.get("b02") is not None and entry["b02"] < blue_min:
|
||||
return True
|
||||
entry_date = datetime.fromisoformat(entry["date"].replace("Z", "+00:00"))
|
||||
window_ndvi = []
|
||||
for e in entries:
|
||||
if e.get("ndvi") is None:
|
||||
continue
|
||||
d = datetime.fromisoformat(e["date"].replace("Z", "+00:00"))
|
||||
if abs((d - entry_date).days) <= WINDOW_DAYS:
|
||||
window_ndvi.append(e["ndvi"])
|
||||
if len(window_ndvi) < MIN_WINDOW_SIZE:
|
||||
return False
|
||||
threshold = max(window_ndvi) - th["delta"]
|
||||
return entry["ndvi"] < threshold and entry["ndvi"] < th["threshold"]
|
||||
|
||||
|
||||
def create_timeseries(season, site_position, site_name):
|
||||
"""Build NDVI timeseries (3x3 window) for raw S2/S3, with exclusion markers for both strategies."""
|
||||
lat, lon = site_position
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
|
||||
print(f"[PRESELECT] Creating NDVI timeseries: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
||||
|
||||
for source in ["s2", "s3"]:
|
||||
input_dir = base / "raw" / source
|
||||
out_dir = base / "raw" / "preselection"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_file = out_dir / f"{source}_preselection.json"
|
||||
|
||||
if not input_dir.exists():
|
||||
print(f"[PRESELECT] Skipping {source}: {input_dir} not found")
|
||||
continue
|
||||
|
||||
timeseries = []
|
||||
for f in sorted(input_dir.glob("*.geotiff")):
|
||||
if "DIST_CLOUD" in f.name:
|
||||
continue
|
||||
date_str, date_iso = _extract_date(f.name)
|
||||
if not date_str:
|
||||
continue
|
||||
ndvi, band_means = _sample_3x3(f, site_position)
|
||||
entry = {"filename": f.name, "date": date_iso, "ndvi": ndvi}
|
||||
if band_means:
|
||||
entry.update(band_means)
|
||||
timeseries.append(entry)
|
||||
|
||||
timeseries.sort(key=lambda e: e["date"])
|
||||
for e in timeseries:
|
||||
e["excluded_aggressive"] = _is_excluded(e, timeseries, "aggressive", source)
|
||||
e["excluded_nonaggressive"] = _is_excluded(e, timeseries, "nonaggressive", source)
|
||||
|
||||
with open(output_file, "w") as out:
|
||||
json.dump(timeseries, out, indent=2)
|
||||
|
||||
csv_file = out_dir / f"{source}_preselection.csv"
|
||||
fieldnames = ["filename", "date", "ndvi"] + BAND_KEYS + ["excluded_aggressive", "excluded_nonaggressive"]
|
||||
with open(csv_file, "w", newline="") as out:
|
||||
w = csv.DictWriter(out, fieldnames=fieldnames, extrasaction="ignore")
|
||||
w.writeheader()
|
||||
for e in timeseries:
|
||||
w.writerow({k: e.get(k) for k in fieldnames})
|
||||
|
||||
n_excl_agg = sum(1 for e in timeseries if e["excluded_aggressive"])
|
||||
n_excl_non = sum(1 for e in timeseries if e["excluded_nonaggressive"])
|
||||
print(f"[PRESELECT] Saved {output_file} + {csv_file.name}: {len(timeseries)} entries ({n_excl_agg} aggressive, {n_excl_non} nonaggressive excluded)")
|
||||
|
||||
print("[PRESELECT] Completed")
|
||||
|
||||
|
||||
# Backward compatibility
|
||||
def detect_clouds(season, site_position, site_name, cleaning_strategy="aggressive"):
|
||||
"""Create timeseries with exclusion markers. Strategy is read from timeseries when preparing."""
|
||||
create_timeseries(season, site_position, site_name)
|
||||
|
||||
|
||||
preselect = create_timeseries
|
||||
|
|
@ -1,2 +1,31 @@
|
|||
[project]
|
||||
name = "worldwide"
|
||||
version = "0.1.0"
|
||||
description = "Worldwide PhenoCam EFAST feasibility screening"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"efast @ git+https://github.com/DHI-GRAS/efast.git",
|
||||
"netCDF4",
|
||||
"numpy",
|
||||
"openeo",
|
||||
"pystac-client",
|
||||
"python-dateutil",
|
||||
"python-dotenv",
|
||||
"rasterio",
|
||||
"requests",
|
||||
"scipy",
|
||||
"shapely",
|
||||
"tqdm",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ruff",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"run.py" = ["F401"]
|
||||
"1-phenocam.py" = ["E402"]
|
||||
"2-phenocam-screening.py" = ["E402"]
|
||||
"3-sentinel-data.py" = ["E402"]
|
||||
"4-fusion.py" = ["E402"]
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
pystac-client
|
||||
rasterio
|
||||
openeo
|
||||
python-dotenv
|
||||
netCDF4
|
||||
numpy
|
||||
timesat
|
||||
requests
|
||||
scipy
|
||||
matplotlib
|
||||
ruff
|
||||
pre-commit
|
||||
87
run.py
87
run.py
|
|
@ -1,87 +0,0 @@
|
|||
"""Pipeline entry point.
|
||||
|
||||
Active snippet below only **regenerates metrics.json** (temporal, baseline,
|
||||
`derived`, `residual_vs_phenocam`). Requires existing post-processed GCC
|
||||
timeseries under `data/{site}/{season}/processed_*`.
|
||||
|
||||
Un-comment imports and steps below for acquisition → fusion → post-process.
|
||||
"""
|
||||
|
||||
# from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
|
||||
# from postprocessing import (
|
||||
# post_process_all_scenarios,
|
||||
# post_process_all_itb_scenarios,
|
||||
# post_process_timeseries,
|
||||
# )
|
||||
# from acquisition_s2 import download_s2
|
||||
# from acquisition_s3 import download_s3
|
||||
# from acquisition_phenocam import download_phenocam
|
||||
# from preselection import create_timeseries
|
||||
# from preparation import (
|
||||
# prepare_s2,
|
||||
# prepare_s3,
|
||||
# prepare_s2_gcc_for_itb,
|
||||
# prepare_s3_gcc_for_itb,
|
||||
# )
|
||||
# from metrics_indices import create_prepared_fusion_timeseries
|
||||
from metrics_stats import calculate_all_metrics
|
||||
|
||||
# from phenology_timesat import write_phenocam_phenology_for_site
|
||||
|
||||
|
||||
def run_pipeline(season, site_position, site_name):
|
||||
"""Run pipeline (metrics-only by default; see module docstring)."""
|
||||
try:
|
||||
# print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}")
|
||||
# download_s2(season, site_position, site_name)
|
||||
# download_s3(season, site_position, site_name)
|
||||
# download_phenocam(season, site_position, site_name)
|
||||
|
||||
# print(f"PhenoCam phenology (50 % amplitude): {site_name}, {season}")
|
||||
# write_phenocam_phenology_for_site(site_name, season)
|
||||
|
||||
# print(f"Creating preselection timeseries: {site_name}, {season}")
|
||||
# create_timeseries(season, site_position, site_name)
|
||||
|
||||
# print(f"Preparing S2 and S3 for fusion: {site_name}, {season}")
|
||||
# for strategy in ["aggressive", "nonaggressive"]:
|
||||
# prepare_s2(season, site_position, site_name, cleaning_strategy=strategy)
|
||||
# prepare_s3(season, site_position, site_name, cleaning_strategy=strategy)
|
||||
|
||||
# print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
|
||||
# run_all_efast_scenarios(season, site_position, site_name)
|
||||
|
||||
# print(f"Index-then-Blend (ItB): {site_name}, {season}")
|
||||
# for strategy in ["aggressive", "nonaggressive"]:
|
||||
# prepare_s2_gcc_for_itb(
|
||||
# season, site_position, site_name, cleaning_strategy=strategy
|
||||
# )
|
||||
# prepare_s3_gcc_for_itb(
|
||||
# season, site_position, site_name, cleaning_strategy=strategy
|
||||
# )
|
||||
# run_all_efast_itb_scenarios(season, site_position, site_name)
|
||||
# post_process_all_itb_scenarios(season, site_position, site_name)
|
||||
|
||||
# print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
|
||||
# create_prepared_fusion_timeseries(season, site_position, site_name)
|
||||
|
||||
# print(f"Post-processing (crop): {site_name}, {season}")
|
||||
# post_process_all_scenarios(season, site_position, site_name)
|
||||
# post_process_timeseries(season, site_position, site_name)
|
||||
|
||||
print(f"Calculating metrics: {site_name}, {season}")
|
||||
calculate_all_metrics(season, site_name, site_position)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_pipeline(2024, (47.116171, 11.320308), "innsbruck")
|
||||
run_pipeline(2024, (35.3045, 25.0743), "forthgr")
|
||||
run_pipeline(2020, (47.116171, 11.320308), "innsbruck")
|
||||
run_pipeline(2024, (58.5633, 24.3688), "pitsalu")
|
||||
run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
|
||||
run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
|
||||
run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
[Unit]
|
||||
Description=Satellite Fusion Pipeline Web Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
WorkingDirectory=/opt/satellite-fusion/webapp
|
||||
Environment="PATH=/opt/satellite-fusion/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
ExecStart=/opt/satellite-fusion/venv/bin/python3 -m http.server 8000 --directory /opt/satellite-fusion/webapp
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
|
@ -1,634 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Compute per-site suitability indicators from existing pipeline outputs.
|
||||
|
||||
The script is intentionally schema-tolerant: it prints one site's discovered JSON
|
||||
structure first, then uses a small set of common field-name conventions to compute
|
||||
SNR, S2 archive density, and S2-S3 GCC coherence.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.interpolate import UnivariateSpline
|
||||
from scipy.stats import pearsonr
|
||||
|
||||
|
||||
OUTPUT_NAME = "suitability_screening.json"
|
||||
SNR_THRESHOLD = 2.0
|
||||
MATCH_TOLERANCE_DAYS = 2
|
||||
|
||||
|
||||
def load_json(path: Path) -> Any | None:
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
print(f"[WARN] Could not read JSON {path}: {exc}")
|
||||
return None
|
||||
|
||||
|
||||
def jsonable_float(value: Any) -> float | None:
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
try:
|
||||
out = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not math.isfinite(out):
|
||||
return None
|
||||
return out
|
||||
|
||||
|
||||
def parse_date(value: Any) -> pd.Timestamp | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, pd.Timestamp):
|
||||
return value.normalize()
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return None
|
||||
match = re.search(r"(?<!\d)(\d{8})(?!\d)", text)
|
||||
if match:
|
||||
try:
|
||||
return pd.to_datetime(match.group(1), format="%Y%m%d").normalize()
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
try:
|
||||
ts = pd.to_datetime(text, errors="coerce")
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if pd.isna(ts):
|
||||
return None
|
||||
if getattr(ts, "tzinfo", None) is not None:
|
||||
ts = ts.tz_convert(None)
|
||||
return pd.Timestamp(ts).normalize()
|
||||
|
||||
|
||||
def compact(value: Any, *, max_text: int = 220) -> Any:
|
||||
"""Return a short representation suitable for discovery logging."""
|
||||
if isinstance(value, dict):
|
||||
return {k: compact(v, max_text=max_text) for k, v in list(value.items())[:12]}
|
||||
if isinstance(value, list):
|
||||
return [compact(v, max_text=max_text) for v in value[:2]]
|
||||
text = repr(value)
|
||||
if len(text) > max_text:
|
||||
return text[: max_text - 3] + "..."
|
||||
return value
|
||||
|
||||
|
||||
def top_keys(data: Any) -> list[str]:
|
||||
if isinstance(data, dict):
|
||||
return list(data.keys())
|
||||
if isinstance(data, list) and data and isinstance(data[0], dict):
|
||||
keys: set[str] = set()
|
||||
for entry in data[:5]:
|
||||
keys.update(entry.keys())
|
||||
return sorted(keys)
|
||||
return []
|
||||
|
||||
|
||||
def normalize_records(data: Any) -> list[dict[str, Any]]:
|
||||
"""Convert common JSON shapes into a list of record dictionaries."""
|
||||
if data is None:
|
||||
return []
|
||||
if isinstance(data, list):
|
||||
records = []
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
records.append(dict(item))
|
||||
else:
|
||||
records.append({"value": item})
|
||||
return records
|
||||
if not isinstance(data, dict):
|
||||
return [{"value": data}]
|
||||
|
||||
for key in ("timeseries", "time_series", "data", "entries", "results", "records"):
|
||||
value = data.get(key)
|
||||
if isinstance(value, list):
|
||||
return normalize_records(value)
|
||||
|
||||
# Dict keyed by date or filename.
|
||||
if data and all(not isinstance(v, (list, tuple)) for v in data.values()):
|
||||
records = []
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
record = dict(value)
|
||||
record.setdefault("date", key)
|
||||
else:
|
||||
record = {"date": key, "value": value}
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
return [dict(data)]
|
||||
|
||||
|
||||
def first_records(data: Any, count: int = 2) -> list[Any]:
|
||||
records = normalize_records(data)
|
||||
return records[:count]
|
||||
|
||||
|
||||
def recursive_snr_candidates(data: Any, prefix: str = "") -> list[tuple[str, Any]]:
|
||||
found: list[tuple[str, Any]] = []
|
||||
if isinstance(data, dict):
|
||||
for key, value in data.items():
|
||||
path = f"{prefix}.{key}" if prefix else str(key)
|
||||
if "snr" in str(key).lower():
|
||||
found.append((path, value))
|
||||
found.extend(recursive_snr_candidates(value, path))
|
||||
elif isinstance(data, list):
|
||||
for i, value in enumerate(data[:10]):
|
||||
found.extend(recursive_snr_candidates(value, f"{prefix}[{i}]"))
|
||||
return found
|
||||
|
||||
|
||||
def find_numeric_snr(data: Any) -> float | None:
|
||||
candidates = recursive_snr_candidates(data)
|
||||
# Prefer exact leaf keys named "snr"; fall back to any numeric snr-containing key.
|
||||
candidates.sort(key=lambda kv: 0 if kv[0].split(".")[-1].lower() == "snr" else 1)
|
||||
for _, value in candidates:
|
||||
numeric = jsonable_float(value)
|
||||
if numeric is not None:
|
||||
return numeric
|
||||
if isinstance(value, dict):
|
||||
nested = value.get("snr")
|
||||
numeric = jsonable_float(nested)
|
||||
if numeric is not None:
|
||||
return numeric
|
||||
return None
|
||||
|
||||
|
||||
def find_site_roots(base_dir: Path) -> list[tuple[str, Path]]:
|
||||
"""Find direct site roots, plus the repo's common site/year layout."""
|
||||
roots: list[tuple[str, Path]] = []
|
||||
if not base_dir.is_dir():
|
||||
return roots
|
||||
|
||||
def looks_like_site_root(path: Path) -> bool:
|
||||
return any(
|
||||
(
|
||||
(path / "metrics.json").exists(),
|
||||
(path / "raw" / "preselection").exists(),
|
||||
(path / "phenocam").exists(),
|
||||
(path / "raw" / "phenocam").exists(),
|
||||
)
|
||||
)
|
||||
|
||||
for child in sorted(p for p in base_dir.iterdir() if p.is_dir()):
|
||||
if looks_like_site_root(child):
|
||||
roots.append((child.name, child))
|
||||
continue
|
||||
for grandchild in sorted(p for p in child.iterdir() if p.is_dir()):
|
||||
if looks_like_site_root(grandchild):
|
||||
name = child.name if grandchild.name.isdigit() else f"{child.name}_{grandchild.name}"
|
||||
roots.append((name, grandchild))
|
||||
|
||||
return roots
|
||||
|
||||
|
||||
def find_s2_preselection(site_root: Path) -> Path | None:
|
||||
candidates = [
|
||||
site_root / "raw" / "preselection" / "s2_preselection.json",
|
||||
site_root / "preselection" / "s2_preselection.json",
|
||||
]
|
||||
return next((p for p in candidates if p.is_file()), None)
|
||||
|
||||
|
||||
def find_s3_timeseries(site_root: Path) -> Path | None:
|
||||
candidates = [
|
||||
site_root / "processed_aggressive_sigma20" / "gcc" / "s3" / "timeseries.json",
|
||||
site_root / "processed_aggressive_itb_sigma20" / "gcc" / "s3" / "timeseries.json",
|
||||
]
|
||||
for candidate in candidates:
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
matches = sorted(site_root.glob("processed*aggressive*sigma20*/gcc/s3/timeseries.json"))
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def find_metrics(site_root: Path) -> Path | None:
|
||||
path = site_root / "metrics.json"
|
||||
return path if path.is_file() else None
|
||||
|
||||
|
||||
def find_phenocam(site_root: Path) -> Path | None:
|
||||
candidates = [
|
||||
site_root / "phenocam" / "gcc_90.json",
|
||||
site_root / "phenocam" / "phenocam_gcc.json",
|
||||
site_root / "raw" / "phenocam" / "gcc_90.json",
|
||||
site_root / "raw" / "phenocam" / "phenocam_gcc.json",
|
||||
]
|
||||
for candidate in candidates:
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
patterns = [
|
||||
"phenocam/*gcc*90*.json",
|
||||
"phenocam/*gcc*.json",
|
||||
"raw/phenocam/*gcc*90*.json",
|
||||
"raw/phenocam/*gcc*.json",
|
||||
"raw/phenocam/*.json",
|
||||
]
|
||||
for pattern in patterns:
|
||||
matches = sorted(site_root.glob(pattern))
|
||||
if matches:
|
||||
return matches[0]
|
||||
return None
|
||||
|
||||
|
||||
def print_structure(label: str, path: Path | None) -> None:
|
||||
print(f"\n[{label}]")
|
||||
if path is None:
|
||||
print("missing")
|
||||
return
|
||||
data = load_json(path)
|
||||
print(f"path: {path}")
|
||||
print(f"type: {type(data).__name__}")
|
||||
print(f"keys: {top_keys(data)}")
|
||||
records = [] if label == "metrics.json" else first_records(data, 2)
|
||||
if records:
|
||||
print(f"first {len(records)} entr{'y' if len(records) == 1 else 'ies'}:")
|
||||
print(json.dumps(compact(records), indent=2, default=str))
|
||||
if label == "metrics.json":
|
||||
snr = recursive_snr_candidates(data)
|
||||
phenocam_keys = []
|
||||
if isinstance(data, dict):
|
||||
for key, value in data.items():
|
||||
if "phenocam" in str(key).lower():
|
||||
phenocam_keys.append((key, top_keys(value)))
|
||||
print(f"phenocam-like keys: {phenocam_keys}")
|
||||
print(f"snr-like keys: {[(path, compact(value)) for path, value in snr]}")
|
||||
|
||||
|
||||
def run_discovery(site_name: str, site_root: Path) -> None:
|
||||
print("\n=== Discovery mode ===")
|
||||
print(f"Using site: {site_name} ({site_root})")
|
||||
print_structure("s2_preselection.json", find_s2_preselection(site_root))
|
||||
print_structure("S3 timeseries.json", find_s3_timeseries(site_root))
|
||||
print_structure("metrics.json", find_metrics(site_root))
|
||||
print_structure("PhenoCam gcc_90 file", find_phenocam(site_root))
|
||||
print("\n=== Computing indicators ===")
|
||||
|
||||
|
||||
def choose_discovery_site(site_roots: list[tuple[str, Path]]) -> tuple[str, Path]:
|
||||
def score(item: tuple[str, Path]) -> int:
|
||||
_, root = item
|
||||
return sum(
|
||||
int(path is not None)
|
||||
for path in (
|
||||
find_s2_preselection(root),
|
||||
find_s3_timeseries(root),
|
||||
find_metrics(root),
|
||||
find_phenocam(root),
|
||||
)
|
||||
)
|
||||
|
||||
return max(site_roots, key=score)
|
||||
|
||||
|
||||
def truthy_status(value: Any, *, field_name: str | None = None) -> bool | None:
|
||||
if isinstance(value, bool):
|
||||
if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
|
||||
return not value
|
||||
return value
|
||||
if value is None:
|
||||
return True
|
||||
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
||||
if field_name and any(word in field_name.lower() for word in ("reject", "exclude")):
|
||||
return not bool(value)
|
||||
return bool(value)
|
||||
text = str(value).strip().lower()
|
||||
if text in {"", "none", "null", "nan", "ok", "pass", "passed", "keep", "kept", "valid", "selected"}:
|
||||
return True
|
||||
if text in {
|
||||
"fail",
|
||||
"failed",
|
||||
"false",
|
||||
"reject",
|
||||
"rejected",
|
||||
"exclude",
|
||||
"excluded",
|
||||
"invalid",
|
||||
"cloud",
|
||||
"cloudy",
|
||||
"dark",
|
||||
"bad",
|
||||
}:
|
||||
return False
|
||||
if field_name and any(word in field_name.lower() for word in ("reason", "status")):
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def acquisition_passes(entry: dict[str, Any], strategy: str) -> bool:
|
||||
strategy_aliases = {
|
||||
strategy,
|
||||
strategy.replace("nonaggressive", "non_aggressive"),
|
||||
strategy.replace("nonaggressive", "non-aggressive"),
|
||||
}
|
||||
negative_prefixes = ("excluded", "exclude", "rejected", "reject")
|
||||
positive_prefixes = ("passed", "pass", "keep", "kept", "valid", "selected")
|
||||
|
||||
for alias in strategy_aliases:
|
||||
for prefix in negative_prefixes:
|
||||
key = f"{prefix}_{alias}"
|
||||
if key in entry:
|
||||
return not bool(entry[key])
|
||||
for prefix in positive_prefixes:
|
||||
key = f"{prefix}_{alias}"
|
||||
if key in entry:
|
||||
return bool(entry[key])
|
||||
|
||||
for alias in strategy_aliases:
|
||||
nested = entry.get(alias)
|
||||
if isinstance(nested, dict):
|
||||
for key, value in nested.items():
|
||||
passed = truthy_status(value, field_name=key)
|
||||
if passed is not None:
|
||||
return passed
|
||||
elif nested is not None:
|
||||
passed = truthy_status(nested, field_name=alias)
|
||||
if passed is not None:
|
||||
return passed
|
||||
|
||||
# Generic status fields.
|
||||
for key in (*negative_prefixes, *positive_prefixes, "status", "strategy", "reason", "rejection_reason"):
|
||||
if key in entry:
|
||||
passed = truthy_status(entry[key], field_name=key)
|
||||
if passed is not None:
|
||||
return passed
|
||||
|
||||
# Dict keyed by date with a scalar rejection reason.
|
||||
if "value" in entry and len(entry) <= 3:
|
||||
passed = truthy_status(entry.get("value"), field_name="value")
|
||||
if passed is not None:
|
||||
return passed
|
||||
|
||||
# Existing pipeline entries with band means and no rejection marker are usable.
|
||||
return True
|
||||
|
||||
|
||||
def band_value(entry: dict[str, Any], names: Iterable[str]) -> float | None:
|
||||
lowered = {str(k).lower(): v for k, v in entry.items()}
|
||||
for name in names:
|
||||
if name.lower() in lowered:
|
||||
value = jsonable_float(lowered[name.lower()])
|
||||
if value is not None:
|
||||
return value
|
||||
for container_key in ("bands", "band_means", "reflectance", "reflectances", "means", "window_means"):
|
||||
container = entry.get(container_key)
|
||||
if isinstance(container, dict):
|
||||
value = band_value(container, names)
|
||||
if value is not None:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def entry_date(entry: dict[str, Any]) -> pd.Timestamp | None:
|
||||
for key in ("date", "datetime", "time", "timestamp", "acquisition_date"):
|
||||
if key in entry:
|
||||
date = parse_date(entry[key])
|
||||
if date is not None:
|
||||
return date
|
||||
for key in ("filename", "file", "path", "name"):
|
||||
if key in entry:
|
||||
date = parse_date(entry[key])
|
||||
if date is not None:
|
||||
return date
|
||||
return None
|
||||
|
||||
|
||||
def s2_gcc_series(s2_data: Any) -> pd.DataFrame:
|
||||
rows = []
|
||||
for entry in normalize_records(s2_data):
|
||||
if not isinstance(entry, dict) or not acquisition_passes(entry, "aggressive"):
|
||||
continue
|
||||
date = entry_date(entry)
|
||||
blue = band_value(entry, ("b02", "blue", "B02", "band_1", "band1"))
|
||||
green = band_value(entry, ("b03", "green", "B03", "band_2", "band2"))
|
||||
red = band_value(entry, ("b04", "red", "B04", "band_3", "band3"))
|
||||
if date is None or blue is None or green is None or red is None:
|
||||
continue
|
||||
denom = blue + green + red
|
||||
if denom <= 0:
|
||||
continue
|
||||
rows.append({"date": date, "s2_gcc": green / denom})
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["date", "s2_gcc"])
|
||||
return pd.DataFrame(rows).groupby("date", as_index=False)["s2_gcc"].mean().sort_values("date")
|
||||
|
||||
|
||||
def value_from_record(entry: dict[str, Any], preferred: Iterable[str]) -> float | None:
|
||||
lowered = {str(k).lower(): v for k, v in entry.items()}
|
||||
for name in preferred:
|
||||
value = jsonable_float(lowered.get(name.lower()))
|
||||
if value is not None:
|
||||
return value
|
||||
for key, value in lowered.items():
|
||||
if any(token in key for token in ("gcc", "greenness")):
|
||||
numeric = jsonable_float(value)
|
||||
if numeric is not None:
|
||||
return numeric
|
||||
return None
|
||||
|
||||
|
||||
def gcc_timeseries(data: Any, value_name: str) -> pd.DataFrame:
|
||||
rows = []
|
||||
for entry in normalize_records(data):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
date = entry_date(entry)
|
||||
value = value_from_record(
|
||||
entry,
|
||||
("greenness_index", "gcc_90", "gcc", "value", "mean", "site_value"),
|
||||
)
|
||||
if date is not None and value is not None:
|
||||
rows.append({"date": date, value_name: value})
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["date", value_name])
|
||||
return pd.DataFrame(rows).groupby("date", as_index=False)[value_name].mean().sort_values("date")
|
||||
|
||||
|
||||
def compute_archive_density(s2_data: Any | None) -> tuple[int | None, int | None]:
|
||||
if s2_data is None:
|
||||
return None, None
|
||||
records = [entry for entry in normalize_records(s2_data) if isinstance(entry, dict)]
|
||||
if not records:
|
||||
return None, None
|
||||
aggressive = sum(1 for entry in records if acquisition_passes(entry, "aggressive"))
|
||||
nonaggressive = sum(1 for entry in records if acquisition_passes(entry, "nonaggressive"))
|
||||
return aggressive, nonaggressive
|
||||
|
||||
|
||||
def compute_coherence(s2_data: Any | None, s3_data: Any | None) -> tuple[int | None, float | None, float | None]:
|
||||
if s2_data is None or s3_data is None:
|
||||
return None, None, None
|
||||
s2 = s2_gcc_series(s2_data)
|
||||
s3 = gcc_timeseries(s3_data, "s3_gcc")
|
||||
if s2.empty or s3.empty:
|
||||
return 0, None, None
|
||||
|
||||
matched = pd.merge_asof(
|
||||
s2.sort_values("date"),
|
||||
s3.sort_values("date"),
|
||||
on="date",
|
||||
direction="nearest",
|
||||
tolerance=pd.Timedelta(days=MATCH_TOLERANCE_DAYS),
|
||||
).dropna(subset=["s2_gcc", "s3_gcc"])
|
||||
n = int(len(matched))
|
||||
if n < 2:
|
||||
return n, None, None
|
||||
r, p_value = pearsonr(matched["s2_gcc"].to_numpy(), matched["s3_gcc"].to_numpy())
|
||||
return n, jsonable_float(r), jsonable_float(p_value)
|
||||
|
||||
|
||||
def phenocam_series(data: Any | None) -> pd.DataFrame:
|
||||
if data is None:
|
||||
return pd.DataFrame(columns=["date", "gcc"])
|
||||
rows = []
|
||||
for entry in normalize_records(data):
|
||||
if isinstance(entry, dict):
|
||||
date = entry_date(entry)
|
||||
value = value_from_record(
|
||||
entry,
|
||||
("gcc_90", "greenness_index", "gcc", "gcc_mean", "value"),
|
||||
)
|
||||
else:
|
||||
date = None
|
||||
value = jsonable_float(entry)
|
||||
if date is not None and value is not None:
|
||||
rows.append({"date": date, "gcc": value})
|
||||
if not rows:
|
||||
return pd.DataFrame(columns=["date", "gcc"])
|
||||
return pd.DataFrame(rows).groupby("date", as_index=False)["gcc"].mean().sort_values("date")
|
||||
|
||||
|
||||
def compute_snr_from_phenocam(phenocam_data: Any | None) -> float | None:
|
||||
series = phenocam_series(phenocam_data)
|
||||
if len(series) < 5:
|
||||
return None
|
||||
x = (series["date"] - series["date"].min()).dt.days.to_numpy(dtype=float)
|
||||
y = series["gcc"].to_numpy(dtype=float)
|
||||
if len(np.unique(x)) < 5:
|
||||
return None
|
||||
try:
|
||||
spline = UnivariateSpline(x, y, k=3)
|
||||
residual = y - spline(x)
|
||||
except Exception as exc:
|
||||
print(f"[WARN] Could not fit PhenoCam smoothing spline: {exc}")
|
||||
return None
|
||||
rmse = float(np.sqrt(np.mean(residual**2)))
|
||||
amplitude = float(np.max(y) - np.min(y))
|
||||
if rmse <= 0:
|
||||
return None
|
||||
return amplitude / rmse
|
||||
|
||||
|
||||
def compute_snr(metrics_data: Any | None, phenocam_data: Any | None) -> float | None:
|
||||
from_metrics = find_numeric_snr(metrics_data)
|
||||
if from_metrics is not None:
|
||||
return from_metrics
|
||||
return compute_snr_from_phenocam(phenocam_data)
|
||||
|
||||
|
||||
def compute_site(site_root: Path) -> dict[str, Any]:
|
||||
s2_data = load_json(find_s2_preselection(site_root) or Path("__missing__"))
|
||||
s3_data = load_json(find_s3_timeseries(site_root) or Path("__missing__"))
|
||||
metrics_data = load_json(find_metrics(site_root) or Path("__missing__"))
|
||||
phenocam_data = load_json(find_phenocam(site_root) or Path("__missing__"))
|
||||
|
||||
snr = compute_snr(metrics_data, phenocam_data)
|
||||
n_s2_aggressive, n_s2_nonaggressive = compute_archive_density(s2_data)
|
||||
n_matched, pearson_r, p_value = compute_coherence(s2_data, s3_data)
|
||||
|
||||
return {
|
||||
"snr": snr,
|
||||
"snr_pass": None if snr is None else snr >= SNR_THRESHOLD,
|
||||
"n_s2_aggressive": n_s2_aggressive,
|
||||
"n_s2_nonaggressive": n_s2_nonaggressive,
|
||||
"coherence_n_matched": n_matched,
|
||||
"coherence_pearson_r": pearson_r,
|
||||
"coherence_p_value": p_value,
|
||||
}
|
||||
|
||||
|
||||
def print_summary(results: dict[str, dict[str, Any]]) -> None:
|
||||
print("\nSuitability summary")
|
||||
if not results:
|
||||
print("(no sites found)")
|
||||
return
|
||||
|
||||
columns = [
|
||||
("site", "site"),
|
||||
("snr", "snr"),
|
||||
("snr_pass", "pass"),
|
||||
("n_s2_aggressive", "n_s2_agg"),
|
||||
("n_s2_nonaggressive", "n_s2_nonagg"),
|
||||
("coherence_n_matched", "n_match"),
|
||||
("coherence_pearson_r", "pearson_r"),
|
||||
("coherence_p_value", "p_value"),
|
||||
]
|
||||
|
||||
def fmt(value: Any, key: str) -> str:
|
||||
if value is None:
|
||||
return "null"
|
||||
if key.startswith("n_") or key == "coherence_n_matched":
|
||||
return str(int(value))
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (int, float)):
|
||||
return f"{float(value):.4g}"
|
||||
return str(value)
|
||||
|
||||
rows = []
|
||||
for site, values in results.items():
|
||||
rows.append([site, *[fmt(values.get(key), key) for key, _ in columns[1:]]])
|
||||
widths = [
|
||||
max(len(header), *(len(row[i]) for row in rows))
|
||||
for i, (_, header) in enumerate(columns)
|
||||
]
|
||||
header = " ".join(header.ljust(widths[i]) for i, (_, header) in enumerate(columns))
|
||||
print(header)
|
||||
print(" ".join("-" * width for width in widths))
|
||||
for row in rows:
|
||||
print(" ".join(row[i].ljust(widths[i]) for i in range(len(columns))))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--base-dir",
|
||||
required=True,
|
||||
type=Path,
|
||||
help="Pipeline output root containing one subdirectory per site.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
base_dir = args.base_dir.expanduser().resolve()
|
||||
site_roots = find_site_roots(base_dir)
|
||||
if site_roots:
|
||||
run_discovery(*choose_discovery_site(site_roots))
|
||||
else:
|
||||
print(f"[WARN] No site directories found under {base_dir}")
|
||||
|
||||
results = {site_name: compute_site(site_root) for site_name, site_root in site_roots}
|
||||
output_path = base_dir / OUTPUT_NAME
|
||||
with output_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(results, f, indent=2, allow_nan=False)
|
||||
f.write("\n")
|
||||
print_summary(results)
|
||||
print(f"\nWrote {output_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
@ -1,397 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Fusion Viewer</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
|
||||
<script src="common.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
|
||||
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
|
||||
.selectors { margin-bottom: 20px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { margin: 0 0 5px 0; font-size: 22px; }
|
||||
.season-row { padding-bottom: 15px; }
|
||||
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
|
||||
.download-links { margin-left: 10px; font-size: 14px; }
|
||||
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
|
||||
.download-links a:hover { text-decoration: underline; }
|
||||
#dateSlider { width: 100%; margin: 15px 0; }
|
||||
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
|
||||
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
|
||||
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
|
||||
#fusionMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
|
||||
.leaflet-image-layer { image-rendering: pixelated; }
|
||||
.leaflet-control-attribution { display: none; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header-sticky">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html" class="active">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="siteName">Innsbruck</h1>
|
||||
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
<label>Strategy:</label>
|
||||
<select id="strategySelect">
|
||||
<option value="aggressive">Aggressive</option>
|
||||
<option value="nonaggressive">Non-aggressive</option>
|
||||
</select>
|
||||
<label>Sigma:</label>
|
||||
<select id="sigmaSelect">
|
||||
<option value="20">σ=20</option>
|
||||
<option value="30">σ=30</option>
|
||||
</select>
|
||||
<label>Mode:</label>
|
||||
<select id="fusionModeSelect" title="BtI = reflectance fusion; ItB = GCC fusion">
|
||||
<option value="bti">BtI (REFL)</option>
|
||||
<option value="itb">ItB (GCC)</option>
|
||||
</select>
|
||||
</div>
|
||||
<input type="range" id="dateSlider" min="0" max="365" value="0">
|
||||
<div id="dateDisplay">2024-01-01</div>
|
||||
</div>
|
||||
<div class="map-label" id="mapLabelFusion">Fusion RGB (closest available)</div>
|
||||
<div id="mapDate" class="map-date"></div>
|
||||
<div id="fusionMap"></div>
|
||||
<div id="plots">
|
||||
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
|
||||
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
|
||||
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
|
||||
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
|
||||
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
|
||||
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
|
||||
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
|
||||
|
||||
let siteName = "innsbruck", season = "2024";
|
||||
let strategy = "aggressive", sigma = "20", fusionMode = "bti";
|
||||
let sitePosition = [47.116171, 11.320308];
|
||||
let start = new Date(2024, 0, 1);
|
||||
let availableSiteSeasons = {};
|
||||
let fusionMap = null, overlay = null, marker = null;
|
||||
let ndviTs = [], gccTs = [], bandsTs = [];
|
||||
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
|
||||
|
||||
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
|
||||
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
|
||||
const daysFromDate = (dateStr) => {
|
||||
const [y, m, d] = dateStr.split("-").map(Number);
|
||||
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
|
||||
};
|
||||
|
||||
function getPreparedBase() {
|
||||
return fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
|
||||
}
|
||||
|
||||
function getFusionDir() {
|
||||
const sub = sigma === "30" ? "fusion_sigma30" : "fusion";
|
||||
return `data/${siteName}/${season}/${getPreparedBase()}/${sub}`;
|
||||
}
|
||||
|
||||
function getFusionTimeseriesDir() {
|
||||
return sigma === "30" ? "fusion_sigma30" : "fusion";
|
||||
}
|
||||
|
||||
async function loadTimeseries() {
|
||||
const sub = getFusionTimeseriesDir();
|
||||
const base = `data/${siteName}/${season}/${getPreparedBase()}`;
|
||||
try {
|
||||
if (fusionMode === "itb") {
|
||||
const g = await fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : []));
|
||||
ndviTs = [];
|
||||
gccTs = g;
|
||||
bandsTs = [];
|
||||
} else {
|
||||
const [n, g, b] = await Promise.all([
|
||||
fetch(`${base}/ndvi/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
fetch(`${base}/gcc/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
fetch(`${base}/bands/${sub}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
]);
|
||||
ndviTs = n;
|
||||
gccTs = g;
|
||||
bandsTs = b;
|
||||
}
|
||||
} catch {
|
||||
ndviTs = [];
|
||||
gccTs = [];
|
||||
bandsTs = [];
|
||||
}
|
||||
drawPlots();
|
||||
updateDownloadLinks();
|
||||
}
|
||||
|
||||
function drawPlot(canvasId, data, key, color) {
|
||||
const canvas = document.getElementById(canvasId);
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
const pts = data.filter(t => t[key] != null);
|
||||
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
|
||||
const dates = pts.map(t => new Date(t.date));
|
||||
const vals = pts.map(t => t[key]);
|
||||
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
|
||||
const minV = Math.min(...vals), maxV = Math.max(...vals);
|
||||
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
|
||||
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
|
||||
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
|
||||
ctx.strokeStyle = color;
|
||||
ctx.beginPath();
|
||||
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
|
||||
ctx.stroke();
|
||||
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(curDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
|
||||
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
|
||||
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
|
||||
}
|
||||
|
||||
function drawPlots() {
|
||||
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
|
||||
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
|
||||
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
|
||||
}
|
||||
|
||||
function updateDownloadLinks() {
|
||||
const el = document.getElementById("downloadLinks");
|
||||
if (!el) return;
|
||||
const sub = getFusionTimeseriesDir();
|
||||
const prep = `data/${siteName}/${season}/${getPreparedBase()}`;
|
||||
if (fusionMode === "itb") {
|
||||
el.innerHTML = `<a href="${prep}/gcc/${sub}/timeseries.json">[GCC JSON]</a>`;
|
||||
return;
|
||||
}
|
||||
const base = `${prep}/export/${sub}`;
|
||||
const name = `${siteName}_${season}_fusion_${strategy}_${sub}`;
|
||||
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
|
||||
}
|
||||
|
||||
async function findFusionFile(dateStr) {
|
||||
const target = new Date(dateStr);
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
const seasonStart = start.getTime();
|
||||
const seasonEnd = yearEnd.getTime();
|
||||
for (let offset = 0; offset <= 365; offset++) {
|
||||
for (const dir of offset === 0 ? [0] : [-1, 1]) {
|
||||
const d = new Date(target.getTime() + dir * offset * 86400000);
|
||||
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
|
||||
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
|
||||
const filename = (fusionMode === "itb" ? "GCC_" : "REFL_") + `${ds}.tif`;
|
||||
try {
|
||||
const res = await fetch(`${getFusionDir()}/${filename}`, { method: "HEAD" });
|
||||
if (res.ok) return filename;
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function transformBounds(bbox, fromCRS) {
|
||||
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
|
||||
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
|
||||
return [[sw[1], sw[0]], [ne[1], ne[0]]];
|
||||
}
|
||||
|
||||
async function loadGeotiff(filename) {
|
||||
const path = `${getFusionDir()}/${filename}`;
|
||||
const buf = await (await fetch(path)).arrayBuffer();
|
||||
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
|
||||
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
|
||||
const dateStr = filename.replace(/^(REFL|GCC)_/, "").replace(".tif", "");
|
||||
return { dataUrl, bounds, dateStr };
|
||||
}
|
||||
|
||||
async function updateMap() {
|
||||
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const filename = await findFusionFile(dateStr);
|
||||
if (!filename || !fusionMap) {
|
||||
if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
|
||||
if (overlay) fusionMap.removeLayer(overlay);
|
||||
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(fusionMap);
|
||||
fusionMap.fitBounds(bounds);
|
||||
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
|
||||
} catch (e) {
|
||||
if (overlay) { fusionMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
async function probeDataExists(sitename, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function getSiteBySitename(sn) {
|
||||
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
|
||||
}
|
||||
|
||||
async function setSiteSeason(newSite, newSeason) {
|
||||
siteName = newSite;
|
||||
season = newSeason;
|
||||
start = new Date(parseInt(season), 0, 1);
|
||||
const site = getSiteBySitename(newSite);
|
||||
if (site?.geometry?.coordinates) {
|
||||
const [lon, lat] = site.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
if (fusionMap) { fusionMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
|
||||
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
|
||||
document.getElementById("season").textContent = season;
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
|
||||
const params = new URLSearchParams(location.search);
|
||||
params.set("site", siteName);
|
||||
params.set("season", season);
|
||||
params.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${params}`);
|
||||
const urlDate = params.get("date");
|
||||
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
await loadTimeseries();
|
||||
await updateMap();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch { window.sitesData = { features: [] }; }
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeDataExists(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
|
||||
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
|
||||
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
|
||||
`<option value="${s}">${s}</option>`
|
||||
).join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
strategy = urlParams.get("strategy") || "aggressive";
|
||||
sigma = urlParams.get("sigma") || "20";
|
||||
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
|
||||
document.getElementById("strategySelect").value = strategy;
|
||||
document.getElementById("sigmaSelect").value = sigma;
|
||||
document.getElementById("fusionModeSelect").value = fusionMode;
|
||||
const ml = document.getElementById("mapLabelFusion");
|
||||
if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
|
||||
|
||||
const initSite = getSiteBySitename(initialSite);
|
||||
if (initSite?.geometry?.coordinates) {
|
||||
const [lon, lat] = initSite.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
fusionMap = L.map("fusionMap", { zoomControl: false }).setView(sitePosition, 12)
|
||||
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
|
||||
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(fusionMap);
|
||||
|
||||
siteSelect.addEventListener("change", function() {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
setSiteSeason(sn, document.getElementById("seasonSelect").value);
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function() {
|
||||
setSiteSeason(siteSelect.value, this.value);
|
||||
});
|
||||
document.getElementById("strategySelect").addEventListener("change", function() {
|
||||
strategy = this.value;
|
||||
urlParams.set("strategy", strategy);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("sigmaSelect").addEventListener("change", function() {
|
||||
sigma = this.value;
|
||||
urlParams.set("sigma", sigma);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("fusionModeSelect").addEventListener("change", function() {
|
||||
fusionMode = this.value;
|
||||
urlParams.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
const ml = document.getElementById("mapLabelFusion");
|
||||
if (ml) ml.textContent = fusionMode === "itb" ? "Fusion GCC grayscale (closest available)" : "Fusion RGB (closest available)";
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
|
||||
await setSiteSeason(initialSite, initialSeason);
|
||||
}
|
||||
|
||||
document.getElementById("dateSlider").addEventListener("input", function() {
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
|
||||
drawPlots(); updateMap();
|
||||
});
|
||||
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,284 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Gap validation</title>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
|
||||
.selectors { margin-bottom: 18px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { font-size: 22px; margin-top: 0; }
|
||||
h2 { font-size: 16px; margin-top: 22px; color: #333; }
|
||||
h2:first-of-type { margin-top: 8px; }
|
||||
table { border-collapse: collapse; width: 100%; font-size: 12px; margin-bottom: 14px; }
|
||||
th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; vertical-align: top; }
|
||||
th { background: #f5f5f5; }
|
||||
td.num { text-align: right; font-variant-numeric: tabular-nums; }
|
||||
td.paths { font-size: 11px; word-break: break-all; color: #444; max-width: 420px; }
|
||||
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
|
||||
padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
|
||||
.intro code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
|
||||
.section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; line-height: 1.45; }
|
||||
.empty { color: #666; font-style: italic; }
|
||||
.err { color: #a00; }
|
||||
details.meta { font-size: 12px; margin-top: 12px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
|
||||
details.meta summary { cursor: pointer; font-weight: 600; }
|
||||
details.meta pre { margin: 8px 0 0; overflow: auto; font-size: 11px; max-height: 200px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html" class="active">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="pageTitle">Gap validation</h1>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
</div>
|
||||
<div id="content"></div>
|
||||
</div>
|
||||
<script>
|
||||
let siteName = "innsbruck",
|
||||
season = "2024";
|
||||
let availableSiteSeasons = {};
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
|
||||
async function probeSummary(sn, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sn}/${s}/validation/gap_validation_summary.json`, {
|
||||
method: "HEAD",
|
||||
});
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function fmt(v, d = 4) {
|
||||
if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
|
||||
return v.toFixed(d);
|
||||
}
|
||||
|
||||
function fmtInt(v) {
|
||||
if (v == null || typeof v !== "number" || !Number.isFinite(v)) return "—";
|
||||
return String(Math.round(v));
|
||||
}
|
||||
|
||||
function crossoverBlock(summary) {
|
||||
const scen = summary.scenario;
|
||||
const wcRoot = summary.whittaker_crossover || {};
|
||||
const wc = (scen && wcRoot[scen]) || Object.values(wcRoot)[0];
|
||||
if (!wc) return "";
|
||||
const first = wc.first_gap_days_fusion_nse_below_whittaker;
|
||||
const def = wc.whittaker_definition || "";
|
||||
let h = `<h2>Whittaker crossover (NSE<sub>S2</sub>)</h2>`;
|
||||
h += `<p class="section-note">${def}</p>`;
|
||||
h += `<p class="section-note"><b>First gap length (days)</b> where fusion NSE<sub>S2</sub> < Whittaker NSE<sub>S2</sub> (strict): <b>${first != null ? first : "—"}</b> (none if fusion never falls below).</p>`;
|
||||
const rows = wc.by_gap || [];
|
||||
if (rows.length) {
|
||||
h += `<table><tr><th>Gap days</th><th class="num">NSE<sub>S2</sub> fusion</th><th class="num">NSE<sub>S2</sub> Whittaker</th></tr>`;
|
||||
for (const r of rows) {
|
||||
h += `<tr><td>${r.gap_days}</td><td class="num">${fmt(r.nse_s2_fusion, 3)}</td><td class="num">${fmt(r.nse_s2_whittaker, 3)}</td></tr>`;
|
||||
}
|
||||
h += `</table>`;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
function manifestTable(manifest) {
|
||||
if (!manifest?.entries?.length) return "";
|
||||
let h = `<h2>Gap manifest</h2>`;
|
||||
h += `<p class="section-note">From <code>data/${siteName}/${season}/validation/gap_manifest.json</code>. Midpoint rule: ${manifest.entries[0]?.midpoint_rule || "—"}.</p>`;
|
||||
h += `<table><tr><th>Transition</th><th>Gap days</th><th>Prediction</th><th>Window</th><th>Withheld S2</th></tr>`;
|
||||
for (const e of manifest.entries) {
|
||||
const w = `${e.window_start} → ${e.window_end}`;
|
||||
h += `<tr><td>${e.transition || "—"}</td><td>${e.gap_days}</td><td>${e.prediction_date}</td><td>${w}</td><td>${e.withheld_s2_filename || "—"}</td></tr>`;
|
||||
}
|
||||
h += `</table>`;
|
||||
return h;
|
||||
}
|
||||
|
||||
function resultsTable(results) {
|
||||
if (!results?.length) return `<p class="empty">No result rows in summary.</p>`;
|
||||
const head = `<tr>
|
||||
<th>Transition</th><th>Gap</th><th>Prediction</th><th>Withheld REFL</th>
|
||||
<th class="num">RMSE<br><span style="font-weight:normal">gap</span></th>
|
||||
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">gap</span></th>
|
||||
<th class="num">RMSE<br><span style="font-weight:normal">no gap</span></th>
|
||||
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">no gap</span></th>
|
||||
<th class="num">ΔRMSE</th><th class="num">ΔNSE</th>
|
||||
<th class="num">NSE<sub>S2</sub><br><span style="font-weight:normal">Whitt.</span></th>
|
||||
<th class="num">n</th>
|
||||
<th>Paths / error</th>
|
||||
</tr>`;
|
||||
const parts = [head];
|
||||
for (const r of results) {
|
||||
if (r.error) {
|
||||
parts.push(
|
||||
`<tr><td>${r.transition ?? "—"}</td><td>${r.gap_days ?? "—"}</td><td colspan="9" class="err">${r.error}</td><td class="paths">${r.fused_gap_path || ""}</td></tr>`
|
||||
);
|
||||
continue;
|
||||
}
|
||||
const g = r.spatial?.gap || {};
|
||||
const ng = r.spatial?.no_gap || {};
|
||||
const wh = r.spatial?.whittaker || {};
|
||||
const dRm = r.spatial?.delta_rmse;
|
||||
const dNs = r.spatial?.delta_nse;
|
||||
const p = r.paths || {};
|
||||
const pathNote = [p.fused_gap, p.fused_no_gap, p.withheld_s2_refl].filter(Boolean).join("<br>");
|
||||
parts.push(`<tr>
|
||||
<td>${r.transition || "—"}</td>
|
||||
<td>${r.gap_days}</td>
|
||||
<td>${r.prediction_date || "—"}</td>
|
||||
<td style="font-size:11px">${r.withheld_s2_filename || "—"}</td>
|
||||
<td class="num">${fmt(g.rmse)}</td>
|
||||
<td class="num">${fmt(g.nse_s2, 3)}</td>
|
||||
<td class="num">${fmt(ng.rmse)}</td>
|
||||
<td class="num">${fmt(ng.nse_s2, 3)}</td>
|
||||
<td class="num">${fmt(dRm)}</td>
|
||||
<td class="num">${fmt(dNs, 3)}</td>
|
||||
<td class="num">${fmt(wh.nse_s2, 3)}</td>
|
||||
<td class="num">${fmtInt(g.n_pixels)}</td>
|
||||
<td class="paths">${pathNote}</td>
|
||||
</tr>`);
|
||||
}
|
||||
return `<table>${parts.join("")}</table>`;
|
||||
}
|
||||
|
||||
function metaDetails(summary) {
|
||||
const cmd = summary.command_line;
|
||||
const git = summary.git_commit;
|
||||
if (!cmd && !git) return "";
|
||||
let h = `<details class="meta"><summary>Run metadata</summary>`;
|
||||
if (git) h += `<p>Git: <code>${git}</code></p>`;
|
||||
if (cmd?.length) h += `<pre>${cmd.map((x) => String(x)).join(" ")}</pre>`;
|
||||
h += `</details>`;
|
||||
return h;
|
||||
}
|
||||
|
||||
async function render(summary, manifest) {
|
||||
const el = document.getElementById("content");
|
||||
if (!summary) {
|
||||
el.innerHTML = `<p class="err">Could not load <code>data/${siteName}/${season}/validation/gap_validation_summary.json</code>.</p>
|
||||
<p class="section-note">From <code>processing/</code>: <code>python -m gap_validation.run --site ${siteName} --season ${season} --lat LAT --lon LON</code> (see <code>--help</code>). Serve from <code>processing/</code>: <code>python3 -m http.server 8000</code> → <code>/webapp/gap_validation.html</code> (<code>webapp/data</code> → <code>../data</code>).</p>`;
|
||||
if (manifest?.entries) el.innerHTML += manifestTable(manifest);
|
||||
return;
|
||||
}
|
||||
const scen = summary.scenario || "—";
|
||||
const sn = summary.site_name ?? siteName;
|
||||
const se = summary.season ?? season;
|
||||
let html = `<div class="intro">
|
||||
Tier-2 withheld S2, spatial GCC vs withheld scene, NSE<sub>S2</sub>, and Whittaker comparison.
|
||||
Summary: <code>data/${sn}/${se}/validation/gap_validation_summary.json</code>.
|
||||
Scenario in this file: <b>${scen}</b> (one run overwrites; re-run CLI for other strategy/σ/mode).
|
||||
</div>`;
|
||||
html += `<h2>Spatial metrics (per gap length)</h2>`;
|
||||
html += `<p class="section-note">Reference = GCC from withheld S2 REFL (bilinear to fusion grid). Prediction = fused GCC. ΔRMSE = RMSE<sub>gap</sub> − RMSE<sub>no gap</sub>; ΔNSE = NSE<sub>no gap</sub> − NSE<sub>gap</sub>.</p>`;
|
||||
html += resultsTable(summary.results);
|
||||
html += crossoverBlock(summary);
|
||||
html += metaDetails(summary);
|
||||
if (manifest?.entries) html += manifestTable(manifest);
|
||||
el.innerHTML = html;
|
||||
}
|
||||
|
||||
async function load() {
|
||||
let summary = null,
|
||||
manifest = null;
|
||||
try {
|
||||
const r1 = await fetch(`data/${siteName}/${season}/validation/gap_validation_summary.json`);
|
||||
summary = r1.ok ? await r1.json() : null;
|
||||
} catch {
|
||||
summary = null;
|
||||
}
|
||||
try {
|
||||
const r2 = await fetch(`data/${siteName}/${season}/validation/gap_manifest.json`);
|
||||
manifest = r2.ok ? await r2.json() : null;
|
||||
} catch {
|
||||
manifest = null;
|
||||
}
|
||||
await render(summary, manifest);
|
||||
const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
|
||||
document.getElementById("pageTitle").textContent =
|
||||
(site?.properties?.description || siteName) + " — gap validation — " + season;
|
||||
urlParams.set("site", siteName);
|
||||
urlParams.set("season", season);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch {
|
||||
window.sitesData = { features: [] };
|
||||
}
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeSummary(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
|
||||
const initialSeason =
|
||||
urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
|
||||
? urlSeason
|
||||
: (availableSiteSeasons[initialSite] || [])[0] || "2024";
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
|
||||
.map((s) => `<option value="${s}">${s}</option>`)
|
||||
.join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
siteName = initialSite;
|
||||
season = initialSeason;
|
||||
|
||||
siteSelect.addEventListener("change", function () {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
siteName = sn;
|
||||
season = document.getElementById("seasonSelect").value;
|
||||
load();
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function () {
|
||||
season = this.value;
|
||||
load();
|
||||
});
|
||||
await load();
|
||||
}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
1789
webapp/index.html
1789
webapp/index.html
File diff suppressed because it is too large
Load diff
|
|
@ -1,367 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Metrics</title>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
|
||||
.selectors { margin-bottom: 20px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { font-size: 22px; }
|
||||
h2 { font-size: 16px; margin-top: 24px; color: #333; }
|
||||
h2:first-of-type { margin-top: 8px; }
|
||||
h3 { font-size: 14px; margin: 14px 0 6px 0; color: #444; font-weight: 600; }
|
||||
table { border-collapse: collapse; width: 100%; font-size: 13px; margin-bottom: 12px; }
|
||||
th, td { border: 1px solid #ccc; padding: 6px 8px; text-align: left; }
|
||||
th { background: #f5f5f5; }
|
||||
td.num { text-align: right; font-variant-numeric: tabular-nums; }
|
||||
.fusion-block table { margin-bottom: 4px; }
|
||||
.fusion-block table + table { margin-top: 12px; }
|
||||
.section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
|
||||
.section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
|
||||
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
|
||||
padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
|
||||
.intro-short { margin-bottom: 0; }
|
||||
details.definitions { margin-top: 28px; font-size: 13px; border: 1px solid #e5e5e5; border-radius: 4px; padding: 8px 12px; background: #fafafa; }
|
||||
details.definitions summary { cursor: pointer; font-weight: 600; color: #333; }
|
||||
details.definitions ul { margin: 8px 0 0 18px; padding: 0; }
|
||||
details.definitions li { margin-bottom: 4px; }
|
||||
.scenario-key { font-size: 11px; color: #666; font-weight: normal; }
|
||||
.empty { color: #666; font-style: italic; }
|
||||
.err { color: #a00; }
|
||||
details.how-read {
|
||||
font-size: 12px; color: #333; line-height: 1.5; max-width: 820px; margin: 0 0 18px 0;
|
||||
padding: 8px 12px 10px; border: 1px solid #ccd; border-radius: 4px; background: #f8fafc;
|
||||
}
|
||||
details.how-read summary {
|
||||
cursor: pointer; font-weight: 600; font-size: 13px; color: #111; margin-bottom: 0;
|
||||
}
|
||||
details.how-read ol { margin: 10px 0 0; padding-left: 1.35rem; }
|
||||
details.how-read li { margin-bottom: 7px; }
|
||||
details.how-read li:last-child { margin-bottom: 0; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html" class="active">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="siteName">Metrics</h1>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
</div>
|
||||
<div id="content"></div>
|
||||
</div>
|
||||
<script>
|
||||
/** Shown in the UI; pearson_r, rmse, mae, n_samples remain in metrics.json only. */
|
||||
const DISPLAY_METRIC_COLS = ["r_squared", "nrmse", "nse_pc"];
|
||||
const DISPLAY_METRIC_LABELS = {
|
||||
r_squared: "R² vs mean",
|
||||
nrmse: "nRMSE",
|
||||
nse_pc: "NSE_PC",
|
||||
};
|
||||
|
||||
const FUSION_BTI_ROWS = [
|
||||
["aggressive_sigma20", "Aggressive", 20],
|
||||
["aggressive_sigma30", "Aggressive", 30],
|
||||
["nonaggressive_sigma20", "Non-aggressive", 20],
|
||||
["nonaggressive_sigma30", "Non-aggressive", 30],
|
||||
];
|
||||
|
||||
function mv(m, c) {
|
||||
return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
|
||||
}
|
||||
function fmtMetric(col, v) {
|
||||
if (v == null || typeof v !== "number") return "—";
|
||||
if (col === "r_squared" || col === "nse_pc") return v.toFixed(3);
|
||||
if (col === "nrmse") return v.toFixed(4);
|
||||
return fmt(v);
|
||||
}
|
||||
let siteName = "innsbruck", season = "2024";
|
||||
let availableSiteSeasons = {};
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
|
||||
async function probeMetrics(sn, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sn}/${s}/metrics.json`, { method: "HEAD" });
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function fmt(v) {
|
||||
if (v == null || typeof v !== "number") return "—";
|
||||
return Number.isInteger(v) ? String(v) : v.toFixed(4);
|
||||
}
|
||||
|
||||
function fusionMeanResidual(m) {
|
||||
const x = m?.residual_vs_phenocam?.mean;
|
||||
const n = Number(x);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
|
||||
function fusionSubTableRows(temporal, keysWithLabels, includeMeanResid) {
|
||||
const parts = [];
|
||||
for (const [key, stratLabel, sig] of keysWithLabels) {
|
||||
const m = temporal[key];
|
||||
if (!m) continue;
|
||||
const mr = fusionMeanResidual(m);
|
||||
const meanCell = includeMeanResid
|
||||
? `<td class="num">${mr !== null ? mr.toFixed(3) : "—"}</td>`
|
||||
: "";
|
||||
parts.push(
|
||||
`<tr><td>${stratLabel}, σ=${sig} <span class="scenario-key">(${key})</span></td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}${meanCell}</tr>`
|
||||
);
|
||||
}
|
||||
return parts;
|
||||
}
|
||||
|
||||
function fusionTables(temporal) {
|
||||
if (!temporal || typeof temporal !== "object") {
|
||||
return `<p class="empty">No fusion temporal data</p>`;
|
||||
}
|
||||
const itbRows = FUSION_BTI_ROWS.map(([k, s, sig]) => [`${k}_itb`, s, sig]);
|
||||
const allKeys = [...FUSION_BTI_ROWS.map((r) => r[0]), ...itbRows.map((r) => r[0])];
|
||||
let showMean = false;
|
||||
for (const k of allKeys) {
|
||||
if (fusionMeanResidual(temporal[k]) !== null) {
|
||||
showMean = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
const btiBody = fusionSubTableRows(temporal, FUSION_BTI_ROWS, showMean);
|
||||
const itbBody = fusionSubTableRows(temporal, itbRows, showMean);
|
||||
if (!btiBody.length && !itbBody.length) {
|
||||
return `<p class="empty">No fusion scenarios in temporal</p>`;
|
||||
}
|
||||
const meanTh = showMean ? `<th class="num">Mean resid.</th>` : "";
|
||||
const head = `<tr><th>Setting</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}${meanTh}</tr>`;
|
||||
|
||||
let h = `<div class="fusion-block">`;
|
||||
if (btiBody.length) {
|
||||
h += `<h3>Bands-then-Index (BtI)</h3>`;
|
||||
h += `<table>${head}${btiBody.join("")}</table>`;
|
||||
}
|
||||
if (itbBody.length) {
|
||||
h += `<h3>Index-then-Bands (ItB)</h3>`;
|
||||
h += `<table>${head}${itbBody.join("")}</table>`;
|
||||
}
|
||||
h += `</div>`;
|
||||
return h;
|
||||
}
|
||||
|
||||
/** Returns only <table>…</table> or empty string (no heading). */
|
||||
function baselineTable(b) {
|
||||
if (!b || typeof b !== "object") return "";
|
||||
const rows = [];
|
||||
const pushRow = (label, m) => {
|
||||
if (!m || typeof m !== "object") return;
|
||||
rows.push(
|
||||
`<tr><td>${label}</td>${DISPLAY_METRIC_COLS.map((c) => `<td class="num">${fmtMetric(c, mv(m, c))}</td>`).join("")}</tr>`
|
||||
);
|
||||
};
|
||||
pushRow("S2 GCC (all acquisitions)", b.s2);
|
||||
for (const strat of ["aggressive", "nonaggressive"]) {
|
||||
pushRow(`S3 composite GCC (${strat})`, b.s3?.[strat]);
|
||||
pushRow(`S2 GCC cloud-screened (${strat})`, b.s2_cloudfree?.[strat]);
|
||||
pushRow(`S2 Whittaker λ=400 (${strat})`, b.s2_whittaker_lambda400?.[strat]);
|
||||
}
|
||||
if (!rows.length) return "";
|
||||
const head = `<tr><th>Baseline</th>${DISPLAY_METRIC_COLS.map((c) => `<th class="num">${DISPLAY_METRIC_LABELS[c]}</th>`).join("")}</tr>`;
|
||||
return `<table>${head}${rows.join("")}</table>`;
|
||||
}
|
||||
|
||||
function fmtFixed3(v) {
|
||||
const n = Number(v);
|
||||
return Number.isFinite(n) ? n.toFixed(3) : "—";
|
||||
}
|
||||
|
||||
function derivedSection(d) {
|
||||
if (!d) return "";
|
||||
const dn = d.delta_nse_pc_sigma20_minus_sigma30;
|
||||
const paired = d.bti_vs_itb_mean_residual || [];
|
||||
if (!dn && !paired.length) return "";
|
||||
|
||||
let h = `<h2>Summaries</h2>`;
|
||||
h += `<p class="section-note">Same numbers as Fusion, condensed. First table: which σ fits PhenoCam better (NSE_PC only). Second: mean bias BtI vs ItB.</p>`;
|
||||
if (dn) {
|
||||
h += `<p class="section-note"><b>ΔNSE_PC</b> = NSE_PC(σ20) − NSE_PC(σ30). <b>+</b> → σ20 better. <b>−</b> → σ30 better.</p>`;
|
||||
h += `<table><tr><th>Mode</th><th>Strategy</th><th class="num">ΔNSE_PC</th></tr>`;
|
||||
let anyDelta = false;
|
||||
for (const mode of ["bti", "itb"]) {
|
||||
for (const strat of ["aggressive", "nonaggressive"]) {
|
||||
const v = dn[mode]?.[strat];
|
||||
if (Number.isFinite(Number(v))) anyDelta = true;
|
||||
h += `<tr><td>${mode.toUpperCase()}</td><td>${strat}</td><td class="num">${fmtFixed3(v)}</td></tr>`;
|
||||
}
|
||||
}
|
||||
h += `</table>`;
|
||||
if (!anyDelta) {
|
||||
h += `<p class="section-note">ΔNSE_PC needs both σ20 and σ30 fusion rows in <code>temporal</code> (BtI and ItB). Re-run <code>metrics_stats</code>.</p>`;
|
||||
}
|
||||
}
|
||||
if (paired.length) {
|
||||
h += `<p class="section-note">Mean(fused − PhenoCam) per row. <b>+</b> / <b>−</b> = average over / under PhenoCam. Closer to <b>0</b> in a column = less bias for that workflow.</p>`;
|
||||
h += `<table><tr><th>Strategy</th><th>σ</th><th class="num">Mean residual BtI</th><th class="num">Mean residual ItB</th></tr>`;
|
||||
for (const row of paired) {
|
||||
h += `<tr><td>${row.strategy}</td><td>${row.sigma}</td><td class="num">${fmtFixed3(row.mean_residual_bti)}</td><td class="num">${fmtFixed3(row.mean_residual_itb)}</td></tr>`;
|
||||
}
|
||||
h += `</table>`;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
function howToReadBlock() {
|
||||
return `<details class="how-read">
|
||||
<summary>How to read</summary>
|
||||
<ol>
|
||||
<li>All scores are satellite or fusion <b>GCC</b> vs <b>PhenoCam GCC</b> at the site 3×3 window, <b>same calendar days</b> only. Extra stats: <code>metrics.json</code>.</li>
|
||||
<li><b>R² vs mean</b> and <b>NSE_PC</b> are the same value (1 − SS<sub>res</sub>/SS<sub>tot</sub> vs predicting mean PhenoCam each day); not (Pearson <i>r</i>)²; can be negative. Higher = better. <b>nRMSE</b>: lower = better.</li>
|
||||
<li><b>Fusion:</b> same row number in BtI and in ItB = same screening + same σ — compare left/right. Down one block = change screening or σ.</li>
|
||||
<li><b>Mean resid.</b> (if present): mean(fused − PhenoCam). Sign = average bias; use R² vs mean / nRMSE / NSE_PC for overall fit.</li>
|
||||
<li><b>Summaries:</b> ΔNSE_PC = NSE at σ20 minus NSE at σ30 (+ means σ20 wins). Paired table: closer to 0 = less mean bias.</li>
|
||||
</ol>
|
||||
</details>`;
|
||||
}
|
||||
|
||||
function definitionsDetails() {
|
||||
return `<details class="definitions">
|
||||
<summary>Definitions</summary>
|
||||
<ul>
|
||||
<li><b>BtI</b>: fuse reflectance bands, then GCC.</li>
|
||||
<li><b>ItB</b>: GCC on S2 and S3, then fuse GCC.</li>
|
||||
<li><b>Scenario</b>: screening (<code>aggressive</code> / <code>nonaggressive</code>) × σ (20 / 30 days).</li>
|
||||
<li><a href="phenology.html">Phenology</a> — PhenoCam SOS/EOS (TIMESAT).</li>
|
||||
<li><b>R² vs mean</b> — coefficient of determination vs a constant mean(PhenoCam) baseline; JSON key <code>r_squared</code>; duplicates <code>nse_pc</code>. Not (Pearson <i>r</i>)².</li>
|
||||
<li><code>metrics.json</code> — also Pearson <i>r</i>, RMSE, MAE, <code>n_samples</code>.</li>
|
||||
</ul>
|
||||
</details>`;
|
||||
}
|
||||
|
||||
function render(data) {
|
||||
const el = document.getElementById("content");
|
||||
if (!data) {
|
||||
el.innerHTML = `<p class="err">Could not load metrics.json</p>`;
|
||||
return;
|
||||
}
|
||||
let html = "";
|
||||
html += `<div class="intro intro-short">
|
||||
GCC at the 3×3 site window vs PhenoCam. Sections: PhenoCam → baselines → fusion (BtI, then ItB) → summaries.
|
||||
<code>data/${siteName}/${season}/metrics.json</code>
|
||||
</div>`;
|
||||
html += howToReadBlock();
|
||||
|
||||
if (data.phenocam_stats) {
|
||||
html += `<h2>PhenoCam (ground truth)</h2>`;
|
||||
html += `<p class="section-note">Camera ROI GCC (not compared to itself). Dates / SOS–EOS: <a href="phenology.html">Phenology</a>.</p>`;
|
||||
html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
|
||||
const p = data.phenocam_stats;
|
||||
html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
|
||||
}
|
||||
|
||||
const baselineTbl = baselineTable(data.baseline);
|
||||
if (baselineTbl) {
|
||||
html += `<h2>Baselines (vs PhenoCam)</h2>`;
|
||||
html += `<p class="section-note">Same columns as fusion (vs PhenoCam). Higher R² vs mean / NSE_PC, lower nRMSE = better. S3 = coarse-only; Whittaker = smoothed S2-only.</p>`;
|
||||
html += baselineTbl;
|
||||
}
|
||||
|
||||
html += `<h2>Fusion (vs PhenoCam)</h2>`;
|
||||
html += `<p class="section-note">BtI block vs ItB block: same row = same screening + σ. Within a block: four EFAST combinations.</p>`;
|
||||
html += fusionTables(data.temporal || {});
|
||||
|
||||
html += derivedSection(data.derived);
|
||||
|
||||
html += definitionsDetails();
|
||||
|
||||
el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
|
||||
}
|
||||
|
||||
async function load() {
|
||||
try {
|
||||
const res = await fetch(`data/${siteName}/${season}/metrics.json`);
|
||||
render(res.ok ? await res.json() : null);
|
||||
} catch {
|
||||
render(null);
|
||||
}
|
||||
const site = window.sitesData?.features?.find((f) => f.properties?.sitename === siteName);
|
||||
document.getElementById("siteName").textContent = (site?.properties?.description || siteName) + " — " + season;
|
||||
urlParams.set("site", siteName);
|
||||
urlParams.set("season", season);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch { window.sitesData = { features: [] }; }
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeMetrics(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach((sn) => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = urlSite && availableSiteSeasons[urlSite] ? urlSite : availableSites[0] || "innsbruck";
|
||||
const initialSeason =
|
||||
urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)
|
||||
? urlSeason
|
||||
: (availableSiteSeasons[initialSite] || [])[0] || "2024";
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || [])
|
||||
.map((s) => `<option value="${s}">${s}</option>`)
|
||||
.join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
siteName = initialSite;
|
||||
season = initialSeason;
|
||||
|
||||
siteSelect.addEventListener("change", function () {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map((s) => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
siteName = sn;
|
||||
season = document.getElementById("seasonSelect").value;
|
||||
load();
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function () {
|
||||
season = this.value;
|
||||
load();
|
||||
});
|
||||
await load();
|
||||
}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Phenology</title>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 900px; margin: 0 auto; padding: 20px; }
|
||||
h1 { font-size: 22px; margin-top: 0; }
|
||||
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
|
||||
padding: 10px 12px; border-radius: 4px; margin-bottom: 16px; line-height: 1.5; }
|
||||
table { border-collapse: collapse; width: 100%; font-size: 13px; }
|
||||
th, td { border: 1px solid #ccc; padding: 8px 10px; text-align: left; }
|
||||
th { background: #f5f5f5; }
|
||||
td.num { text-align: center; font-variant-numeric: tabular-nums; }
|
||||
td.site { font-weight: 500; }
|
||||
a.rowlink { color: #0066cc; text-decoration: none; }
|
||||
a.rowlink:hover { text-decoration: underline; }
|
||||
.empty { color: #666; }
|
||||
.err { color: #a00; }
|
||||
.loading { color: #666; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html" class="active">Phenology</a>
|
||||
</div>
|
||||
<h1>PhenoCam phenology (50% amplitude)</h1>
|
||||
<p class="intro">
|
||||
Green-up and green-down dates from <code>data/<site>/<season>/raw/phenocam/phenocam_phenology.json</code>
|
||||
(TIMESAT on PhenoCam GCC). Site/season rows match <code>data/sites.geojson</code>.
|
||||
Run <code>python phenology_timesat.py --all</code> or the pipeline to generate missing JSON files.
|
||||
</p>
|
||||
<p id="status" class="loading">Loading…</p>
|
||||
<div id="tableWrap"></div>
|
||||
</div>
|
||||
<script>
|
||||
function escapeHtml(s) {
|
||||
return String(s)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """);
|
||||
}
|
||||
|
||||
function cellDate(v) {
|
||||
if (v == null || v === "") return "<span class='empty'>—</span>";
|
||||
return escapeHtml(v);
|
||||
}
|
||||
|
||||
async function loadPhenologyRow(site, season) {
|
||||
const path = `data/${site}/${season}/raw/phenocam/phenocam_phenology.json`;
|
||||
try {
|
||||
const res = await fetch(path);
|
||||
if (!res.ok) return { ok: false, up: null, down: null };
|
||||
const j = await res.json();
|
||||
return {
|
||||
ok: true,
|
||||
up: j.green_up_50pct_date ?? null,
|
||||
down: j.green_down_50pct_date ?? null
|
||||
};
|
||||
} catch {
|
||||
return { ok: false, up: null, down: null };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const status = document.getElementById("status");
|
||||
const wrap = document.getElementById("tableWrap");
|
||||
let features = [];
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
if (!res.ok) throw new Error("Could not load sites.geojson");
|
||||
const g = await res.json();
|
||||
features = g.features || [];
|
||||
} catch (e) {
|
||||
status.textContent = "";
|
||||
status.className = "err";
|
||||
status.textContent = "Failed to load data/sites.geojson.";
|
||||
return;
|
||||
}
|
||||
|
||||
const rows = [];
|
||||
for (const f of features) {
|
||||
const site = f.properties && f.properties.sitename;
|
||||
if (!site) continue;
|
||||
const desc = (f.properties && f.properties.description) || site;
|
||||
const seasons = f.properties && f.properties.seasons
|
||||
? Object.keys(f.properties.seasons).sort()
|
||||
: [];
|
||||
for (const season of seasons) {
|
||||
rows.push({ site, season, desc });
|
||||
}
|
||||
}
|
||||
rows.sort((a, b) => a.site.localeCompare(b.site) || a.season.localeCompare(b.season));
|
||||
|
||||
const results = await Promise.all(
|
||||
rows.map((r) =>
|
||||
loadPhenologyRow(r.site, r.season).then((phen) => ({ ...r, ...phen }))
|
||||
)
|
||||
);
|
||||
|
||||
const head =
|
||||
"<thead><tr>" +
|
||||
"<th>Site</th><th>Season</th><th>Description</th>" +
|
||||
"<th>Green-up</th><th>Green-down</th>" +
|
||||
"</tr></thead>";
|
||||
const body = results
|
||||
.map((r) => {
|
||||
const q = new URLSearchParams();
|
||||
q.set("site", r.site);
|
||||
q.set("season", r.season);
|
||||
const viewer = `index.html?${q.toString()}`;
|
||||
return (
|
||||
"<tr>" +
|
||||
`<td class="site"><a class="rowlink" href="${viewer}">${escapeHtml(r.site)}</a></td>` +
|
||||
`<td class="num">${r.season}</td>` +
|
||||
`<td>${escapeHtml(r.desc)}</td>` +
|
||||
`<td class="num">${cellDate(r.up)}</td>` +
|
||||
`<td class="num">${cellDate(r.down)}</td>` +
|
||||
"</tr>"
|
||||
);
|
||||
})
|
||||
.join("");
|
||||
|
||||
status.textContent = "";
|
||||
status.className = "";
|
||||
wrap.innerHTML = "<table>" + head + "<tbody>" + body + "</tbody></table>";
|
||||
}
|
||||
|
||||
main();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,390 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Postprocessed Viewer</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
|
||||
<script src="common.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
|
||||
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
|
||||
.selectors { margin-bottom: 20px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { margin: 0 0 5px 0; font-size: 22px; }
|
||||
.season-row { padding-bottom: 15px; }
|
||||
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
|
||||
.download-links { margin-left: 10px; font-size: 14px; }
|
||||
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
|
||||
.download-links a:hover { text-decoration: underline; }
|
||||
#dateSlider { width: 100%; margin: 15px 0; }
|
||||
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
|
||||
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
|
||||
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
|
||||
#postprocessedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
|
||||
.leaflet-image-layer { image-rendering: pixelated; }
|
||||
.leaflet-control-attribution { display: none; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header-sticky">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html" class="active">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="siteName">Innsbruck</h1>
|
||||
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
<label>Strategy:</label>
|
||||
<select id="strategySelect">
|
||||
<option value="aggressive">Aggressive</option>
|
||||
<option value="nonaggressive">Non-aggressive</option>
|
||||
</select>
|
||||
<label>Sigma:</label>
|
||||
<select id="sigmaSelect">
|
||||
<option value="20">σ=20</option>
|
||||
<option value="30">σ=30</option>
|
||||
</select>
|
||||
<label>Source:</label>
|
||||
<select id="sourceSelect">
|
||||
<option value="s2">S2</option>
|
||||
<option value="fusion">Fusion</option>
|
||||
<option value="s3">S3</option>
|
||||
</select>
|
||||
<label>Mode:</label>
|
||||
<select id="fusionModeSelect" title="BtI vs ItB processed paths">
|
||||
<option value="bti">BtI</option>
|
||||
<option value="itb">ItB</option>
|
||||
</select>
|
||||
</div>
|
||||
<input type="range" id="dateSlider" min="0" max="365" value="0">
|
||||
<div id="dateDisplay">2024-01-01</div>
|
||||
</div>
|
||||
<div class="map-label">Postprocessed RGB (closest available)</div>
|
||||
<div id="mapDate" class="map-date"></div>
|
||||
<div id="postprocessedMap"></div>
|
||||
<div id="plots">
|
||||
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
|
||||
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
|
||||
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
|
||||
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
|
||||
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
|
||||
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
|
||||
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
|
||||
|
||||
let siteName = "innsbruck", season = "2024";
|
||||
let strategy = "aggressive", sigma = "20", source = "s2", fusionMode = "bti";
|
||||
let sitePosition = [47.116171, 11.320308];
|
||||
let start = new Date(2024, 0, 1);
|
||||
let availableSiteSeasons = {};
|
||||
let postprocessedMap = null, overlay = null, marker = null;
|
||||
let ndviTs = [], gccTs = [], bandsTs = [];
|
||||
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
|
||||
|
||||
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
|
||||
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
|
||||
const daysFromDate = (dateStr) => {
|
||||
const [y, m, d] = dateStr.split("-").map(Number);
|
||||
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
|
||||
};
|
||||
|
||||
function getProcessedPath() {
|
||||
const mid = fusionMode === "itb" ? `processed_${strategy}_itb_sigma${sigma}` : `processed_${strategy}_sigma${sigma}`;
|
||||
return `data/${siteName}/${season}/${mid}`;
|
||||
}
|
||||
|
||||
async function loadTimeseries() {
|
||||
const base = getProcessedPath();
|
||||
try {
|
||||
const [n, g, b] = await Promise.all([
|
||||
fetch(`${base}/ndvi/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
fetch(`${base}/gcc/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
fetch(`${base}/bands/${source}/timeseries.json`).then((r) => (r.ok ? r.json() : [])),
|
||||
]);
|
||||
ndviTs = n;
|
||||
gccTs = g;
|
||||
bandsTs = b;
|
||||
} catch {
|
||||
ndviTs = [];
|
||||
gccTs = [];
|
||||
bandsTs = [];
|
||||
}
|
||||
drawPlots();
|
||||
updateDownloadLinks();
|
||||
}
|
||||
|
||||
function drawPlot(canvasId, data, key, color) {
|
||||
const canvas = document.getElementById(canvasId);
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
const pts = data.filter(t => t[key] != null);
|
||||
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
|
||||
const dates = pts.map(t => new Date(t.date));
|
||||
const vals = pts.map(t => t[key]);
|
||||
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
|
||||
const minV = Math.min(...vals), maxV = Math.max(...vals);
|
||||
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
|
||||
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
|
||||
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
|
||||
ctx.strokeStyle = color;
|
||||
ctx.beginPath();
|
||||
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
|
||||
ctx.stroke();
|
||||
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(curDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
|
||||
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
|
||||
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
|
||||
}
|
||||
|
||||
function drawPlots() {
|
||||
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
|
||||
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
|
||||
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
|
||||
}
|
||||
|
||||
function updateDownloadLinks() {
|
||||
const el = document.getElementById("downloadLinks");
|
||||
if (!el) return;
|
||||
const root = getProcessedPath();
|
||||
if (fusionMode === "itb") {
|
||||
el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
|
||||
return;
|
||||
}
|
||||
const base = `${root}/export/${source}`;
|
||||
const name = `${siteName}_${season}_postprocessed_${strategy}_sigma${sigma}_${source}`;
|
||||
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
|
||||
}
|
||||
|
||||
async function findProcessedFile(dateStr) {
|
||||
const target = new Date(dateStr);
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
const seasonStart = start.getTime();
|
||||
const seasonEnd = yearEnd.getTime();
|
||||
for (let offset = 0; offset <= 365; offset++) {
|
||||
for (const dir of offset === 0 ? [0] : [-1, 1]) {
|
||||
const d = new Date(target.getTime() + dir * offset * 86400000);
|
||||
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
|
||||
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
|
||||
const filename = `${ds}_0.geotiff`;
|
||||
try {
|
||||
const res = await fetch(`${getProcessedPath()}/${source}/${filename}`, { method: "HEAD" });
|
||||
if (res.ok) return filename;
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function transformBounds(bbox, fromCRS) {
|
||||
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
|
||||
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
|
||||
return [[sw[1], sw[0]], [ne[1], ne[0]]];
|
||||
}
|
||||
|
||||
async function loadGeotiff(filename) {
|
||||
const path = `${getProcessedPath()}/${source}/${filename}`;
|
||||
const buf = await (await fetch(path)).arrayBuffer();
|
||||
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
|
||||
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
|
||||
const dateStr = filename.replace("_0.geotiff", "");
|
||||
return { dataUrl, bounds, dateStr };
|
||||
}
|
||||
|
||||
async function updateMap() {
|
||||
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const filename = await findProcessedFile(dateStr);
|
||||
if (!filename || !postprocessedMap) {
|
||||
if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
|
||||
if (overlay) postprocessedMap.removeLayer(overlay);
|
||||
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(postprocessedMap);
|
||||
postprocessedMap.fitBounds(bounds);
|
||||
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
|
||||
} catch (e) {
|
||||
if (overlay) { postprocessedMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
async function probeDataExists(sitename, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sitename}/${s}/metrics.json`, { method: "HEAD" });
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function getSiteBySitename(sn) {
|
||||
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
|
||||
}
|
||||
|
||||
async function setSiteSeason(newSite, newSeason) {
|
||||
siteName = newSite;
|
||||
season = newSeason;
|
||||
start = new Date(parseInt(season), 0, 1);
|
||||
const site = getSiteBySitename(newSite);
|
||||
if (site?.geometry?.coordinates) {
|
||||
const [lon, lat] = site.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
if (postprocessedMap) { postprocessedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
|
||||
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
|
||||
document.getElementById("season").textContent = season;
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
|
||||
const params = new URLSearchParams(location.search);
|
||||
params.set("site", siteName);
|
||||
params.set("season", season);
|
||||
params.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${params}`);
|
||||
const urlDate = params.get("date");
|
||||
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
await loadTimeseries();
|
||||
await updateMap();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch { window.sitesData = { features: [] }; }
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeDataExists(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
|
||||
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
|
||||
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
|
||||
`<option value="${s}">${s}</option>`
|
||||
).join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
strategy = urlParams.get("strategy") || "aggressive";
|
||||
sigma = urlParams.get("sigma") || "20";
|
||||
source = urlParams.get("source") || "s2";
|
||||
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
|
||||
document.getElementById("strategySelect").value = strategy;
|
||||
document.getElementById("sigmaSelect").value = sigma;
|
||||
document.getElementById("sourceSelect").value = source;
|
||||
document.getElementById("fusionModeSelect").value = fusionMode;
|
||||
|
||||
const initSite = getSiteBySitename(initialSite);
|
||||
if (initSite?.geometry?.coordinates) {
|
||||
const [lon, lat] = initSite.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
postprocessedMap = L.map("postprocessedMap", { zoomControl: false }).setView(sitePosition, 12)
|
||||
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
|
||||
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(postprocessedMap);
|
||||
|
||||
siteSelect.addEventListener("change", function() {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
setSiteSeason(sn, document.getElementById("seasonSelect").value);
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function() {
|
||||
setSiteSeason(siteSelect.value, this.value);
|
||||
});
|
||||
document.getElementById("strategySelect").addEventListener("change", function() {
|
||||
strategy = this.value;
|
||||
urlParams.set("strategy", strategy);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("sigmaSelect").addEventListener("change", function() {
|
||||
sigma = this.value;
|
||||
urlParams.set("sigma", sigma);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("sourceSelect").addEventListener("change", function() {
|
||||
source = this.value;
|
||||
urlParams.set("source", source);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("fusionModeSelect").addEventListener("change", function() {
|
||||
fusionMode = this.value;
|
||||
urlParams.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
|
||||
await setSiteSeason(initialSite, initialSeason);
|
||||
}
|
||||
|
||||
document.getElementById("dateSlider").addEventListener("input", function() {
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
|
||||
drawPlots(); updateMap();
|
||||
});
|
||||
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,379 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Prepared S2/S3 Viewer</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
|
||||
<script src="common.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
|
||||
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
|
||||
.selectors { margin-bottom: 20px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { margin: 0 0 5px 0; font-size: 22px; }
|
||||
.season-row { padding-bottom: 15px; }
|
||||
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
|
||||
.download-links { margin-left: 10px; font-size: 14px; }
|
||||
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
|
||||
.download-links a:hover { text-decoration: underline; }
|
||||
#dateSlider { width: 100%; margin: 15px 0; }
|
||||
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
|
||||
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
|
||||
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
|
||||
#preparedMap { height: 500px; border: 1px solid #ccc; margin-top: 10px; }
|
||||
.leaflet-image-layer { image-rendering: pixelated; }
|
||||
.leaflet-control-attribution { display: none; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header-sticky">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html">Pre-selection</a>
|
||||
<a href="prepared.html" class="active">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="siteName">Innsbruck</h1>
|
||||
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
<label>Strategy:</label>
|
||||
<select id="strategySelect">
|
||||
<option value="aggressive">Aggressive</option>
|
||||
<option value="nonaggressive">Non-aggressive</option>
|
||||
</select>
|
||||
<label>Source:</label>
|
||||
<select id="sourceSelect">
|
||||
<option value="s2">S2</option>
|
||||
<option value="s3">S3</option>
|
||||
</select>
|
||||
<label>Mode:</label>
|
||||
<select id="fusionModeSelect" title="BtI = REFL/composite; ItB = GCC rasters">
|
||||
<option value="bti">BtI</option>
|
||||
<option value="itb">ItB</option>
|
||||
</select>
|
||||
</div>
|
||||
<input type="range" id="dateSlider" min="0" max="365" value="0">
|
||||
<div id="dateDisplay">2024-01-01</div>
|
||||
</div>
|
||||
<div class="map-label" id="mapLabel">Prepared RGB (closest available)</div>
|
||||
<div id="mapDate" class="map-date"></div>
|
||||
<div id="preparedMap"></div>
|
||||
<div id="plots">
|
||||
<div class="plot-label">NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>
|
||||
<div class="plot-label">GCC</div><canvas id="plot_gcc" class="plot"></canvas>
|
||||
<div class="plot-label">B02 (Blue)</div><canvas id="plot_b02" class="plot"></canvas>
|
||||
<div class="plot-label">B03 (Green)</div><canvas id="plot_b03" class="plot"></canvas>
|
||||
<div class="plot-label">B04 (Red)</div><canvas id="plot_b04" class="plot"></canvas>
|
||||
<div class="plot-label">B8A (NIR)</div><canvas id="plot_b8a" class="plot"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
|
||||
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
|
||||
|
||||
let siteName = "innsbruck", season = "2024";
|
||||
let strategy = "aggressive", source = "s2", fusionMode = "bti";
|
||||
let sitePosition = [47.116171, 11.320308];
|
||||
let start = new Date(2024, 0, 1);
|
||||
let availableSiteSeasons = {};
|
||||
let preparedMap = null, overlay = null, marker = null;
|
||||
let ndviTs = [], gccTs = [], bandsTs = [];
|
||||
const BANDS = [{key:"b02",color:"#0066ff"},{key:"b03",color:"#00aa00"},{key:"b04",color:"#cc0000"},{key:"b8a",color:"#9900cc"}];
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
|
||||
|
||||
const fmtDate = (d) => `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
|
||||
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
|
||||
const daysFromDate = (dateStr) => {
|
||||
const [y, m, d] = dateStr.split("-").map(Number);
|
||||
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
|
||||
};
|
||||
|
||||
function getPreparedPath() {
|
||||
const mid = fusionMode === "itb" ? `prepared_${strategy}_itb` : `prepared_${strategy}`;
|
||||
return `data/${siteName}/${season}/${mid}`;
|
||||
}
|
||||
|
||||
async function loadTimeseries() {
|
||||
try {
|
||||
const [n, g, b] = await Promise.all([
|
||||
fetch(`${getPreparedPath()}/ndvi/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
|
||||
fetch(`${getPreparedPath()}/gcc/${source}/timeseries.json`).then(r => r.ok ? r.json() : []),
|
||||
fetch(`${getPreparedPath()}/bands/${source}/timeseries.json`).then(r => r.ok ? r.json() : [])
|
||||
]);
|
||||
ndviTs = n; gccTs = g; bandsTs = b;
|
||||
} catch { ndviTs = []; gccTs = []; bandsTs = []; }
|
||||
drawPlots();
|
||||
updateDownloadLinks();
|
||||
}
|
||||
|
||||
function drawPlot(canvasId, data, key, color) {
|
||||
const canvas = document.getElementById(canvasId);
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
const pts = data.filter(t => t[key] != null);
|
||||
if (!pts.length) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.fillStyle = "#999"; ctx.font = "12px sans-serif"; ctx.fillText("No data", pad, pad + plotH / 2); return; }
|
||||
const dates = pts.map(t => new Date(t.date));
|
||||
const vals = pts.map(t => t[key]);
|
||||
const minD = new Date(Math.min(...dates)), maxD = new Date(Math.max(...dates));
|
||||
const minV = Math.min(...vals), maxV = Math.max(...vals);
|
||||
const dRange = maxD - minD || 1, vRange = maxV - minV || 1;
|
||||
const x = d => pad + ((new Date(d) - minD) / dRange) * plotW;
|
||||
const y = v => pad + plotH - ((v - minV) / vRange) * plotH;
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath(); ctx.moveTo(pad, pad); ctx.lineTo(pad, pad + plotH); ctx.lineTo(pad + plotW, pad + plotH); ctx.stroke();
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minV.toFixed(3), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxV.toFixed(3), 2, pad + 3);
|
||||
ctx.strokeStyle = color;
|
||||
ctx.beginPath();
|
||||
pts.forEach((t, i) => { const px = x(t.date), py = y(t[key]); i ? ctx.lineTo(px, py) : ctx.moveTo(px, py); });
|
||||
ctx.stroke();
|
||||
const curDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(curDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath(); ctx.moveTo(xPos, pad); ctx.lineTo(xPos, pad + plotH); ctx.stroke();
|
||||
const closest = pts.reduce((c, t) => Math.abs(new Date(t.date) - new Date(curDate)) < Math.abs(new Date(c.date) - new Date(curDate)) ? t : c);
|
||||
if (closest) { ctx.fillStyle = "#f00"; ctx.font = "bold 10px sans-serif"; ctx.fillText(closest[key].toFixed(3), xPos + 5, y(closest[key]) - 5); }
|
||||
}
|
||||
|
||||
function drawPlots() {
|
||||
drawPlot("plot_ndvi", ndviTs, "ndvi", "#2d7a3e");
|
||||
drawPlot("plot_gcc", gccTs, "greenness_index", "#00aa00");
|
||||
BANDS.forEach(b => drawPlot(`plot_${b.key}`, bandsTs, b.key, b.color));
|
||||
}
|
||||
|
||||
function updateDownloadLinks() {
|
||||
const el = document.getElementById("downloadLinks");
|
||||
if (!el) return;
|
||||
const root = getPreparedPath();
|
||||
if (fusionMode === "itb") {
|
||||
el.innerHTML = `<a href="${root}/gcc/${source}/timeseries.json">[GCC JSON]</a>`;
|
||||
return;
|
||||
}
|
||||
const base = `${root}/export/${source}`;
|
||||
const name = `${siteName}_${season}_prepared_${strategy}_${source}`;
|
||||
el.innerHTML = `<a href="${base}/timeseries.json" download="${name}.json">[JSON]</a><a href="${base}/timeseries.csv" download="${name}.csv">[CSV]</a>`;
|
||||
}
|
||||
|
||||
async function findPreparedFile(dateStr) {
|
||||
const target = new Date(dateStr);
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
const seasonStart = start.getTime();
|
||||
const seasonEnd = yearEnd.getTime();
|
||||
for (let offset = 0; offset <= 365; offset++) {
|
||||
for (const dir of offset === 0 ? [0] : [-1, 1]) {
|
||||
const d = new Date(target.getTime() + dir * offset * 86400000);
|
||||
if (d.getTime() < seasonStart || d.getTime() > seasonEnd) continue;
|
||||
const ds = d.toISOString().split("T")[0].replace(/-/g, "");
|
||||
const filename =
|
||||
source === "s2"
|
||||
? fusionMode === "itb"
|
||||
? `S2A_MSIL2A_${ds}_GCC.tif`
|
||||
: `S2A_MSIL2A_${ds}_REFL.tif`
|
||||
: `composite_${ds}.tif`;
|
||||
try {
|
||||
const res = await fetch(`${getPreparedPath()}/${source}/${filename}`, { method: "HEAD" });
|
||||
if (res.ok) return filename;
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function transformBounds(bbox, fromCRS) {
|
||||
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
|
||||
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
|
||||
return [[sw[1], sw[0]], [ne[1], ne[0]]];
|
||||
}
|
||||
|
||||
async function loadGeotiff(filename) {
|
||||
const path = `${getPreparedPath()}/${source}/${filename}`;
|
||||
const buf = await (await fetch(path)).arrayBuffer();
|
||||
const { dataUrl, bbox, crsCode } = await geotiffToCanvasDataUrl(buf);
|
||||
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
|
||||
const m = filename.match(/(\d{8})/);
|
||||
const dateStr = m ? m[1] : "";
|
||||
return { dataUrl, bounds, dateStr };
|
||||
}
|
||||
|
||||
async function updateMap() {
|
||||
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const filename = await findPreparedFile(dateStr);
|
||||
if (!filename || !preparedMap) {
|
||||
if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { dataUrl, bounds, dateStr: ds } = await loadGeotiff(filename);
|
||||
if (overlay) preparedMap.removeLayer(overlay);
|
||||
overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(preparedMap);
|
||||
preparedMap.fitBounds(bounds);
|
||||
document.getElementById("mapDate").textContent = `${ds.slice(0,4)}-${ds.slice(4,6)}-${ds.slice(6,8)}`;
|
||||
} catch (e) {
|
||||
if (overlay) { preparedMap.removeLayer(overlay); overlay = null; }
|
||||
document.getElementById("mapDate").textContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
async function probeDataExists(sitename, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function getSiteBySitename(sn) {
|
||||
return window.sitesData?.features?.find(f => f.properties?.sitename === sn);
|
||||
}
|
||||
|
||||
async function setSiteSeason(newSite, newSeason) {
|
||||
siteName = newSite;
|
||||
season = newSeason;
|
||||
start = new Date(parseInt(season), 0, 1);
|
||||
const site = getSiteBySitename(newSite);
|
||||
if (site?.geometry?.coordinates) {
|
||||
const [lon, lat] = site.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
if (preparedMap) { preparedMap.setView(sitePosition, 12); if (marker) marker.setLatLng(sitePosition); }
|
||||
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
|
||||
document.getElementById("season").textContent = season;
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
|
||||
const params = new URLSearchParams(location.search);
|
||||
params.set("site", siteName);
|
||||
params.set("season", season);
|
||||
params.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${params}`);
|
||||
const urlDate = params.get("date");
|
||||
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
await loadTimeseries();
|
||||
await updateMap();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch { window.sitesData = { features: [] }; }
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeDataExists(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
|
||||
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
|
||||
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
|
||||
`<option value="${s}">${s}</option>`
|
||||
).join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
strategy = urlParams.get("strategy") || "aggressive";
|
||||
source = urlParams.get("source") || "s2";
|
||||
fusionMode = urlParams.get("mode") === "itb" ? "itb" : "bti";
|
||||
document.getElementById("strategySelect").value = strategy;
|
||||
document.getElementById("sourceSelect").value = source;
|
||||
document.getElementById("fusionModeSelect").value = fusionMode;
|
||||
const ml = document.getElementById("mapLabel");
|
||||
if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
|
||||
|
||||
const initSite = getSiteBySitename(initialSite);
|
||||
if (initSite?.geometry?.coordinates) {
|
||||
const [lon, lat] = initSite.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
preparedMap = L.map("preparedMap", { zoomControl: false }).setView(sitePosition, 12)
|
||||
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
|
||||
marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(preparedMap);
|
||||
|
||||
siteSelect.addEventListener("change", function() {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
setSiteSeason(sn, document.getElementById("seasonSelect").value);
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function() {
|
||||
setSiteSeason(siteSelect.value, this.value);
|
||||
});
|
||||
document.getElementById("strategySelect").addEventListener("change", function() {
|
||||
strategy = this.value;
|
||||
urlParams.set("strategy", strategy);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("sourceSelect").addEventListener("change", function() {
|
||||
source = this.value;
|
||||
urlParams.set("source", source);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
document.getElementById("fusionModeSelect").addEventListener("change", function() {
|
||||
fusionMode = this.value;
|
||||
urlParams.set("mode", fusionMode);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
const ml = document.getElementById("mapLabel");
|
||||
if (ml) ml.textContent = fusionMode === "itb" ? "Prepared GCC grayscale / S3 (closest available)" : "Prepared RGB (closest available)";
|
||||
loadTimeseries(); updateMap();
|
||||
});
|
||||
|
||||
await setSiteSeason(initialSite, initialSeason);
|
||||
}
|
||||
|
||||
document.getElementById("dateSlider").addEventListener("input", function() {
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
|
||||
drawPlots(); updateMap();
|
||||
});
|
||||
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,541 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>S2 Band Reflectance Timeseries</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" />
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/geotiff@2.0.7/dist-browser/geotiff.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/proj4@2.9.0/dist/proj4.js"></script>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; }
|
||||
.nav { margin-bottom: 15px; font-size: 14px; }
|
||||
.nav a { margin-right: 12px; color: #0066cc; text-decoration: none; }
|
||||
.nav a:hover { text-decoration: underline; }
|
||||
.nav a.active { font-weight: bold; }
|
||||
.container { max-width: 1400px; margin: 0 auto; padding: 20px; }
|
||||
.header-sticky { position: sticky; top: 0; background: white; z-index: 1000; border-bottom: 1px solid #ccc; padding-bottom: 20px; margin-bottom: 20px; }
|
||||
.selectors { margin-bottom: 20px; }
|
||||
.selectors select { padding: 5px 10px; font-size: 14px; margin-right: 15px; }
|
||||
h1 { margin: 0 0 5px 0; font-size: 22px; }
|
||||
.season-row { padding-bottom: 15px; }
|
||||
h2 { margin: 0; font-size: 16px; color: #666; display: inline; }
|
||||
.download-links { margin-left: 10px; font-size: 14px; }
|
||||
.download-links a { margin-right: 8px; color: #0066cc; text-decoration: none; }
|
||||
.download-links a:hover { text-decoration: underline; }
|
||||
.plot { width: 100%; height: 100px; border: 1px solid #ccc; margin-bottom: 15px; }
|
||||
.plot-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
#dateSlider { width: 100%; margin: 15px 0; }
|
||||
#dateDisplay { text-align: center; font-size: 14px; color: #666; }
|
||||
.map-label { font-size: 12px; margin-bottom: 3px; color: #666; }
|
||||
.map-date { font-size: 11px; margin-top: 3px; color: #999; }
|
||||
#s2map { height: 400px; border: 1px solid #ccc; margin-top: 10px; }
|
||||
.leaflet-image-layer { image-rendering: pixelated; }
|
||||
.leaflet-control-attribution { display: none; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header-sticky">
|
||||
<div class="nav">
|
||||
<a href="index.html">Full</a>
|
||||
<a href="preselection.html" class="active">Pre-selection</a>
|
||||
<a href="prepared.html">Prepared</a>
|
||||
<a href="fusion.html">Fusion</a>
|
||||
<a href="postprocessed.html">Postprocessed</a>
|
||||
<a href="metrics.html">Metrics</a>
|
||||
<a href="gap_validation.html">Gap validation</a>
|
||||
<a href="phenology.html">Phenology</a>
|
||||
</div>
|
||||
<h1 id="siteName">Innsbruck</h1>
|
||||
<div class="season-row"><h2 id="season">2024</h2><span class="download-links" id="downloadLinks"></span></div>
|
||||
<div class="selectors">
|
||||
<label>Site:</label>
|
||||
<select id="siteSelect"></select>
|
||||
<label>Season:</label>
|
||||
<select id="seasonSelect"></select>
|
||||
<label>Source:</label>
|
||||
<select id="sourceSelect">
|
||||
<option value="s2">S2</option>
|
||||
<option value="s3">S3</option>
|
||||
</select>
|
||||
<label>Exclusion:</label>
|
||||
<select id="exclusionSelect">
|
||||
<option value="none">None</option>
|
||||
<option value="aggressive">Aggressive</option>
|
||||
<option value="nonaggressive">Non-aggressive</option>
|
||||
</select>
|
||||
</div>
|
||||
<input type="range" id="dateSlider" min="0" max="365" value="0">
|
||||
<div id="dateDisplay">2024-01-01</div>
|
||||
</div>
|
||||
<div class="map-label" id="mapLabel">S2 RGB (closest available)</div>
|
||||
<div id="s2rgbdate" class="map-date"></div>
|
||||
<div id="s2map"></div>
|
||||
<div id="bandPlots"></div>
|
||||
</div>
|
||||
<script>
|
||||
proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
|
||||
proj4.defs("EPSG:4326", "+proj=longlat +datum=WGS84 +no_defs");
|
||||
|
||||
const BANDS = [
|
||||
{ key: "b02", label: "B02 (Blue)", color: "#0066ff" },
|
||||
{ key: "b03", label: "B03 (Green)", color: "#00aa00" },
|
||||
{ key: "b04", label: "B04 (Red)", color: "#cc0000" },
|
||||
{ key: "b8a", label: "B8A (NIR)", color: "#9900cc" }
|
||||
];
|
||||
let siteName = "innsbruck", season = "2024";
|
||||
let source = "s2";
|
||||
let exclusion = "none";
|
||||
let sitePosition = [47.116171, 11.320308];
|
||||
let start = new Date(2024, 0, 1);
|
||||
let timeseries = [];
|
||||
let gccTimeseries = [];
|
||||
let ndviTimeseries = [];
|
||||
let availableSiteSeasons = {};
|
||||
let s2Map = null, s2Overlay = null, s2Marker = null;
|
||||
|
||||
const urlParams = new URLSearchParams(location.search);
|
||||
|
||||
function filteredTimeseries(arr) {
|
||||
if (exclusion === "none") return arr;
|
||||
const key = exclusion === "aggressive" ? "excluded_aggressive" : "excluded_nonaggressive";
|
||||
return arr.filter(t => !t[key]);
|
||||
}
|
||||
|
||||
function fmtDate(d) {
|
||||
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
|
||||
}
|
||||
const dateFromDays = (days) => fmtDate(new Date(start.getTime() + days * 86400000));
|
||||
const daysFromDate = (dateStr) => {
|
||||
const [y, m, d] = dateStr.split("-").map(Number);
|
||||
return Math.floor((new Date(y, m - 1, d) - start) / 86400000);
|
||||
};
|
||||
|
||||
function drawBandPlot(canvasId, bandKey, bandLabel, color) {
|
||||
const canvas = document.getElementById(canvasId);
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
|
||||
const data = filteredTimeseries(timeseries).filter(t => t[bandKey] != null);
|
||||
if (!data.length) return;
|
||||
|
||||
const dates = data.map(t => new Date(t.date));
|
||||
const values = data.map(t => t[bandKey]);
|
||||
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
|
||||
const dateRange = maxDate - minDate || 1;
|
||||
const minVal = Math.min(...values), maxVal = Math.max(...values);
|
||||
const valRange = maxVal - minVal || 1;
|
||||
|
||||
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
|
||||
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
|
||||
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(pad, pad);
|
||||
ctx.lineTo(pad, pad + plotH);
|
||||
ctx.lineTo(pad + plotW, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minVal.toFixed(4), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxVal.toFixed(4), 2, pad + 3);
|
||||
|
||||
ctx.strokeStyle = color;
|
||||
ctx.beginPath();
|
||||
data.forEach((t, i) => {
|
||||
const px = x(t.date), py = y(t[bandKey]);
|
||||
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
|
||||
});
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#888";
|
||||
const axisY = pad + plotH;
|
||||
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
|
||||
|
||||
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(currentDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos, pad);
|
||||
ctx.lineTo(xPos, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
const closest = data.reduce((c, t) =>
|
||||
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
|
||||
);
|
||||
if (closest) {
|
||||
ctx.fillStyle = "#f00";
|
||||
ctx.font = "bold 10px sans-serif";
|
||||
ctx.fillText(closest[bandKey].toFixed(4), xPos + 5, y(closest[bandKey]) - 5);
|
||||
}
|
||||
}
|
||||
|
||||
function drawNdviPlot() {
|
||||
const canvas = document.getElementById("plot_ndvi");
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
const data = filteredTimeseries(ndviTimeseries).filter(t => t.ndvi != null);
|
||||
if (!data.length) return;
|
||||
|
||||
const dates = data.map(t => new Date(t.date));
|
||||
const values = data.map(t => t.ndvi);
|
||||
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
|
||||
const dateRange = maxDate - minDate || 1;
|
||||
const minVal = Math.min(...values), maxVal = Math.max(...values);
|
||||
const valRange = maxVal - minVal || 1;
|
||||
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
|
||||
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
|
||||
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(pad, pad);
|
||||
ctx.lineTo(pad, pad + plotH);
|
||||
ctx.lineTo(pad + plotW, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
|
||||
|
||||
ctx.strokeStyle = "#2d7a3e";
|
||||
ctx.beginPath();
|
||||
data.forEach((t, i) => {
|
||||
const px = x(t.date), py = y(t.ndvi);
|
||||
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
|
||||
});
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#888";
|
||||
const axisY = pad + plotH;
|
||||
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
|
||||
|
||||
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(currentDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos, pad);
|
||||
ctx.lineTo(xPos, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
const closest = data.reduce((c, t) =>
|
||||
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
|
||||
);
|
||||
if (closest) {
|
||||
ctx.fillStyle = "#f00";
|
||||
ctx.font = "bold 10px sans-serif";
|
||||
ctx.fillText(closest.ndvi.toFixed(3), xPos + 5, y(closest.ndvi) - 5);
|
||||
}
|
||||
}
|
||||
|
||||
function drawGccPlot() {
|
||||
const canvas = document.getElementById("plot_gcc");
|
||||
if (!canvas) return;
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = canvas.offsetWidth;
|
||||
canvas.height = 100;
|
||||
const w = canvas.width, h = canvas.height, pad = 30;
|
||||
const plotW = w - pad * 2, plotH = h - pad * 2;
|
||||
const data = filteredTimeseries(gccTimeseries).filter(t => t.greenness_index != null);
|
||||
if (!data.length) return;
|
||||
|
||||
const dates = data.map(t => new Date(t.date));
|
||||
const values = data.map(t => t.greenness_index);
|
||||
const minDate = new Date(Math.min(...dates)), maxDate = new Date(Math.max(...dates));
|
||||
const dateRange = maxDate - minDate || 1;
|
||||
const minVal = Math.min(...values), maxVal = Math.max(...values);
|
||||
const valRange = maxVal - minVal || 1;
|
||||
const x = (d) => pad + ((new Date(d) - minDate) / dateRange) * plotW;
|
||||
const y = (v) => pad + plotH - ((v - minVal) / valRange) * plotH;
|
||||
|
||||
ctx.clearRect(0, 0, w, h);
|
||||
ctx.strokeStyle = "#ccc";
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(pad, pad);
|
||||
ctx.lineTo(pad, pad + plotH);
|
||||
ctx.lineTo(pad + plotW, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#000";
|
||||
ctx.font = "9px sans-serif";
|
||||
ctx.fillText(minVal.toFixed(3), 2, pad + plotH + 10);
|
||||
ctx.fillText(maxVal.toFixed(3), 2, pad + 3);
|
||||
|
||||
ctx.strokeStyle = "#00aa00";
|
||||
ctx.beginPath();
|
||||
data.forEach((t, i) => {
|
||||
const px = x(t.date), py = y(t.greenness_index);
|
||||
i === 0 ? ctx.moveTo(px, py) : ctx.lineTo(px, py);
|
||||
});
|
||||
ctx.stroke();
|
||||
|
||||
ctx.fillStyle = "#888";
|
||||
const axisY = pad + plotH;
|
||||
for (const t of data) ctx.fillRect(x(t.date) - 1, axisY - 1, 2, 2);
|
||||
|
||||
const currentDate = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const xPos = x(currentDate);
|
||||
ctx.strokeStyle = "#f00";
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos, pad);
|
||||
ctx.lineTo(xPos, pad + plotH);
|
||||
ctx.stroke();
|
||||
|
||||
const closest = data.reduce((c, t) =>
|
||||
Math.abs(new Date(t.date) - new Date(currentDate)) < Math.abs(new Date(c.date) - new Date(currentDate)) ? t : c
|
||||
);
|
||||
if (closest) {
|
||||
ctx.fillStyle = "#f00";
|
||||
ctx.font = "bold 10px sans-serif";
|
||||
ctx.fillText(closest.greenness_index.toFixed(3), xPos + 5, y(closest.greenness_index) - 5);
|
||||
}
|
||||
}
|
||||
|
||||
function drawAllPlots() {
|
||||
drawNdviPlot();
|
||||
drawGccPlot();
|
||||
BANDS.forEach(b => drawBandPlot(`plot_${b.key}`, b.key, b.label, b.color));
|
||||
}
|
||||
|
||||
function computeGcc(entry) {
|
||||
const b = entry.b02 + entry.b03 + entry.b04;
|
||||
return b > 0 ? entry.b03 / b : null;
|
||||
}
|
||||
|
||||
async function loadTimeseries() {
|
||||
const rawBase = `data/${siteName}/${season}/raw`;
|
||||
const src = document.getElementById("sourceSelect")?.value || "s2";
|
||||
source = src;
|
||||
try {
|
||||
const preselectionRes = await fetch(`${rawBase}/preselection/${source}_preselection.json`);
|
||||
const preselection = preselectionRes.ok ? await preselectionRes.json() : [];
|
||||
timeseries = preselection;
|
||||
ndviTimeseries = preselection;
|
||||
gccTimeseries = preselection.map(t => ({ ...t, greenness_index: computeGcc(t) })).filter(t => t.greenness_index != null);
|
||||
} catch {
|
||||
timeseries = [];
|
||||
ndviTimeseries = [];
|
||||
gccTimeseries = [];
|
||||
}
|
||||
const srcLabel = source.toUpperCase();
|
||||
document.getElementById("mapLabel").textContent = `${srcLabel} RGB (closest available)`;
|
||||
const jsonUrl = `${rawBase}/preselection/${source}_preselection.json`;
|
||||
const csvUrl = `${rawBase}/preselection/${source}_preselection.csv`;
|
||||
document.getElementById("downloadLinks").innerHTML =
|
||||
`<a href="${jsonUrl}" download="${siteName}_${season}_${source}_preselection.json" target="_blank">[JSON]</a>` +
|
||||
`<a href="${csvUrl}" download="${siteName}_${season}_${source}_preselection.csv" target="_blank">[CSV]</a>`;
|
||||
document.getElementById("bandPlots").innerHTML =
|
||||
`<div class="plot-label">${srcLabel} NDVI</div><canvas id="plot_ndvi" class="plot"></canvas>` +
|
||||
`<div class="plot-label">${srcLabel} GCC (Greenness Index)</div><canvas id="plot_gcc" class="plot"></canvas>` +
|
||||
BANDS.map(b => `<div class="plot-label">${b.label}</div><canvas id="plot_${b.key}" class="plot"></canvas>`).join("");
|
||||
const yearEnd = new Date(parseInt(season), 11, 31);
|
||||
document.getElementById("dateSlider").max = Math.ceil((yearEnd - start) / 86400000);
|
||||
drawAllPlots();
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
updateS2Imagery();
|
||||
}
|
||||
|
||||
async function probeDataExists(sitename, s) {
|
||||
try {
|
||||
const res = await fetch(`data/${sitename}/${s}/raw/preselection/s2_preselection.json`, { method: "HEAD" });
|
||||
return res.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function getSiteBySitename(sitename) {
|
||||
return window.sitesData?.features?.find(f => f.properties?.sitename === sitename);
|
||||
}
|
||||
|
||||
async function setSiteSeason(newSite, newSeason) {
|
||||
siteName = newSite;
|
||||
season = newSeason;
|
||||
start = new Date(parseInt(season), 0, 1);
|
||||
const site = getSiteBySitename(newSite);
|
||||
if (site?.geometry?.coordinates) {
|
||||
const [lon, lat] = site.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
if (s2Map) { s2Map.setView(sitePosition, 12); if (s2Marker) s2Marker.setLatLng(sitePosition); }
|
||||
document.getElementById("siteName").textContent = (site?.properties?.description || newSite);
|
||||
document.getElementById("season").textContent = season;
|
||||
const params = new URLSearchParams(location.search);
|
||||
params.set("site", siteName);
|
||||
params.set("season", season);
|
||||
history.replaceState({}, "", `?${params}`);
|
||||
await loadTimeseries();
|
||||
const urlDate = params.get("date");
|
||||
if (urlDate) document.getElementById("dateSlider").value = daysFromDate(urlDate);
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
const res = await fetch("data/sites.geojson");
|
||||
window.sitesData = res.ok ? await res.json() : { features: [] };
|
||||
} catch {
|
||||
window.sitesData = { features: [] };
|
||||
}
|
||||
const features = window.sitesData.features || [];
|
||||
for (const f of features) {
|
||||
const sn = f.properties?.sitename;
|
||||
if (!sn) continue;
|
||||
const seasonsFromGeo = f.properties?.seasons ? Object.keys(f.properties.seasons).sort() : [];
|
||||
const withData = [];
|
||||
for (const s of seasonsFromGeo) {
|
||||
if (await probeDataExists(sn, s)) withData.push(s);
|
||||
}
|
||||
if (withData.length) availableSiteSeasons[sn] = withData;
|
||||
}
|
||||
const availableSites = Object.keys(availableSiteSeasons);
|
||||
const siteSelect = document.getElementById("siteSelect");
|
||||
siteSelect.innerHTML = "";
|
||||
(availableSites.length ? availableSites.sort() : ["innsbruck"]).forEach(sn => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = sn;
|
||||
opt.textContent = sn;
|
||||
siteSelect.appendChild(opt);
|
||||
if (!availableSiteSeasons[sn]) availableSiteSeasons[sn] = ["2024"];
|
||||
});
|
||||
|
||||
const urlSite = urlParams.get("site");
|
||||
const urlSeason = urlParams.get("season");
|
||||
const initialSite = (urlSite && availableSiteSeasons[urlSite]) ? urlSite : (availableSites[0] || "innsbruck");
|
||||
const initialSeason = (urlSeason && (availableSiteSeasons[initialSite] || []).includes(urlSeason)) ? urlSeason : ((availableSiteSeasons[initialSite] || [])[0] || "2024");
|
||||
|
||||
siteSelect.value = initialSite;
|
||||
document.getElementById("seasonSelect").innerHTML = (availableSiteSeasons[initialSite] || []).map(s =>
|
||||
`<option value="${s}">${s}</option>`
|
||||
).join("");
|
||||
document.getElementById("seasonSelect").value = initialSeason;
|
||||
document.getElementById("sourceSelect").value = urlParams.get("source") || "s2";
|
||||
exclusion = urlParams.get("exclusion") || "none";
|
||||
document.getElementById("exclusionSelect").value = exclusion;
|
||||
|
||||
const initSite = getSiteBySitename(initialSite);
|
||||
if (initSite?.geometry?.coordinates) {
|
||||
const [lon, lat] = initSite.geometry.coordinates;
|
||||
sitePosition = [lat, lon];
|
||||
}
|
||||
const osmUrl = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png";
|
||||
s2Map = L.map("s2map", { zoomControl: false }).setView(sitePosition, 12)
|
||||
.addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 0.4 }));
|
||||
s2Marker = L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(s2Map);
|
||||
|
||||
siteSelect.addEventListener("change", function() {
|
||||
const sn = this.value;
|
||||
const seas = availableSiteSeasons[sn] || [];
|
||||
document.getElementById("seasonSelect").innerHTML = seas.map(s => `<option value="${s}">${s}</option>`).join("");
|
||||
document.getElementById("seasonSelect").value = seas[0] || "2024";
|
||||
setSiteSeason(sn, document.getElementById("seasonSelect").value);
|
||||
});
|
||||
document.getElementById("seasonSelect").addEventListener("change", function() {
|
||||
setSiteSeason(siteSelect.value, this.value);
|
||||
});
|
||||
document.getElementById("sourceSelect").addEventListener("change", async function() {
|
||||
source = this.value;
|
||||
urlParams.set("source", source);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
await loadTimeseries();
|
||||
});
|
||||
document.getElementById("exclusionSelect").addEventListener("change", function() {
|
||||
exclusion = this.value;
|
||||
urlParams.set("exclusion", exclusion);
|
||||
history.replaceState({}, "", `?${urlParams}`);
|
||||
drawAllPlots();
|
||||
updateS2Imagery();
|
||||
});
|
||||
|
||||
await setSiteSeason(initialSite, initialSeason);
|
||||
}
|
||||
|
||||
document.getElementById("dateSlider").addEventListener("input", function() {
|
||||
document.getElementById("dateDisplay").textContent = dateFromDays(parseInt(this.value));
|
||||
drawAllPlots();
|
||||
updateS2Imagery();
|
||||
});
|
||||
|
||||
function closestFilename(dateStr) {
|
||||
const target = new Date(dateStr);
|
||||
const withData = filteredTimeseries(timeseries).filter(t => t.filename);
|
||||
if (!withData.length) return null;
|
||||
const closest = withData.reduce((c, t) =>
|
||||
Math.abs(new Date(t.date) - target) < Math.abs(new Date(c.date) - target) ? t : c
|
||||
);
|
||||
return closest.filename;
|
||||
}
|
||||
|
||||
function transformBounds(bbox, fromCRS) {
|
||||
const sw = proj4(fromCRS, "EPSG:4326", [bbox[0], bbox[1]]);
|
||||
const ne = proj4(fromCRS, "EPSG:4326", [bbox[2], bbox[3]]);
|
||||
return [[sw[1], sw[0]], [ne[1], ne[0]]];
|
||||
}
|
||||
|
||||
async function loadS2Geotiff(filename) {
|
||||
const path = `data/${siteName}/${season}/raw/${source}/${filename}`;
|
||||
const tiff = await GeoTIFF.fromArrayBuffer(await (await fetch(path)).arrayBuffer());
|
||||
const image = await tiff.getImage();
|
||||
const rasters = await image.readRasters();
|
||||
const width = image.getWidth(), height = image.getHeight();
|
||||
const bbox = image.getBoundingBox();
|
||||
const geoKeys = image.getGeoKeys();
|
||||
const crsCode = geoKeys.ProjectedCSTypeGeoKey ? `EPSG:${geoKeys.ProjectedCSTypeGeoKey}` :
|
||||
(geoKeys.GeographicTypeGeoKey !== 4326 ? `EPSG:${geoKeys.GeographicTypeGeoKey}` : "EPSG:4326");
|
||||
const [blue, green, red] = [0, 1, 2].map(i => Array.from(rasters[i]));
|
||||
const normalize = (arr) => {
|
||||
let min = Infinity, max = -Infinity;
|
||||
for (const v of arr) if (!isNaN(v) && v > 0) { min = Math.min(min, v); max = Math.max(max, v); }
|
||||
return arr.map(v => Math.max(0, Math.min(255, ((v - min) / (max - min || 1)) * 255)));
|
||||
};
|
||||
const [rN, gN, bN] = [red, green, blue].map(normalize);
|
||||
const canvas = Object.assign(document.createElement("canvas"), { width, height });
|
||||
const ctx = canvas.getContext("2d");
|
||||
ctx.imageSmoothingEnabled = false;
|
||||
const imgData = ctx.createImageData(width, height);
|
||||
for (let i = 0; i < rN.length; i++) {
|
||||
const idx = i * 4;
|
||||
if (rN[i] === 0 && gN[i] === 0 && bN[i] === 0) imgData.data[idx + 3] = 0;
|
||||
else { imgData.data[idx] = rN[i]; imgData.data[idx + 1] = gN[i]; imgData.data[idx + 2] = bN[i]; imgData.data[idx + 3] = 255; }
|
||||
}
|
||||
ctx.putImageData(imgData, 0, 0);
|
||||
const bounds = crsCode === "EPSG:4326" ? [[bbox[1], bbox[0]], [bbox[3], bbox[2]]] : transformBounds(bbox, crsCode);
|
||||
return { dataUrl: canvas.toDataURL(), bounds };
|
||||
}
|
||||
|
||||
async function updateS2Imagery() {
|
||||
const dateStr = dateFromDays(parseInt(document.getElementById("dateSlider").value));
|
||||
const filename = closestFilename(dateStr);
|
||||
if (!filename || !s2Map) {
|
||||
if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
|
||||
document.getElementById("s2rgbdate").textContent = "";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { dataUrl, bounds } = await loadS2Geotiff(filename);
|
||||
if (s2Overlay) s2Map.removeLayer(s2Overlay);
|
||||
s2Overlay = L.imageOverlay(dataUrl, bounds, { opacity: 0.95 }).addTo(s2Map);
|
||||
s2Map.fitBounds(bounds);
|
||||
const d = filename.split("_")[0];
|
||||
document.getElementById("s2rgbdate").textContent = `${d.slice(0,4)}-${d.slice(4,6)}-${d.slice(6,8)}`;
|
||||
} catch (e) {
|
||||
if (s2Overlay) { s2Map.removeLayer(s2Overlay); s2Overlay = null; }
|
||||
document.getElementById("s2rgbdate").textContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Add table
Add a link
Reference in a new issue