foo
This commit is contained in:
parent
d50a0fcbb1
commit
94f910d978
3 changed files with 227 additions and 35 deletions
159
metrics_stats.py
159
metrics_stats.py
|
|
@ -3,13 +3,24 @@
|
|||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from scipy import sparse
|
||||
from scipy.sparse.linalg import spsolve
|
||||
from scipy.stats import pearsonr
|
||||
import rasterio
|
||||
from rasterio.warp import transform as transform_coords
|
||||
|
||||
from metrics_indices import BLUE_BAND, GREEN_BAND, RED_BAND
|
||||
|
||||
WHITTAKER_LAMBDA_DAYS_SQ = 400.0
|
||||
|
||||
|
||||
def _norm_date_key(s):
|
||||
if s is None:
|
||||
return None
|
||||
t = str(s).strip()
|
||||
return t.split("T")[0][:10] if "T" in t else t[:10]
|
||||
|
||||
|
||||
def load_timeseries(filepath):
|
||||
"""Load JSON timeseries and return dict mapping date -> value."""
|
||||
|
|
@ -22,14 +33,24 @@ def load_timeseries(filepath):
|
|||
|
||||
def match_dates(fusion_ts, phenocam_ts):
|
||||
"""Match dates between timeseries, return aligned numpy arrays (filter None values)."""
|
||||
common_dates = set(fusion_ts.keys()) & set(phenocam_ts.keys())
|
||||
|
||||
def _bundle(m):
|
||||
out = {}
|
||||
for k, v in m.items():
|
||||
nk = _norm_date_key(k)
|
||||
if nk and nk not in out:
|
||||
out[nk] = v
|
||||
return out
|
||||
|
||||
fa, pa = _bundle(fusion_ts), _bundle(phenocam_ts)
|
||||
common_dates = set(fa) & set(pa)
|
||||
fusion_vals = []
|
||||
phenocam_vals = []
|
||||
dates = []
|
||||
|
||||
for date in sorted(common_dates):
|
||||
fusion_val = fusion_ts[date]
|
||||
phenocam_val = phenocam_ts[date]
|
||||
fusion_val = fa[date]
|
||||
phenocam_val = pa[date]
|
||||
if fusion_val is not None and phenocam_val is not None:
|
||||
fusion_vals.append(fusion_val)
|
||||
phenocam_vals.append(phenocam_val)
|
||||
|
|
@ -94,19 +115,21 @@ def nse(y_true, y_pred):
|
|||
|
||||
|
||||
def calculate_temporal_metrics(fusion_ts, phenocam_ts):
|
||||
"""Calculate all 6 temporal metrics."""
|
||||
"""Temporal metrics vs PhenoCam (nse_pc; nse is the same value)."""
|
||||
fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
|
||||
|
||||
if len(fusion_vals) < 2:
|
||||
return None
|
||||
|
||||
n_pc = nse(phenocam_vals, fusion_vals)
|
||||
metrics = {
|
||||
"pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
|
||||
"r_squared": r_squared(phenocam_vals, fusion_vals),
|
||||
"rmse": rmse(phenocam_vals, fusion_vals),
|
||||
"mae": mae(phenocam_vals, fusion_vals),
|
||||
"nrmse": nrmse(phenocam_vals, fusion_vals),
|
||||
"nse": nse(phenocam_vals, fusion_vals),
|
||||
"nse_pc": n_pc,
|
||||
"nse": n_pc,
|
||||
"n_samples": len(fusion_vals),
|
||||
"date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
|
||||
}
|
||||
|
|
@ -129,6 +152,59 @@ def calculate_phenocam_stats(phenocam_ts):
|
|||
}
|
||||
|
||||
|
||||
def _s2_kept_date_set(base: Path, strategy: str) -> set:
|
||||
path = base / "raw" / "preselection" / "s2_preselection.json"
|
||||
if not path.exists():
|
||||
return set()
|
||||
with open(path) as f:
|
||||
rows = json.load(f)
|
||||
key = f"excluded_{strategy}"
|
||||
out = set()
|
||||
for e in rows:
|
||||
if e.get(key):
|
||||
continue
|
||||
nk = _norm_date_key(e.get("date"))
|
||||
if nk:
|
||||
out.add(nk)
|
||||
return out
|
||||
|
||||
|
||||
def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
|
||||
"""Daily Whittaker (weights 1 at obs); returns {YYYY-MM-DD: z}."""
|
||||
pairs = [
|
||||
(_norm_date_key(d), float(v))
|
||||
for d, v in zip(obs_dates, obs_values)
|
||||
if v is not None and _norm_date_key(d)
|
||||
]
|
||||
if len(pairs) < 2:
|
||||
return {}
|
||||
days = sorted({p[0] for p in pairs})
|
||||
t0 = datetime.strptime(days[0], "%Y-%m-%d").date()
|
||||
t1 = datetime.strptime(days[-1], "%Y-%m-%d").date()
|
||||
n = (t1 - t0).days + 1
|
||||
if n < n_min:
|
||||
return {}
|
||||
|
||||
w = np.zeros(n)
|
||||
y = np.zeros(n)
|
||||
for dk, val in pairs:
|
||||
i = (datetime.strptime(dk, "%Y-%m-%d").date() - t0).days
|
||||
if 0 <= i < n:
|
||||
w[i] = 1.0
|
||||
y[i] = val
|
||||
|
||||
D = sparse.diags(
|
||||
[1.0, -2.0, 1.0], [0, 1, 2], shape=(n - 2, n), format="csc", dtype=np.float64
|
||||
)
|
||||
H = D.T @ D
|
||||
Wm = sparse.diags(w.astype(np.float64), format="csc")
|
||||
z = spsolve(Wm + lam * H, w * y)
|
||||
out = {}
|
||||
for i in range(n):
|
||||
out[(t0 + timedelta(days=i)).isoformat()] = float(z[i])
|
||||
return out
|
||||
|
||||
|
||||
def _get_spatial_stats_from_raster(raster_file, site_position):
|
||||
"""Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
|
||||
try:
|
||||
|
|
@ -316,15 +392,52 @@ def calculate_all_metrics(season, site_name, site_position):
|
|||
if phenocam_stats:
|
||||
results["phenocam_stats"] = phenocam_stats
|
||||
|
||||
# Calculate S2 baseline metrics once (S2 data is identical across scenarios)
|
||||
s2_ts_path = (
|
||||
base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json"
|
||||
)
|
||||
s2_ts = load_timeseries(s2_ts_path)
|
||||
baseline = {}
|
||||
s2_ts = {}
|
||||
for sub in ("processed_aggressive_sigma20", "processed_nonaggressive_sigma20"):
|
||||
p = base / sub / "gcc" / "s2" / "timeseries.json"
|
||||
if p.exists():
|
||||
s2_ts = load_timeseries(p)
|
||||
if s2_ts:
|
||||
break
|
||||
if s2_ts:
|
||||
s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts)
|
||||
if s2_metrics:
|
||||
results["baseline"] = {"s2": s2_metrics}
|
||||
m0 = calculate_temporal_metrics(s2_ts, phenocam_ts)
|
||||
if m0:
|
||||
baseline["s2"] = m0
|
||||
for strategy in ("aggressive", "nonaggressive"):
|
||||
kept = _s2_kept_date_set(base, strategy)
|
||||
filtered = [
|
||||
(k, v)
|
||||
for k, v in sorted(
|
||||
s2_ts.items(), key=lambda x: _norm_date_key(x[0]) or ""
|
||||
)
|
||||
if _norm_date_key(k) in kept and v is not None
|
||||
]
|
||||
if not filtered:
|
||||
continue
|
||||
sub_ts = dict(filtered)
|
||||
mcf = calculate_temporal_metrics(sub_ts, phenocam_ts)
|
||||
if mcf:
|
||||
baseline.setdefault("s2_cloudfree", {})[strategy] = mcf
|
||||
obs_d, obs_v = zip(*filtered)
|
||||
smooth = _whittaker_smooth_dict(obs_d, obs_v, WHITTAKER_LAMBDA_DAYS_SQ)
|
||||
if smooth:
|
||||
mw = calculate_temporal_metrics(smooth, phenocam_ts)
|
||||
if mw:
|
||||
baseline.setdefault("s2_whittaker_lambda400", {})[strategy] = mw
|
||||
|
||||
for strategy in ("aggressive", "nonaggressive"):
|
||||
p = base / f"processed_{strategy}_sigma20" / "gcc" / "s3" / "timeseries.json"
|
||||
if not p.exists():
|
||||
continue
|
||||
s3_ts = load_timeseries(p)
|
||||
if s3_ts:
|
||||
m3 = calculate_temporal_metrics(s3_ts, phenocam_ts)
|
||||
if m3:
|
||||
baseline.setdefault("s3", {})[strategy] = m3
|
||||
|
||||
if baseline:
|
||||
results["baseline"] = baseline
|
||||
|
||||
# Calculate fusion metrics for each scenario
|
||||
for strategy in ["aggressive", "nonaggressive"]:
|
||||
|
|
@ -378,15 +491,17 @@ def calculate_all_metrics(season, site_name, site_position):
|
|||
if spatial_metrics:
|
||||
results["spatial"][scenario_name] = spatial_metrics
|
||||
|
||||
# Add summary
|
||||
# Add summary (primary: NSE vs PhenoCam; R² kept for comparison)
|
||||
if results["temporal"]:
|
||||
best_temporal = max(
|
||||
results["temporal"].items(),
|
||||
key=lambda x: x[1].get("r_squared", -1)
|
||||
if x[1].get("r_squared") is not None
|
||||
else -1,
|
||||
)
|
||||
results["summary"] = {"best_temporal_scenario": best_temporal[0]}
|
||||
ti = list(results["temporal"].items())
|
||||
|
||||
def _score(k):
|
||||
return lambda x: x[1].get(k) if x[1].get(k) is not None else float("-inf")
|
||||
|
||||
results["summary"] = {
|
||||
"best_temporal_scenario": max(ti, key=_score("nse_pc"))[0],
|
||||
"best_temporal_scenario_by_r2": max(ti, key=_score("r_squared"))[0],
|
||||
}
|
||||
|
||||
if results["spatial"]:
|
||||
best_spatial = max(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue