Ran linter.
This commit is contained in:
parent
49ad6bcaab
commit
d29754e4a5
6 changed files with 186 additions and 84 deletions
177
5-metrics.py
177
5-metrics.py
|
|
@ -72,7 +72,9 @@ WHITTAKER_LAMBDA = 400.0
|
|||
SPATIAL_CV_HALF_M = 150
|
||||
|
||||
# PhenoCam archive image URL pattern
|
||||
PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
|
||||
PHENOCAM_IMAGE_URL = (
|
||||
"https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -133,7 +135,9 @@ def _date_from_s2_tif(path: Path) -> str | None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
|
||||
def _whittaker_smooth(
|
||||
values: list[float | None], lam: float = WHITTAKER_LAMBDA
|
||||
) -> list[float | None]:
|
||||
"""Penalised least-squares smoother (Whittaker, 2nd-order differences).
|
||||
|
||||
Masked (None) values are filled via the smooth and then re-set to None in
|
||||
|
|
@ -211,9 +215,7 @@ def _parse_phenocam_csv(
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _moving_average(
|
||||
series: list[dict], value_key: str, window: int
|
||||
) -> list[dict]:
|
||||
def _moving_average(series: list[dict], value_key: str, window: int) -> list[dict]:
|
||||
"""Compute centred moving average; returns new list with ``_smooth`` suffix key."""
|
||||
if not series:
|
||||
return []
|
||||
|
|
@ -221,11 +223,13 @@ def _moving_average(
|
|||
half = window // 2
|
||||
smoothed = []
|
||||
for i, pt in enumerate(series):
|
||||
chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
|
||||
smoothed.append({
|
||||
"date": pt["date"],
|
||||
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
|
||||
})
|
||||
chunk = [v for v in vals[max(0, i - half) : i + half + 1] if v is not None]
|
||||
smoothed.append(
|
||||
{
|
||||
"date": pt["date"],
|
||||
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
|
||||
}
|
||||
)
|
||||
return smoothed
|
||||
|
||||
|
||||
|
|
@ -237,8 +241,10 @@ MATCH_TOLERANCE_DAYS = 5
|
|||
|
||||
|
||||
def compute_metrics(
|
||||
ref: list[dict], ref_key: str,
|
||||
pred: list[dict], pred_key: str,
|
||||
ref: list[dict],
|
||||
ref_key: str,
|
||||
pred: list[dict],
|
||||
pred_key: str,
|
||||
) -> dict | None:
|
||||
"""Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
|
||||
|
||||
|
|
@ -246,7 +252,9 @@ def compute_metrics(
|
|||
``MATCH_TOLERANCE_DAYS``. Returns a dict or ``None`` if fewer than
|
||||
2 matched pairs exist.
|
||||
"""
|
||||
ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
|
||||
ref_lookup: dict[str, float] = {
|
||||
p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None
|
||||
}
|
||||
if not ref_lookup:
|
||||
return None
|
||||
|
||||
|
|
@ -257,9 +265,16 @@ def compute_metrics(
|
|||
v = pt.get(pred_key)
|
||||
if v is None:
|
||||
continue
|
||||
nearest = min(ref_dates, key=lambda d: abs((
|
||||
np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
|
||||
gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
|
||||
nearest = min(
|
||||
ref_dates,
|
||||
key=lambda d: abs(
|
||||
(np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")
|
||||
),
|
||||
)
|
||||
gap = abs(
|
||||
(np.datetime64(pt["date"]) - np.datetime64(nearest))
|
||||
/ np.timedelta64(1, "D")
|
||||
)
|
||||
if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
|
||||
obs.append(ref_lookup[nearest])
|
||||
sim.append(v)
|
||||
|
|
@ -281,7 +296,13 @@ def compute_metrics(
|
|||
def _r4(v: float | None) -> float | None:
|
||||
return round(v, 4) if v is not None else None
|
||||
|
||||
return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
|
||||
return {
|
||||
"n": len(obs),
|
||||
"rmse": _r4(rmse),
|
||||
"nrmse": _r4(nrmse),
|
||||
"nse": _r4(nse),
|
||||
"r": _r4(float(r)),
|
||||
}
|
||||
|
||||
|
||||
S2_BAND_NAMES = ["B02", "B03", "B04"]
|
||||
|
|
@ -294,7 +315,9 @@ def _read_multiband_center(
|
|||
"""Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
|
||||
try:
|
||||
with rasterio.open(path) as src:
|
||||
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
||||
transformer = Transformer.from_crs(
|
||||
CRS.from_epsg(4326), src.crs, always_xy=True
|
||||
)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
row, col = rowcol(src.transform, x, y)
|
||||
h, w = src.height, src.width
|
||||
|
|
@ -353,7 +376,9 @@ def _read_footprint_stats(
|
|||
"""
|
||||
try:
|
||||
with rasterio.open(path) as src:
|
||||
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
||||
transformer = Transformer.from_crs(
|
||||
CRS.from_epsg(4326), src.crs, always_xy=True
|
||||
)
|
||||
x, y = transformer.transform(lon, lat)
|
||||
res = abs(src.transform.a) # pixel size in CRS units (metres for UTM)
|
||||
half_px = max(1, int(round(half_m / res)))
|
||||
|
|
@ -392,7 +417,11 @@ def compute_covariates(
|
|||
means.append(m)
|
||||
stds.append(s)
|
||||
|
||||
spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
|
||||
spatial_gcc_cv = (
|
||||
round(float(np.mean([s / m for s, m in zip(stds, means)])), 4)
|
||||
if means
|
||||
else None
|
||||
)
|
||||
spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
|
||||
|
||||
# S2 temporal gap statistics
|
||||
|
|
@ -406,13 +435,13 @@ def compute_covariates(
|
|||
s2_max_gap = None
|
||||
|
||||
return {
|
||||
"spatial_gcc_cv": spatial_gcc_cv,
|
||||
"spatial_gcc_std": spatial_gcc_std,
|
||||
"s2_scene_count": len(s2_series),
|
||||
"s2_mean_gap_days": s2_mean_gap,
|
||||
"s2_max_gap_days": s2_max_gap,
|
||||
"spatial_gcc_cv": spatial_gcc_cv,
|
||||
"spatial_gcc_std": spatial_gcc_std,
|
||||
"s2_scene_count": len(s2_series),
|
||||
"s2_mean_gap_days": s2_mean_gap,
|
||||
"s2_max_gap_days": s2_max_gap,
|
||||
"s3_composite_count": len(s3_series),
|
||||
"n_gcc_points": n_gcc_points,
|
||||
"n_gcc_points": n_gcc_points,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -521,8 +550,22 @@ def export_site(
|
|||
]
|
||||
|
||||
# Band reflectance timeseries (multi-band center-pixel)
|
||||
bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
|
||||
bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
|
||||
bands_s2 = _multiband_series(
|
||||
sorted(s2_refl_dir.glob("*_REFL.tif")),
|
||||
_date_from_s2_tif,
|
||||
lat,
|
||||
lon,
|
||||
S2_BAND_NAMES,
|
||||
f"{site} S2 bands",
|
||||
)
|
||||
bands_s3 = _multiband_series(
|
||||
sorted(s3_comp_dir.glob("composite_*.tif")),
|
||||
_date_from_gcc_tif,
|
||||
lat,
|
||||
lon,
|
||||
S3_BAND_NAMES,
|
||||
f"{site} S3 bands",
|
||||
)
|
||||
|
||||
# --- Per-metric JSON outputs ---
|
||||
_write_json(out_dir / "gcc_phenocam.json", phenocam_series)
|
||||
|
|
@ -545,16 +588,40 @@ def export_site(
|
|||
s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
|
||||
s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
|
||||
|
||||
s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
|
||||
if r["date"] in s2_valid_dates]
|
||||
s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
if r["date"] in s3_valid_dates]
|
||||
s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
|
||||
if r["date"] in s2_valid_dates]
|
||||
s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
if r["date"] in s3_valid_dates]
|
||||
bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
|
||||
s2_refl = [
|
||||
r
|
||||
for r in _raster_index(
|
||||
sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root
|
||||
)
|
||||
if r["date"] in s2_valid_dates
|
||||
]
|
||||
s3_comp = [
|
||||
r
|
||||
for r in _raster_index(
|
||||
sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root
|
||||
)
|
||||
if r["date"] in s3_valid_dates
|
||||
]
|
||||
s2_gcc = [
|
||||
r
|
||||
for r in _raster_index(
|
||||
sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root
|
||||
)
|
||||
if r["date"] in s2_valid_dates
|
||||
]
|
||||
s3_gcc = [
|
||||
r
|
||||
for r in _raster_index(
|
||||
sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root
|
||||
)
|
||||
if r["date"] in s3_valid_dates
|
||||
]
|
||||
bti_refl = _raster_index(
|
||||
sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root
|
||||
)
|
||||
itb_gcc = _raster_index(
|
||||
sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root
|
||||
)
|
||||
|
||||
_write_json(out_dir / "rasters_s2_refl.json", s2_refl)
|
||||
_write_json(out_dir / "rasters_s3_composite.json", s3_comp)
|
||||
|
|
@ -564,19 +631,27 @@ def export_site(
|
|||
_write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
|
||||
|
||||
# --- Site covariates (heterogeneity + observation density) ---
|
||||
_write_json(out_dir / "covariates.json", compute_covariates(
|
||||
s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
|
||||
))
|
||||
_write_json(
|
||||
out_dir / "covariates.json",
|
||||
compute_covariates(s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon),
|
||||
)
|
||||
|
||||
# --- Validation metrics vs PhenoCam gcc_90 ---
|
||||
_write_json(out_dir / "metrics.json", {
|
||||
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
|
||||
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
|
||||
"s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker, "gcc"),
|
||||
"s3_smooth": compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
|
||||
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
|
||||
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
|
||||
})
|
||||
_write_json(
|
||||
out_dir / "metrics.json",
|
||||
{
|
||||
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
|
||||
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
|
||||
"s2_whittaker": compute_metrics(
|
||||
phenocam_series, "gcc_90", s2_whittaker, "gcc"
|
||||
),
|
||||
"s3_smooth": compute_metrics(
|
||||
phenocam_series, "gcc_90", s3_smooth_series, "gcc"
|
||||
),
|
||||
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
|
||||
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
|
||||
},
|
||||
)
|
||||
|
||||
# Remove legacy bundled outputs if present
|
||||
for legacy in ("timeseries.json", "rasters.json"):
|
||||
|
|
@ -682,7 +757,9 @@ def main() -> None:
|
|||
print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
|
||||
for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
|
||||
out_dir = out_base / str(year) / site
|
||||
ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
|
||||
ok = export_site(
|
||||
site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points")
|
||||
)
|
||||
if ok:
|
||||
print(f" ✓ {site}")
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue