Ran linter.

This commit is contained in:
Felix Delattre 2026-06-11 16:33:14 +02:00
parent 49ad6bcaab
commit d29754e4a5
6 changed files with 186 additions and 84 deletions

View file

@ -72,7 +72,9 @@ WHITTAKER_LAMBDA = 400.0
SPATIAL_CV_HALF_M = 150
# PhenoCam archive image URL pattern
PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
PHENOCAM_IMAGE_URL = (
"https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}"
)
# ---------------------------------------------------------------------------
@ -133,7 +135,9 @@ def _date_from_s2_tif(path: Path) -> str | None:
# ---------------------------------------------------------------------------
def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]:
def _whittaker_smooth(
values: list[float | None], lam: float = WHITTAKER_LAMBDA
) -> list[float | None]:
"""Penalised least-squares smoother (Whittaker, 2nd-order differences).
Masked (None) values are filled via the smooth and then re-set to None in
@ -211,9 +215,7 @@ def _parse_phenocam_csv(
# ---------------------------------------------------------------------------
def _moving_average(
series: list[dict], value_key: str, window: int
) -> list[dict]:
def _moving_average(series: list[dict], value_key: str, window: int) -> list[dict]:
"""Compute centred moving average; returns new list with ``_smooth`` suffix key."""
if not series:
return []
@ -221,11 +223,13 @@ def _moving_average(
half = window // 2
smoothed = []
for i, pt in enumerate(series):
chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None]
smoothed.append({
"date": pt["date"],
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
})
chunk = [v for v in vals[max(0, i - half) : i + half + 1] if v is not None]
smoothed.append(
{
"date": pt["date"],
value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None,
}
)
return smoothed
@ -237,8 +241,10 @@ MATCH_TOLERANCE_DAYS = 5
def compute_metrics(
ref: list[dict], ref_key: str,
pred: list[dict], pred_key: str,
ref: list[dict],
ref_key: str,
pred: list[dict],
pred_key: str,
) -> dict | None:
"""Compute NSE, RMSE, nRMSE, Pearson r between pred and ref.
@ -246,7 +252,9 @@ def compute_metrics(
``MATCH_TOLERANCE_DAYS``. Returns a dict or ``None`` if fewer than
2 matched pairs exist.
"""
ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None}
ref_lookup: dict[str, float] = {
p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None
}
if not ref_lookup:
return None
@ -257,9 +265,16 @@ def compute_metrics(
v = pt.get(pred_key)
if v is None:
continue
nearest = min(ref_dates, key=lambda d: abs((
np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")))
gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D"))
nearest = min(
ref_dates,
key=lambda d: abs(
(np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D")
),
)
gap = abs(
(np.datetime64(pt["date"]) - np.datetime64(nearest))
/ np.timedelta64(1, "D")
)
if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup:
obs.append(ref_lookup[nearest])
sim.append(v)
@ -281,7 +296,13 @@ def compute_metrics(
def _r4(v: float | None) -> float | None:
return round(v, 4) if v is not None else None
return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))}
return {
"n": len(obs),
"rmse": _r4(rmse),
"nrmse": _r4(nrmse),
"nse": _r4(nse),
"r": _r4(float(r)),
}
S2_BAND_NAMES = ["B02", "B03", "B04"]
@ -294,7 +315,9 @@ def _read_multiband_center(
"""Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None."""
try:
with rasterio.open(path) as src:
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
transformer = Transformer.from_crs(
CRS.from_epsg(4326), src.crs, always_xy=True
)
x, y = transformer.transform(lon, lat)
row, col = rowcol(src.transform, x, y)
h, w = src.height, src.width
@ -353,7 +376,9 @@ def _read_footprint_stats(
"""
try:
with rasterio.open(path) as src:
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
transformer = Transformer.from_crs(
CRS.from_epsg(4326), src.crs, always_xy=True
)
x, y = transformer.transform(lon, lat)
res = abs(src.transform.a) # pixel size in CRS units (metres for UTM)
half_px = max(1, int(round(half_m / res)))
@ -392,7 +417,11 @@ def compute_covariates(
means.append(m)
stds.append(s)
spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None
spatial_gcc_cv = (
round(float(np.mean([s / m for s, m in zip(stds, means)])), 4)
if means
else None
)
spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None
# S2 temporal gap statistics
@ -406,13 +435,13 @@ def compute_covariates(
s2_max_gap = None
return {
"spatial_gcc_cv": spatial_gcc_cv,
"spatial_gcc_std": spatial_gcc_std,
"s2_scene_count": len(s2_series),
"s2_mean_gap_days": s2_mean_gap,
"s2_max_gap_days": s2_max_gap,
"spatial_gcc_cv": spatial_gcc_cv,
"spatial_gcc_std": spatial_gcc_std,
"s2_scene_count": len(s2_series),
"s2_mean_gap_days": s2_mean_gap,
"s2_max_gap_days": s2_max_gap,
"s3_composite_count": len(s3_series),
"n_gcc_points": n_gcc_points,
"n_gcc_points": n_gcc_points,
}
@ -521,8 +550,22 @@ def export_site(
]
# Band reflectance timeseries (multi-band center-pixel)
bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands")
bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands")
bands_s2 = _multiband_series(
sorted(s2_refl_dir.glob("*_REFL.tif")),
_date_from_s2_tif,
lat,
lon,
S2_BAND_NAMES,
f"{site} S2 bands",
)
bands_s3 = _multiband_series(
sorted(s3_comp_dir.glob("composite_*.tif")),
_date_from_gcc_tif,
lat,
lon,
S3_BAND_NAMES,
f"{site} S3 bands",
)
# --- Per-metric JSON outputs ---
_write_json(out_dir / "gcc_phenocam.json", phenocam_series)
@ -545,16 +588,40 @@ def export_site(
s2_valid_dates = {p["date"].replace("-", "") for p in s2_series}
s3_valid_dates = {p["date"].replace("-", "") for p in s3_series}
s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root)
if r["date"] in s2_valid_dates]
s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
if r["date"] in s3_valid_dates]
s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root)
if r["date"] in s2_valid_dates]
s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root)
if r["date"] in s3_valid_dates]
bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root)
itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root)
s2_refl = [
r
for r in _raster_index(
sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root
)
if r["date"] in s2_valid_dates
]
s3_comp = [
r
for r in _raster_index(
sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root
)
if r["date"] in s3_valid_dates
]
s2_gcc = [
r
for r in _raster_index(
sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root
)
if r["date"] in s2_valid_dates
]
s3_gcc = [
r
for r in _raster_index(
sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root
)
if r["date"] in s3_valid_dates
]
bti_refl = _raster_index(
sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root
)
itb_gcc = _raster_index(
sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root
)
_write_json(out_dir / "rasters_s2_refl.json", s2_refl)
_write_json(out_dir / "rasters_s3_composite.json", s3_comp)
@ -564,19 +631,27 @@ def export_site(
_write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc)
# --- Site covariates (heterogeneity + observation density) ---
_write_json(out_dir / "covariates.json", compute_covariates(
s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon
))
_write_json(
out_dir / "covariates.json",
compute_covariates(s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon),
)
# --- Validation metrics vs PhenoCam gcc_90 ---
_write_json(out_dir / "metrics.json", {
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
"s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker, "gcc"),
"s3_smooth": compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"),
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
})
_write_json(
out_dir / "metrics.json",
{
"bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"),
"itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"),
"s2_whittaker": compute_metrics(
phenocam_series, "gcc_90", s2_whittaker, "gcc"
),
"s3_smooth": compute_metrics(
phenocam_series, "gcc_90", s3_smooth_series, "gcc"
),
"s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"),
"s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"),
},
)
# Remove legacy bundled outputs if present
for legacy in ("timeseries.json", "rasters.json"):
@ -682,7 +757,9 @@ def main() -> None:
print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}")
for site, meta in tqdm(fusion_sites.items(), desc="Sites"):
out_dir = out_base / str(year) / site
ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points"))
ok = export_site(
site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points")
)
if ok:
print(f"{site}")
else: