diff --git a/1-phenocam.py b/1-phenocam.py index 9276080..89ca797 100644 --- a/1-phenocam.py +++ b/1-phenocam.py @@ -81,7 +81,9 @@ def load_candidate_cameras( sitename = str(camera["Sitename"]) if site_filter is not None and sitename not in site_filter: continue - if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year): + if not _overlaps_year( + camera.get("date_first"), camera.get("date_last"), evaluation_year + ): continue cameras.append(dict(camera)) cameras.sort(key=lambda item: str(item["Sitename"])) @@ -129,7 +131,9 @@ def download_site( csv_url = roi.get("one_day_summary") if roi else None if csv_url: - download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename)) + download_one_day_csv( + csv_url, site_csv_path(cache_dir, evaluation_year, sitename) + ) return sitename @@ -199,7 +203,9 @@ def write_manifest( cache_dir: Path, evaluation_year: int, ) -> None: - rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent) + rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to( + output_path.parent + ) payload = { "evaluation_year": evaluation_year, "count": len(sitenames), diff --git a/2-phenocam-screening.py b/2-phenocam-screening.py index 954f44e..5848ae9 100644 --- a/2-phenocam-screening.py +++ b/2-phenocam-screening.py @@ -203,7 +203,9 @@ def screen_site( calculations["snr"] = snr if snr is None or snr < snr_threshold: calculations["failing_gate"] = "snr" - calculations["reason"] = "insufficient_snr" if snr is not None else "snr_undefined" + calculations["reason"] = ( + "insufficient_snr" if snr is not None else "snr_undefined" + ) return {"response": response, "calculations": calculations} calculations["passed_gates"].append("snr") @@ -331,10 +333,7 @@ def print_summary(results: list[dict[str, Any]], evaluation_year: int) -> None: print(f" after_{gate}: {after}, fail_at_{gate}: {fails}") print("\nPer-site table") - print( - f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} " - f"{'status':>6} gate reason" - ) + print(f"{'site':<24} {'n':>4} {'mon':>3} {'snr':>6} {'status':>6} gate reason") print("-" * 72) for row in sorted( results, diff --git a/3-sentinel-data.py b/3-sentinel-data.py index a5571da..82b4b72 100644 --- a/3-sentinel-data.py +++ b/3-sentinel-data.py @@ -388,10 +388,14 @@ def download_s2_window( with rasterio.Env(**_GDAL_COG_ENV): with ThreadPoolExecutor(max_workers=max_workers) as pool: futures = { - pool.submit(_process_item, item, bbox, bands, output_dir, ratio): item.id + pool.submit( + _process_item, item, bbox, bands, output_dir, ratio + ): item.id for item in items } - with tqdm(total=len(futures), unit="granule", desc="S2 COG window read") as pbar: + with tqdm( + total=len(futures), unit="granule", desc="S2 COG window read" + ) as pbar: for fut in as_completed(futures): msg = fut.result() if msg: @@ -461,9 +465,7 @@ def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int: n = date_counts.get(date_str, 0) date_counts[date_str] = n + 1 - raw = np.stack( - [nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0 - ) + raw = np.stack([nc.variables[b][t_idx, :, :] for b in S3_BANDS], axis=0) stacked = ( np.ma.filled(raw, fill_value=np.nan).astype("float32") / S3_REFLECTANCE_SCALE @@ -523,7 +525,9 @@ def _download_with_retry(datacube: Any, nc_path: Path) -> None: delay *= 2 else: print(f"[S3-OEO] All {_S3_DOWNLOAD_RETRIES} download attempts failed") - raise RuntimeError(f"S3 download failed after {_S3_DOWNLOAD_RETRIES} attempts") from last_exc + raise RuntimeError( + f"S3 download failed after {_S3_DOWNLOAD_RETRIES} attempts" + ) from last_exc def download_s3_openeo( @@ -592,9 +596,7 @@ def _import_distance_to_clouds(): return distance_to_clouds except ImportError as exc: - raise ImportError( - "efast not found. Install with: uv sync" - ) from exc + raise ImportError("efast not found. Install with: uv sync") from exc def _normalize_s2_grid(s2_dir: Path) -> None: @@ -663,9 +665,7 @@ def _import_s3_processing(): return s3_processing except ImportError as exc: - raise ImportError( - "efast not found. Install with: uv sync" - ) from exc + raise ImportError("efast not found. Install with: uv sync") from exc def _reproject_s3_composites_to_s2_grid( @@ -912,7 +912,9 @@ def main(argv: list[str] | None = None) -> int: sitename = site["sitename"] site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename if args.skip_downloaded and site_dir.exists(): - print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename} — skipping (directory exists)") + print( + f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename} — skipping (directory exists)" + ) continue print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}") summary = process_site(sitename, site["lat"], site["lon"], year) diff --git a/4-fusion.py b/4-fusion.py index 22f5873..cdcc4bc 100644 --- a/4-fusion.py +++ b/4-fusion.py @@ -61,9 +61,7 @@ def _import_efast(): return efast_module except ImportError as exc: - raise ImportError( - "efast not found. Install with: uv sync" - ) from exc + raise ImportError("efast not found. Install with: uv sync") from exc # --------------------------------------------------------------------------- @@ -174,7 +172,9 @@ def fuse_site(sitename: str, year: int) -> dict[str, Any]: s2_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s2" s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3" - gcc_s3_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3" + gcc_s3_dir = ( + DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "gcc_s3" + ) base = DATA_DIR / "fusion" / str(year) / sitename if not s2_dir.is_dir() or not any(s2_dir.glob("*_REFL.tif")): @@ -228,11 +228,15 @@ def fuse_site(sitename: str, year: int) -> dict[str, Any]: print(f"[{sitename}] ItB: fusing GCC over {len(fusion_dates)} dates...") for date in fusion_dates: - efast.fusion(date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs) + efast.fusion( + date, gcc_s3_dir, s2_dir, itb_fusion, product="GCC", **_fusion_kwargs + ) # --- BtI: fuse reflectance (3-band, matching S2 B02/B03/B04), then derive GCC --- # S3 composites have 4 bands; strip band 4 (Oa17/NIR) so shapes match S2 REFL. - s3_rgb_dir = DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb" + s3_rgb_dir = ( + DATA_DIR / "sentinel_data" / str(year) / sitename / "prepared" / "s3_rgb" + ) s3_rgb_dir.mkdir(parents=True, exist_ok=True) for p in sorted(s3_dir.glob("composite_*.tif")): out = s3_rgb_dir / p.name @@ -250,7 +254,9 @@ def fuse_site(sitename: str, year: int) -> dict[str, Any]: print(f"[{sitename}] BtI: fusing REFL over {len(fusion_dates)} dates...") for date in fusion_dates: - efast.fusion(date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs) + efast.fusion( + date, s3_rgb_dir, s2_dir, bti_fusion, product="REFL", **_fusion_kwargs + ) print(f"[{sitename}] BtI: deriving GCC from fused REFL...") compute_gcc_from_refl(bti_fusion, bti_gcc) @@ -318,7 +324,9 @@ def main(argv: list[str] | None = None) -> int: for i, sitename in enumerate(sites, 1): fusion_dir = DATA_DIR / "fusion" / str(year) / sitename if args.skip_blended and fusion_dir.exists(): - print(f"[Fusion] ({i}/{len(sites)}) {sitename} — skipping (fusion directory exists)") + print( + f"[Fusion] ({i}/{len(sites)}) {sitename} — skipping (fusion directory exists)" + ) continue print(f"[Fusion] ({i}/{len(sites)}) {sitename}") summary = fuse_site(sitename, year) diff --git a/5-metrics.py b/5-metrics.py index c049850..25be963 100644 --- a/5-metrics.py +++ b/5-metrics.py @@ -72,7 +72,9 @@ WHITTAKER_LAMBDA = 400.0 SPATIAL_CV_HALF_M = 150 # PhenoCam archive image URL pattern -PHENOCAM_IMAGE_URL = "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}" +PHENOCAM_IMAGE_URL = ( + "https://phenocam.nau.edu/data/archive/{site}/{year}/{month}/{filename}" +) # --------------------------------------------------------------------------- @@ -133,7 +135,9 @@ def _date_from_s2_tif(path: Path) -> str | None: # --------------------------------------------------------------------------- -def _whittaker_smooth(values: list[float | None], lam: float = WHITTAKER_LAMBDA) -> list[float | None]: +def _whittaker_smooth( + values: list[float | None], lam: float = WHITTAKER_LAMBDA +) -> list[float | None]: """Penalised least-squares smoother (Whittaker, 2nd-order differences). Masked (None) values are filled via the smooth and then re-set to None in @@ -211,9 +215,7 @@ def _parse_phenocam_csv( # --------------------------------------------------------------------------- -def _moving_average( - series: list[dict], value_key: str, window: int -) -> list[dict]: +def _moving_average(series: list[dict], value_key: str, window: int) -> list[dict]: """Compute centred moving average; returns new list with ``_smooth`` suffix key.""" if not series: return [] @@ -221,11 +223,13 @@ def _moving_average( half = window // 2 smoothed = [] for i, pt in enumerate(series): - chunk = [v for v in vals[max(0, i - half): i + half + 1] if v is not None] - smoothed.append({ - "date": pt["date"], - value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None, - }) + chunk = [v for v in vals[max(0, i - half) : i + half + 1] if v is not None] + smoothed.append( + { + "date": pt["date"], + value_key + "_smooth": (sum(chunk) / len(chunk)) if chunk else None, + } + ) return smoothed @@ -237,8 +241,10 @@ MATCH_TOLERANCE_DAYS = 5 def compute_metrics( - ref: list[dict], ref_key: str, - pred: list[dict], pred_key: str, + ref: list[dict], + ref_key: str, + pred: list[dict], + pred_key: str, ) -> dict | None: """Compute NSE, RMSE, nRMSE, Pearson r between pred and ref. @@ -246,7 +252,9 @@ def compute_metrics( ``MATCH_TOLERANCE_DAYS``. Returns a dict or ``None`` if fewer than 2 matched pairs exist. """ - ref_lookup: dict[str, float] = {p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None} + ref_lookup: dict[str, float] = { + p["date"]: p[ref_key] for p in ref if p.get(ref_key) is not None + } if not ref_lookup: return None @@ -257,9 +265,16 @@ def compute_metrics( v = pt.get(pred_key) if v is None: continue - nearest = min(ref_dates, key=lambda d: abs(( - np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D"))) - gap = abs((np.datetime64(pt["date"]) - np.datetime64(nearest)) / np.timedelta64(1, "D")) + nearest = min( + ref_dates, + key=lambda d: abs( + (np.datetime64(pt["date"]) - np.datetime64(d)) / np.timedelta64(1, "D") + ), + ) + gap = abs( + (np.datetime64(pt["date"]) - np.datetime64(nearest)) + / np.timedelta64(1, "D") + ) if gap <= MATCH_TOLERANCE_DAYS and nearest in ref_lookup: obs.append(ref_lookup[nearest]) sim.append(v) @@ -281,7 +296,13 @@ def compute_metrics( def _r4(v: float | None) -> float | None: return round(v, 4) if v is not None else None - return {"n": len(obs), "rmse": _r4(rmse), "nrmse": _r4(nrmse), "nse": _r4(nse), "r": _r4(float(r))} + return { + "n": len(obs), + "rmse": _r4(rmse), + "nrmse": _r4(nrmse), + "nse": _r4(nse), + "r": _r4(float(r)), + } S2_BAND_NAMES = ["B02", "B03", "B04"] @@ -294,7 +315,9 @@ def _read_multiband_center( """Return 3×3 mean per band at (lat, lon). Keys are ``band_names``, values float or None.""" try: with rasterio.open(path) as src: - transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True) + transformer = Transformer.from_crs( + CRS.from_epsg(4326), src.crs, always_xy=True + ) x, y = transformer.transform(lon, lat) row, col = rowcol(src.transform, x, y) h, w = src.height, src.width @@ -353,7 +376,9 @@ def _read_footprint_stats( """ try: with rasterio.open(path) as src: - transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True) + transformer = Transformer.from_crs( + CRS.from_epsg(4326), src.crs, always_xy=True + ) x, y = transformer.transform(lon, lat) res = abs(src.transform.a) # pixel size in CRS units (metres for UTM) half_px = max(1, int(round(half_m / res))) @@ -392,7 +417,11 @@ def compute_covariates( means.append(m) stds.append(s) - spatial_gcc_cv = round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) if means else None + spatial_gcc_cv = ( + round(float(np.mean([s / m for s, m in zip(stds, means)])), 4) + if means + else None + ) spatial_gcc_std = round(float(np.mean(stds)), 4) if stds else None # S2 temporal gap statistics @@ -406,13 +435,13 @@ def compute_covariates( s2_max_gap = None return { - "spatial_gcc_cv": spatial_gcc_cv, - "spatial_gcc_std": spatial_gcc_std, - "s2_scene_count": len(s2_series), - "s2_mean_gap_days": s2_mean_gap, - "s2_max_gap_days": s2_max_gap, + "spatial_gcc_cv": spatial_gcc_cv, + "spatial_gcc_std": spatial_gcc_std, + "s2_scene_count": len(s2_series), + "s2_mean_gap_days": s2_mean_gap, + "s2_max_gap_days": s2_max_gap, "s3_composite_count": len(s3_series), - "n_gcc_points": n_gcc_points, + "n_gcc_points": n_gcc_points, } @@ -521,8 +550,22 @@ def export_site( ] # Band reflectance timeseries (multi-band center-pixel) - bands_s2 = _multiband_series(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, lat, lon, S2_BAND_NAMES, f"{site} S2 bands") - bands_s3 = _multiband_series(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, lat, lon, S3_BAND_NAMES, f"{site} S3 bands") + bands_s2 = _multiband_series( + sorted(s2_refl_dir.glob("*_REFL.tif")), + _date_from_s2_tif, + lat, + lon, + S2_BAND_NAMES, + f"{site} S2 bands", + ) + bands_s3 = _multiband_series( + sorted(s3_comp_dir.glob("composite_*.tif")), + _date_from_gcc_tif, + lat, + lon, + S3_BAND_NAMES, + f"{site} S3 bands", + ) # --- Per-metric JSON outputs --- _write_json(out_dir / "gcc_phenocam.json", phenocam_series) @@ -545,16 +588,40 @@ def export_site( s2_valid_dates = {p["date"].replace("-", "") for p in s2_series} s3_valid_dates = {p["date"].replace("-", "") for p in s3_series} - s2_refl = [r for r in _raster_index(sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root) - if r["date"] in s2_valid_dates] - s3_comp = [r for r in _raster_index(sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root) - if r["date"] in s3_valid_dates] - s2_gcc = [r for r in _raster_index(sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root) - if r["date"] in s2_valid_dates] - s3_gcc = [r for r in _raster_index(sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root) - if r["date"] in s3_valid_dates] - bti_refl = _raster_index(sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root) - itb_gcc = _raster_index(sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root) + s2_refl = [ + r + for r in _raster_index( + sorted(s2_refl_dir.glob("*_REFL.tif")), _date_from_s2_tif, rel_root + ) + if r["date"] in s2_valid_dates + ] + s3_comp = [ + r + for r in _raster_index( + sorted(s3_comp_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root + ) + if r["date"] in s3_valid_dates + ] + s2_gcc = [ + r + for r in _raster_index( + sorted(s2_gcc_dir.glob("*_GCC.tif")), _date_from_s2_tif, rel_root + ) + if r["date"] in s2_valid_dates + ] + s3_gcc = [ + r + for r in _raster_index( + sorted(s3_gcc_dir.glob("composite_*.tif")), _date_from_gcc_tif, rel_root + ) + if r["date"] in s3_valid_dates + ] + bti_refl = _raster_index( + sorted(bti_refl_dir.glob("REFL_*.tif")), _date_from_gcc_tif, rel_root + ) + itb_gcc = _raster_index( + sorted(itb_gcc_dir.glob("GCC_*.tif")), _date_from_gcc_tif, rel_root + ) _write_json(out_dir / "rasters_s2_refl.json", s2_refl) _write_json(out_dir / "rasters_s3_composite.json", s3_comp) @@ -564,19 +631,27 @@ def export_site( _write_json(out_dir / "rasters_fusion_itb_gcc.json", itb_gcc) # --- Site covariates (heterogeneity + observation density) --- - _write_json(out_dir / "covariates.json", compute_covariates( - s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon - )) + _write_json( + out_dir / "covariates.json", + compute_covariates(s2_gcc_paths, s2_series, s3_series, n_gcc_points, lat, lon), + ) # --- Validation metrics vs PhenoCam gcc_90 --- - _write_json(out_dir / "metrics.json", { - "bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"), - "itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"), - "s2_whittaker": compute_metrics(phenocam_series, "gcc_90", s2_whittaker, "gcc"), - "s3_smooth": compute_metrics(phenocam_series, "gcc_90", s3_smooth_series, "gcc"), - "s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"), - "s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"), - }) + _write_json( + out_dir / "metrics.json", + { + "bti": compute_metrics(phenocam_series, "gcc_90", bti_series, "gcc"), + "itb": compute_metrics(phenocam_series, "gcc_90", itb_series, "gcc"), + "s2_whittaker": compute_metrics( + phenocam_series, "gcc_90", s2_whittaker, "gcc" + ), + "s3_smooth": compute_metrics( + phenocam_series, "gcc_90", s3_smooth_series, "gcc" + ), + "s2": compute_metrics(phenocam_series, "gcc_90", s2_series, "gcc"), + "s3": compute_metrics(phenocam_series, "gcc_90", s3_series, "gcc"), + }, + ) # Remove legacy bundled outputs if present for legacy in ("timeseries.json", "rasters.json"): @@ -682,7 +757,9 @@ def main() -> None: print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}") for site, meta in tqdm(fusion_sites.items(), desc="Sites"): out_dir = out_base / str(year) / site - ok = export_site(site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points")) + ok = export_site( + site, year, meta["lat"], meta["lon"], out_dir, meta.get("n_gcc_points") + ) if ok: print(f" ✓ {site}") else: diff --git a/fix-tile-boundary.py b/fix-tile-boundary.py index dff958e..abfc1e1 100644 --- a/fix-tile-boundary.py +++ b/fix-tile-boundary.py @@ -121,12 +121,16 @@ def repair(site_dir: Path) -> None: refl_path.unlink(missing_ok=True) n_removed += 1 - print(f"[repair] {name}: removed {n_removed} minority-shape file-sets (kept {ref_shape[0]}×{ref_shape[1]})") + print( + f"[repair] {name}: removed {n_removed} minority-shape file-sets (kept {ref_shape[0]}×{ref_shape[1]})" + ) # --- 3. Remove stale GCC files from prepared/s2 --------------------------- gcc_removed = sum(1 for f in s2_dir.glob("*_GCC.tif") if f.unlink() or True) if gcc_removed: - print(f"[repair] {name}: removed {gcc_removed} stale GCC files from prepared/s2") + print( + f"[repair] {name}: removed {gcc_removed} stale GCC files from prepared/s2" + ) # --- 4. Wipe stale S3 composites ------------------------------------------ for d in (s3_out, gcc_s3_out): @@ -136,15 +140,21 @@ def repair(site_dir: Path) -> None: # --- 5. Regenerate S3 composites with the correct reference --------------- if not s3_raw.exists() or not any(s3_raw.glob("S3*.tif")): - print(f"[repair] {name}: WARNING — no raw S3 data in {s3_raw}; skipping S3 regeneration.") + print( + f"[repair] {name}: WARNING — no raw S3 data in {s3_raw}; skipping S3 regeneration." + ) return s2_refl_path = next(iter(sorted(s2_dir.glob("*_REFL.tif"))), None) if s2_refl_path is None: - print(f"[repair] {name}: WARNING — no REFL files left; cannot regenerate S3 composites.") + print( + f"[repair] {name}: WARNING — no REFL files left; cannot regenerate S3 composites." + ) return - print(f"[repair] {name}: regenerating S3 composites (reference: {s2_refl_path.name})...") + print( + f"[repair] {name}: regenerating S3 composites (reference: {s2_refl_path.name})..." + ) step3 = _load_step3() s3_out.mkdir(parents=True, exist_ok=True) step3._prepare_s3(s3_raw, s2_refl_path, s3_out)