From d7b18baf2e2e6dda107980c0750fc8ba01c73f92 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Thu, 11 Jun 2026 18:03:34 +0200 Subject: [PATCH] Improved single sites metrics calculation. --- 5-metrics.py | 15 ++++++++++++--- run-pipeline.py | 13 ++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/5-metrics.py b/5-metrics.py index d4a833e..9658797 100644 --- a/5-metrics.py +++ b/5-metrics.py @@ -756,8 +756,6 @@ def main() -> None: # Export per-site data for the requested year year_sites = manifest["sites"].get(str(year), {}) fusion_sites = {s: m for s, m in year_sites.items() if m["has_fusion"]} - if filter_site: - fusion_sites = {s: m for s, m in fusion_sites.items() if s == filter_site} print(f"Exporting {len(fusion_sites)} site(s) with fusion data for {year}") for site, meta in tqdm(fusion_sites.items(), desc="Sites"): @@ -771,7 +769,18 @@ def main() -> None: print(f" ✗ {site} — no fusion data found") manifest_path = out_base / "manifest.json" - manifest_path.write_text(json.dumps(manifest, separators=(",", ":"))) + if filter_site and manifest_path.is_file(): + # Merge: update only the filtered site(s); preserve all other entries. + existing: dict = json.loads(manifest_path.read_text()) + for year_key, year_sites_new in manifest["sites"].items(): + existing.setdefault("sites", {}).setdefault(year_key, {}).update( + year_sites_new + ) + all_years = sorted(set(existing.get("years", [])) | set(manifest["years"])) + existing["years"] = all_years + manifest_path.write_text(json.dumps(existing, separators=(",", ":"))) + else: + manifest_path.write_text(json.dumps(manifest, separators=(",", ":"))) print(f"Manifest written → {manifest_path}") diff --git a/run-pipeline.py b/run-pipeline.py index 3c18dc9..2067396 100644 --- a/run-pipeline.py +++ b/run-pipeline.py @@ -1,11 +1,9 @@ """Pipeline wrapper: run steps 1 → 2 → 3 → 4 → 5. Steps 1 and 2 run once for the whole year (skipped when their output already -exists). Steps 3 and 4 run site-by-site for every PASS site from -``data/phenocam_screening/{year}.json``; a site is skipped when -``data/metrics/{year}/{site}/metrics.json`` already exists. Step 5 always -runs once at the end without ``--site`` so that ``manifest.json`` is written -with all processed sites (not just the last one). +exists). Steps 3–5 run site-by-site for every PASS site from +``data/phenocam_screening/{year}.json``; a site is skipped entirely when +``data/metrics/{year}/{site}/metrics.json`` already exists. Any failure stops the run immediately. Fix the issue and re-run — completed steps and sites are skipped automatically. @@ -13,7 +11,7 @@ steps and sites are skipped automatically. CLI: - ``--evaluation-year`` (default 2025) -- ``--site`` single site to run steps 3–4 for (default: all PASS sites) +- ``--site`` single site to run steps 3–5 for (default: all PASS sites) """ from __future__ import annotations @@ -38,6 +36,7 @@ GLOBAL_STEPS: list[tuple[str, Path]] = [ PER_SITE_STEPS = [ "3-sentinel-data.py", "4-fusion.py", + "5-metrics.py", ] @@ -101,7 +100,7 @@ def main(argv: list[str] | None = None) -> int: f"[pipeline] {script} failed (exit {rc}); cannot continue" ) - # --- Per-site steps (steps 3–5) --- + # --- Per-site steps (steps 3, 4, 5) --- sites = _load_pass_sites(year) if args.site: if args.site not in sites: