diff --git a/README.md b/README.md index 63852f1..3d64297 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,22 @@ End-to-end pipeline from selecting sites from the global [PhenoCam Network](http ## Quick start +### Run pipeline wrapper (recommended) + +```bash +uv sync +uv run python run-pipeline.py --evaluation-year 2025 +``` + +Runs all five steps in order. Steps 1 and 2 are skipped when their output already exists. Each site in steps 3–5 is skipped when `data/metrics/{year}/{site}/metrics.json` is present. Any failure stops the run immediately, so one can fix the issue and re-run; completed work is never repeated. + +```bash +# single site (steps 1 and 2 still skip if already done) +uv run python run-pipeline.py --evaluation-year 2025 --site ICOSFR-Fon1 +``` + +### Step by step + ```bash uv sync uv run python 1-phenocam.py --evaluation-year 2025 diff --git a/run-pipeline.py b/run-pipeline.py new file mode 100644 index 0000000..db2c493 --- /dev/null +++ b/run-pipeline.py @@ -0,0 +1,142 @@ +"""Pipeline wrapper: run steps 1 → 2 → 3 → 4 → 5. + +Steps 1 and 2 run once for the whole year (skipped when their output already +exists). Steps 3–5 run site-by-site for every PASS site from +``data/phenocam_screening/{year}.json``; a site is skipped entirely when +``data/metrics/{year}/{site}/metrics.json`` already exists. + +Any failure stops the run immediately. Fix the issue and re-run — completed +steps and sites are skipped automatically. + +CLI: + +- ``--evaluation-year`` (default 2025) +- ``--site`` single site to run steps 3–5 for (default: all PASS sites) +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + +DATA_DIR = Path("data") +DEFAULT_YEAR = 2025 + +# Steps 1 and 2 are global (once per year); steps 3–5 repeat per site. +GLOBAL_STEPS: list[tuple[str, Path]] = [ + # (script, skip-if-this-file-exists) + ("1-phenocam.py", Path("phenocam/{year}.json")), + ("2-phenocam-screening.py", Path("phenocam_screening/{year}.json")), +] + +PER_SITE_STEPS = [ + "3-sentinel-data.py", + "4-fusion.py", + "5-metrics.py", +] + + +def _load_pass_sites(year: int) -> list[str]: + path = DATA_DIR / "phenocam_screening" / f"{year}.json" + if not path.is_file(): + raise SystemExit(f"Step-2 screening manifest not found: {path}") + payload: dict[str, Any] = json.loads(path.read_text(encoding="utf-8")) + sites = [] + for row in payload.get("sites", []): + if row.get("calculations", {}).get("status") != "PASS": + continue + name = row.get("response", {}).get("camera", {}).get("Sitename") + if name: + sites.append(str(name)) + return sorted(sites) + + +def _is_site_complete(year: int, site: str) -> bool: + return (DATA_DIR / "metrics" / str(year) / site / "metrics.json").is_file() + + +def _run_global_step(step: str, year: int) -> int: + result = subprocess.run( + [sys.executable, step, "--evaluation-year", str(year)], + check=False, + ) + return result.returncode + + +def _run_site_step(step: str, year: int, site: str) -> int: + result = subprocess.run( + [sys.executable, step, "--evaluation-year", str(year), "--site", site], + check=False, + ) + return result.returncode + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR) + parser.add_argument( + "--site", + type=str, + default=None, + help="Single sitename to process (default: all PASS sites)", + ) + args = parser.parse_args(argv) + year = args.evaluation_year + + # --- Global steps (steps 1 and 2) --- + for script, marker_template in GLOBAL_STEPS: + marker = DATA_DIR / str(marker_template).replace("{year}", str(year)) + if marker.is_file(): + print(f"[pipeline] {script} — skipping (output already exists)") + else: + print(f"[pipeline] Running {script}...") + rc = _run_global_step(script, year) + if rc != 0: + raise SystemExit( + f"[pipeline] {script} failed (exit {rc}); cannot continue" + ) + + # --- Per-site steps (steps 3–5) --- + sites = _load_pass_sites(year) + if args.site: + if args.site not in sites: + raise SystemExit( + f"Site '{args.site}' not found in step-2 PASS sites for {year}" + ) + sites = [args.site] + + n_total = len(sites) + skipped: list[str] = [] + succeeded: list[str] = [] + + print(f"[pipeline] {n_total} PASS site(s) for {year}") + + for i, site in enumerate(sites, 1): + prefix = f"[pipeline] ({i}/{n_total}) {site}" + + if _is_site_complete(year, site): + print(f"{prefix} — skipping (already complete)") + skipped.append(site) + continue + + print(f"{prefix}") + for step in PER_SITE_STEPS: + rc = _run_site_step(step, year, site) + if rc != 0: + raise SystemExit(f"[pipeline] {step} failed for {site} (exit {rc})") + + succeeded.append(site) + + print( + f"\n[pipeline] Done: {len(skipped)} skipped, {len(succeeded)} succeeded" + f" (of {n_total} total)" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())