143 lines
4.4 KiB
Python
143 lines
4.4 KiB
Python
"""Pipeline wrapper: run steps 1 → 2 → 3 → 4 → 5.
|
||
|
||
Steps 1 and 2 run once for the whole year (skipped when their output already
|
||
exists). Steps 3 and 4 run site-by-site for every PASS site from
|
||
``data/phenocam_screening/{year}.json``; a site is skipped when
|
||
``data/metrics/{year}/{site}/metrics.json`` already exists. Step 5 always
|
||
runs once at the end without ``--site`` so that ``manifest.json`` is written
|
||
with all processed sites (not just the last one).
|
||
|
||
Any failure stops the run immediately. Fix the issue and re-run — completed
|
||
steps and sites are skipped automatically.
|
||
|
||
CLI:
|
||
|
||
- ``--evaluation-year`` (default 2025)
|
||
- ``--site`` single site to run steps 3–4 for (default: all PASS sites)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import subprocess
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
DATA_DIR = Path("data")
|
||
DEFAULT_YEAR = 2025
|
||
|
||
# Steps 1 and 2 are global (once per year); steps 3–5 repeat per site.
|
||
GLOBAL_STEPS: list[tuple[str, Path]] = [
|
||
# (script, skip-if-this-file-exists)
|
||
("1-phenocam.py", Path("phenocam/{year}.json")),
|
||
("2-phenocam-screening.py", Path("phenocam_screening/{year}.json")),
|
||
]
|
||
|
||
PER_SITE_STEPS = [
|
||
"3-sentinel-data.py",
|
||
"4-fusion.py",
|
||
]
|
||
|
||
|
||
def _load_pass_sites(year: int) -> list[str]:
|
||
path = DATA_DIR / "phenocam_screening" / f"{year}.json"
|
||
if not path.is_file():
|
||
raise SystemExit(f"Step-2 screening manifest not found: {path}")
|
||
payload: dict[str, Any] = json.loads(path.read_text(encoding="utf-8"))
|
||
sites = []
|
||
for row in payload.get("sites", []):
|
||
if row.get("calculations", {}).get("status") != "PASS":
|
||
continue
|
||
name = row.get("response", {}).get("camera", {}).get("Sitename")
|
||
if name:
|
||
sites.append(str(name))
|
||
return sorted(sites)
|
||
|
||
|
||
def _is_site_complete(year: int, site: str) -> bool:
|
||
return (DATA_DIR / "metrics" / str(year) / site / "metrics.json").is_file()
|
||
|
||
|
||
def _run_global_step(step: str, year: int) -> int:
|
||
result = subprocess.run(
|
||
[sys.executable, step, "--evaluation-year", str(year)],
|
||
check=False,
|
||
)
|
||
return result.returncode
|
||
|
||
|
||
def _run_site_step(step: str, year: int, site: str) -> int:
|
||
result = subprocess.run(
|
||
[sys.executable, step, "--evaluation-year", str(year), "--site", site],
|
||
check=False,
|
||
)
|
||
return result.returncode
|
||
|
||
|
||
def main(argv: list[str] | None = None) -> int:
|
||
parser = argparse.ArgumentParser(description=__doc__)
|
||
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
|
||
parser.add_argument(
|
||
"--site",
|
||
type=str,
|
||
default=None,
|
||
help="Single sitename to process (default: all PASS sites)",
|
||
)
|
||
args = parser.parse_args(argv)
|
||
year = args.evaluation_year
|
||
|
||
# --- Global steps (steps 1 and 2) ---
|
||
for script, marker_template in GLOBAL_STEPS:
|
||
marker = DATA_DIR / str(marker_template).replace("{year}", str(year))
|
||
if marker.is_file():
|
||
print(f"[pipeline] {script} — skipping (output already exists)")
|
||
else:
|
||
print(f"[pipeline] Running {script}...")
|
||
rc = _run_global_step(script, year)
|
||
if rc != 0:
|
||
raise SystemExit(
|
||
f"[pipeline] {script} failed (exit {rc}); cannot continue"
|
||
)
|
||
|
||
# --- Per-site steps (steps 3–5) ---
|
||
sites = _load_pass_sites(year)
|
||
if args.site:
|
||
if args.site not in sites:
|
||
raise SystemExit(
|
||
f"Site '{args.site}' not found in step-2 PASS sites for {year}"
|
||
)
|
||
sites = [args.site]
|
||
|
||
n_total = len(sites)
|
||
skipped: list[str] = []
|
||
succeeded: list[str] = []
|
||
|
||
print(f"[pipeline] {n_total} PASS site(s) for {year}")
|
||
|
||
for i, site in enumerate(sites, 1):
|
||
prefix = f"[pipeline] ({i}/{n_total}) {site}"
|
||
|
||
if _is_site_complete(year, site):
|
||
print(f"{prefix} — skipping (already complete)")
|
||
skipped.append(site)
|
||
continue
|
||
|
||
print(f"{prefix}")
|
||
for step in PER_SITE_STEPS:
|
||
rc = _run_site_step(step, year, site)
|
||
if rc != 0:
|
||
raise SystemExit(f"[pipeline] {step} failed for {site} (exit {rc})")
|
||
|
||
succeeded.append(site)
|
||
|
||
print(
|
||
f"\n[pipeline] Done: {len(skipped)} skipped, {len(succeeded)} succeeded"
|
||
f" (of {n_total} total)"
|
||
)
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|