"""Step 1: download worldwide PhenoCam sites for a calendar year. Inputs (``data/``): none — queries the PhenoCam API. Outputs (``data/``, ``{year}`` = ``--evaluation-year``): - ``phenocam/{year}.json`` — site list manifest - ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata - ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter). Next step: :mod:`2-phenocam-screening`. """ from __future__ import annotations import argparse import json import sys from datetime import date from pathlib import Path from typing import Any import requests PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing" if str(PROCESSING_DIR) not in sys.path: sys.path.insert(0, str(PROCESSING_DIR)) from acquisition_phenocam import PHENOCAM_API # noqa: E402 from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date # noqa: E402 EVALUATION_YEAR = 2025 HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1" ONE_DAY_CSV_SUFFIX = "_1day.csv" def check_phenocam_host() -> None: try: response = requests.get(HOST_PROBE, timeout=30) response.raise_for_status() except requests.RequestException as exc: raise RuntimeError( f"PhenoCam API unreachable (phenocam.nau.edu): " f"{exc.__class__.__name__}: {exc}" ) from exc def _overlaps_year(first: str | None, last: str | None, season: int) -> bool: start = _parse_iso_date(first) end = _parse_iso_date(last) if start is None or end is None: return False return start <= date(season, 12, 31) and end >= date(season, 1, 1) def sites_dir(cache_dir: Path, evaluation_year: int) -> Path: return cache_dir / "phenocam" / str(evaluation_year) def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path: return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json" def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path: return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}" def load_candidate_cameras( evaluation_year: int, *, site_filter: set[str] | None = None, active_only: bool = False, limit: int | None = None, ) -> list[dict[str, Any]]: cameras: list[dict[str, Any]] = [] for camera in _paginate_cameras(): if active_only and not camera.get("active"): continue sitename = str(camera["Sitename"]) if site_filter is not None and sitename not in site_filter: continue if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year): continue cameras.append(dict(camera)) cameras.sort(key=lambda item: str(item["Sitename"])) if limit is not None: cameras = cameras[:limit] return cameras def fetch_roi_record(site_name: str) -> dict[str, Any] | None: rois: list[dict[str, Any]] = [] url = f"{PHENOCAM_API}/roilists/" params: dict[str, Any] | None = {"site": site_name} while url: response = requests.get(url, params=params, timeout=60) response.raise_for_status() payload = response.json() rois.extend( item for item in payload.get("results", []) if item.get("site") == site_name ) url = payload.get("next") params = None if rois: break return dict(rois[0]) if rois else None def download_one_day_csv(csv_url: str, output_path: Path) -> None: response = requests.get(csv_url, timeout=60) response.raise_for_status() output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(response.text, encoding="utf-8") def download_site( camera: dict[str, Any], evaluation_year: int, cache_dir: Path, ) -> str: sitename = str(camera["Sitename"]) roi = fetch_roi_record(sitename) payload = {"response": {"camera": camera, "roi": roi}} json_path = site_json_path(cache_dir, evaluation_year, sitename) json_path.parent.mkdir(parents=True, exist_ok=True) json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") csv_url = roi.get("one_day_summary") if roi else None if csv_url: download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename)) return sitename def load_or_download_site( camera: dict[str, Any], evaluation_year: int, cache_dir: Path, *, refresh: bool, ) -> str: sitename = str(camera["Sitename"]) json_path = site_json_path(cache_dir, evaluation_year, sitename) csv_path = site_csv_path(cache_dir, evaluation_year, sitename) if not refresh and json_path.is_file(): if not csv_path.is_file(): payload = json.loads(json_path.read_text(encoding="utf-8")) roi = payload.get("response", {}).get("roi") or {} csv_url = roi.get("one_day_summary") if csv_url: download_one_day_csv(csv_url, csv_path) return sitename return download_site(camera, evaluation_year, cache_dir) def run_download( *, cache_dir: Path, evaluation_year: int, active_only: bool = False, site_filter: set[str] | None = None, limit: int | None = None, refresh: bool = False, ) -> list[str]: check_phenocam_host() candidates = load_candidate_cameras( evaluation_year, site_filter=site_filter, active_only=active_only, limit=limit, ) print( f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for " f"{evaluation_year}" ) sitenames: list[str] = [] for index, camera in enumerate(candidates, start=1): sitename = str(camera["Sitename"]) print( f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} " f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})" ) sitenames.append( load_or_download_site( camera, evaluation_year, cache_dir, refresh=refresh, ) ) return sorted(sitenames) def write_manifest( sitenames: list[str], output_path: Path, cache_dir: Path, evaluation_year: int, ) -> None: rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent) payload = { "evaluation_year": evaluation_year, "count": len(sitenames), "sites_dir": rel_sites_dir.as_posix(), "sites": sitenames, } output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") print(f"[PhenoCam-1] Wrote {output_path}") def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--cache-dir", type=Path, default=Path("data"), help="Base directory for per-site files and manifest", ) parser.add_argument( "--evaluation-year", type=int, default=EVALUATION_YEAR, help=f"Calendar year to download (default: {EVALUATION_YEAR})", ) parser.add_argument( "--active-only", action="store_true", help="Restrict candidates to cameras marked active in the API", ) parser.add_argument( "--limit", type=int, default=None, help="Process only the first N candidate sites (testing)", ) parser.add_argument( "--sites", type=str, default=None, help="Comma-separated sitenames to download (testing)", ) parser.add_argument( "--refresh", action="store_true", help="Re-download sites even when cache files exist", ) parser.add_argument( "--output-json", type=Path, default=None, help="Manifest output path (default: data/phenocam/{year}.json)", ) args = parser.parse_args(argv) site_filter = None if args.sites: site_filter = {name.strip() for name in args.sites.split(",") if name.strip()} sitenames = run_download( cache_dir=args.cache_dir, evaluation_year=args.evaluation_year, active_only=args.active_only, site_filter=site_filter, limit=args.limit, refresh=args.refresh, ) manifest_path = args.output_json or ( args.cache_dir / "phenocam" / f"{args.evaluation_year}.json" ) write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year) return 0 if __name__ == "__main__": raise SystemExit(main())