284 lines
8.6 KiB
Python
284 lines
8.6 KiB
Python
"""Step 1: download worldwide PhenoCam sites for a calendar year.
|
|
|
|
Inputs (``data/``): none — queries the PhenoCam API.
|
|
|
|
Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
|
|
|
- ``phenocam/{year}.json`` — site list manifest
|
|
- ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata
|
|
- ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV
|
|
|
|
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
|
|
|
|
Next step: :mod:`2-phenocam-screening`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from datetime import date
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
|
|
if str(PROCESSING_DIR) not in sys.path:
|
|
sys.path.insert(0, str(PROCESSING_DIR))
|
|
|
|
from acquisition_phenocam import PHENOCAM_API # noqa: E402
|
|
from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date # noqa: E402
|
|
|
|
EVALUATION_YEAR = 2025
|
|
HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
|
|
ONE_DAY_CSV_SUFFIX = "_1day.csv"
|
|
|
|
|
|
def check_phenocam_host() -> None:
|
|
try:
|
|
response = requests.get(HOST_PROBE, timeout=30)
|
|
response.raise_for_status()
|
|
except requests.RequestException as exc:
|
|
raise RuntimeError(
|
|
f"PhenoCam API unreachable (phenocam.nau.edu): "
|
|
f"{exc.__class__.__name__}: {exc}"
|
|
) from exc
|
|
|
|
|
|
def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
|
|
start = _parse_iso_date(first)
|
|
end = _parse_iso_date(last)
|
|
if start is None or end is None:
|
|
return False
|
|
return start <= date(season, 12, 31) and end >= date(season, 1, 1)
|
|
|
|
|
|
def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
|
|
return cache_dir / "phenocam" / str(evaluation_year)
|
|
|
|
|
|
def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
|
return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
|
|
|
|
|
|
def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
|
return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
|
|
|
|
|
|
def load_candidate_cameras(
|
|
evaluation_year: int,
|
|
*,
|
|
site_filter: set[str] | None = None,
|
|
active_only: bool = False,
|
|
limit: int | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
cameras: list[dict[str, Any]] = []
|
|
for camera in _paginate_cameras():
|
|
if active_only and not camera.get("active"):
|
|
continue
|
|
sitename = str(camera["Sitename"])
|
|
if site_filter is not None and sitename not in site_filter:
|
|
continue
|
|
if not _overlaps_year(
|
|
camera.get("date_first"), camera.get("date_last"), evaluation_year
|
|
):
|
|
continue
|
|
cameras.append(dict(camera))
|
|
cameras.sort(key=lambda item: str(item["Sitename"]))
|
|
if limit is not None:
|
|
cameras = cameras[:limit]
|
|
return cameras
|
|
|
|
|
|
def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
|
|
rois: list[dict[str, Any]] = []
|
|
url = f"{PHENOCAM_API}/roilists/"
|
|
params: dict[str, Any] | None = {"site": site_name}
|
|
while url:
|
|
response = requests.get(url, params=params, timeout=60)
|
|
response.raise_for_status()
|
|
payload = response.json()
|
|
rois.extend(
|
|
item for item in payload.get("results", []) if item.get("site") == site_name
|
|
)
|
|
url = payload.get("next")
|
|
params = None
|
|
if rois:
|
|
break
|
|
return dict(rois[0]) if rois else None
|
|
|
|
|
|
def download_one_day_csv(csv_url: str, output_path: Path) -> None:
|
|
response = requests.get(csv_url, timeout=60)
|
|
response.raise_for_status()
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(response.text, encoding="utf-8")
|
|
|
|
|
|
def download_site(
|
|
camera: dict[str, Any],
|
|
evaluation_year: int,
|
|
cache_dir: Path,
|
|
) -> str:
|
|
sitename = str(camera["Sitename"])
|
|
roi = fetch_roi_record(sitename)
|
|
payload = {"response": {"camera": camera, "roi": roi}}
|
|
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
|
json_path.parent.mkdir(parents=True, exist_ok=True)
|
|
json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
|
|
csv_url = roi.get("one_day_summary") if roi else None
|
|
if csv_url:
|
|
download_one_day_csv(
|
|
csv_url, site_csv_path(cache_dir, evaluation_year, sitename)
|
|
)
|
|
return sitename
|
|
|
|
|
|
def load_or_download_site(
|
|
camera: dict[str, Any],
|
|
evaluation_year: int,
|
|
cache_dir: Path,
|
|
*,
|
|
refresh: bool,
|
|
) -> str:
|
|
sitename = str(camera["Sitename"])
|
|
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
|
csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
|
|
if not refresh and json_path.is_file():
|
|
if not csv_path.is_file():
|
|
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
|
roi = payload.get("response", {}).get("roi") or {}
|
|
csv_url = roi.get("one_day_summary")
|
|
if csv_url:
|
|
download_one_day_csv(csv_url, csv_path)
|
|
return sitename
|
|
return download_site(camera, evaluation_year, cache_dir)
|
|
|
|
|
|
def run_download(
|
|
*,
|
|
cache_dir: Path,
|
|
evaluation_year: int,
|
|
active_only: bool = False,
|
|
site_filter: set[str] | None = None,
|
|
limit: int | None = None,
|
|
refresh: bool = False,
|
|
) -> list[str]:
|
|
check_phenocam_host()
|
|
candidates = load_candidate_cameras(
|
|
evaluation_year,
|
|
site_filter=site_filter,
|
|
active_only=active_only,
|
|
limit=limit,
|
|
)
|
|
print(
|
|
f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
|
|
f"{evaluation_year}"
|
|
)
|
|
|
|
sitenames: list[str] = []
|
|
for index, camera in enumerate(candidates, start=1):
|
|
sitename = str(camera["Sitename"])
|
|
print(
|
|
f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
|
|
f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
|
|
)
|
|
sitenames.append(
|
|
load_or_download_site(
|
|
camera,
|
|
evaluation_year,
|
|
cache_dir,
|
|
refresh=refresh,
|
|
)
|
|
)
|
|
return sorted(sitenames)
|
|
|
|
|
|
def write_manifest(
|
|
sitenames: list[str],
|
|
output_path: Path,
|
|
cache_dir: Path,
|
|
evaluation_year: int,
|
|
) -> None:
|
|
rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(
|
|
output_path.parent
|
|
)
|
|
payload = {
|
|
"evaluation_year": evaluation_year,
|
|
"count": len(sitenames),
|
|
"sites_dir": rel_sites_dir.as_posix(),
|
|
"sites": sitenames,
|
|
}
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
print(f"[PhenoCam-1] Wrote {output_path}")
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--cache-dir",
|
|
type=Path,
|
|
default=Path("data"),
|
|
help="Base directory for per-site files and manifest",
|
|
)
|
|
parser.add_argument(
|
|
"--evaluation-year",
|
|
type=int,
|
|
default=EVALUATION_YEAR,
|
|
help=f"Calendar year to download (default: {EVALUATION_YEAR})",
|
|
)
|
|
parser.add_argument(
|
|
"--active-only",
|
|
action="store_true",
|
|
help="Restrict candidates to cameras marked active in the API",
|
|
)
|
|
parser.add_argument(
|
|
"--limit",
|
|
type=int,
|
|
default=None,
|
|
help="Process only the first N candidate sites (testing)",
|
|
)
|
|
parser.add_argument(
|
|
"--sites",
|
|
type=str,
|
|
default=None,
|
|
help="Comma-separated sitenames to download (testing)",
|
|
)
|
|
parser.add_argument(
|
|
"--refresh",
|
|
action="store_true",
|
|
help="Re-download sites even when cache files exist",
|
|
)
|
|
parser.add_argument(
|
|
"--output-json",
|
|
type=Path,
|
|
default=None,
|
|
help="Manifest output path (default: data/phenocam/{year}.json)",
|
|
)
|
|
args = parser.parse_args(argv)
|
|
|
|
site_filter = None
|
|
if args.sites:
|
|
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
|
|
|
|
sitenames = run_download(
|
|
cache_dir=args.cache_dir,
|
|
evaluation_year=args.evaluation_year,
|
|
active_only=args.active_only,
|
|
site_filter=site_filter,
|
|
limit=args.limit,
|
|
refresh=args.refresh,
|
|
)
|
|
manifest_path = args.output_json or (
|
|
args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
|
|
)
|
|
write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|