Switching horses.
This commit is contained in:
parent
25cbd97662
commit
e3e14027fc
51 changed files with 5078 additions and 11678 deletions
278
1-phenocam.py
Normal file
278
1-phenocam.py
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
"""Step 1: download worldwide PhenoCam sites for a calendar year.
|
||||
|
||||
Inputs (``data/``): none — queries the PhenoCam API.
|
||||
|
||||
Outputs (``data/``, ``{year}`` = ``--evaluation-year``):
|
||||
|
||||
- ``phenocam/{year}.json`` — site list manifest
|
||||
- ``phenocam/{year}/{sitename}.json`` — camera + ROI metadata
|
||||
- ``phenocam/{year}/{sitename}_1day.csv`` — ``one_day_summary`` GCC CSV
|
||||
|
||||
CLI: ``--evaluation-year`` (default 2025), ``--sites`` (optional comma-separated filter).
|
||||
|
||||
Next step: :mod:`2-phenocam-screening`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
PROCESSING_DIR = Path(__file__).resolve().parents[1] / "processing"
|
||||
if str(PROCESSING_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(PROCESSING_DIR))
|
||||
|
||||
from acquisition_phenocam import PHENOCAM_API # noqa: E402
|
||||
from acquisition_phenocam_all_europe import _paginate_cameras, _parse_iso_date # noqa: E402
|
||||
|
||||
EVALUATION_YEAR = 2025
|
||||
HOST_PROBE = "https://phenocam.nau.edu/api/cameras/?limit=1"
|
||||
ONE_DAY_CSV_SUFFIX = "_1day.csv"
|
||||
|
||||
|
||||
def check_phenocam_host() -> None:
|
||||
try:
|
||||
response = requests.get(HOST_PROBE, timeout=30)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(
|
||||
f"PhenoCam API unreachable (phenocam.nau.edu): "
|
||||
f"{exc.__class__.__name__}: {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def _overlaps_year(first: str | None, last: str | None, season: int) -> bool:
|
||||
start = _parse_iso_date(first)
|
||||
end = _parse_iso_date(last)
|
||||
if start is None or end is None:
|
||||
return False
|
||||
return start <= date(season, 12, 31) and end >= date(season, 1, 1)
|
||||
|
||||
|
||||
def sites_dir(cache_dir: Path, evaluation_year: int) -> Path:
|
||||
return cache_dir / "phenocam" / str(evaluation_year)
|
||||
|
||||
|
||||
def site_json_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
||||
return sites_dir(cache_dir, evaluation_year) / f"{sitename}.json"
|
||||
|
||||
|
||||
def site_csv_path(cache_dir: Path, evaluation_year: int, sitename: str) -> Path:
|
||||
return sites_dir(cache_dir, evaluation_year) / f"{sitename}{ONE_DAY_CSV_SUFFIX}"
|
||||
|
||||
|
||||
def load_candidate_cameras(
|
||||
evaluation_year: int,
|
||||
*,
|
||||
site_filter: set[str] | None = None,
|
||||
active_only: bool = False,
|
||||
limit: int | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
cameras: list[dict[str, Any]] = []
|
||||
for camera in _paginate_cameras():
|
||||
if active_only and not camera.get("active"):
|
||||
continue
|
||||
sitename = str(camera["Sitename"])
|
||||
if site_filter is not None and sitename not in site_filter:
|
||||
continue
|
||||
if not _overlaps_year(camera.get("date_first"), camera.get("date_last"), evaluation_year):
|
||||
continue
|
||||
cameras.append(dict(camera))
|
||||
cameras.sort(key=lambda item: str(item["Sitename"]))
|
||||
if limit is not None:
|
||||
cameras = cameras[:limit]
|
||||
return cameras
|
||||
|
||||
|
||||
def fetch_roi_record(site_name: str) -> dict[str, Any] | None:
|
||||
rois: list[dict[str, Any]] = []
|
||||
url = f"{PHENOCAM_API}/roilists/"
|
||||
params: dict[str, Any] | None = {"site": site_name}
|
||||
while url:
|
||||
response = requests.get(url, params=params, timeout=60)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
rois.extend(
|
||||
item for item in payload.get("results", []) if item.get("site") == site_name
|
||||
)
|
||||
url = payload.get("next")
|
||||
params = None
|
||||
if rois:
|
||||
break
|
||||
return dict(rois[0]) if rois else None
|
||||
|
||||
|
||||
def download_one_day_csv(csv_url: str, output_path: Path) -> None:
|
||||
response = requests.get(csv_url, timeout=60)
|
||||
response.raise_for_status()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(response.text, encoding="utf-8")
|
||||
|
||||
|
||||
def download_site(
|
||||
camera: dict[str, Any],
|
||||
evaluation_year: int,
|
||||
cache_dir: Path,
|
||||
) -> str:
|
||||
sitename = str(camera["Sitename"])
|
||||
roi = fetch_roi_record(sitename)
|
||||
payload = {"response": {"camera": camera, "roi": roi}}
|
||||
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
json_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
csv_url = roi.get("one_day_summary") if roi else None
|
||||
if csv_url:
|
||||
download_one_day_csv(csv_url, site_csv_path(cache_dir, evaluation_year, sitename))
|
||||
return sitename
|
||||
|
||||
|
||||
def load_or_download_site(
|
||||
camera: dict[str, Any],
|
||||
evaluation_year: int,
|
||||
cache_dir: Path,
|
||||
*,
|
||||
refresh: bool,
|
||||
) -> str:
|
||||
sitename = str(camera["Sitename"])
|
||||
json_path = site_json_path(cache_dir, evaluation_year, sitename)
|
||||
csv_path = site_csv_path(cache_dir, evaluation_year, sitename)
|
||||
if not refresh and json_path.is_file():
|
||||
if not csv_path.is_file():
|
||||
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
roi = payload.get("response", {}).get("roi") or {}
|
||||
csv_url = roi.get("one_day_summary")
|
||||
if csv_url:
|
||||
download_one_day_csv(csv_url, csv_path)
|
||||
return sitename
|
||||
return download_site(camera, evaluation_year, cache_dir)
|
||||
|
||||
|
||||
def run_download(
|
||||
*,
|
||||
cache_dir: Path,
|
||||
evaluation_year: int,
|
||||
active_only: bool = False,
|
||||
site_filter: set[str] | None = None,
|
||||
limit: int | None = None,
|
||||
refresh: bool = False,
|
||||
) -> list[str]:
|
||||
check_phenocam_host()
|
||||
candidates = load_candidate_cameras(
|
||||
evaluation_year,
|
||||
site_filter=site_filter,
|
||||
active_only=active_only,
|
||||
limit=limit,
|
||||
)
|
||||
print(
|
||||
f"[PhenoCam-1] {len(candidates)} candidate(s) with archive overlap for "
|
||||
f"{evaluation_year}"
|
||||
)
|
||||
|
||||
sitenames: list[str] = []
|
||||
for index, camera in enumerate(candidates, start=1):
|
||||
sitename = str(camera["Sitename"])
|
||||
print(
|
||||
f"[PhenoCam-1] ({index}/{len(candidates)}) {sitename} "
|
||||
f"({float(camera['Lat']):.4f}, {float(camera['Lon']):.4f})"
|
||||
)
|
||||
sitenames.append(
|
||||
load_or_download_site(
|
||||
camera,
|
||||
evaluation_year,
|
||||
cache_dir,
|
||||
refresh=refresh,
|
||||
)
|
||||
)
|
||||
return sorted(sitenames)
|
||||
|
||||
|
||||
def write_manifest(
|
||||
sitenames: list[str],
|
||||
output_path: Path,
|
||||
cache_dir: Path,
|
||||
evaluation_year: int,
|
||||
) -> None:
|
||||
rel_sites_dir = sites_dir(cache_dir, evaluation_year).relative_to(output_path.parent)
|
||||
payload = {
|
||||
"evaluation_year": evaluation_year,
|
||||
"count": len(sitenames),
|
||||
"sites_dir": rel_sites_dir.as_posix(),
|
||||
"sites": sitenames,
|
||||
}
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"[PhenoCam-1] Wrote {output_path}")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--cache-dir",
|
||||
type=Path,
|
||||
default=Path("data"),
|
||||
help="Base directory for per-site files and manifest",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--evaluation-year",
|
||||
type=int,
|
||||
default=EVALUATION_YEAR,
|
||||
help=f"Calendar year to download (default: {EVALUATION_YEAR})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--active-only",
|
||||
action="store_true",
|
||||
help="Restrict candidates to cameras marked active in the API",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Process only the first N candidate sites (testing)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sites",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Comma-separated sitenames to download (testing)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--refresh",
|
||||
action="store_true",
|
||||
help="Re-download sites even when cache files exist",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-json",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Manifest output path (default: data/phenocam/{year}.json)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
site_filter = None
|
||||
if args.sites:
|
||||
site_filter = {name.strip() for name in args.sites.split(",") if name.strip()}
|
||||
|
||||
sitenames = run_download(
|
||||
cache_dir=args.cache_dir,
|
||||
evaluation_year=args.evaluation_year,
|
||||
active_only=args.active_only,
|
||||
site_filter=site_filter,
|
||||
limit=args.limit,
|
||||
refresh=args.refresh,
|
||||
)
|
||||
manifest_path = args.output_json or (
|
||||
args.cache_dir / "phenocam" / f"{args.evaluation_year}.json"
|
||||
)
|
||||
write_manifest(sitenames, manifest_path, args.cache_dir, args.evaluation_year)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue