This commit is contained in:
Felix Delattre 2026-04-19 18:52:37 +02:00
parent bc7830822b
commit d50a0fcbb1

View file

@ -1,15 +1,42 @@
"""PhenoCam acquisition from PhenoCam Network API.""" """PhenoCam acquisition from PhenoCam Network API."""
import csv import csv
import json import json
import requests import requests
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from io import StringIO
PHENOCAM_API = "https://phenocam.nau.edu/api" PHENOCAM_API = "https://phenocam.nau.edu/api"
def _phenocam_summary_gcc_value(row, use_mean_fallback: bool):
"""Extract daily GCC from a one-day summary row.
Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth).
Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present.
With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``.
"""
if not use_mean_fallback:
oflag = row.get("outlierflag_gcc_90")
if oflag is not None and str(oflag).strip() in ("1", "1.0"):
return None
raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90")
if raw is None:
return None
text = str(raw).strip()
if not text or text.upper() == "NA":
return None
try:
val = float(text)
except ValueError:
return None
if val <= -9998.0:
return None
return val
def _find_start_offset(site_name, start_dt, total_count): def _find_start_offset(site_name, start_dt, total_count):
"""Binary search to find approximate offset for start date.""" """Binary search to find approximate offset for start date."""
low, high = 0, total_count - 1 low, high = 0, total_count - 1
@ -20,7 +47,7 @@ def _find_start_offset(site_name, start_dt, total_count):
response = requests.get( response = requests.get(
f"{PHENOCAM_API}/middayimages/", f"{PHENOCAM_API}/middayimages/",
params={"site": site_name, "limit": limit, "offset": mid}, params={"site": site_name, "limit": limit, "offset": mid},
timeout=30 timeout=30,
) )
response.raise_for_status() response.raise_for_status()
results = response.json().get("results", []) results = response.json().get("results", [])
@ -65,7 +92,7 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None)
response = requests.get( response = requests.get(
f"{PHENOCAM_API}/middayimages/", f"{PHENOCAM_API}/middayimages/",
params={"site": site_name, "limit": 1}, params={"site": site_name, "limit": 1},
timeout=30 timeout=30,
) )
response.raise_for_status() response.raise_for_status()
total_count = response.json().get("count", 0) total_count = response.json().get("count", 0)
@ -74,7 +101,9 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None)
print(f"[PhenoCam] No images found for site '{site_name}'") print(f"[PhenoCam] No images found for site '{site_name}'")
return return
print(f"[PhenoCam] Found {total_count} total images, estimating start offset...") print(
f"[PhenoCam] Found {total_count} total images, estimating start offset..."
)
start_offset = _find_start_offset(site_name, start_dt, total_count) start_offset = _find_start_offset(site_name, start_dt, total_count)
url = f"{PHENOCAM_API}/middayimages/" url = f"{PHENOCAM_API}/middayimages/"
@ -114,7 +143,9 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None)
params = None params = None
page += 1 page += 1
if page % 50 == 0: if page % 50 == 0:
print(f"[PhenoCam] Processed {page} pages, found {len(images)} images in range...") print(
f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..."
)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
if e.response.status_code == 404: if e.response.status_code == 404:
print(f"[PhenoCam] Site '{site_name}' not found") print(f"[PhenoCam] Site '{site_name}' not found")
@ -176,7 +207,9 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
r = requests.get(url, params=params, timeout=30) r = requests.get(url, params=params, timeout=30)
r.raise_for_status() r.raise_for_status()
data = r.json() data = r.json()
rois.extend([roi for roi in data.get("results", []) if roi["site"] == site_name]) rois.extend(
[roi for roi in data.get("results", []) if roi["site"] == site_name]
)
url = data.get("next") url = data.get("next")
params = None params = None
if len(rois) > 0: if len(rois) > 0:
@ -186,7 +219,7 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
return return
csv_url = rois[0].get("one_day_summary") csv_url = rois[0].get("one_day_summary")
if not csv_url: if not csv_url:
print(f"[PhenoCam-GI] No CSV data URL found for ROI") print("[PhenoCam-GI] No CSV data URL found for ROI")
return return
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f"[PhenoCam-GI] Error fetching ROIs: {e}") print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
@ -196,8 +229,17 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
try: try:
csv_r = requests.get(csv_url, timeout=30) csv_r = requests.get(csv_url, timeout=30)
csv_r.raise_for_status() csv_r.raise_for_status()
lines = [l for l in csv_r.text.split('\n') if l and not l.startswith('#')] lines = [
line for line in csv_r.text.split("\n") if line and not line.startswith("#")
]
reader = csv.DictReader(lines) reader = csv.DictReader(lines)
fieldnames = reader.fieldnames or ()
use_mean_fallback = "gcc_90" not in fieldnames
if use_mean_fallback:
print(
"[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)"
)
timeseries = [] timeseries = []
for row in reader: for row in reader:
try: try:
@ -206,9 +248,11 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
continue continue
date = datetime.strptime(date_str, "%Y-%m-%d") date = datetime.strptime(date_str, "%Y-%m-%d")
if start_dt <= date <= end_dt: if start_dt <= date <= end_dt:
gcc = row.get("gcc_mean") gcc = _phenocam_summary_gcc_value(row, use_mean_fallback)
if gcc and gcc != "NA": if gcc is not None:
timeseries.append({"date": date.isoformat(), "greenness_index": float(gcc)}) timeseries.append(
{"date": date.isoformat(), "greenness_index": gcc}
)
except (ValueError, KeyError): except (ValueError, KeyError):
continue continue
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
@ -229,4 +273,6 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
writer.writeheader() writer.writeheader()
writer.writerows(timeseries) writer.writerows(timeseries)
print(f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)") print(
f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)"
)