From d50a0fcbb100b2b08a4c3ade44db75238a8c746e Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Sun, 19 Apr 2026 18:52:37 +0200 Subject: [PATCH] gcc_90 --- acquisition_phenocam.py | 70 ++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/acquisition_phenocam.py b/acquisition_phenocam.py index 200f944..ff4ba9c 100644 --- a/acquisition_phenocam.py +++ b/acquisition_phenocam.py @@ -1,15 +1,42 @@ """PhenoCam acquisition from PhenoCam Network API.""" + import csv import json import requests from pathlib import Path from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed -from io import StringIO PHENOCAM_API = "https://phenocam.nau.edu/api" +def _phenocam_summary_gcc_value(row, use_mean_fallback: bool): + """Extract daily GCC from a one-day summary row. + + Prefers **gcc_90** (90th percentile; matches PhenoCam gcc90 / thesis ground truth). + Skips rows flagged as outliers in ``outlierflag_gcc_90`` when present. + With ``use_mean_fallback``, uses ``gcc_mean`` for legacy CSVs missing ``gcc_90``. + """ + if not use_mean_fallback: + oflag = row.get("outlierflag_gcc_90") + if oflag is not None and str(oflag).strip() in ("1", "1.0"): + return None + + raw = row.get("gcc_mean" if use_mean_fallback else "gcc_90") + if raw is None: + return None + text = str(raw).strip() + if not text or text.upper() == "NA": + return None + try: + val = float(text) + except ValueError: + return None + if val <= -9998.0: + return None + return val + + def _find_start_offset(site_name, start_dt, total_count): """Binary search to find approximate offset for start date.""" low, high = 0, total_count - 1 @@ -20,7 +47,7 @@ def _find_start_offset(site_name, start_dt, total_count): response = requests.get( f"{PHENOCAM_API}/middayimages/", params={"site": site_name, "limit": limit, "offset": mid}, - timeout=30 + timeout=30, ) response.raise_for_status() results = response.json().get("results", []) @@ -65,7 +92,7 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None) response = requests.get( f"{PHENOCAM_API}/middayimages/", params={"site": site_name, "limit": 1}, - timeout=30 + timeout=30, ) response.raise_for_status() total_count = response.json().get("count", 0) @@ -74,7 +101,9 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None) print(f"[PhenoCam] No images found for site '{site_name}'") return - print(f"[PhenoCam] Found {total_count} total images, estimating start offset...") + print( + f"[PhenoCam] Found {total_count} total images, estimating start offset..." + ) start_offset = _find_start_offset(site_name, start_dt, total_count) url = f"{PHENOCAM_API}/middayimages/" @@ -114,7 +143,9 @@ def _download_phenocam_images(season, site_position, site_name, date_range=None) params = None page += 1 if page % 50 == 0: - print(f"[PhenoCam] Processed {page} pages, found {len(images)} images in range...") + print( + f"[PhenoCam] Processed {page} pages, found {len(images)} images in range..." + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: print(f"[PhenoCam] Site '{site_name}' not found") @@ -176,7 +207,9 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None): r = requests.get(url, params=params, timeout=30) r.raise_for_status() data = r.json() - rois.extend([roi for roi in data.get("results", []) if roi["site"] == site_name]) + rois.extend( + [roi for roi in data.get("results", []) if roi["site"] == site_name] + ) url = data.get("next") params = None if len(rois) > 0: @@ -186,7 +219,7 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None): return csv_url = rois[0].get("one_day_summary") if not csv_url: - print(f"[PhenoCam-GI] No CSV data URL found for ROI") + print("[PhenoCam-GI] No CSV data URL found for ROI") return except requests.exceptions.RequestException as e: print(f"[PhenoCam-GI] Error fetching ROIs: {e}") @@ -196,8 +229,17 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None): try: csv_r = requests.get(csv_url, timeout=30) csv_r.raise_for_status() - lines = [l for l in csv_r.text.split('\n') if l and not l.startswith('#')] + lines = [ + line for line in csv_r.text.split("\n") if line and not line.startswith("#") + ] reader = csv.DictReader(lines) + fieldnames = reader.fieldnames or () + use_mean_fallback = "gcc_90" not in fieldnames + if use_mean_fallback: + print( + "[PhenoCam-GI] Warning: gcc_90 not in summary CSV; using gcc_mean (legacy export)" + ) + timeseries = [] for row in reader: try: @@ -206,9 +248,11 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None): continue date = datetime.strptime(date_str, "%Y-%m-%d") if start_dt <= date <= end_dt: - gcc = row.get("gcc_mean") - if gcc and gcc != "NA": - timeseries.append({"date": date.isoformat(), "greenness_index": float(gcc)}) + gcc = _phenocam_summary_gcc_value(row, use_mean_fallback) + if gcc is not None: + timeseries.append( + {"date": date.isoformat(), "greenness_index": gcc} + ) except (ValueError, KeyError): continue except requests.exceptions.RequestException as e: @@ -229,4 +273,6 @@ def _download_phenocam_gcc(season, site_position, site_name, date_range=None): writer.writeheader() writer.writerows(timeseries) - print(f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)") + print( + f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)" + )