232 lines
8 KiB
Python
232 lines
8 KiB
Python
"""PhenoCam acquisition from PhenoCam Network API."""
|
|
import csv
|
|
import json
|
|
import requests
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from io import StringIO
|
|
|
|
PHENOCAM_API = "https://phenocam.nau.edu/api"
|
|
|
|
|
|
def _find_start_offset(site_name, start_dt, total_count):
|
|
"""Binary search to find approximate offset for start date."""
|
|
low, high = 0, total_count - 1
|
|
limit = 1
|
|
|
|
for _ in range(15):
|
|
mid = (low + high) // 2
|
|
response = requests.get(
|
|
f"{PHENOCAM_API}/middayimages/",
|
|
params={"site": site_name, "limit": limit, "offset": mid},
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
results = response.json().get("results", [])
|
|
if not results:
|
|
break
|
|
|
|
mid_date_str = results[0].get("imgdate", "")
|
|
if not mid_date_str:
|
|
break
|
|
|
|
try:
|
|
mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
|
|
if mid_date < start_dt:
|
|
low = mid + 1
|
|
else:
|
|
high = mid
|
|
except ValueError:
|
|
break
|
|
|
|
return max(0, low - 100)
|
|
|
|
|
|
def download_phenocam(season, site_position, site_name, date_range=None):
|
|
"""Wrapper that downloads both phenocam images and GCC time series."""
|
|
_download_phenocam_images(season, site_position, site_name, date_range)
|
|
_download_phenocam_gcc(season, site_position, site_name, date_range)
|
|
|
|
|
|
def _download_phenocam_images(season, site_position, site_name, date_range=None):
|
|
lat, lon = site_position
|
|
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
|
output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
|
|
|
|
start_date, end_date = datetime_range.split("/")
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
|
|
|
try:
|
|
response = requests.get(
|
|
f"{PHENOCAM_API}/middayimages/",
|
|
params={"site": site_name, "limit": 1},
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
total_count = response.json().get("count", 0)
|
|
|
|
if total_count == 0:
|
|
print(f"[PhenoCam] No images found for site '{site_name}'")
|
|
return
|
|
|
|
print(f"[PhenoCam] Found {total_count} total images, estimating start offset...")
|
|
start_offset = _find_start_offset(site_name, start_dt, total_count)
|
|
|
|
url = f"{PHENOCAM_API}/middayimages/"
|
|
params = {"site": site_name, "offset": start_offset}
|
|
|
|
print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
|
|
images = []
|
|
page = 1
|
|
max_pages = 500
|
|
past_end_date = False
|
|
|
|
while url and page <= max_pages and not past_end_date:
|
|
response = requests.get(url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
results = data.get("results", [])
|
|
|
|
if not results:
|
|
break
|
|
|
|
for img in results:
|
|
img_date_str = img.get("imgdate", "")
|
|
if not img_date_str:
|
|
continue
|
|
try:
|
|
img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
|
|
if img_date > end_dt:
|
|
past_end_date = True
|
|
break
|
|
if start_dt <= img_date <= end_dt:
|
|
images.append(img)
|
|
except ValueError:
|
|
continue
|
|
|
|
if url and not past_end_date:
|
|
url = data.get("next")
|
|
params = None
|
|
page += 1
|
|
if page % 50 == 0:
|
|
print(f"[PhenoCam] Processed {page} pages, found {len(images)} images in range...")
|
|
except requests.exceptions.HTTPError as e:
|
|
if e.response.status_code == 404:
|
|
print(f"[PhenoCam] Site '{site_name}' not found")
|
|
return
|
|
raise
|
|
|
|
print(f"[PhenoCam] Found {len(images)} images")
|
|
|
|
def _download_image(img):
|
|
date_str = img.get("imgdate", "").replace("-", "")
|
|
if not date_str:
|
|
return None
|
|
|
|
filepath = output_dir / f"{date_str}.jpg"
|
|
if filepath.exists():
|
|
return f"Skipped {date_str}.jpg (exists)"
|
|
|
|
img_path = img.get("imgpath")
|
|
if not img_path:
|
|
return None
|
|
|
|
img_url = f"https://phenocam.nau.edu{img_path}"
|
|
try:
|
|
img_response = requests.get(img_url, timeout=30)
|
|
img_response.raise_for_status()
|
|
filepath.write_bytes(img_response.content)
|
|
return f"Saved {date_str}.jpg"
|
|
except Exception as e:
|
|
return f"Error downloading {date_str}: {e}"
|
|
|
|
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
futures = [executor.submit(_download_image, img) for img in images]
|
|
for future in as_completed(futures):
|
|
result = future.result()
|
|
if result:
|
|
print(f"[PhenoCam] {result}")
|
|
|
|
print("[PhenoCam] Completed")
|
|
|
|
|
|
def _download_phenocam_gcc(season, site_position, site_name, date_range=None):
|
|
"""Fetch greenness-index time series from PhenoCam API. Saves JSON and CSV."""
|
|
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
|
|
output_file = Path(f"data/{site_name}/{season}/raw/phenocam/phenocam_gcc.json")
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
start_date, end_date = datetime_range.split("/")
|
|
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
|
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
|
|
|
|
print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
|
|
|
|
# Get ROIs for site (paginate through results)
|
|
try:
|
|
url = f"{PHENOCAM_API}/roilists/"
|
|
params = {"site": site_name}
|
|
rois = []
|
|
while url:
|
|
r = requests.get(url, params=params, timeout=30)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
rois.extend([roi for roi in data.get("results", []) if roi["site"] == site_name])
|
|
url = data.get("next")
|
|
params = None
|
|
if len(rois) > 0:
|
|
break
|
|
if not rois:
|
|
print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
|
|
return
|
|
csv_url = rois[0].get("one_day_summary")
|
|
if not csv_url:
|
|
print(f"[PhenoCam-GI] No CSV data URL found for ROI")
|
|
return
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
|
|
return
|
|
|
|
# Fetch CSV data
|
|
try:
|
|
csv_r = requests.get(csv_url, timeout=30)
|
|
csv_r.raise_for_status()
|
|
lines = [l for l in csv_r.text.split('\n') if l and not l.startswith('#')]
|
|
reader = csv.DictReader(lines)
|
|
timeseries = []
|
|
for row in reader:
|
|
try:
|
|
date_str = row.get("date")
|
|
if not date_str:
|
|
continue
|
|
date = datetime.strptime(date_str, "%Y-%m-%d")
|
|
if start_dt <= date <= end_dt:
|
|
gcc = row.get("gcc_mean")
|
|
if gcc and gcc != "NA":
|
|
timeseries.append({"date": date.isoformat(), "greenness_index": float(gcc)})
|
|
except (ValueError, KeyError):
|
|
continue
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"[PhenoCam-GI] Error fetching CSV: {e}")
|
|
return
|
|
|
|
timeseries.sort(key=lambda x: x["date"])
|
|
|
|
output_dir = output_file.parent
|
|
json_path = output_dir / "phenocam_gcc.json"
|
|
csv_path = output_dir / "phenocam_gcc.csv"
|
|
|
|
with open(json_path, "w") as f:
|
|
json.dump(timeseries, f, indent=2)
|
|
|
|
with open(csv_path, "w", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=["date", "greenness_index"])
|
|
writer.writeheader()
|
|
writer.writerows(timeseries)
|
|
|
|
print(f"[PhenoCam-GI] Saved: {json_path} and {csv_path} ({len(timeseries)} entries)")
|