Renaming.

2026-02-20 21:57:42 +01:00 · 2026-02-20 21:57:42 +01:00 · 3919b8e871
commit 3919b8e871
parent f9da4aef7d
12 changed files with 953 additions and 203 deletions
--- a/download_phenocam.py
+++ b/download_phenocam.py
@ -1,216 +0,0 @@
-import csv
-import json
-import requests
-from pathlib import Path
-from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from io import StringIO
-
-PHENOCAM_API = "https://phenocam.nau.edu/api"
-
-
-def _find_start_offset(site_name, start_dt, total_count):
-    """Binary search to find approximate offset for start date."""
-    low, high = 0, total_count - 1
-    limit = 1
-    
-    for _ in range(15):
-        mid = (low + high) // 2
-        response = requests.get(
-            f"{PHENOCAM_API}/middayimages/",
-            params={"site": site_name, "limit": limit, "offset": mid},
-            timeout=30
-        )
-        response.raise_for_status()
-        results = response.json().get("results", [])
-        if not results:
-            break
-        
-        mid_date_str = results[0].get("imgdate", "")
-        if not mid_date_str:
-            break
-        
-        try:
-            mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d")
-            if mid_date < start_dt:
-                low = mid + 1
-            else:
-                high = mid
-        except ValueError:
-            break
-    
-    return max(0, low - 100)
-
-
-def download_phenocam(season, site_position, site_name, date_range=None):
-    lat, lon = site_position
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_dir = Path(f"data/{site_name}/{season}/raw/phenocam/")
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    print(f"[PhenoCam] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
-
-    start_date, end_date = datetime_range.split("/")
-    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
-    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
-
-    try:
-        response = requests.get(
-            f"{PHENOCAM_API}/middayimages/",
-            params={"site": site_name, "limit": 1},
-            timeout=30
-        )
-        response.raise_for_status()
-        total_count = response.json().get("count", 0)
-        
-        if total_count == 0:
-            print(f"[PhenoCam] No images found for site '{site_name}'")
-            return
-        
-        print(f"[PhenoCam] Found {total_count} total images, estimating start offset...")
-        start_offset = _find_start_offset(site_name, start_dt, total_count)
-        
-        url = f"{PHENOCAM_API}/middayimages/"
-        params = {"site": site_name, "offset": start_offset}
-        
-        print(f"[PhenoCam] Fetching image list from offset {start_offset}...")
-        images = []
-        page = 1
-        max_pages = 500
-        past_end_date = False
-        
-        while url and page <= max_pages and not past_end_date:
-            response = requests.get(url, params=params, timeout=30)
-            response.raise_for_status()
-            data = response.json()
-            results = data.get("results", [])
-            
-            if not results:
-                break
-            
-            for img in results:
-                img_date_str = img.get("imgdate", "")
-                if not img_date_str:
-                    continue
-                try:
-                    img_date = datetime.strptime(img_date_str, "%Y-%m-%d")
-                    if img_date > end_dt:
-                        past_end_date = True
-                        break
-                    if start_dt <= img_date <= end_dt:
-                        images.append(img)
-                except ValueError:
-                    continue
-            
-            if url and not past_end_date:
-                url = data.get("next")
-                params = None
-                page += 1
-                if page % 50 == 0:
-                    print(f"[PhenoCam] Processed {page} pages, found {len(images)} images in range...")
-    except requests.exceptions.HTTPError as e:
-        if e.response.status_code == 404:
-            print(f"[PhenoCam] Site '{site_name}' not found")
-            return
-        raise
-
-    print(f"[PhenoCam] Found {len(images)} images")
-
-    def _download_image(img):
-        date_str = img.get("imgdate", "").replace("-", "")
-        if not date_str:
-            return None
-        
-        filepath = output_dir / f"{date_str}.jpg"
-        if filepath.exists():
-            return f"Skipped {date_str}.jpg (exists)"
-        
-        img_path = img.get("imgpath")
-        if not img_path:
-            return None
-        
-        img_url = f"https://phenocam.nau.edu{img_path}"
-        try:
-            img_response = requests.get(img_url, timeout=30)
-            img_response.raise_for_status()
-            filepath.write_bytes(img_response.content)
-            return f"Saved {date_str}.jpg"
-        except Exception as e:
-            return f"Error downloading {date_str}: {e}"
-
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        futures = [executor.submit(_download_image, img) for img in images]
-        for future in as_completed(futures):
-            result = future.result()
-            if result:
-                print(f"[PhenoCam] {result}")
-
-    print("[PhenoCam] Completed")
-
-
-def download_phenocam_greenness(season, site_position, site_name, date_range=None):
-    """Fetch greenness-index time series from PhenoCam API."""
-    datetime_range = date_range or f"{season}-01-01/{season}-12-31"
-    output_file = Path(f"data/{site_name}/{season}/raw/phenocam/timeseries.json")
-    output_file.parent.mkdir(parents=True, exist_ok=True)
-    
-    start_date, end_date = datetime_range.split("/")
-    start_dt = datetime.strptime(start_date, "%Y-%m-%d")
-    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
-    
-    print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}")
-    
-    # Get ROIs for site (paginate through results)
-    try:
-        url = f"{PHENOCAM_API}/roilists/"
-        params = {"site": site_name}
-        rois = []
-        while url:
-            r = requests.get(url, params=params, timeout=30)
-            r.raise_for_status()
-            data = r.json()
-            rois.extend([roi for roi in data.get("results", []) if roi["site"] == site_name])
-            url = data.get("next")
-            params = None
-            if len(rois) > 0:
-                break
-        if not rois:
-            print(f"[PhenoCam-GI] No ROIs found for site '{site_name}'")
-            return
-        csv_url = rois[0].get("one_day_summary")
-        if not csv_url:
-            print(f"[PhenoCam-GI] No CSV data URL found for ROI")
-            return
-    except requests.exceptions.RequestException as e:
-        print(f"[PhenoCam-GI] Error fetching ROIs: {e}")
-        return
-    
-    # Fetch CSV data
-    try:
-        csv_r = requests.get(csv_url, timeout=30)
-        csv_r.raise_for_status()
-        lines = [l for l in csv_r.text.split('\n') if l and not l.startswith('#')]
-        reader = csv.DictReader(lines)
-        timeseries = []
-        for row in reader:
-            try:
-                date_str = row.get("date")
-                if not date_str:
-                    continue
-                date = datetime.strptime(date_str, "%Y-%m-%d")
-                if start_dt <= date <= end_dt:
-                    gcc = row.get("gcc_mean")
-                    if gcc and gcc != "NA":
-                        timeseries.append({"date": date.isoformat(), "greenness_index": float(gcc)})
-            except (ValueError, KeyError):
-                continue
-    except requests.exceptions.RequestException as e:
-        print(f"[PhenoCam-GI] Error fetching CSV: {e}")
-        return
-    
-    timeseries.sort(key=lambda x: x["date"])
-    with open(output_file, "w") as f:
-        json.dump(timeseries, f, indent=2)
-    
-    print(f"[PhenoCam-GI] Saved: {output_file} ({len(timeseries)} entries)")
-