diff --git a/download_phenocam.py b/acquisition_phenocam.py similarity index 97% rename from download_phenocam.py rename to acquisition_phenocam.py index f352539..7fed9d7 100644 --- a/download_phenocam.py +++ b/acquisition_phenocam.py @@ -1,3 +1,4 @@ +"""PhenoCam acquisition from PhenoCam Network API.""" import csv import json import requests @@ -13,7 +14,7 @@ def _find_start_offset(site_name, start_dt, total_count): """Binary search to find approximate offset for start date.""" low, high = 0, total_count - 1 limit = 1 - + for _ in range(15): mid = (low + high) // 2 response = requests.get( @@ -25,11 +26,11 @@ def _find_start_offset(site_name, start_dt, total_count): results = response.json().get("results", []) if not results: break - + mid_date_str = results[0].get("imgdate", "") if not mid_date_str: break - + try: mid_date = datetime.strptime(mid_date_str, "%Y-%m-%d") if mid_date < start_dt: @@ -38,7 +39,7 @@ def _find_start_offset(site_name, start_dt, total_count): high = mid except ValueError: break - + return max(0, low - 100) @@ -62,32 +63,32 @@ def download_phenocam(season, site_position, site_name, date_range=None): ) response.raise_for_status() total_count = response.json().get("count", 0) - + if total_count == 0: print(f"[PhenoCam] No images found for site '{site_name}'") return - + print(f"[PhenoCam] Found {total_count} total images, estimating start offset...") start_offset = _find_start_offset(site_name, start_dt, total_count) - + url = f"{PHENOCAM_API}/middayimages/" params = {"site": site_name, "offset": start_offset} - + print(f"[PhenoCam] Fetching image list from offset {start_offset}...") images = [] page = 1 max_pages = 500 past_end_date = False - + while url and page <= max_pages and not past_end_date: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() results = data.get("results", []) - + if not results: break - + for img in results: img_date_str = img.get("imgdate", "") if not img_date_str: @@ -101,7 +102,7 @@ def download_phenocam(season, site_position, site_name, date_range=None): images.append(img) except ValueError: continue - + if url and not past_end_date: url = data.get("next") params = None @@ -120,15 +121,15 @@ def download_phenocam(season, site_position, site_name, date_range=None): date_str = img.get("imgdate", "").replace("-", "") if not date_str: return None - + filepath = output_dir / f"{date_str}.jpg" if filepath.exists(): return f"Skipped {date_str}.jpg (exists)" - + img_path = img.get("imgpath") if not img_path: return None - + img_url = f"https://phenocam.nau.edu{img_path}" try: img_response = requests.get(img_url, timeout=30) @@ -153,13 +154,13 @@ def download_phenocam_greenness(season, site_position, site_name, date_range=Non datetime_range = date_range or f"{season}-01-01/{season}-12-31" output_file = Path(f"data/{site_name}/{season}/raw/phenocam/timeseries.json") output_file.parent.mkdir(parents=True, exist_ok=True) - + start_date, end_date = datetime_range.split("/") start_dt = datetime.strptime(start_date, "%Y-%m-%d") end_dt = datetime.strptime(end_date, "%Y-%m-%d") - + print(f"[PhenoCam-GI] Fetching greenness-index time series: {site_name}, {season}") - + # Get ROIs for site (paginate through results) try: url = f"{PHENOCAM_API}/roilists/" @@ -184,7 +185,7 @@ def download_phenocam_greenness(season, site_position, site_name, date_range=Non except requests.exceptions.RequestException as e: print(f"[PhenoCam-GI] Error fetching ROIs: {e}") return - + # Fetch CSV data try: csv_r = requests.get(csv_url, timeout=30) @@ -207,10 +208,9 @@ def download_phenocam_greenness(season, site_position, site_name, date_range=Non except requests.exceptions.RequestException as e: print(f"[PhenoCam-GI] Error fetching CSV: {e}") return - + timeseries.sort(key=lambda x: x["date"]) with open(output_file, "w") as f: json.dump(timeseries, f, indent=2) - - print(f"[PhenoCam-GI] Saved: {output_file} ({len(timeseries)} entries)") + print(f"[PhenoCam-GI] Saved: {output_file} ({len(timeseries)} entries)") diff --git a/download_s2.py b/acquisition_s2.py similarity index 75% rename from download_s2.py rename to acquisition_s2.py index 6ce399d..59aae0a 100644 --- a/download_s2.py +++ b/acquisition_s2.py @@ -1,12 +1,16 @@ +"""Sentinel-2-MSI acquisition from AWS Element84 Earth Search (STAC catalog).""" +import numpy as np import rasterio import xml.etree.ElementTree as ET import requests from pathlib import Path -from rasterio.warp import transform_geom +from rasterio.crs import CRS +from rasterio.warp import Resampling, calculate_default_transform, reproject, transform_geom from rasterio.windows import from_bounds, transform as window_transform from pystac_client import Client -BBOX_SIZE = 0.011 +BBOX_SIZE = 0.016 +TARGET_CRS = CRS.from_epsg(32632) def _get_bbox(lon, lat): @@ -128,10 +132,41 @@ def download_s2(season, site_position, site_name, date_range=None): band_data[band_idx] = data[0] if profile and len(band_data) == len(bands): - stacked = [band_data[i] for i in sorted(band_data.keys())] + stacked = np.array([band_data[i] for i in sorted(band_data.keys())]) band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())] viewing_angle = _extract_viewing_angle(item) + if profile["crs"] != TARGET_CRS: + src_transform = profile["transform"] + src_height, src_width = profile["height"], profile["width"] + left, bottom, right, top = rasterio.transform.array_bounds( + src_height, src_width, src_transform + ) + dst_transform, dst_width, dst_height = calculate_default_transform( + profile["crs"], TARGET_CRS, src_width, src_height, + left=left, bottom=bottom, right=right, top=top, + ) + reprojected = np.empty( + (len(stacked), dst_height, dst_width), dtype=stacked.dtype + ) + for i in range(len(stacked)): + reproject( + source=stacked[i], + destination=reprojected[i], + src_transform=src_transform, + src_crs=profile["crs"], + dst_transform=dst_transform, + dst_crs=TARGET_CRS, + resampling=Resampling.bilinear, + ) + stacked = reprojected + profile.update({ + "crs": TARGET_CRS, + "transform": dst_transform, + "width": dst_width, + "height": dst_height, + }) + with rasterio.open(filepath, "w", **profile) as dst: for i, data in enumerate(stacked, 1): dst.write(data, i) diff --git a/download_s3.py b/acquisition_s3.py similarity index 98% rename from download_s3.py rename to acquisition_s3.py index 4ad9409..a2e4efd 100644 --- a/download_s3.py +++ b/acquisition_s3.py @@ -1,3 +1,4 @@ +"""Sentinel-3-OLCI acquisition from Copernicus Data Space OpenEO API.""" import os import time from pathlib import Path diff --git a/fusion.py b/fusion.py new file mode 100644 index 0000000..ce69513 --- /dev/null +++ b/fusion.py @@ -0,0 +1,76 @@ +"""EFAST fusion: S2/S3 reflectance fusion for four scenarios.""" +from pathlib import Path +from datetime import datetime, timedelta + +from preselection import detect_clouds +from preparation import ( + prepare_s2, + prepare_s3, + _get_base_dir, + RESOLUTION_RATIO, +) + + +def _import_efast(): + """Lazy import of efast to avoid import errors when not using efast functions.""" + try: + import efast + return efast + except ImportError: + raise ImportError( + "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git" + ) + + +def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None, date_range=None): + lat, lon = site_position + datetime_range = date_range or f"{season}-01-01/{season}-12-31" + + efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy) + s2_output_dir = efast_base_dir / "s2" + s3_output_dir = efast_base_dir / "s3" + fusion_output_dir = efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion") + + fusion_output_dir.mkdir(parents=True, exist_ok=True) + print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}") + + efast = _import_efast() + + start_str, end_str = datetime_range.split("/") + start_date = datetime.strptime(start_str, "%Y-%m-%d") + end_date = datetime.strptime(end_str, "%Y-%m-%d") + + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime("%Y%m%d") + output_file = fusion_output_dir / f"REFL_{date_str}.tif" + try: + kwargs = { + "product": "REFL", + "max_days": 30, + "date_position": 2, + "minimum_acquisition_importance": 0.0, + "ratio": RESOLUTION_RATIO, + } + if sigma is not None: + kwargs["sigma"] = sigma + efast.fusion(current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs) + print( + f"[EFAST] Saved: {output_file}" + if output_file.exists() + else f"[EFAST] No output for {date_str} (insufficient nearby data)" + ) + except Exception as e: + print(f"[EFAST] Error processing {date_str}: {e}") + current_date += timedelta(days=1) + + print("[EFAST] Completed") + + +def run_all_efast_scenarios(season, site_position, site_name, sigma_value=30, date_range=None): + for strategy in ["aggressive", "nonaggressive"]: + detect_clouds(season, site_name, cleaning_strategy=strategy) + prepare_s2(season, site_position, site_name, cleaning_strategy=strategy, date_range=date_range) + prepare_s3(season, site_position, site_name, cleaning_strategy=strategy, date_range=date_range) + run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=None, date_range=date_range) + run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma_value, date_range=date_range) diff --git a/generate_indexes.py b/metrics_indices.py similarity index 86% rename from generate_indexes.py rename to metrics_indices.py index e081005..d9c4d68 100644 --- a/generate_indexes.py +++ b/metrics_indices.py @@ -1,3 +1,4 @@ +"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs.""" import json import numpy as np import rasterio @@ -70,37 +71,37 @@ def _get_ndvi_from_original(input_file, site_position): with rasterio.open(input_file) as src: if src.count < 4: return None - + red = src.read(RED_BAND).astype(np.float32) nir = src.read(NIR_BAND).astype(np.float32) - + lon, lat = site_position[1], site_position[0] x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) - + if not ( src.bounds.left <= x[0] <= src.bounds.right and src.bounds.bottom <= y[0] <= src.bounds.top ): return None - + row, col = src.index(x[0], y[0]) if row < 0 or row >= src.height or col < 0 or col >= src.width: return None - + # Extract 3x3 window with boundary handling r0, r1 = max(0, row - 1), min(src.height, row + 2) c0, c1 = max(0, col - 1), min(src.width, col + 2) red_window = red[r0:r1, c0:c1] nir_window = nir[r0:r1, c0:c1] - + # Calculate NDVI for each pixel in window mask = (red_window > 0) & (nir_window > 0) & ~np.isnan(red_window) & ~np.isnan(nir_window) if not np.any(mask): return None - + ndvi_window = np.zeros_like(red_window, dtype=np.float32) ndvi_window[mask] = (nir_window[mask] - red_window[mask]) / (nir_window[mask] + red_window[mask]) - + # Return mean of valid NDVI values valid_ndvi = ndvi_window[mask] return float(np.mean(valid_ndvi)) if len(valid_ndvi) > 0 else None @@ -115,7 +116,7 @@ def _create_timeseries_for_dir(input_dir, output_dir, site_position, source_name for input_file in sorted(input_dir.glob(pattern)): if "DIST_CLOUD" in input_file.name: continue - + filename = input_file.name parts = filename.replace(".geotiff", "").split("_") date_str = None @@ -307,31 +308,31 @@ def _get_gcc_from_original(input_file, site_position): with rasterio.open(input_file) as src: if src.count < 3: return None - + blue = src.read(BLUE_BAND).astype(np.float32) green = src.read(GREEN_BAND).astype(np.float32) red = src.read(RED_BAND).astype(np.float32) - + lon, lat = site_position[1], site_position[0] x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) - + if not ( src.bounds.left <= x[0] <= src.bounds.right and src.bounds.bottom <= y[0] <= src.bounds.top ): return None - + row, col = src.index(x[0], y[0]) if row < 0 or row >= src.height or col < 0 or col >= src.width: return None - + # Extract 3x3 window with boundary handling r0, r1 = max(0, row - 1), min(src.height, row + 2) c0, c1 = max(0, col - 1), min(src.width, col + 2) blue_window = blue[r0:r1, c0:c1] green_window = green[r0:r1, c0:c1] red_window = red[r0:r1, c0:c1] - + # Calculate GCC for each pixel in window total = red_window + green_window + blue_window mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) @@ -340,10 +341,10 @@ def _get_gcc_from_original(input_file, site_position): if negative_pixels > 0: print(f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)") return None - + gcc_window = np.zeros_like(green_window, dtype=np.float32) gcc_window[mask] = green_window[mask] / total[mask] - + # Return mean of valid GCC values valid_gcc = gcc_window[mask] return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None @@ -358,7 +359,7 @@ def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_ for input_file in sorted(input_dir.glob(pattern)): if "DIST_CLOUD" in input_file.name: continue - + filename = input_file.name parts = filename.replace(".geotiff", "").split("_") date_str = None @@ -451,3 +452,60 @@ def create_gcc_timeseries_post_process(season, site_position, site_name): input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/") output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/") _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}") + + +def _get_bands_from_original(input_file, site_position): + """Extract mean B02, B03, B04, B8A from 3x3 window at site. Returns dict or None.""" + try: + with rasterio.open(input_file) as src: + if src.count < 4: + return None + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + row, col = src.index(x[0], y[0]) + r0, r1 = max(0, row - 1), min(src.height, row + 2) + c0, c1 = max(0, col - 1), min(src.width, col + 2) + bands = [src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32) for i in range(4)] + mask = ~np.any([np.isnan(b) for b in bands], axis=0) + mask &= np.all([b > 0 for b in bands], axis=0) + if not np.any(mask): + return None + return { + "b02": float(np.mean(bands[0][mask])), + "b03": float(np.mean(bands[1][mask])), + "b04": float(np.mean(bands[2][mask])), + "b8a": float(np.mean(bands[3][mask])), + } + except Exception: + return None + + +def _create_s2_bands_timeseries_for_dir(input_dir, output_dir, site_position): + print(f"[S2-BANDS] Creating timeseries.json...") + timeseries = [] + for f in sorted(input_dir.glob("*.geotiff")): + date_str = f.stem.split("_")[0] + if len(date_str) != 8 or not date_str.isdigit(): + continue + date = datetime.strptime(date_str, "%Y%m%d").isoformat() + bands = _get_bands_from_original(f, site_position) + timeseries.append({"date": date, "filename": f.name, **(bands or {})}) + timeseries.sort(key=lambda x: x["date"]) + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2)) + print(f"[S2-BANDS] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)") + + +def create_s2_bands_timeseries_post_process(season, site_position, site_name): + for strategy in ["aggressive", "nonaggressive"]: + for sigma in [20, 30]: + processed_dir = f"processed_{strategy}_sigma{sigma}" + input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/s2/") + output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/s2_bands/") + if input_dir.exists(): + _create_s2_bands_timeseries_for_dir(input_dir, output_dir, site_position) diff --git a/calculate_metrics.py b/metrics_stats.py similarity index 94% rename from calculate_metrics.py rename to metrics_stats.py index bc189af..c770a35 100644 --- a/calculate_metrics.py +++ b/metrics_stats.py @@ -1,4 +1,4 @@ -"""Calculate metrics comparing fusion-derived GCC with phenocam GCC ground truth.""" +"""Metrics and statistics: temporal/spatial metrics and PhenoCam stats.""" import json import numpy as np from pathlib import Path @@ -7,7 +7,7 @@ from scipy.stats import pearsonr import rasterio from rasterio.warp import transform as transform_coords -from generate_indexes import BLUE_BAND, GREEN_BAND, RED_BAND +from metrics_indices import BLUE_BAND, GREEN_BAND, RED_BAND def load_timeseries(filepath): @@ -25,7 +25,7 @@ def match_dates(fusion_ts, phenocam_ts): fusion_vals = [] phenocam_vals = [] dates = [] - + for date in sorted(common_dates): fusion_val = fusion_ts[date] phenocam_val = phenocam_ts[date] @@ -33,7 +33,7 @@ def match_dates(fusion_ts, phenocam_ts): fusion_vals.append(fusion_val) phenocam_vals.append(phenocam_val) dates.append(date) - + return np.array(fusion_vals), np.array(phenocam_vals), dates @@ -95,10 +95,10 @@ def nse(y_true, y_pred): def calculate_temporal_metrics(fusion_ts, phenocam_ts): """Calculate all 6 temporal metrics.""" fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts) - + if len(fusion_vals) < 2: return None - + metrics = { "pearson_r": pearson_correlation(phenocam_vals, fusion_vals), "r_squared": r_squared(phenocam_vals, fusion_vals), @@ -117,7 +117,7 @@ def calculate_phenocam_stats(phenocam_ts): values = [v for v in phenocam_ts.values() if v is not None] if len(values) == 0: return None - + vals = np.array(values) return { "mean": float(np.mean(vals)), @@ -134,44 +134,44 @@ def _get_spatial_stats_from_raster(raster_file, site_position): with rasterio.open(raster_file) as src: if src.count < 3: return None - + blue = src.read(BLUE_BAND).astype(np.float32) green = src.read(GREEN_BAND).astype(np.float32) red = src.read(RED_BAND).astype(np.float32) - + lon, lat = site_position[1], site_position[0] x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) - + if not ( src.bounds.left <= x[0] <= src.bounds.right and src.bounds.bottom <= y[0] <= src.bounds.top ): return None - + row, col = src.index(x[0], y[0]) if row < 0 or row >= src.height or col < 0 or col >= src.width: return None - + # Extract 3x3 window with boundary handling r0, r1 = max(0, row - 1), min(src.height, row + 2) c0, c1 = max(0, col - 1), min(src.width, col + 2) blue_window = blue[r0:r1, c0:c1] green_window = green[r0:r1, c0:c1] red_window = red[r0:r1, c0:c1] - + # Calculate GCC for each pixel in window total = red_window + green_window + blue_window mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) if not np.any(mask): return None - + gcc_window = np.zeros_like(green_window, dtype=np.float32) gcc_window[mask] = green_window[mask] / total[mask] valid_gcc = gcc_window[mask] - + if len(valid_gcc) == 0: return None - + return { "mean": float(np.mean(valid_gcc)), "std": float(np.std(valid_gcc)), @@ -187,15 +187,15 @@ def calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position): fusion_raster_dir = Path(fusion_raster_dir) if not fusion_raster_dir.exists(): return None - + spatial_means = [] phenocam_vals = [] - + # Process each fusion raster file for raster_file in sorted(fusion_raster_dir.glob("*.geotiff")): if "DIST_CLOUD" in raster_file.name: continue - + # Extract date from filename parts = raster_file.stem.split("_") date_str = None @@ -203,35 +203,35 @@ def calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position): if len(part) == 8 and part.isdigit(): date_str = part break - + if not date_str: continue - + # Convert to ISO format for matching try: date = datetime.strptime(date_str, "%Y%m%d").isoformat() except ValueError: continue - + # Get phenocam value for this date phenocam_val = phenocam_ts.get(date) if phenocam_val is None: continue - + # Extract spatial statistics stats = _get_spatial_stats_from_raster(raster_file, site_position) if stats is None: continue - + spatial_means.append(stats["mean"]) phenocam_vals.append(phenocam_val) - + if len(spatial_means) < 2: return None - + spatial_means = np.array(spatial_means) phenocam_vals = np.array(phenocam_vals) - + return { "pearson_r": pearson_correlation(phenocam_vals, spatial_means), "r_squared": r_squared(phenocam_vals, spatial_means), @@ -243,24 +243,24 @@ def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position """Calculate metrics for one scenario.""" base = Path(f"data/{site_name}/{season}") processed_dir = f"processed_{strategy}_sigma{sigma}" - + # Load timeseries fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json" phenocam_ts_path = base / "raw" / "phenocam" / "timeseries.json" - + fusion_ts = load_timeseries(fusion_ts_path) phenocam_ts = load_timeseries(phenocam_ts_path) - + if not fusion_ts or not phenocam_ts: return None, None - + # Calculate temporal metrics temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts) - + # Calculate spatial metrics fusion_raster_dir = base / processed_dir / "fusion" spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) - + return temporal_metrics, spatial_metrics @@ -268,20 +268,20 @@ def calculate_all_metrics(season, site_name, site_position): """Calculate metrics for all 4 scenarios and save to JSON.""" results = {"temporal": {}, "spatial": {}} base = Path(f"data/{site_name}/{season}") - + # Load phenocam timeseries once (same for all scenarios) phenocam_ts_path = base / "raw" / "phenocam" / "timeseries.json" phenocam_ts = load_timeseries(phenocam_ts_path) - + if not phenocam_ts: print("[METRICS] Warning: No phenocam data found") return results - + # Calculate phenocam stats phenocam_stats = calculate_phenocam_stats(phenocam_ts) if phenocam_stats: results["phenocam_stats"] = phenocam_stats - + # Calculate S2 baseline metrics once (S2 data is identical across scenarios) s2_ts_path = base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json" s2_ts = load_timeseries(s2_ts_path) @@ -289,34 +289,34 @@ def calculate_all_metrics(season, site_name, site_position): s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts) if s2_metrics: results["baseline"] = {"s2": s2_metrics} - + # Calculate fusion metrics for each scenario for strategy in ["aggressive", "nonaggressive"]: for sigma in [20, 30]: scenario_name = f"{strategy}_sigma{sigma}" print(f"[METRICS] Calculating metrics for {scenario_name}...") - + processed_dir = f"processed_{strategy}_sigma{sigma}" - + # Load fusion timeseries fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json" fusion_ts = load_timeseries(fusion_ts_path) - + if not fusion_ts: print(f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping") continue - + # Calculate temporal metrics temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts) if temporal_metrics: results["temporal"][scenario_name] = temporal_metrics - + # Calculate spatial metrics fusion_raster_dir = base / processed_dir / "fusion" spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) if spatial_metrics: results["spatial"][scenario_name] = spatial_metrics - + # Add summary if results["temporal"]: best_temporal = max( @@ -324,7 +324,7 @@ def calculate_all_metrics(season, site_name, site_position): key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1 ) results["summary"] = {"best_temporal_scenario": best_temporal[0]} - + if results["spatial"]: best_spatial = max( results["spatial"].items(), @@ -333,38 +333,38 @@ def calculate_all_metrics(season, site_name, site_position): if "summary" not in results: results["summary"] = {} results["summary"]["best_spatial_scenario"] = best_spatial[0] - + # Save results output_path = Path(f"data/{site_name}/{season}/metrics.json") output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: json.dump(results, f, indent=2) print(f"[METRICS] Saved results to {output_path}") - + return results def main(): """Standalone script entry point.""" import sys - + if len(sys.argv) < 4: - print("Usage: calculate_metrics.py ") - print("Example: calculate_metrics.py 2024 innsbruck 47.116171 11.320308") + print("Usage: metrics_stats.py ") + print("Example: metrics_stats.py 2024 innsbruck 47.116171 11.320308") sys.exit(1) - + season = int(sys.argv[1]) site_name = sys.argv[2] site_position = (float(sys.argv[3]), float(sys.argv[4])) - + results = calculate_all_metrics(season, site_name, site_position) - + # Save results output_path = Path(f"data/{site_name}/{season}/metrics.json") output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: json.dump(results, f, indent=2) - + print(f"[METRICS] Saved results to {output_path}") diff --git a/post_process.py b/postprocessing.py similarity index 96% rename from post_process.py rename to postprocessing.py index 1e1fb48..eed4bc0 100644 --- a/post_process.py +++ b/postprocessing.py @@ -1,3 +1,4 @@ +"""Post-processing: crop fusion/S2/S3 to valid pixels.""" from pathlib import Path import numpy as np import rasterio @@ -12,16 +13,16 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress prepared = base / f"prepared_{cleaning_strategy}" processed_dir = f"processed_{cleaning_strategy}_sigma{sigma}" if sigma else f"processed_{cleaning_strategy}_sigma20" processed = base / processed_dir - + s2_prep = prepared / "s2" s3_prep = prepared / "s3" fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion") - + for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: output_dir.mkdir(parents=True, exist_ok=True) - + print(f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}") - + # Crop fusion to valid data and get dimensions fusion_dims = {} for fusion_file in fusion_prep.glob("REFL_*.tif"): @@ -49,7 +50,7 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress dst.write(data_crop) fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile) print(f"[PROCESS] Cropped fusion: {output_file}") - + # Crop S2 and S3 to fusion size for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items(): window = windows.Window(c0, r0, w, h) @@ -91,7 +92,7 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress with rasterio.open(output_file, "w", **p2) as dst: dst.write(data) print(f"[PROCESS] Cropped: {output_file}") - + print("[PROCESS] Completed") @@ -100,3 +101,8 @@ def process_all_scenarios(season, site_position, site_name): for strategy in ["aggressive", "nonaggressive"]: for sigma in [None, 30]: process_cropped(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma) + + +# Aliases +postprocess = process_cropped +postprocess_all_scenarios = process_all_scenarios diff --git a/call_efast.py b/preparation.py similarity index 69% rename from call_efast.py rename to preparation.py index c9ec727..ee698cc 100644 --- a/call_efast.py +++ b/preparation.py @@ -1,7 +1,7 @@ +"""Data preparation: S2/S3 preprocessing for fusion.""" import json import shutil from pathlib import Path -from datetime import datetime, timedelta from collections import defaultdict import numpy as np import rasterio @@ -9,24 +9,20 @@ from rasterio.warp import Resampling from rasterio.vrt import WarpedVRT from rasterio import shutil as rio_shutil +RESOLUTION_RATIO = 21 -def _import_efast(): - """Lazy import of efast to avoid import errors when not using efast functions.""" + +def _import_distance_to_clouds(): + """Lazy import of efast.distance_to_clouds.""" try: - import efast from efast.s2_processing import distance_to_clouds - from efast.s3_processing import reproject_and_crop_s3 - - return efast, distance_to_clouds, reproject_and_crop_s3 + return distance_to_clouds except ImportError: raise ImportError( "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git" ) -RESOLUTION_RATIO = 21 - - def _load_clouds(clouds_file): clouds = {"s2": set(), "s3": set()} if clouds_file.exists(): @@ -100,12 +96,7 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", temp_normalized = s2_output_dir / f"temp_{s2_file.name}" with rasterio.open(s2_file) as src: - pb = src.tags().get("PROCESSING_BASELINE", "") - data = src.read().astype("float32") - mask_nodata = data == 0 - data = (data - 1000) / 10000.0 if pb >= "04.00" else data / 10000.0 - data = np.maximum(data, 0) - data[mask_nodata] = 0 + data = src.read().astype("float32") / 10000.0 profile = src.profile.copy() profile.update({"dtype": "float32", "nodata": 0}) with rasterio.open(temp_normalized, "w", **profile) as dst: @@ -116,7 +107,7 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", ) temp_normalized.unlink() - _, distance_to_clouds, _ = _import_efast() + distance_to_clouds = _import_distance_to_clouds() distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO) @@ -200,59 +191,3 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", rio_shutil.copy(vrt, outfile, **profile) shutil.rmtree(temp_composite_dir) - - -def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None, date_range=None): - lat, lon = site_position - datetime_range = date_range or f"{season}-01-01/{season}-12-31" - - efast_base_dir = _get_base_dir(season, site_name, cleaning_strategy) - s2_output_dir = efast_base_dir / "s2" - s3_output_dir = efast_base_dir / "s3" - fusion_output_dir = efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion") - - fusion_output_dir.mkdir(parents=True, exist_ok=True) - print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}") - - efast, _, _ = _import_efast() - - start_str, end_str = datetime_range.split("/") - start_date = datetime.strptime(start_str, "%Y-%m-%d") - end_date = datetime.strptime(end_str, "%Y-%m-%d") - - current_date = start_date - while current_date <= end_date: - date_str = current_date.strftime("%Y%m%d") - output_file = fusion_output_dir / f"REFL_{date_str}.tif" - try: - kwargs = { - "product": "REFL", - "max_days": 30, - "date_position": 2, - "minimum_acquisition_importance": 0.0, - "ratio": RESOLUTION_RATIO, - } - if sigma is not None: - kwargs["sigma"] = sigma - efast.fusion(current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs) - print( - f"[EFAST] Saved: {output_file}" - if output_file.exists() - else f"[EFAST] No output for {date_str} (insufficient nearby data)" - ) - except Exception as e: - print(f"[EFAST] Error processing {date_str}: {e}") - current_date += timedelta(days=1) - - print("[EFAST] Completed") - - -def run_all_efast_scenarios(season, site_position, site_name, sigma_value=30, date_range=None): - from clouds import detect_clouds - - for strategy in ["aggressive", "nonaggressive"]: - detect_clouds(season, site_name, cleaning_strategy=strategy) - prepare_s2(season, site_position, site_name, cleaning_strategy=strategy, date_range=date_range) - prepare_s3(season, site_position, site_name, cleaning_strategy=strategy, date_range=date_range) - run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=None, date_range=date_range) - run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma_value, date_range=date_range) diff --git a/clouds.py b/preselection.py similarity index 90% rename from clouds.py rename to preselection.py index ec6b85c..9de5f4d 100644 --- a/clouds.py +++ b/preselection.py @@ -1,3 +1,4 @@ +"""Pre-selection: NDVI-based cloud/flaw filtering for S2 and S3 data.""" import json from pathlib import Path from datetime import datetime @@ -8,6 +9,7 @@ THRESHOLDS = {"aggressive": {"threshold": 0.3, "delta": 0.15}, "nonaggressive": def detect_clouds(season, site_name, cleaning_strategy="aggressive"): + """Filter cloud-covered/flawed S2 and S3 files using NDVI thresholds.""" output_file = Path(f"data/{site_name}/{season}/clouds_{cleaning_strategy}.json") clouds = {"s2": [], "s3": []} thresholds = THRESHOLDS[cleaning_strategy] @@ -61,3 +63,7 @@ def detect_clouds(season, site_name, cleaning_strategy="aggressive"): json.dump(clouds, f, indent=2) print(f"[CLOUDS] Saved: {output_file}") + + +# Alias for backward compatibility +preselect = detect_clouds diff --git a/run.py b/run.py index 7720ccc..7f645c7 100644 --- a/run.py +++ b/run.py @@ -1,31 +1,28 @@ -from call_efast import run_all_efast_scenarios -from post_process import process_all_scenarios -from generate_indexes import ( - generate_ndvi_raw, +from fusion import run_all_efast_scenarios +from postprocessing import process_all_scenarios +from metrics_indices import ( create_ndvi_timeseries_raw, - generate_ndvi_post_process, create_ndvi_timeseries_post_process, - generate_gcc_post_process, create_gcc_timeseries_post_process, + create_s2_bands_timeseries_post_process, ) -from download_s2 import download_s2 -from download_s3 import download_s3 -from download_phenocam import download_phenocam, download_phenocam_greenness -from clouds import detect_clouds -from calculate_metrics import calculate_all_metrics +from acquisition_s2 import download_s2 +from acquisition_s3 import download_s3 +from acquisition_phenocam import download_phenocam, download_phenocam_greenness +from metrics_stats import calculate_all_metrics def run_pipeline(season, site_position, site_name): """Run pipeline (downloads + EFAST fusion + post-process + metrics).""" try: # Download steps (needed for new site/season) - download_s2(season, site_position, site_name) - download_s3(season, site_position, site_name) - download_phenocam(season, site_position, site_name) - download_phenocam_greenness(season, site_position, site_name) + #download_s2(season, site_position, site_name) + #download_s3(season, site_position, site_name) + #download_phenocam(season, site_position, site_name) + #download_phenocam_greenness(season, site_position, site_name) - print(f"Generating NDVI for raw data: {site_name}, {season}") - create_ndvi_timeseries_raw(season, site_position, site_name) + #print(f"Generating NDVI for raw data: {site_name}, {season}") + #create_ndvi_timeseries_raw(season, site_position, site_name) print(f"Running EFAST fusion for all scenarios: {site_name}, {season}") run_all_efast_scenarios(season, site_position, site_name) @@ -39,6 +36,9 @@ def run_pipeline(season, site_position, site_name): print(f"Generating GCC for final outputs: {site_name}, {season}") create_gcc_timeseries_post_process(season, site_position, site_name) + print(f"Generating S2 band timeseries: {site_name}, {season}") + create_s2_bands_timeseries_post_process(season, site_position, site_name) + print(f"Calculating metrics: {site_name}, {season}") calculate_all_metrics(season, site_name, site_position) @@ -48,6 +48,8 @@ def run_pipeline(season, site_position, site_name): if __name__ == "__main__": + + run_pipeline(2024, (47.116171, 11.320308), "innsbruck") # forthgr - FORTH Heraklion Greece, Agriculture, 2024 # sites.geojson: lon=25.0743, lat=35.3045 - run_pipeline(2024, (35.3045, 25.0743), "forthgr") + #run_pipeline(2024, (35.3045, 25.0743), "forthgr") diff --git a/webapp/index.html b/webapp/index.html index 1d860b2..3e40604 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -11,6 +11,9 @@ .slider-container { position: sticky; top: 0; background: white; padding: 20px; z-index: 1000; border-bottom: 1px solid #ccc; } .scenario-selector { margin-bottom: 10px; } .scenario-selector select { padding: 5px 10px; font-size: 14px; } + .site-selector { margin-bottom: 10px; } + .site-selector select { padding: 5px 10px; font-size: 14px; } + .site-selector label { margin-right: 5px; } .container { max-width: 1400px; margin: 0 auto; padding: 20px; } .header { display: flex; gap: 20px; margin-bottom: 20px; border-bottom: 1px solid #ccc; padding-top: 10px;padding-bottom: 20px;} .header-col { flex: 1; } @@ -60,6 +63,12 @@
+
+ + + + +
+ + + + +
+ +
2024-01-01
+
S2 RGB (closest available)
+
+
+
+ + + +