From b6fcc48016346db514171977a2db43d7bbc443ec Mon Sep 17 00:00:00 2001
From: Felix Delattre <felix@delattre.de>
Date: Sun, 15 Feb 2026 16:54:31 +0100
Subject: [PATCH] foo

---
 calculate_metrics.py | 372 +++++++++++++++++++++++++++++++++++++++++++
 run.py               |  17 +-
 webapp/index.html    |  69 ++++++++
 3 files changed, 452 insertions(+), 6 deletions(-)
 create mode 100644 calculate_metrics.py

diff --git a/calculate_metrics.py b/calculate_metrics.py
new file mode 100644
index 0000000..bc189af
--- /dev/null
+++ b/calculate_metrics.py
@@ -0,0 +1,372 @@
+"""Calculate metrics comparing fusion-derived GCC with phenocam GCC ground truth."""
+import json
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from scipy.stats import pearsonr
+import rasterio
+from rasterio.warp import transform as transform_coords
+
+from generate_indexes import BLUE_BAND, GREEN_BAND, RED_BAND
+
+
+def load_timeseries(filepath):
+    """Load JSON timeseries and return dict mapping date -> value."""
+    if not Path(filepath).exists():
+        return {}
+    with open(filepath) as f:
+        data = json.load(f)
+    return {item["date"]: item.get("greenness_index") for item in data}
+
+
+def match_dates(fusion_ts, phenocam_ts):
+    """Match dates between timeseries, return aligned numpy arrays (filter None values)."""
+    common_dates = set(fusion_ts.keys()) & set(phenocam_ts.keys())
+    fusion_vals = []
+    phenocam_vals = []
+    dates = []
+    
+    for date in sorted(common_dates):
+        fusion_val = fusion_ts[date]
+        phenocam_val = phenocam_ts[date]
+        if fusion_val is not None and phenocam_val is not None:
+            fusion_vals.append(fusion_val)
+            phenocam_vals.append(phenocam_val)
+            dates.append(date)
+    
+    return np.array(fusion_vals), np.array(phenocam_vals), dates
+
+
+def pearson_correlation(y_true, y_pred):
+    """Calculate Pearson correlation coefficient r."""
+    if len(y_true) < 2 or np.std(y_true) == 0 or np.std(y_pred) == 0:
+        return None
+    r, _ = pearsonr(y_true, y_pred)
+    return float(r)
+
+
+def r_squared(y_true, y_pred):
+    """Calculate coefficient of determination R²."""
+    if len(y_true) < 2 or np.std(y_true) == 0:
+        return None
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    if ss_tot == 0:
+        return None
+    return float(1 - (ss_res / ss_tot))
+
+
+def rmse(y_true, y_pred):
+    """Calculate Root Mean Square Error."""
+    if len(y_true) == 0:
+        return None
+    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
+
+
+def mae(y_true, y_pred):
+    """Calculate Mean Absolute Error."""
+    if len(y_true) == 0:
+        return None
+    return float(np.mean(np.abs(y_true - y_pred)))
+
+
+def nrmse(y_true, y_pred):
+    """Calculate normalized RMSE (RMSE / mean(y_true))."""
+    if len(y_true) == 0:
+        return None
+    mean_val = np.mean(y_true)
+    if mean_val == 0:
+        return None
+    rmse_val = rmse(y_true, y_pred)
+    return float(rmse_val / mean_val) if rmse_val is not None else None
+
+
+def nse(y_true, y_pred):
+    """Calculate Nash-Sutcliffe Efficiency."""
+    if len(y_true) < 2:
+        return None
+    numerator = np.sum((y_true - y_pred) ** 2)
+    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
+    if denominator == 0:
+        return None
+    return float(1 - (numerator / denominator))
+
+
+def calculate_temporal_metrics(fusion_ts, phenocam_ts):
+    """Calculate all 6 temporal metrics."""
+    fusion_vals, phenocam_vals, dates = match_dates(fusion_ts, phenocam_ts)
+    
+    if len(fusion_vals) < 2:
+        return None
+    
+    metrics = {
+        "pearson_r": pearson_correlation(phenocam_vals, fusion_vals),
+        "r_squared": r_squared(phenocam_vals, fusion_vals),
+        "rmse": rmse(phenocam_vals, fusion_vals),
+        "mae": mae(phenocam_vals, fusion_vals),
+        "nrmse": nrmse(phenocam_vals, fusion_vals),
+        "nse": nse(phenocam_vals, fusion_vals),
+        "n_samples": len(fusion_vals),
+        "date_range": {"start": dates[0], "end": dates[-1]} if dates else None,
+    }
+    return metrics
+
+
+def calculate_phenocam_stats(phenocam_ts):
+    """Calculate phenocam summary statistics."""
+    values = [v for v in phenocam_ts.values() if v is not None]
+    if len(values) == 0:
+        return None
+    
+    vals = np.array(values)
+    return {
+        "mean": float(np.mean(vals)),
+        "std": float(np.std(vals)),
+        "min": float(np.min(vals)),
+        "max": float(np.max(vals)),
+        "n_samples": len(vals),
+    }
+
+
+def _get_spatial_stats_from_raster(raster_file, site_position):
+    """Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
+    try:
+        with rasterio.open(raster_file) as src:
+            if src.count < 3:
+                return None
+            
+            blue = src.read(BLUE_BAND).astype(np.float32)
+            green = src.read(GREEN_BAND).astype(np.float32)
+            red = src.read(RED_BAND).astype(np.float32)
+            
+            lon, lat = site_position[1], site_position[0]
+            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
+            
+            if not (
+                src.bounds.left <= x[0] <= src.bounds.right
+                and src.bounds.bottom <= y[0] <= src.bounds.top
+            ):
+                return None
+            
+            row, col = src.index(x[0], y[0])
+            if row < 0 or row >= src.height or col < 0 or col >= src.width:
+                return None
+            
+            # Extract 3x3 window with boundary handling
+            r0, r1 = max(0, row - 1), min(src.height, row + 2)
+            c0, c1 = max(0, col - 1), min(src.width, col + 2)
+            blue_window = blue[r0:r1, c0:c1]
+            green_window = green[r0:r1, c0:c1]
+            red_window = red[r0:r1, c0:c1]
+            
+            # Calculate GCC for each pixel in window
+            total = red_window + green_window + blue_window
+            mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0)
+            if not np.any(mask):
+                return None
+            
+            gcc_window = np.zeros_like(green_window, dtype=np.float32)
+            gcc_window[mask] = green_window[mask] / total[mask]
+            valid_gcc = gcc_window[mask]
+            
+            if len(valid_gcc) == 0:
+                return None
+            
+            return {
+                "mean": float(np.mean(valid_gcc)),
+                "std": float(np.std(valid_gcc)),
+                "min": float(np.min(valid_gcc)),
+                "max": float(np.max(valid_gcc)),
+            }
+    except Exception:
+        return None
+
+
+def calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position):
+    """Calculate r and R² on spatial statistics."""
+    fusion_raster_dir = Path(fusion_raster_dir)
+    if not fusion_raster_dir.exists():
+        return None
+    
+    spatial_means = []
+    phenocam_vals = []
+    
+    # Process each fusion raster file
+    for raster_file in sorted(fusion_raster_dir.glob("*.geotiff")):
+        if "DIST_CLOUD" in raster_file.name:
+            continue
+        
+        # Extract date from filename
+        parts = raster_file.stem.split("_")
+        date_str = None
+        for part in parts:
+            if len(part) == 8 and part.isdigit():
+                date_str = part
+                break
+        
+        if not date_str:
+            continue
+        
+        # Convert to ISO format for matching
+        try:
+            date = datetime.strptime(date_str, "%Y%m%d").isoformat()
+        except ValueError:
+            continue
+        
+        # Get phenocam value for this date
+        phenocam_val = phenocam_ts.get(date)
+        if phenocam_val is None:
+            continue
+        
+        # Extract spatial statistics
+        stats = _get_spatial_stats_from_raster(raster_file, site_position)
+        if stats is None:
+            continue
+        
+        spatial_means.append(stats["mean"])
+        phenocam_vals.append(phenocam_val)
+    
+    if len(spatial_means) < 2:
+        return None
+    
+    spatial_means = np.array(spatial_means)
+    phenocam_vals = np.array(phenocam_vals)
+    
+    return {
+        "pearson_r": pearson_correlation(phenocam_vals, spatial_means),
+        "r_squared": r_squared(phenocam_vals, spatial_means),
+        "n_samples": len(spatial_means),
+    }
+
+
+def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position):
+    """Calculate metrics for one scenario."""
+    base = Path(f"data/{site_name}/{season}")
+    processed_dir = f"processed_{strategy}_sigma{sigma}"
+    
+    # Load timeseries
+    fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
+    phenocam_ts_path = base / "raw" / "phenocam" / "timeseries.json"
+    
+    fusion_ts = load_timeseries(fusion_ts_path)
+    phenocam_ts = load_timeseries(phenocam_ts_path)
+    
+    if not fusion_ts or not phenocam_ts:
+        return None, None
+    
+    # Calculate temporal metrics
+    temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
+    
+    # Calculate spatial metrics
+    fusion_raster_dir = base / processed_dir / "fusion"
+    spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position)
+    
+    return temporal_metrics, spatial_metrics
+
+
+def calculate_all_metrics(season, site_name, site_position):
+    """Calculate metrics for all 4 scenarios and save to JSON."""
+    results = {"temporal": {}, "spatial": {}}
+    base = Path(f"data/{site_name}/{season}")
+    
+    # Load phenocam timeseries once (same for all scenarios)
+    phenocam_ts_path = base / "raw" / "phenocam" / "timeseries.json"
+    phenocam_ts = load_timeseries(phenocam_ts_path)
+    
+    if not phenocam_ts:
+        print("[METRICS] Warning: No phenocam data found")
+        return results
+    
+    # Calculate phenocam stats
+    phenocam_stats = calculate_phenocam_stats(phenocam_ts)
+    if phenocam_stats:
+        results["phenocam_stats"] = phenocam_stats
+    
+    # Calculate S2 baseline metrics once (S2 data is identical across scenarios)
+    s2_ts_path = base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json"
+    s2_ts = load_timeseries(s2_ts_path)
+    if s2_ts:
+        s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts)
+        if s2_metrics:
+            results["baseline"] = {"s2": s2_metrics}
+    
+    # Calculate fusion metrics for each scenario
+    for strategy in ["aggressive", "nonaggressive"]:
+        for sigma in [20, 30]:
+            scenario_name = f"{strategy}_sigma{sigma}"
+            print(f"[METRICS] Calculating metrics for {scenario_name}...")
+            
+            processed_dir = f"processed_{strategy}_sigma{sigma}"
+            
+            # Load fusion timeseries
+            fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
+            fusion_ts = load_timeseries(fusion_ts_path)
+            
+            if not fusion_ts:
+                print(f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping")
+                continue
+            
+            # Calculate temporal metrics
+            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
+            if temporal_metrics:
+                results["temporal"][scenario_name] = temporal_metrics
+            
+            # Calculate spatial metrics
+            fusion_raster_dir = base / processed_dir / "fusion"
+            spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position)
+            if spatial_metrics:
+                results["spatial"][scenario_name] = spatial_metrics
+    
+    # Add summary
+    if results["temporal"]:
+        best_temporal = max(
+            results["temporal"].items(),
+            key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1
+        )
+        results["summary"] = {"best_temporal_scenario": best_temporal[0]}
+    
+    if results["spatial"]:
+        best_spatial = max(
+            results["spatial"].items(),
+            key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1
+        )
+        if "summary" not in results:
+            results["summary"] = {}
+        results["summary"]["best_spatial_scenario"] = best_spatial[0]
+    
+    # Save results
+    output_path = Path(f"data/{site_name}/{season}/metrics.json")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(results, f, indent=2)
+    print(f"[METRICS] Saved results to {output_path}")
+    
+    return results
+
+
+def main():
+    """Standalone script entry point."""
+    import sys
+    
+    if len(sys.argv) < 4:
+        print("Usage: calculate_metrics.py <season> <site_name> <lat> <lon>")
+        print("Example: calculate_metrics.py 2024 innsbruck 47.116171 11.320308")
+        sys.exit(1)
+    
+    season = int(sys.argv[1])
+    site_name = sys.argv[2]
+    site_position = (float(sys.argv[3]), float(sys.argv[4]))
+    
+    results = calculate_all_metrics(season, site_name, site_position)
+    
+    # Save results
+    output_path = Path(f"data/{site_name}/{season}/metrics.json")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(results, f, indent=2)
+    
+    print(f"[METRICS] Saved results to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/run.py b/run.py
index 090ff33..26f9e64 100644
--- a/run.py
+++ b/run.py
@@ -12,6 +12,7 @@ from download_s2 import download_s2
 from download_s3 import download_s3
 from download_phenocam import download_phenocam, download_phenocam_greenness
 from clouds import detect_clouds
+from calculate_metrics import calculate_all_metrics
 
 
 def run_pipeline(season, site_position, site_name):
@@ -20,7 +21,7 @@ def run_pipeline(season, site_position, site_name):
         # download_s2(season, site_position, site_name)
         # download_s3(season, site_position, site_name)
         # download_phenocam(season, site_position, site_name)
-        #download_phenocam_greenness(season, site_position, site_name)
+        # download_phenocam_greenness(season, site_position, site_name)
 
         # print(f"Generating NDVI for raw data: {site_name}, {season}")
         # create_ndvi_timeseries_raw(season, site_position, site_name)
@@ -29,12 +30,16 @@ def run_pipeline(season, site_position, site_name):
         # run_all_efast_scenarios(season, site_position, site_name)
 
         # print(f"Post-processing data: {site_name}, {season}")
-        # process_all_scenarios(season, site_position, site_name)  # Already done
+        # process_all_scenarios(season, site_position, site_name)
+        
         # print(f"Generating NDVI for final outputs: {site_name}, {season}")
-        # create_ndvi_timeseries_post_process(season, site_position, site_name)  # Already done
-        print(f"Generating GCC for final outputs: {site_name}, {season}")
-        # generate_gcc_post_process(season, site_position, site_name)  # No-op function
-        create_gcc_timeseries_post_process(season, site_position, site_name)
+        # create_ndvi_timeseries_post_process(season, site_position, site_name)
+        
+        # print(f"Generating GCC for final outputs: {site_name}, {season}")
+        # create_gcc_timeseries_post_process(season, site_position, site_name)
+
+        print(f"Calculating metrics: {site_name}, {season}")
+        calculate_all_metrics(season, site_name, site_position)
 
     except Exception as e:
         print(f"Error: {e}")
diff --git a/webapp/index.html b/webapp/index.html
index b581afb..1d860b2 100644
--- a/webapp/index.html
+++ b/webapp/index.html
@@ -109,6 +109,10 @@
             <div class="combined-plot-label">GCC Comparison: All Scenarios</div>
             <canvas id="scenariosgcctimeseries" class="combined-plot-canvas"></canvas>
         </div>
+        <div class="combined-plot">
+            <div class="combined-plot-label">Metrics Comparison: All Scenarios</div>
+            <div id="metricsTable" style="overflow-x: auto; margin-top: 10px;"></div>
+        </div>
     </div>
     <script>
         proj4.defs("EPSG:32632", "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs");
@@ -133,6 +137,7 @@
         }
         
         let allScenariosGCC = {};
+        let metricsData = null;
         const siteMap = L.map("sitemap", { zoomControl: false }).setView(sitePosition, 4).addLayer(L.tileLayer(osmUrl, { attribution: "OpenStreetMap", opacity: 1 }));
         L.marker(sitePosition, { icon: L.divIcon({ className: "site-marker", html: "<div style='width:8px;height:8px;background:red;border:2px solid white;border-radius:50%;box-shadow:0 0 2px rgba(0,0,0,0.5);'></div>", iconSize: [8, 8] }) }).addTo(siteMap);
         const maps = {
@@ -195,11 +200,18 @@
             const scenarioData = await Promise.all(scenarioPromises);
             scenarios.forEach((s, i) => { allScenariosGCC[s.name] = scenarioData[i]; });
             
+            // Load metrics
+            try {
+                const metricsRes = await fetch(`../data/${siteName}/${season}/metrics.json`);
+                if (metricsRes.ok) metricsData = await metricsRes.json();
+            } catch {}
+            
             drawTimeseries();
             drawGreennessTimeseries();
             drawPhenocamGreennessTimeseries();
             drawCombinedGreennessTimeseries();
             drawScenariosComparison();
+            drawMetricsTable();
         }
         
         function drawGreennessTimeseries() {
@@ -512,6 +524,62 @@
             });
         }
         
+        function drawMetricsTable() {
+            const container = document.getElementById("metricsTable");
+            if (!metricsData || !metricsData.temporal) {
+                container.innerHTML = "<p style='color:#666; font-size:12px;'>Metrics not available. Run calculate_metrics.py to generate.</p>";
+                return;
+            }
+            
+            const scenarios = ["aggressive_sigma20", "aggressive_sigma30", "nonaggressive_sigma20", "nonaggressive_sigma30"];
+            const scenarioNames = ["Aggressive σ20", "Aggressive σ30", "Non-aggressive σ20", "Non-aggressive σ30"];
+            const metrics = ["pearson_r", "r_squared", "rmse", "mae", "nrmse", "nse"];
+            const metricLabels = { pearson_r: "r", r_squared: "R²", rmse: "RMSE", mae: "MAE", nrmse: "nRMSE", nse: "NSE" };
+            
+            let html = "<table style='width:100%; border-collapse:collapse; font-size:11px;'>";
+            html += "<thead><tr style='background:#f5f5f5; border-bottom:2px solid #ccc;'>";
+            html += "<th style='padding:8px; text-align:left;'>Scenario</th>";
+            metrics.forEach(m => html += `<th style='padding:8px; text-align:right;'>${metricLabels[m]}</th>`);
+            html += "</tr></thead><tbody>";
+            
+            // Add S2 baseline row
+            if (metricsData.baseline && metricsData.baseline.s2) {
+                const data = metricsData.baseline.s2;
+                html += `<tr style='border-bottom:2px solid #ccc; background:#f9f9f9;'>`;
+                html += `<td style='padding:6px 8px; font-weight:600;'>S2 (baseline)</td>`;
+                metrics.forEach(m => {
+                    const val = data[m];
+                    const fmt = val !== null && val !== undefined ? (m === "pearson_r" || m === "r_squared" || m === "nse" ? val.toFixed(3) : val.toFixed(4)) : "—";
+                    html += `<td style='padding:6px 8px; text-align:right; font-family:monospace;'>${fmt}</td>`;
+                });
+                html += "</tr>";
+            }
+            
+            // Add fusion scenario rows
+            scenarios.forEach((scenario, i) => {
+                const data = metricsData.temporal[scenario];
+                if (!data) return;
+                html += `<tr style='border-bottom:1px solid #eee;'>`;
+                html += `<td style='padding:6px 8px; font-weight:500;'>${scenarioNames[i]}</td>`;
+                metrics.forEach(m => {
+                    const val = data[m];
+                    const fmt = val !== null && val !== undefined ? (m === "pearson_r" || m === "r_squared" || m === "nse" ? val.toFixed(3) : val.toFixed(4)) : "—";
+                    html += `<td style='padding:6px 8px; text-align:right; font-family:monospace;'>${fmt}</td>`;
+                });
+                html += "</tr>";
+            });
+            
+            html += "</tbody></table>";
+            
+            // Add phenocam stats info if available
+            if (metricsData.phenocam_stats) {
+                const stats = metricsData.phenocam_stats;
+                html += `<p style='margin-top:10px; font-size:10px; color:#666;'>PhenoCam stats: mean=${stats.mean.toFixed(3)}, std=${stats.std.toFixed(3)}, n=${stats.n_samples}</p>`;
+            }
+            
+            container.innerHTML = html;
+        }
+        
         function drawTimeseries() {
             const currentDate = dateFromDays(parseInt(slider.value));
             for (const source of ["s2", "fusion", "s3"]) {
@@ -730,6 +798,7 @@
             drawPhenocamGreennessTimeseries();
             drawCombinedGreennessTimeseries();
             drawScenariosComparison();
+            drawMetricsTable();
             for (const source of ["s2", "fusion", "s3"]) {
                 const filename = await findFile(date, source);
                 if (filename) {