foo

2026-04-20 09:06:10 +02:00 · 2026-04-20 09:06:10 +02:00 · de25bad733
commit de25bad733
parent 94f910d978
2 changed files with 40 additions and 235 deletions
--- a/metrics_stats.py
+++ b/metrics_stats.py
@ -1,4 +1,4 @@
-"""Metrics and statistics: temporal/spatial metrics and PhenoCam stats."""
+"""Metrics and statistics: temporal metrics and PhenoCam stats."""
 import json
 import numpy as np
@ -7,10 +7,6 @@ from datetime import datetime, timedelta
 from scipy import sparse
 from scipy.sparse.linalg import spsolve
 from scipy.stats import pearsonr
 import rasterio
 from rasterio.warp import transform as transform_coords
 from metrics_indices import BLUE_BAND, GREEN_BAND, RED_BAND
 WHITTAKER_LAMBDA_DAYS_SQ = 400.0
@ -205,178 +201,10 @@ def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
    return out
 def _get_spatial_stats_from_raster(raster_file, site_position):
    """Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
    try:
        with rasterio.open(raster_file) as src:
            if src.count == 1:
                g = src.read(1).astype(np.float32)
                lon, lat = site_position[1], site_position[0]
                x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
                if not (
                    src.bounds.left <= x[0] <= src.bounds.right
                    and src.bounds.bottom <= y[0] <= src.bounds.top
                ):
                    return None
                row, col = src.index(x[0], y[0])
                if row < 0 or row >= src.height or col < 0 or col >= src.width:
                    return None
                r0, r1 = max(0, row - 1), min(src.height, row + 2)
                c0, c1 = max(0, col - 1), min(src.width, col + 2)
                win = g[r0:r1, c0:c1]
                mask = np.isfinite(win) & (win > 0)
                if not np.any(mask):
                    return None
                valid = win[mask]
                return {
                    "mean": float(np.mean(valid)),
                    "std": float(np.std(valid)),
                    "min": float(np.min(valid)),
                    "max": float(np.max(valid)),
                }
            if src.count < 3:
                return None
            blue = src.read(BLUE_BAND).astype(np.float32)
            green = src.read(GREEN_BAND).astype(np.float32)
            red = src.read(RED_BAND).astype(np.float32)
            lon, lat = site_position[1], site_position[0]
            x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
            if not (
                src.bounds.left <= x[0] <= src.bounds.right
                and src.bounds.bottom <= y[0] <= src.bounds.top
            ):
                return None
            row, col = src.index(x[0], y[0])
            if row < 0 or row >= src.height or col < 0 or col >= src.width:
                return None
            # Extract 3x3 window with boundary handling
            r0, r1 = max(0, row - 1), min(src.height, row + 2)
            c0, c1 = max(0, col - 1), min(src.width, col + 2)
            blue_window = blue[r0:r1, c0:c1]
            green_window = green[r0:r1, c0:c1]
            red_window = red[r0:r1, c0:c1]
            # Calculate GCC for each pixel in window
            total = red_window + green_window + blue_window
            mask = (
                (total > 0)
                & ~np.isnan(total)
                & (blue_window >= 0)
                & (green_window >= 0)
                & (red_window >= 0)
            )
            if not np.any(mask):
                return None
            gcc_window = np.zeros_like(green_window, dtype=np.float32)
            gcc_window[mask] = green_window[mask] / total[mask]
            valid_gcc = gcc_window[mask]
            if len(valid_gcc) == 0:
                return None
            return {
                "mean": float(np.mean(valid_gcc)),
                "std": float(np.std(valid_gcc)),
                "min": float(np.min(valid_gcc)),
                "max": float(np.max(valid_gcc)),
            }
    except Exception:
        return None
 def calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position):
    """Calculate r and R² on spatial statistics."""
    fusion_raster_dir = Path(fusion_raster_dir)
    if not fusion_raster_dir.exists():
        return None
    spatial_means = []
    phenocam_vals = []
    # Process each fusion raster file
    for raster_file in sorted(fusion_raster_dir.glob("*.geotiff")):
        if "DIST_CLOUD" in raster_file.name:
            continue
        # Extract date from filename
        parts = raster_file.stem.split("_")
        date_str = None
        for part in parts:
            if len(part) == 8 and part.isdigit():
                date_str = part
                break
        if not date_str:
            continue
        # Convert to ISO format for matching
        try:
            date = datetime.strptime(date_str, "%Y%m%d").isoformat()
        except ValueError:
            continue
        # Get phenocam value for this date
        phenocam_val = phenocam_ts.get(date)
        if phenocam_val is None:
            continue
        # Extract spatial statistics
        stats = _get_spatial_stats_from_raster(raster_file, site_position)
        if stats is None:
            continue
        spatial_means.append(stats["mean"])
        phenocam_vals.append(phenocam_val)
    if len(spatial_means) < 2:
        return None
    spatial_means = np.array(spatial_means)
    phenocam_vals = np.array(phenocam_vals)
    return {
        "pearson_r": pearson_correlation(phenocam_vals, spatial_means),
        "r_squared": r_squared(phenocam_vals, spatial_means),
        "n_samples": len(spatial_means),
    }
 def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position):
    """Calculate metrics for one scenario."""
    base = Path(f"data/{site_name}/{season}")
    processed_dir = f"processed_{strategy}_sigma{sigma}"
    # Load timeseries
    fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
    phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
    fusion_ts = load_timeseries(fusion_ts_path)
    phenocam_ts = load_timeseries(phenocam_ts_path)
    if not fusion_ts or not phenocam_ts:
        return None, None
    # Calculate temporal metrics
    temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
    # Calculate spatial metrics
    fusion_raster_dir = base / processed_dir / "fusion"
    spatial_metrics = calculate_spatial_metrics(
        fusion_raster_dir, phenocam_ts, site_position
    )
    return temporal_metrics, spatial_metrics
 def calculate_all_metrics(season, site_name, site_position):
    """Calculate metrics for all 4 scenarios and save to JSON."""
-    results = {"temporal": {}, "spatial": {}}
+    del site_position
    results = {"temporal": {}}
    base = Path(f"data/{site_name}/{season}")
    # Load phenocam timeseries once (same for all scenarios)
@ -457,19 +285,10 @@ def calculate_all_metrics(season, site_name, site_position):
                )
                continue
            # Calculate temporal metrics
            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
            if temporal_metrics:
                results["temporal"][scenario_name] = temporal_metrics
            # Calculate spatial metrics
            fusion_raster_dir = base / processed_dir / "fusion"
            spatial_metrics = calculate_spatial_metrics(
                fusion_raster_dir, phenocam_ts, site_position
            )
            if spatial_metrics:
                results["spatial"][scenario_name] = spatial_metrics
    for strategy in ["aggressive", "nonaggressive"]:
        for sigma in [20, 30]:
            scenario_name = f"{strategy}_sigma{sigma}_itb"
@ -484,35 +303,6 @@ def calculate_all_metrics(season, site_name, site_position):
            temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
            if temporal_metrics:
                results["temporal"][scenario_name] = temporal_metrics
            fusion_raster_dir = base / processed_dir / "fusion"
            spatial_metrics = calculate_spatial_metrics(
                fusion_raster_dir, phenocam_ts, site_position
            )
            if spatial_metrics:
                results["spatial"][scenario_name] = spatial_metrics
    # Add summary (primary: NSE vs PhenoCam; R² kept for comparison)
    if results["temporal"]:
        ti = list(results["temporal"].items())
        def _score(k):
            return lambda x: x[1].get(k) if x[1].get(k) is not None else float("-inf")
        results["summary"] = {
            "best_temporal_scenario": max(ti, key=_score("nse_pc"))[0],
            "best_temporal_scenario_by_r2": max(ti, key=_score("r_squared"))[0],
        }
    if results["spatial"]:
        best_spatial = max(
            results["spatial"].items(),
            key=lambda x: x[1].get("r_squared", -1)
            if x[1].get("r_squared") is not None
            else -1,
        )
        if "summary" not in results:
            results["summary"] = {}
        results["summary"]["best_spatial_scenario"] = best_spatial[0]
    # Save results
    output_path = Path(f"data/{site_name}/{season}/metrics.json")
--- a/webapp/metrics.html
+++ b/webapp/metrics.html
@ -19,6 +19,12 @@
        th { background: #f5f5f5; }
        td.num { text-align: right; font-variant-numeric: tabular-nums; }
        .compare-note { font-size: 12px; color: #555; margin: 0 0 8px 0; max-width: 720px; }
        .section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
        .section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
        .intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
                 padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
        .intro ul { margin: 6px 0 0 18px; padding: 0; }
        .intro li { margin-bottom: 2px; }
        .empty { color: #666; font-style: italic; }
        .err { color: #a00; }
    </style>
@ -44,8 +50,6 @@
    </div>
    <script>
        const METRIC_COLS = ["pearson_r", "r_squared", "rmse", "mae", "nrmse", "nse_pc", "n_samples"];
        /** Spatial fusion metrics in metrics.json (no RMSE block at site level). */
        const SPATIAL_METRIC_COLS = ["pearson_r", "r_squared", "n_samples"];
        function mv(m, c) {
            return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
        }
@ -66,8 +70,9 @@
        }
        function tableSection(title, obj) {
            const heading = title ? `<h2>${title}</h2>` : "";
            if (!obj || typeof obj !== "object" || !Object.keys(obj).length) {
-                return `<h2>${title}</h2><p class="empty">No data</p>`;
+                return `${heading}<p class="empty">No data</p>`;
            }
            const keys = Object.keys(obj).sort();
            let head = `<tr><th>Scenario</th>${METRIC_COLS.map((c) => `<th>${c}</th>`).join("")}</tr>`;
@ -75,7 +80,7 @@
                const m = obj[k] || {};
                return `<tr><td>${k}</td>${METRIC_COLS.map((c) => `<td class="num">${fmt(mv(m, c))}</td>`).join("")}</tr>`;
            }).join("");
-            return `<h2>${title}</h2><table>${head}${rows}</table>`;
+            return `${heading}<table>${head}${rows}</table>`;
        }
        /** Pair BtI keys (`aggressive_sigma20`) with ItB (`aggressive_sigma20_itb`). */
@ -121,7 +126,9 @@
                    return `<tr><td>${p.label}</td>${cells}</tr>`;
                })
                .join("");
-            return `<h2>${title}</h2><p class="compare-note">${blurb}</p><table>${head}${rows}</table>`;
+            const heading = title ? `<h2>${title}</h2>` : "";
            const note = blurb ? `<p class="compare-note">${blurb}</p>` : "";
            return `${heading}${note}<table>${head}${rows}</table>`;
        }
        function baselineSection(b) {
@ -149,28 +156,36 @@
                return;
            }
            let html = "";
            html += `
                <div class="intro">
                    All metrics compare a greenness index (GCC) from satellite products against PhenoCam
                    ground-truth GCC at the site's 3×3 pixel window.
                    <ul>
                        <li><b>BtI</b> (<i>Bands-then-Index</i>): fuse S2/S3 reflectance, then compute GCC from the fused bands.</li>
                        <li><b>ItB</b> (<i>Index-then-Bands</i>): compute GCC from S2 and S3 first, then fuse the GCC rasters.</li>
                        <li>Scenarios combine a cloud-screening <b>strategy</b> (<code>aggressive</code> / <code>nonaggressive</code>)
                            and an EFAST fusion <b>σ</b> (<code>sigma20</code> / <code>sigma30</code>).</li>
                    </ul>
                </div>`;
            if (data.phenocam_stats) {
-                html += `<h2>PhenoCam</h2><table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
+                html += `<h2>PhenoCam</h2>`;
                html += `<p class="section-note">Summary statistics of the PhenoCam GCC timeseries used as ground truth for this site and season.</p>`;
                html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
                const p = data.phenocam_stats;
                html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
            }
-            html += baselineSection(data.baseline);
+            const baselineHtml = baselineSection(data.baseline);
-            html += btiItbCompareSection(
+            if (baselineHtml) {
-                "Temporal: BtI vs ItB (paired)",
+                html += `<h2>Baselines (temporal vs PhenoCam)</h2>`;
-                data.temporal,
+                html += `<p class="section-note">Reference GCC series <i>before</i> any fusion: raw S2 (all dates and cloud-screened per strategy), S3 composite per strategy, and a Whittaker-smoothed S2 series (λ=400). Useful to see what fusion has to beat.</p>`;
-                "Δ = ItB − BtI. For Pearson r, R², and NSE (%), positive Δ means ItB is higher. For RMSE, MAE, and NRMSE, negative Δ means ItB is better (lower error)."
+                html += baselineHtml.replace(/^<h2>[^<]*<\/h2>/, "");
            );
            html += tableSection("Temporal (vs PhenoCam)", data.temporal);
            html += btiItbCompareSection(
                "Spatial: BtI vs ItB (paired)",
                data.spatial,
                "Δ = ItB − BtI. For Pearson r and R², positive Δ means ItB is higher.",
                SPATIAL_METRIC_COLS
            );
            html += tableSection("Spatial (3×3 fusion mean vs PhenoCam)", data.spatial);
            if (data.summary) {
                html += `<h2>Summary</h2><pre style="font-size:13px;background:#f9f9f9;padding:10px;">${JSON.stringify(data.summary, null, 2)}</pre>`;
            }
            html += `<h2>Temporal: BtI vs ItB (paired)</h2>`;
            html += `<p class="section-note">Per scenario (same strategy + σ), BtI and ItB side-by-side with <b>Δ = ItB − BtI</b>. Positive Δ is better for Pearson r, R², and NSE; negative Δ is better for RMSE, MAE, and NRMSE.</p>`;
            html += btiItbCompareSection("", data.temporal, "") || `<p class="empty">No paired scenarios</p>`;
            html += `<h2>Temporal (vs PhenoCam)</h2>`;
            html += `<p class="section-note">Per-scenario agreement between the fusion GCC <b>timeseries</b> at the site 3×3 window and the PhenoCam GCC timeseries, across all matched dates. Scenarios ending in <code>_itb</code> are Index-then-Bands; the others are Bands-then-Index.</p>`;
            html += tableSection("", data.temporal);
            el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
        }