foo
This commit is contained in:
parent
94f910d978
commit
de25bad733
2 changed files with 40 additions and 235 deletions
216
metrics_stats.py
216
metrics_stats.py
|
|
@ -1,4 +1,4 @@
|
||||||
"""Metrics and statistics: temporal/spatial metrics and PhenoCam stats."""
|
"""Metrics and statistics: temporal metrics and PhenoCam stats."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
@ -7,10 +7,6 @@ from datetime import datetime, timedelta
|
||||||
from scipy import sparse
|
from scipy import sparse
|
||||||
from scipy.sparse.linalg import spsolve
|
from scipy.sparse.linalg import spsolve
|
||||||
from scipy.stats import pearsonr
|
from scipy.stats import pearsonr
|
||||||
import rasterio
|
|
||||||
from rasterio.warp import transform as transform_coords
|
|
||||||
|
|
||||||
from metrics_indices import BLUE_BAND, GREEN_BAND, RED_BAND
|
|
||||||
|
|
||||||
WHITTAKER_LAMBDA_DAYS_SQ = 400.0
|
WHITTAKER_LAMBDA_DAYS_SQ = 400.0
|
||||||
|
|
||||||
|
|
@ -205,178 +201,10 @@ def _whittaker_smooth_dict(obs_dates, obs_values, lam: float, n_min: int = 3):
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _get_spatial_stats_from_raster(raster_file, site_position):
|
|
||||||
"""Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
|
|
||||||
try:
|
|
||||||
with rasterio.open(raster_file) as src:
|
|
||||||
if src.count == 1:
|
|
||||||
g = src.read(1).astype(np.float32)
|
|
||||||
lon, lat = site_position[1], site_position[0]
|
|
||||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
|
||||||
if not (
|
|
||||||
src.bounds.left <= x[0] <= src.bounds.right
|
|
||||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
|
||||||
):
|
|
||||||
return None
|
|
||||||
row, col = src.index(x[0], y[0])
|
|
||||||
if row < 0 or row >= src.height or col < 0 or col >= src.width:
|
|
||||||
return None
|
|
||||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
|
||||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
|
||||||
win = g[r0:r1, c0:c1]
|
|
||||||
mask = np.isfinite(win) & (win > 0)
|
|
||||||
if not np.any(mask):
|
|
||||||
return None
|
|
||||||
valid = win[mask]
|
|
||||||
return {
|
|
||||||
"mean": float(np.mean(valid)),
|
|
||||||
"std": float(np.std(valid)),
|
|
||||||
"min": float(np.min(valid)),
|
|
||||||
"max": float(np.max(valid)),
|
|
||||||
}
|
|
||||||
if src.count < 3:
|
|
||||||
return None
|
|
||||||
|
|
||||||
blue = src.read(BLUE_BAND).astype(np.float32)
|
|
||||||
green = src.read(GREEN_BAND).astype(np.float32)
|
|
||||||
red = src.read(RED_BAND).astype(np.float32)
|
|
||||||
|
|
||||||
lon, lat = site_position[1], site_position[0]
|
|
||||||
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
|
|
||||||
|
|
||||||
if not (
|
|
||||||
src.bounds.left <= x[0] <= src.bounds.right
|
|
||||||
and src.bounds.bottom <= y[0] <= src.bounds.top
|
|
||||||
):
|
|
||||||
return None
|
|
||||||
|
|
||||||
row, col = src.index(x[0], y[0])
|
|
||||||
if row < 0 or row >= src.height or col < 0 or col >= src.width:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Extract 3x3 window with boundary handling
|
|
||||||
r0, r1 = max(0, row - 1), min(src.height, row + 2)
|
|
||||||
c0, c1 = max(0, col - 1), min(src.width, col + 2)
|
|
||||||
blue_window = blue[r0:r1, c0:c1]
|
|
||||||
green_window = green[r0:r1, c0:c1]
|
|
||||||
red_window = red[r0:r1, c0:c1]
|
|
||||||
|
|
||||||
# Calculate GCC for each pixel in window
|
|
||||||
total = red_window + green_window + blue_window
|
|
||||||
mask = (
|
|
||||||
(total > 0)
|
|
||||||
& ~np.isnan(total)
|
|
||||||
& (blue_window >= 0)
|
|
||||||
& (green_window >= 0)
|
|
||||||
& (red_window >= 0)
|
|
||||||
)
|
|
||||||
if not np.any(mask):
|
|
||||||
return None
|
|
||||||
|
|
||||||
gcc_window = np.zeros_like(green_window, dtype=np.float32)
|
|
||||||
gcc_window[mask] = green_window[mask] / total[mask]
|
|
||||||
valid_gcc = gcc_window[mask]
|
|
||||||
|
|
||||||
if len(valid_gcc) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {
|
|
||||||
"mean": float(np.mean(valid_gcc)),
|
|
||||||
"std": float(np.std(valid_gcc)),
|
|
||||||
"min": float(np.min(valid_gcc)),
|
|
||||||
"max": float(np.max(valid_gcc)),
|
|
||||||
}
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position):
|
|
||||||
"""Calculate r and R² on spatial statistics."""
|
|
||||||
fusion_raster_dir = Path(fusion_raster_dir)
|
|
||||||
if not fusion_raster_dir.exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
spatial_means = []
|
|
||||||
phenocam_vals = []
|
|
||||||
|
|
||||||
# Process each fusion raster file
|
|
||||||
for raster_file in sorted(fusion_raster_dir.glob("*.geotiff")):
|
|
||||||
if "DIST_CLOUD" in raster_file.name:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Extract date from filename
|
|
||||||
parts = raster_file.stem.split("_")
|
|
||||||
date_str = None
|
|
||||||
for part in parts:
|
|
||||||
if len(part) == 8 and part.isdigit():
|
|
||||||
date_str = part
|
|
||||||
break
|
|
||||||
|
|
||||||
if not date_str:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Convert to ISO format for matching
|
|
||||||
try:
|
|
||||||
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get phenocam value for this date
|
|
||||||
phenocam_val = phenocam_ts.get(date)
|
|
||||||
if phenocam_val is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Extract spatial statistics
|
|
||||||
stats = _get_spatial_stats_from_raster(raster_file, site_position)
|
|
||||||
if stats is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
spatial_means.append(stats["mean"])
|
|
||||||
phenocam_vals.append(phenocam_val)
|
|
||||||
|
|
||||||
if len(spatial_means) < 2:
|
|
||||||
return None
|
|
||||||
|
|
||||||
spatial_means = np.array(spatial_means)
|
|
||||||
phenocam_vals = np.array(phenocam_vals)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"pearson_r": pearson_correlation(phenocam_vals, spatial_means),
|
|
||||||
"r_squared": r_squared(phenocam_vals, spatial_means),
|
|
||||||
"n_samples": len(spatial_means),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position):
|
|
||||||
"""Calculate metrics for one scenario."""
|
|
||||||
base = Path(f"data/{site_name}/{season}")
|
|
||||||
processed_dir = f"processed_{strategy}_sigma{sigma}"
|
|
||||||
|
|
||||||
# Load timeseries
|
|
||||||
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
|
|
||||||
phenocam_ts_path = base / "raw" / "phenocam" / "phenocam_gcc.json"
|
|
||||||
|
|
||||||
fusion_ts = load_timeseries(fusion_ts_path)
|
|
||||||
phenocam_ts = load_timeseries(phenocam_ts_path)
|
|
||||||
|
|
||||||
if not fusion_ts or not phenocam_ts:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
# Calculate temporal metrics
|
|
||||||
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
|
||||||
|
|
||||||
# Calculate spatial metrics
|
|
||||||
fusion_raster_dir = base / processed_dir / "fusion"
|
|
||||||
spatial_metrics = calculate_spatial_metrics(
|
|
||||||
fusion_raster_dir, phenocam_ts, site_position
|
|
||||||
)
|
|
||||||
|
|
||||||
return temporal_metrics, spatial_metrics
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_all_metrics(season, site_name, site_position):
|
def calculate_all_metrics(season, site_name, site_position):
|
||||||
"""Calculate metrics for all 4 scenarios and save to JSON."""
|
"""Calculate metrics for all 4 scenarios and save to JSON."""
|
||||||
results = {"temporal": {}, "spatial": {}}
|
del site_position
|
||||||
|
results = {"temporal": {}}
|
||||||
base = Path(f"data/{site_name}/{season}")
|
base = Path(f"data/{site_name}/{season}")
|
||||||
|
|
||||||
# Load phenocam timeseries once (same for all scenarios)
|
# Load phenocam timeseries once (same for all scenarios)
|
||||||
|
|
@ -457,19 +285,10 @@ def calculate_all_metrics(season, site_name, site_position):
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Calculate temporal metrics
|
|
||||||
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||||
if temporal_metrics:
|
if temporal_metrics:
|
||||||
results["temporal"][scenario_name] = temporal_metrics
|
results["temporal"][scenario_name] = temporal_metrics
|
||||||
|
|
||||||
# Calculate spatial metrics
|
|
||||||
fusion_raster_dir = base / processed_dir / "fusion"
|
|
||||||
spatial_metrics = calculate_spatial_metrics(
|
|
||||||
fusion_raster_dir, phenocam_ts, site_position
|
|
||||||
)
|
|
||||||
if spatial_metrics:
|
|
||||||
results["spatial"][scenario_name] = spatial_metrics
|
|
||||||
|
|
||||||
for strategy in ["aggressive", "nonaggressive"]:
|
for strategy in ["aggressive", "nonaggressive"]:
|
||||||
for sigma in [20, 30]:
|
for sigma in [20, 30]:
|
||||||
scenario_name = f"{strategy}_sigma{sigma}_itb"
|
scenario_name = f"{strategy}_sigma{sigma}_itb"
|
||||||
|
|
@ -484,35 +303,6 @@ def calculate_all_metrics(season, site_name, site_position):
|
||||||
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
|
||||||
if temporal_metrics:
|
if temporal_metrics:
|
||||||
results["temporal"][scenario_name] = temporal_metrics
|
results["temporal"][scenario_name] = temporal_metrics
|
||||||
fusion_raster_dir = base / processed_dir / "fusion"
|
|
||||||
spatial_metrics = calculate_spatial_metrics(
|
|
||||||
fusion_raster_dir, phenocam_ts, site_position
|
|
||||||
)
|
|
||||||
if spatial_metrics:
|
|
||||||
results["spatial"][scenario_name] = spatial_metrics
|
|
||||||
|
|
||||||
# Add summary (primary: NSE vs PhenoCam; R² kept for comparison)
|
|
||||||
if results["temporal"]:
|
|
||||||
ti = list(results["temporal"].items())
|
|
||||||
|
|
||||||
def _score(k):
|
|
||||||
return lambda x: x[1].get(k) if x[1].get(k) is not None else float("-inf")
|
|
||||||
|
|
||||||
results["summary"] = {
|
|
||||||
"best_temporal_scenario": max(ti, key=_score("nse_pc"))[0],
|
|
||||||
"best_temporal_scenario_by_r2": max(ti, key=_score("r_squared"))[0],
|
|
||||||
}
|
|
||||||
|
|
||||||
if results["spatial"]:
|
|
||||||
best_spatial = max(
|
|
||||||
results["spatial"].items(),
|
|
||||||
key=lambda x: x[1].get("r_squared", -1)
|
|
||||||
if x[1].get("r_squared") is not None
|
|
||||||
else -1,
|
|
||||||
)
|
|
||||||
if "summary" not in results:
|
|
||||||
results["summary"] = {}
|
|
||||||
results["summary"]["best_spatial_scenario"] = best_spatial[0]
|
|
||||||
|
|
||||||
# Save results
|
# Save results
|
||||||
output_path = Path(f"data/{site_name}/{season}/metrics.json")
|
output_path = Path(f"data/{site_name}/{season}/metrics.json")
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,12 @@
|
||||||
th { background: #f5f5f5; }
|
th { background: #f5f5f5; }
|
||||||
td.num { text-align: right; font-variant-numeric: tabular-nums; }
|
td.num { text-align: right; font-variant-numeric: tabular-nums; }
|
||||||
.compare-note { font-size: 12px; color: #555; margin: 0 0 8px 0; max-width: 720px; }
|
.compare-note { font-size: 12px; color: #555; margin: 0 0 8px 0; max-width: 720px; }
|
||||||
|
.section-note { font-size: 12px; color: #555; margin: -6px 0 8px 0; max-width: 720px; line-height: 1.45; }
|
||||||
|
.section-note code { background: #f1f1f1; padding: 1px 4px; border-radius: 3px; font-size: 11px; }
|
||||||
|
.intro { font-size: 13px; color: #333; background: #fafafa; border: 1px solid #e5e5e5;
|
||||||
|
padding: 10px 12px; border-radius: 4px; margin-bottom: 18px; line-height: 1.5; }
|
||||||
|
.intro ul { margin: 6px 0 0 18px; padding: 0; }
|
||||||
|
.intro li { margin-bottom: 2px; }
|
||||||
.empty { color: #666; font-style: italic; }
|
.empty { color: #666; font-style: italic; }
|
||||||
.err { color: #a00; }
|
.err { color: #a00; }
|
||||||
</style>
|
</style>
|
||||||
|
|
@ -44,8 +50,6 @@
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
const METRIC_COLS = ["pearson_r", "r_squared", "rmse", "mae", "nrmse", "nse_pc", "n_samples"];
|
const METRIC_COLS = ["pearson_r", "r_squared", "rmse", "mae", "nrmse", "nse_pc", "n_samples"];
|
||||||
/** Spatial fusion metrics in metrics.json (no RMSE block at site level). */
|
|
||||||
const SPATIAL_METRIC_COLS = ["pearson_r", "r_squared", "n_samples"];
|
|
||||||
function mv(m, c) {
|
function mv(m, c) {
|
||||||
return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
|
return c === "nse_pc" ? (m.nse_pc ?? m.nse) : m[c];
|
||||||
}
|
}
|
||||||
|
|
@ -66,8 +70,9 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
function tableSection(title, obj) {
|
function tableSection(title, obj) {
|
||||||
|
const heading = title ? `<h2>${title}</h2>` : "";
|
||||||
if (!obj || typeof obj !== "object" || !Object.keys(obj).length) {
|
if (!obj || typeof obj !== "object" || !Object.keys(obj).length) {
|
||||||
return `<h2>${title}</h2><p class="empty">No data</p>`;
|
return `${heading}<p class="empty">No data</p>`;
|
||||||
}
|
}
|
||||||
const keys = Object.keys(obj).sort();
|
const keys = Object.keys(obj).sort();
|
||||||
let head = `<tr><th>Scenario</th>${METRIC_COLS.map((c) => `<th>${c}</th>`).join("")}</tr>`;
|
let head = `<tr><th>Scenario</th>${METRIC_COLS.map((c) => `<th>${c}</th>`).join("")}</tr>`;
|
||||||
|
|
@ -75,7 +80,7 @@
|
||||||
const m = obj[k] || {};
|
const m = obj[k] || {};
|
||||||
return `<tr><td>${k}</td>${METRIC_COLS.map((c) => `<td class="num">${fmt(mv(m, c))}</td>`).join("")}</tr>`;
|
return `<tr><td>${k}</td>${METRIC_COLS.map((c) => `<td class="num">${fmt(mv(m, c))}</td>`).join("")}</tr>`;
|
||||||
}).join("");
|
}).join("");
|
||||||
return `<h2>${title}</h2><table>${head}${rows}</table>`;
|
return `${heading}<table>${head}${rows}</table>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Pair BtI keys (`aggressive_sigma20`) with ItB (`aggressive_sigma20_itb`). */
|
/** Pair BtI keys (`aggressive_sigma20`) with ItB (`aggressive_sigma20_itb`). */
|
||||||
|
|
@ -121,7 +126,9 @@
|
||||||
return `<tr><td>${p.label}</td>${cells}</tr>`;
|
return `<tr><td>${p.label}</td>${cells}</tr>`;
|
||||||
})
|
})
|
||||||
.join("");
|
.join("");
|
||||||
return `<h2>${title}</h2><p class="compare-note">${blurb}</p><table>${head}${rows}</table>`;
|
const heading = title ? `<h2>${title}</h2>` : "";
|
||||||
|
const note = blurb ? `<p class="compare-note">${blurb}</p>` : "";
|
||||||
|
return `${heading}${note}<table>${head}${rows}</table>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function baselineSection(b) {
|
function baselineSection(b) {
|
||||||
|
|
@ -149,28 +156,36 @@
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let html = "";
|
let html = "";
|
||||||
|
html += `
|
||||||
|
<div class="intro">
|
||||||
|
All metrics compare a greenness index (GCC) from satellite products against PhenoCam
|
||||||
|
ground-truth GCC at the site's 3×3 pixel window.
|
||||||
|
<ul>
|
||||||
|
<li><b>BtI</b> (<i>Bands-then-Index</i>): fuse S2/S3 reflectance, then compute GCC from the fused bands.</li>
|
||||||
|
<li><b>ItB</b> (<i>Index-then-Bands</i>): compute GCC from S2 and S3 first, then fuse the GCC rasters.</li>
|
||||||
|
<li>Scenarios combine a cloud-screening <b>strategy</b> (<code>aggressive</code> / <code>nonaggressive</code>)
|
||||||
|
and an EFAST fusion <b>σ</b> (<code>sigma20</code> / <code>sigma30</code>).</li>
|
||||||
|
</ul>
|
||||||
|
</div>`;
|
||||||
if (data.phenocam_stats) {
|
if (data.phenocam_stats) {
|
||||||
html += `<h2>PhenoCam</h2><table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
|
html += `<h2>PhenoCam</h2>`;
|
||||||
|
html += `<p class="section-note">Summary statistics of the PhenoCam GCC timeseries used as ground truth for this site and season.</p>`;
|
||||||
|
html += `<table><tr><th>mean</th><th>std</th><th>min</th><th>max</th><th>n</th></tr><tr>`;
|
||||||
const p = data.phenocam_stats;
|
const p = data.phenocam_stats;
|
||||||
html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
|
html += `<td class="num">${fmt(p.mean)}</td><td class="num">${fmt(p.std)}</td><td class="num">${fmt(p.min)}</td><td class="num">${fmt(p.max)}</td><td class="num">${fmt(p.n_samples)}</td></tr></table>`;
|
||||||
}
|
}
|
||||||
html += baselineSection(data.baseline);
|
const baselineHtml = baselineSection(data.baseline);
|
||||||
html += btiItbCompareSection(
|
if (baselineHtml) {
|
||||||
"Temporal: BtI vs ItB (paired)",
|
html += `<h2>Baselines (temporal vs PhenoCam)</h2>`;
|
||||||
data.temporal,
|
html += `<p class="section-note">Reference GCC series <i>before</i> any fusion: raw S2 (all dates and cloud-screened per strategy), S3 composite per strategy, and a Whittaker-smoothed S2 series (λ=400). Useful to see what fusion has to beat.</p>`;
|
||||||
"Δ = ItB − BtI. For Pearson r, R², and NSE (%), positive Δ means ItB is higher. For RMSE, MAE, and NRMSE, negative Δ means ItB is better (lower error)."
|
html += baselineHtml.replace(/^<h2>[^<]*<\/h2>/, "");
|
||||||
);
|
|
||||||
html += tableSection("Temporal (vs PhenoCam)", data.temporal);
|
|
||||||
html += btiItbCompareSection(
|
|
||||||
"Spatial: BtI vs ItB (paired)",
|
|
||||||
data.spatial,
|
|
||||||
"Δ = ItB − BtI. For Pearson r and R², positive Δ means ItB is higher.",
|
|
||||||
SPATIAL_METRIC_COLS
|
|
||||||
);
|
|
||||||
html += tableSection("Spatial (3×3 fusion mean vs PhenoCam)", data.spatial);
|
|
||||||
if (data.summary) {
|
|
||||||
html += `<h2>Summary</h2><pre style="font-size:13px;background:#f9f9f9;padding:10px;">${JSON.stringify(data.summary, null, 2)}</pre>`;
|
|
||||||
}
|
}
|
||||||
|
html += `<h2>Temporal: BtI vs ItB (paired)</h2>`;
|
||||||
|
html += `<p class="section-note">Per scenario (same strategy + σ), BtI and ItB side-by-side with <b>Δ = ItB − BtI</b>. Positive Δ is better for Pearson r, R², and NSE; negative Δ is better for RMSE, MAE, and NRMSE.</p>`;
|
||||||
|
html += btiItbCompareSection("", data.temporal, "") || `<p class="empty">No paired scenarios</p>`;
|
||||||
|
html += `<h2>Temporal (vs PhenoCam)</h2>`;
|
||||||
|
html += `<p class="section-note">Per-scenario agreement between the fusion GCC <b>timeseries</b> at the site 3×3 window and the PhenoCam GCC timeseries, across all matched dates. Scenarios ending in <code>_itb</code> are Index-then-Bands; the others are Bands-then-Index.</p>`;
|
||||||
|
html += tableSection("", data.temporal);
|
||||||
el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
|
el.innerHTML = html || `<p class="empty">Empty metrics file</p>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue