Added dropped sites.

This commit is contained in:
Felix Delattre 2026-06-17 12:04:27 +02:00
parent f188dd38ab
commit d55ee31e8d
2 changed files with 229 additions and 98 deletions

View file

@ -6,7 +6,8 @@ Inputs (``data/``, ``{year}`` = ``--evaluation-year``):
Outputs (``data/statistics_fusion_order/``):
- ``{year}.json`` paired Wilcoxon + t-test summary for NSE, RMSE, nRMSE, r
- ``{year}.json`` paired Wilcoxon + t-test summary for NSE, RMSE, nRMSE, r;
includes ``dropped_sites`` (union) and per-metric ``dropped_sites`` lists
CLI:
@ -50,48 +51,48 @@ def _r4(v: float | None) -> float | None:
return round(v, 4) if v is not None else None
def _load_site_metrics(year: int) -> list[dict[str, Any]]:
"""Return parsed ``metrics.json`` payloads for every site under ``{year}``."""
def _load_site_metrics(year: int) -> list[tuple[str, dict[str, Any]]]:
"""Return ``(sitename, metrics.json payload)`` for every site under ``{year}``."""
metrics_dir = DATA_DIR / "metrics" / str(year)
if not metrics_dir.is_dir():
return []
payloads: list[dict[str, Any]] = []
payloads: list[tuple[str, dict[str, Any]]] = []
for site_dir in sorted(metrics_dir.iterdir()):
if not site_dir.is_dir():
continue
path = site_dir / "metrics.json"
if not path.is_file():
continue
payloads.append(json.loads(path.read_text()))
payloads.append((site_dir.name, json.loads(path.read_text())))
return payloads
def collect_pairs(
site_metrics: list[dict[str, Any]], metric: str
) -> tuple[list[float], list[float], int]:
"""Return paired BtI / ItB values for ``metric`` and count of dropped sites."""
site_metrics: list[tuple[str, dict[str, Any]]], metric: str
) -> tuple[list[float], list[float], list[str]]:
"""Return paired BtI / ItB values for ``metric`` and dropped site names."""
bti_vals: list[float] = []
itb_vals: list[float] = []
n_dropped = 0
dropped_sites: list[str] = []
for payload in site_metrics:
for site, payload in site_metrics:
bti = payload.get("bti")
itb = payload.get("itb")
if not isinstance(bti, dict) or not isinstance(itb, dict):
n_dropped += 1
dropped_sites.append(site)
continue
bti_v = bti.get(metric)
itb_v = itb.get(metric)
if bti_v is None or itb_v is None:
n_dropped += 1
dropped_sites.append(site)
continue
bti_vals.append(float(bti_v))
itb_vals.append(float(itb_v))
return bti_vals, itb_vals, n_dropped
return bti_vals, itb_vals, dropped_sites
def _better_order(
@ -226,16 +227,20 @@ def main() -> None:
)
metrics_out: dict[str, Any] = {}
all_dropped: set[str] = set()
for metric in METRICS:
bti_vals, itb_vals, n_dropped = collect_pairs(site_metrics, metric)
bti_vals, itb_vals, dropped_sites = collect_pairs(site_metrics, metric)
summary = paired_test(bti_vals, itb_vals, metric, alpha)
summary["n_dropped"] = n_dropped
summary["n_dropped"] = len(dropped_sites)
summary["dropped_sites"] = dropped_sites
all_dropped.update(dropped_sites)
metrics_out[metric] = summary
payload = {
"year": year,
"alpha": alpha,
"n_sites_total": n_sites_total,
"dropped_sites": sorted(all_dropped),
"metrics": metrics_out,
}