Foo
This commit is contained in:
parent
77e1488830
commit
374be6865d
19 changed files with 1276 additions and 64 deletions
1
gap_validation/__init__.py
Normal file
1
gap_validation/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Synthetic gap and withheld-S2 validation (outputs under data/.../validation/)."""
|
||||
4
gap_validation/__main__.py
Normal file
4
gap_validation/__main__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
from gap_validation.run import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
152
gap_validation/calendar.py
Normal file
152
gap_validation/calendar.py
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
"""Gap windows and nearest S2 acquisition (manifest inputs)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from phenology_timesat import phenocam_phenology_path
|
||||
|
||||
REFL_DATE_RE = re.compile(r"S2A_MSIL2A_(\d{8})_REFL\.tif$")
|
||||
|
||||
|
||||
def validation_dir(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/validation")
|
||||
|
||||
|
||||
def phenology_midpoint(
|
||||
site_name: str, season: int, phenology_path: Path | None = None
|
||||
) -> date:
|
||||
"""Pick fusion gap midpoint: green-up if in season, else green-down, else July 1."""
|
||||
path = phenology_path or phenocam_phenology_path(site_name, season)
|
||||
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
||||
fallback = date(season, 7, 1)
|
||||
if not path.is_file():
|
||||
return fallback
|
||||
try:
|
||||
rec = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return fallback
|
||||
up_s = rec.get("green_up_50pct_date")
|
||||
dn_s = rec.get("green_down_50pct_date")
|
||||
|
||||
def _parse(s) -> date | None:
|
||||
if not s or not isinstance(s, str):
|
||||
return None
|
||||
try:
|
||||
d = datetime.strptime(s[:10], "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return None
|
||||
return d if y0 <= d <= y1 else None
|
||||
|
||||
up, dn = _parse(up_s), _parse(dn_s)
|
||||
if up:
|
||||
return up
|
||||
if dn:
|
||||
return dn
|
||||
return fallback
|
||||
|
||||
|
||||
def centered_window(mid: date, gap_days: int, season: int) -> tuple[date, date]:
|
||||
"""[start, end] inclusive, gap_days wide, clamped to calendar year."""
|
||||
half = gap_days // 2
|
||||
start = mid - timedelta(days=half)
|
||||
end = mid + timedelta(days=gap_days - 1 - half)
|
||||
y0, y1 = date(season, 1, 1), date(season, 12, 31)
|
||||
if start < y0:
|
||||
end = min(y1, end + (y0 - start))
|
||||
start = y0
|
||||
if end > y1:
|
||||
start = max(y0, start - (end - y1))
|
||||
end = y1
|
||||
return start, end
|
||||
|
||||
|
||||
def list_s2_refl_dates(prepared_s2: Path) -> list[tuple[date, str]]:
|
||||
"""Return sorted (acquisition_date, filename) for *REFL.tif."""
|
||||
out: list[tuple[date, str]] = []
|
||||
if not prepared_s2.is_dir():
|
||||
return out
|
||||
for p in sorted(prepared_s2.glob("*REFL.tif")):
|
||||
m = REFL_DATE_RE.search(p.name)
|
||||
if not m:
|
||||
continue
|
||||
d = datetime.strptime(m.group(1), "%Y%m%d").date()
|
||||
out.append((d, p.name))
|
||||
out.sort(key=lambda x: x[0])
|
||||
return out
|
||||
|
||||
|
||||
def nearest_s2_acquisition(
|
||||
prediction: date, pairs: list[tuple[date, str]]
|
||||
) -> tuple[date, str] | None:
|
||||
if not pairs:
|
||||
return None
|
||||
best = min(pairs, key=lambda t: abs((t[0] - prediction).days))
|
||||
return best
|
||||
|
||||
|
||||
def build_manifest_entries(
|
||||
site_name: str,
|
||||
season: int,
|
||||
gap_lengths: tuple[int, ...] = (15, 30, 60, 90),
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
) -> list[dict]:
|
||||
"""One entry per gap length: window, prediction=midpoint, withheld = nearest S2 to midpoint."""
|
||||
mid = phenology_midpoint(site_name, season)
|
||||
prepared_s2 = Path(f"data/{site_name}/{season}/prepared_{s2_calendar_strategy}/s2")
|
||||
pairs = list_s2_refl_dates(prepared_s2)
|
||||
entries = []
|
||||
for gap_days in gap_lengths:
|
||||
w0, w1 = centered_window(mid, gap_days, season)
|
||||
prediction = mid
|
||||
ns = nearest_s2_acquisition(prediction, pairs)
|
||||
if ns is None:
|
||||
withheld_date = None
|
||||
withheld_filename = None
|
||||
else:
|
||||
withheld_date, withheld_filename = ns[0].isoformat(), ns[1]
|
||||
entries.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"midpoint_rule": "green_up_50pct else green_down_50pct else July01",
|
||||
"midpoint_date": mid.isoformat(),
|
||||
"window_start": w0.isoformat(),
|
||||
"window_end": w1.isoformat(),
|
||||
"prediction_date": prediction.isoformat(),
|
||||
"withheld_s2_date": withheld_date,
|
||||
"withheld_s2_filename": withheld_filename,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def write_manifest(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
s2_calendar_strategy: str = "aggressive",
|
||||
) -> Path:
|
||||
out_dir = validation_dir(site_name, season)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = out_dir / "gap_manifest.json"
|
||||
payload = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"site_position_lat_lon": list(site_position),
|
||||
"s2_calendar_strategy": s2_calendar_strategy,
|
||||
"entries": build_manifest_entries(
|
||||
site_name, season, s2_calendar_strategy=s2_calendar_strategy
|
||||
),
|
||||
}
|
||||
path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
return path
|
||||
|
||||
|
||||
def load_manifest(site_name: str, season: int) -> dict:
|
||||
path = validation_dir(site_name, season) / "gap_manifest.json"
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"Missing manifest: {path}")
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
113
gap_validation/fusion_masked.py
Normal file
113
gap_validation/fusion_masked.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""EFAST with symlinked S2 dir (withhold one acquisition); outputs under validation/."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from fusion import run_efast, run_efast_itb
|
||||
from preparation import _get_base_dir, _get_itb_base_dir
|
||||
|
||||
from gap_validation.s2_mask_dir import build_masked_s2_dir_bti, build_masked_s2_dir_itb
|
||||
|
||||
|
||||
def prepared_s3_dir(season: int, site_name: str, strategy: str) -> Path:
|
||||
return _get_base_dir(season, site_name, strategy) / "s3"
|
||||
|
||||
|
||||
def validation_fusion_dir(
|
||||
site_name: str,
|
||||
season: int,
|
||||
gap_days: int,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
) -> Path:
|
||||
"""``data/.../validation/fusion/gap_{n}/{strategy}_sigma{20|30}_{bti|itb}/``."""
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return (
|
||||
Path(f"data/{site_name}/{season}/validation")
|
||||
/ "fusion"
|
||||
/ f"gap_{gap_days}"
|
||||
/ f"{strategy}_sigma{sig}_{mode}"
|
||||
)
|
||||
|
||||
|
||||
def run_masked_fusion_one_date(
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
prediction_date_iso: str,
|
||||
withheld_yyyymmdd: str,
|
||||
fusion_output_dir: Path,
|
||||
) -> Path:
|
||||
"""Build temp masked S2 dir, run EFAST for ``prediction_date_iso`` only; return output dir."""
|
||||
fusion_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_range = f"{prediction_date_iso[:10]}/{prediction_date_iso[:10]}"
|
||||
s3_dir = prepared_s3_dir(season, site_name, strategy)
|
||||
|
||||
with TemporaryDirectory(prefix="gapval_s2_") as tmp:
|
||||
tmp_s2 = Path(tmp) / "s2"
|
||||
if mode == "bti":
|
||||
prep_s2 = _get_base_dir(season, site_name, strategy) / "s2"
|
||||
build_masked_s2_dir_bti(prep_s2, withheld_yyyymmdd, tmp_s2)
|
||||
run_efast(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=s3_dir,
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
elif mode == "itb":
|
||||
prep_s2 = _get_itb_base_dir(season, site_name, strategy) / "s2"
|
||||
s3_itb = _get_itb_base_dir(season, site_name, strategy) / "s3"
|
||||
build_masked_s2_dir_itb(prep_s2, withheld_yyyymmdd, tmp_s2)
|
||||
run_efast_itb(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
cleaning_strategy=strategy,
|
||||
sigma=sigma,
|
||||
date_range=date_range,
|
||||
s2_output_dir=tmp_s2,
|
||||
s3_output_dir=s3_itb,
|
||||
fusion_output_dir=fusion_output_dir,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"mode must be bti or itb, got {mode!r}")
|
||||
|
||||
return fusion_output_dir
|
||||
|
||||
|
||||
def production_fusion_path(
|
||||
season: int,
|
||||
site_name: str,
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
yyyymmdd: str,
|
||||
) -> Path:
|
||||
"""Single-date fused raster from the normal prepared tree (no-gap baseline)."""
|
||||
if mode == "bti":
|
||||
base = _get_base_dir(season, site_name, strategy)
|
||||
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
|
||||
return base / sub / f"REFL_{yyyymmdd}.tif"
|
||||
base = _get_itb_base_dir(season, site_name, strategy)
|
||||
sub = f"fusion_sigma{sigma}" if sigma else "fusion"
|
||||
return base / sub / f"GCC_{yyyymmdd}.tif"
|
||||
|
||||
|
||||
def withheld_s2_refl_path(
|
||||
season: int, site_name: str, strategy: str, withheld_filename: str | None
|
||||
) -> Path | None:
|
||||
if not withheld_filename:
|
||||
return None
|
||||
p = _get_base_dir(season, site_name, strategy) / "s2" / withheld_filename
|
||||
return p if p.is_file() else None
|
||||
290
gap_validation/run.py
Normal file
290
gap_validation/run.py
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
"""Tier-2 gap validation CLI: manifest, masked EFAST, spatial ``nse_s2``, Whittaker crossover."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from gap_validation.calendar import load_manifest, validation_dir, write_manifest
|
||||
from gap_validation.fusion_masked import (
|
||||
production_fusion_path,
|
||||
run_masked_fusion_one_date,
|
||||
validation_fusion_dir,
|
||||
withheld_s2_refl_path,
|
||||
)
|
||||
from gap_validation.spatial_metrics import evaluate_gap_vs_withheld
|
||||
from gap_validation.whittaker_compare import first_gap_where_fusion_below_whittaker
|
||||
|
||||
|
||||
def _ymd_from_iso(iso_d: str) -> str:
|
||||
return datetime.strptime(iso_d[:10], "%Y-%m-%d").strftime("%Y%m%d")
|
||||
|
||||
|
||||
def _yyyymmdd_from_withheld_filename(fn: str) -> str | None:
|
||||
for part in fn.replace(".tif", "").split("_"):
|
||||
if len(part) == 8 and part.isdigit():
|
||||
return part
|
||||
return None
|
||||
|
||||
|
||||
def _withheld_iso(entry: dict) -> str | None:
|
||||
d = entry.get("withheld_s2_date")
|
||||
if isinstance(d, str) and len(d) >= 10:
|
||||
return d[:10]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn or not isinstance(fn, str):
|
||||
return None
|
||||
ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not ymd:
|
||||
return None
|
||||
return datetime.strptime(ymd, "%Y%m%d").date().isoformat()
|
||||
|
||||
|
||||
def _fused_file(fusion_dir: Path, mode: str, ymd: str) -> Path:
|
||||
stem = "REFL" if mode == "bti" else "GCC"
|
||||
return fusion_dir / f"{stem}_{ymd}.tif"
|
||||
|
||||
|
||||
def _scenario_key(strategy: str, sigma: int | None, mode: str) -> str:
|
||||
sig = 30 if sigma == 30 else 20
|
||||
return f"{strategy}_sigma{sig}_{mode}"
|
||||
|
||||
|
||||
def _git_rev() -> str | None:
|
||||
try:
|
||||
return subprocess.check_output(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=Path(__file__).resolve().parent.parent,
|
||||
text=True,
|
||||
).strip()
|
||||
except (OSError, subprocess.CalledProcessError):
|
||||
return None
|
||||
|
||||
|
||||
def run_validation(
|
||||
site_name: str,
|
||||
season: int,
|
||||
site_position: tuple[float, float],
|
||||
strategy: str,
|
||||
sigma: int | None,
|
||||
mode: str,
|
||||
*,
|
||||
skip_manifest: bool,
|
||||
skip_fusion: bool,
|
||||
write_manifest_only: bool,
|
||||
gap_days_filter: list[int] | None,
|
||||
s2_calendar_strategy: str,
|
||||
) -> Path:
|
||||
base = Path(f"data/{site_name}/{season}")
|
||||
vdir = validation_dir(site_name, season)
|
||||
vdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not skip_manifest:
|
||||
write_manifest(
|
||||
site_name, season, site_position, s2_calendar_strategy=s2_calendar_strategy
|
||||
)
|
||||
if write_manifest_only:
|
||||
return vdir / "gap_manifest.json"
|
||||
|
||||
manifest = load_manifest(site_name, season)
|
||||
entries = manifest["entries"]
|
||||
if gap_days_filter:
|
||||
entries = [e for e in entries if e.get("gap_days") in gap_days_filter]
|
||||
|
||||
results: list[dict] = []
|
||||
for entry in entries:
|
||||
gap_days = entry["gap_days"]
|
||||
pred = entry["prediction_date"]
|
||||
fn = entry.get("withheld_s2_filename")
|
||||
if not fn:
|
||||
results.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"error": "no_withheld_s2_filename",
|
||||
"entry": entry,
|
||||
}
|
||||
)
|
||||
continue
|
||||
ymd = _ymd_from_iso(pred)
|
||||
wh_ymd = _yyyymmdd_from_withheld_filename(fn)
|
||||
if not wh_ymd:
|
||||
results.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"error": "could_not_parse_withheld_yyyymmdd",
|
||||
"withheld_s2_filename": fn,
|
||||
}
|
||||
)
|
||||
continue
|
||||
withheld_iso = (
|
||||
_withheld_iso(entry) or f"{wh_ymd[:4]}-{wh_ymd[4:6]}-{wh_ymd[6:8]}"
|
||||
)
|
||||
|
||||
fusion_out = validation_fusion_dir(
|
||||
site_name, season, gap_days, strategy, sigma, mode
|
||||
)
|
||||
if not skip_fusion:
|
||||
run_masked_fusion_one_date(
|
||||
season,
|
||||
site_position,
|
||||
site_name,
|
||||
strategy,
|
||||
sigma,
|
||||
mode,
|
||||
pred,
|
||||
wh_ymd,
|
||||
fusion_out,
|
||||
)
|
||||
|
||||
fused_gap = _fused_file(fusion_out, mode, ymd)
|
||||
prod = production_fusion_path(season, site_name, strategy, sigma, mode, ymd)
|
||||
wh_path = withheld_s2_refl_path(season, site_name, strategy, fn)
|
||||
if wh_path is None or not fused_gap.is_file():
|
||||
results.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"withheld_s2_filename": fn,
|
||||
"scenario": {
|
||||
"strategy": strategy,
|
||||
"sigma": 30 if sigma == 30 else 20,
|
||||
"mode": mode,
|
||||
},
|
||||
"error": "missing_withheld_refl_or_fused_gap",
|
||||
"fused_gap_path": str(fused_gap),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
spatial = evaluate_gap_vs_withheld(
|
||||
wh_path,
|
||||
fused_gap,
|
||||
prod if prod.is_file() else None,
|
||||
mode,
|
||||
whittaker_context=(base, strategy, pred, withheld_iso),
|
||||
)
|
||||
fusion_nse = (spatial.get("gap") or {}).get("nse_s2")
|
||||
wh_nse = (spatial.get("whittaker") or {}).get("nse_s2")
|
||||
results.append(
|
||||
{
|
||||
"gap_days": gap_days,
|
||||
"prediction_date": pred,
|
||||
"withheld_s2_filename": fn,
|
||||
"scenario": {
|
||||
"strategy": strategy,
|
||||
"sigma": 30 if sigma == 30 else 20,
|
||||
"mode": mode,
|
||||
},
|
||||
"paths": {
|
||||
"fused_gap": str(fused_gap),
|
||||
"fused_no_gap": str(prod) if prod.is_file() else None,
|
||||
"withheld_s2_refl": str(wh_path),
|
||||
},
|
||||
"spatial": spatial,
|
||||
"whittaker_crossover_row": {
|
||||
"gap_days": gap_days,
|
||||
"nse_s2_fusion": fusion_nse,
|
||||
"nse_s2_whittaker": wh_nse,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
scenario = _scenario_key(strategy, sigma, mode)
|
||||
crossover_rows = [
|
||||
r["whittaker_crossover_row"]
|
||||
for r in results
|
||||
if isinstance(r.get("whittaker_crossover_row"), dict)
|
||||
]
|
||||
summary = {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"scenario": scenario,
|
||||
"command_line": sys.argv,
|
||||
"git_commit": _git_rev(),
|
||||
"manifest": str(vdir / "gap_manifest.json"),
|
||||
"results": results,
|
||||
"whittaker_crossover": {
|
||||
scenario: {
|
||||
"metric": "nse_s2_spatial_vs_withheld_s2_gcc",
|
||||
"whittaker_definition": (
|
||||
"Whittaker λ=400 d² on cloud-screened S2 GCC from s2_preselection.json; "
|
||||
"withheld acquisition removed from the fit; prediction is a spatially constant "
|
||||
"field at the smoothed GCC(prediction_date), compared to withheld S2 GCC on the "
|
||||
"same valid mask as fusion (aligned with baseline.s2_whittaker_lambda400 spirit)."
|
||||
),
|
||||
"first_gap_days_fusion_nse_below_whittaker": first_gap_where_fusion_below_whittaker(
|
||||
crossover_rows,
|
||||
fusion_key="nse_s2_fusion",
|
||||
whittaker_key="nse_s2_whittaker",
|
||||
),
|
||||
"by_gap": crossover_rows,
|
||||
}
|
||||
},
|
||||
}
|
||||
out_path = vdir / "gap_validation_summary.json"
|
||||
out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Tier-2 withheld-S2 gap validation (outputs under data/.../validation/)."
|
||||
)
|
||||
ap.add_argument("--site", required=True)
|
||||
ap.add_argument("--season", type=int, required=True)
|
||||
ap.add_argument("--lat", type=float, required=True)
|
||||
ap.add_argument("--lon", type=float, required=True)
|
||||
ap.add_argument(
|
||||
"--strategy", default="aggressive", choices=["aggressive", "nonaggressive"]
|
||||
)
|
||||
ap.add_argument("--sigma", type=int, default=20, choices=[20, 30])
|
||||
ap.add_argument("--mode", default="bti", choices=["bti", "itb"])
|
||||
ap.add_argument(
|
||||
"--gap-days",
|
||||
type=int,
|
||||
action="append",
|
||||
metavar="N",
|
||||
help="Restrict to gap length(s); repeatable (default: all manifest lengths).",
|
||||
)
|
||||
ap.add_argument("--skip-manifest", action="store_true")
|
||||
ap.add_argument(
|
||||
"--skip-fusion",
|
||||
action="store_true",
|
||||
help="Reuse existing validation fusion rasters.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--write-manifest-only",
|
||||
action="store_true",
|
||||
help="Write gap_manifest.json and exit (no EFAST).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--s2-calendar-strategy",
|
||||
default="aggressive",
|
||||
choices=["aggressive", "nonaggressive"],
|
||||
help="Which prepared_*/s2 tree is used to pick nearest S2 for withholding.",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
sigma_kw = 30 if args.sigma == 30 else None
|
||||
site_position = (args.lat, args.lon)
|
||||
out = run_validation(
|
||||
args.site,
|
||||
args.season,
|
||||
site_position,
|
||||
args.strategy,
|
||||
sigma_kw,
|
||||
args.mode,
|
||||
skip_manifest=args.skip_manifest,
|
||||
skip_fusion=args.skip_fusion,
|
||||
write_manifest_only=args.write_manifest_only,
|
||||
gap_days_filter=args.gap_days,
|
||||
s2_calendar_strategy=args.s2_calendar_strategy,
|
||||
)
|
||||
print(out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
51
gap_validation/s2_mask_dir.py
Normal file
51
gap_validation/s2_mask_dir.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""Symlink prepared S2 into a temp dir, omitting one acquisition (REFL + DIST_CLOUD)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# Acquisition calendar day in prepared S2 names (BtI REFL/DIST; ItB GCC/DIST).
|
||||
S2_PREP_DATE_RE = re.compile(r"_(\d{8})_(?:REFL|GCC|DIST_CLOUD)\.tif$", re.IGNORECASE)
|
||||
|
||||
|
||||
def yyyymmdd_in_name(name: str) -> str | None:
|
||||
m = S2_PREP_DATE_RE.search(name)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def build_masked_s2_dir(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path, patterns: tuple[str, ...]
|
||||
) -> int:
|
||||
"""Symlink all files matching ``patterns`` except the withheld acquisition day."""
|
||||
dest.mkdir(parents=True, exist_ok=True)
|
||||
n = 0
|
||||
for pattern in patterns:
|
||||
for src in sorted(prepared_s2.glob(pattern)):
|
||||
if not src.is_file() and not src.is_symlink():
|
||||
continue
|
||||
y = yyyymmdd_in_name(src.name)
|
||||
if y == withheld_yyyymmdd:
|
||||
continue
|
||||
link = dest / src.name
|
||||
if link.exists() or link.is_symlink():
|
||||
link.unlink()
|
||||
link.symlink_to(src.resolve())
|
||||
n += 1
|
||||
return n
|
||||
|
||||
|
||||
def build_masked_s2_dir_bti(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*REFL.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
|
||||
|
||||
def build_masked_s2_dir_itb(
|
||||
prepared_s2: Path, withheld_yyyymmdd: str, dest: Path
|
||||
) -> int:
|
||||
return build_masked_s2_dir(
|
||||
prepared_s2, withheld_yyyymmdd, dest, ("*GCC.tif", "*DIST_CLOUD.tif")
|
||||
)
|
||||
187
gap_validation/spatial_metrics.py
Normal file
187
gap_validation/spatial_metrics.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
"""Per-pixel GCC vs withheld S2; NSE (nse_s2); no-gap baseline; deltas."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.warp import reproject, Resampling
|
||||
from scipy.stats import pearsonr
|
||||
|
||||
# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
|
||||
VALID_REFL_THRESHOLD = 0.001
|
||||
|
||||
|
||||
def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
|
||||
t = red.astype(np.float64) + green.astype(np.float64) + blue.astype(np.float64)
|
||||
out = np.full_like(blue, np.nan, dtype=np.float64)
|
||||
m = (
|
||||
np.isfinite(t)
|
||||
& (t > 0)
|
||||
& np.isfinite(blue)
|
||||
& np.isfinite(green)
|
||||
& np.isfinite(red)
|
||||
)
|
||||
out[m] = green[m].astype(np.float64) / t[m]
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
|
||||
"""Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if src.count >= 4:
|
||||
b = src.read(1).astype(np.float32)
|
||||
g = src.read(2).astype(np.float32)
|
||||
r = src.read(3).astype(np.float32)
|
||||
gcc = _gcc_from_rgb(b, g, r)
|
||||
else:
|
||||
gcc = src.read(1).astype(np.float32)
|
||||
prof = src.profile.copy()
|
||||
return gcc, prof
|
||||
|
||||
|
||||
def warp_refl_bands_to_grid(
|
||||
refl_path: Path,
|
||||
height: int,
|
||||
width: int,
|
||||
transform,
|
||||
crs,
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Resample S2 REFL blue/green/red to fusion grid (bilinear)."""
|
||||
with rasterio.open(refl_path) as src:
|
||||
b = np.empty((height, width), dtype=np.float32)
|
||||
g = np.empty((height, width), dtype=np.float32)
|
||||
r = np.empty((height, width), dtype=np.float32)
|
||||
for i, dst in enumerate((b, g, r), start=1):
|
||||
reproject(
|
||||
source=rasterio.band(src, i),
|
||||
destination=dst,
|
||||
src_transform=src.transform,
|
||||
src_crs=src.crs,
|
||||
dst_transform=transform,
|
||||
dst_crs=crs,
|
||||
resampling=Resampling.bilinear,
|
||||
)
|
||||
return b, g, r
|
||||
|
||||
|
||||
def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
|
||||
"""Valid pixels: BtI uses REFL-style mask; ItB uses single-band GCC (postprocessing ItB)."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if mode == "itb" or src.count < 4:
|
||||
d = src.read(1).astype(np.float32)
|
||||
return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
|
||||
stacks = src.read().astype(np.float32)
|
||||
ok = np.isfinite(stacks).all(axis=0) & (
|
||||
np.nanmax(stacks, axis=0) > VALID_REFL_THRESHOLD
|
||||
)
|
||||
return ok
|
||||
|
||||
|
||||
def spatial_scores(
|
||||
y_true_gcc: np.ndarray,
|
||||
y_pred_gcc: np.ndarray,
|
||||
mask: np.ndarray,
|
||||
) -> dict:
|
||||
"""RMSE, MAE, mean bias, Pearson r, nse_s2 (Nash–Sutcliffe vs spatial truth)."""
|
||||
yt = y_true_gcc[mask].astype(np.float64).ravel()
|
||||
yp = y_pred_gcc[mask].astype(np.float64).ravel()
|
||||
n = int(yt.size)
|
||||
if n < 2:
|
||||
return {"n_pixels": n}
|
||||
mean_t = float(np.mean(yt))
|
||||
rmse = float(np.sqrt(np.mean((yt - yp) ** 2)))
|
||||
mae = float(np.mean(np.abs(yt - yp)))
|
||||
bias = float(np.mean(yp - yt))
|
||||
den = float(np.sum((yt - mean_t) ** 2))
|
||||
nse_s2 = float(1.0 - np.sum((yt - yp) ** 2) / den) if den > 0 else None
|
||||
r = None
|
||||
if np.std(yt) > 0 and np.std(yp) > 0:
|
||||
r = float(pearsonr(yt, yp)[0])
|
||||
return {
|
||||
"n_pixels": n,
|
||||
"rmse": rmse,
|
||||
"mae": mae,
|
||||
"mean_bias": bias,
|
||||
"pearson_r": r,
|
||||
"nse_s2": nse_s2,
|
||||
}
|
||||
|
||||
|
||||
def withheld_gcc_on_fusion_grid(
|
||||
withheld_refl_path: Path, fused_path: Path
|
||||
) -> tuple[np.ndarray, np.ndarray, dict]:
|
||||
"""``y_true`` GCC (withheld S2) and ``y_pred`` GCC from ``fused_path``, same grid."""
|
||||
yp, prof = read_fused_gcc(fused_path)
|
||||
h, w = yp.shape
|
||||
b, g, r = warp_refl_bands_to_grid(
|
||||
withheld_refl_path, h, w, prof["transform"], prof["crs"]
|
||||
)
|
||||
yt = _gcc_from_rgb(b, g, r)
|
||||
return yt, yp, prof
|
||||
|
||||
|
||||
def common_valid_mask(
|
||||
yt: np.ndarray,
|
||||
y_gap: np.ndarray,
|
||||
y_nogap: np.ndarray | None,
|
||||
fused_gap_path: Path,
|
||||
mode: str,
|
||||
) -> np.ndarray:
|
||||
"""Shared finite mask: truth GCC, gap/nogap preds, and fusion valid-data rules."""
|
||||
m = (
|
||||
valid_mask_fused(fused_gap_path, mode)
|
||||
& np.isfinite(yt)
|
||||
& np.isfinite(y_gap)
|
||||
& (yt > VALID_REFL_THRESHOLD)
|
||||
& (y_gap > VALID_REFL_THRESHOLD)
|
||||
)
|
||||
if y_nogap is not None:
|
||||
m &= np.isfinite(y_nogap) & (y_nogap > VALID_REFL_THRESHOLD)
|
||||
return m
|
||||
|
||||
|
||||
def evaluate_gap_vs_withheld(
|
||||
withheld_refl_path: Path,
|
||||
fused_gap_path: Path,
|
||||
fused_nogap_path: Path | None,
|
||||
mode: str,
|
||||
*,
|
||||
whittaker_context: tuple[Path, str, str, str] | None = None,
|
||||
) -> dict:
|
||||
"""Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask.
|
||||
|
||||
``delta_rmse`` = RMSE_gap − RMSE_no_gap; ``delta_nse`` = NSE_no_gap − NSE_gap (higher gap loss → positive delta_nse).
|
||||
"""
|
||||
yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
|
||||
y_nogap = None
|
||||
if fused_nogap_path is not None and fused_nogap_path.is_file():
|
||||
y_nogap, _ = read_fused_gcc(fused_nogap_path)
|
||||
mask = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
|
||||
out: dict = {"gap": spatial_scores(yt, y_gap, mask)}
|
||||
if y_nogap is not None:
|
||||
out["no_gap"] = spatial_scores(yt, y_nogap, mask)
|
||||
g, ng = out["gap"], out["no_gap"]
|
||||
if g.get("rmse") is not None and ng.get("rmse") is not None:
|
||||
out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
|
||||
if g.get("nse_s2") is not None and ng.get("nse_s2") is not None:
|
||||
out["delta_nse"] = float(ng["nse_s2"] - g["nse_s2"])
|
||||
if whittaker_context is not None:
|
||||
from gap_validation.whittaker_compare import whittaker_gcc_on_gap_masked_series
|
||||
|
||||
base, strategy, prediction_iso, withheld_iso = whittaker_context
|
||||
wgcc = whittaker_gcc_on_gap_masked_series(
|
||||
base, strategy, prediction_iso, withheld_iso
|
||||
)
|
||||
if wgcc is not None:
|
||||
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask)
|
||||
return out
|
||||
|
||||
|
||||
def constant_field_scores(
|
||||
y_true_gcc: np.ndarray, scalar: float, mask: np.ndarray
|
||||
) -> dict:
|
||||
"""NSE / RMSE when prediction is a spatially constant Whittaker value (same mask as fusion)."""
|
||||
yp = np.full_like(y_true_gcc, scalar, dtype=np.float32)
|
||||
return spatial_scores(y_true_gcc, yp, mask)
|
||||
64
gap_validation/whittaker_compare.py
Normal file
64
gap_validation/whittaker_compare.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""Whittaker S2 GCC (λ=400 d²) as a spatial constant vs withheld S2 GCC; crossover vs fusion nse_s2."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_stats import (
|
||||
WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
_norm_date_key,
|
||||
_s2_gcc_series_from_preselection,
|
||||
_whittaker_smooth_dict,
|
||||
)
|
||||
|
||||
|
||||
def whittaker_gcc_on_gap_masked_series(
|
||||
base: Path,
|
||||
strategy: str,
|
||||
prediction_iso: str,
|
||||
withheld_iso: str,
|
||||
lam: float = WHITTAKER_LAMBDA_DAYS_SQ,
|
||||
) -> float | None:
|
||||
"""Whittaker smooth on cloud-screened S2 GCC **excluding** the withheld acquisition day.
|
||||
|
||||
Comparator aligned with ``baseline.s2_whittaker_lambda400`` in ``metrics_stats`` (same λ,
|
||||
same preselection GCC), but the withheld date is removed so the smoother does not see
|
||||
the target acquisition. Value at ``prediction_iso`` (YYYY-MM-DD) is returned.
|
||||
"""
|
||||
pred_k = _norm_date_key(prediction_iso)
|
||||
wh_k = _norm_date_key(withheld_iso)
|
||||
if not pred_k or not wh_k:
|
||||
return None
|
||||
all_gcc, flags = _s2_gcc_series_from_preselection(base)
|
||||
if not all_gcc:
|
||||
return None
|
||||
idx = 0 if strategy == "aggressive" else 1
|
||||
kept = sorted(
|
||||
(d, g)
|
||||
for d, g in all_gcc.items()
|
||||
if d in flags and not flags[d][idx] and _norm_date_key(d) != wh_k
|
||||
)
|
||||
if len(kept) < 2:
|
||||
return None
|
||||
obs_d, obs_v = zip(*kept)
|
||||
smooth = _whittaker_smooth_dict(obs_d, obs_v, lam)
|
||||
return smooth.get(pred_k)
|
||||
|
||||
|
||||
def first_gap_where_fusion_below_whittaker(
|
||||
rows: list[dict],
|
||||
*,
|
||||
fusion_key: str = "nse_s2",
|
||||
whittaker_key: str = "nse_s2",
|
||||
) -> int | None:
|
||||
"""Smallest ``gap_days`` where fusion[metric] < whittaker[metric] (strict)."""
|
||||
eligible = [
|
||||
r
|
||||
for r in rows
|
||||
if r.get(fusion_key) is not None and r.get(whittaker_key) is not None
|
||||
]
|
||||
eligible.sort(key=lambda r: r["gap_days"])
|
||||
for r in eligible:
|
||||
if r[fusion_key] < r[whittaker_key]:
|
||||
return int(r["gap_days"])
|
||||
return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue