foo
This commit is contained in:
parent
be17f64aa2
commit
e3af4bf2f4
5 changed files with 333 additions and 32 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -44,5 +44,4 @@ dist/
|
|||
Thumbs.db
|
||||
|
||||
AGENTS.md
|
||||
METHODOLOGY.md
|
||||
.vibe
|
||||
263
fusion_phenology.py
Normal file
263
fusion_phenology.py
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
"""
|
||||
No-gap EFAST fusion GCC: TIMESAT green-up / green-down (50 % seasonal amplitude).
|
||||
|
||||
Reads daily ``gcc/fusion/timeseries.json`` under each ``processed_*`` scenario
|
||||
directory, runs the same TIMESAT stack as :mod:`phenology_timesat`, and writes
|
||||
``data/{site}/{season}/fusion_phenology.json`` with per-scenario transition dates
|
||||
and day offsets vs.\ PhenoCam ``phenocam_phenology.json``.
|
||||
|
||||
Gap-degraded fusion dates remain in ``validation/gap_phenology_offsets.json``
|
||||
(:mod:`gap_validation.phenology_offsets`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metrics_stats import _norm_date_key, load_timeseries
|
||||
from phenology_timesat import (
|
||||
_timesat as _timesat_pkg,
|
||||
build_yraw_three_years,
|
||||
iter_sites_seasons_from_sites_geojson,
|
||||
phenocam_phenology_path,
|
||||
run_timesat_phenology_from_yraw,
|
||||
)
|
||||
|
||||
FUSION_SCENARIO_KEYS: tuple[str, ...] = (
|
||||
"aggressive_sigma20",
|
||||
"aggressive_sigma30",
|
||||
"nonaggressive_sigma20",
|
||||
"nonaggressive_sigma30",
|
||||
"aggressive_sigma20_itb",
|
||||
"aggressive_sigma30_itb",
|
||||
"nonaggressive_sigma20_itb",
|
||||
"nonaggressive_sigma30_itb",
|
||||
)
|
||||
|
||||
|
||||
def fusion_phenology_path(site_name: str, season: int) -> Path:
|
||||
return Path(f"data/{site_name}/{season}/fusion_phenology.json")
|
||||
|
||||
|
||||
def parse_scenario_key(key: str) -> tuple[str, int, str]:
|
||||
"""``aggressive_sigma20`` / ``nonaggressive_sigma30_itb`` → (strategy, sigma, mode)."""
|
||||
mode = "itb" if key.endswith("_itb") else "bti"
|
||||
base = key.replace("_itb", "")
|
||||
m = re.match(r"^(aggressive|nonaggressive)_sigma(\d+)$", base)
|
||||
if not m:
|
||||
raise ValueError(f"Cannot parse scenario key: {key!r}")
|
||||
return m.group(1), int(m.group(2)), mode
|
||||
|
||||
|
||||
def fusion_gcc_timeseries_path(site_name: str, season: int, scenario_key: str) -> Path:
|
||||
strategy, sigma, mode = parse_scenario_key(scenario_key)
|
||||
if mode == "bti":
|
||||
processed = f"processed_{strategy}_sigma{sigma}"
|
||||
else:
|
||||
processed = f"processed_{strategy}_itb_sigma{sigma}"
|
||||
return Path(f"data/{site_name}/{season}/{processed}/gcc/fusion/timeseries.json")
|
||||
|
||||
|
||||
def fusion_gcc_by_date(ts_path: Path) -> dict[str, float]:
|
||||
"""YYYY-MM-DD → GCC from fusion ``timeseries.json``."""
|
||||
raw = load_timeseries(ts_path)
|
||||
out: dict[str, float] = {}
|
||||
for k, v in raw.items():
|
||||
nk = _norm_date_key(k)
|
||||
if nk and v is not None:
|
||||
try:
|
||||
fv = float(v)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if fv == fv: # finite
|
||||
out[nk] = fv
|
||||
return out
|
||||
|
||||
|
||||
def timesat_transitions_from_by_date(
|
||||
by_date: dict[str, float], season: int
|
||||
) -> dict[str, str | float | None]:
|
||||
"""Run TIMESAT on fusion GCC; return transition dates for *season*."""
|
||||
if len(by_date) < 10:
|
||||
return {
|
||||
"green_up_50pct_date": None,
|
||||
"green_down_50pct_date": None,
|
||||
"timesat_input": None,
|
||||
"n_values": len(by_date),
|
||||
}
|
||||
y1, y2, y3 = season - 1, season, season + 1
|
||||
yraw, stack_mode = build_yraw_three_years(by_date, y1, y2, y3)
|
||||
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
|
||||
return {
|
||||
"green_up_50pct_date": out.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": out.get("green_down_50pct_date"),
|
||||
"timesat_input": stack_mode,
|
||||
"n_values": len(by_date),
|
||||
}
|
||||
|
||||
|
||||
def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
|
||||
if not iso_a or not iso_b:
|
||||
return None
|
||||
try:
|
||||
a = datetime.strptime(iso_a[:10], "%Y-%m-%d").date()
|
||||
b = datetime.strptime(iso_b[:10], "%Y-%m-%d").date()
|
||||
return abs((a - b).days)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _offsets_vs_reference(
|
||||
fused: dict[str, str | float | None], reference: dict
|
||||
) -> dict[str, int | None]:
|
||||
ref_up = reference.get("green_up_50pct_date")
|
||||
ref_dn = reference.get("green_down_50pct_date")
|
||||
fup = fused.get("green_up_50pct_date")
|
||||
fdn = fused.get("green_down_50pct_date")
|
||||
return {
|
||||
"abs_day_offset_green_up": _day_offset(fup, ref_up),
|
||||
"abs_day_offset_green_down": _day_offset(fdn, ref_dn),
|
||||
}
|
||||
|
||||
|
||||
def compute_fusion_phenology_for_site(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
|
||||
) -> dict:
|
||||
ref_path = phenocam_phenology_path(site_name, season)
|
||||
reference = (
|
||||
json.loads(ref_path.read_text(encoding="utf-8")) if ref_path.is_file() else {}
|
||||
)
|
||||
scenarios: dict[str, dict] = {}
|
||||
for key in scenario_keys:
|
||||
ts_path = fusion_gcc_timeseries_path(site_name, season, key)
|
||||
if not ts_path.is_file():
|
||||
scenarios[key] = {
|
||||
"workflow": parse_scenario_key(key)[2],
|
||||
"missing_timeseries": str(ts_path),
|
||||
}
|
||||
continue
|
||||
by_date = fusion_gcc_by_date(ts_path)
|
||||
fused = timesat_transitions_from_by_date(by_date, season)
|
||||
strategy, sigma, mode = parse_scenario_key(key)
|
||||
scenarios[key] = {
|
||||
"workflow": mode,
|
||||
"strategy": strategy,
|
||||
"sigma": sigma,
|
||||
"timeseries_path": str(ts_path),
|
||||
**fused,
|
||||
**_offsets_vs_reference(fused, reference),
|
||||
}
|
||||
return {
|
||||
"site_name": site_name,
|
||||
"season": season,
|
||||
"reference": {
|
||||
"source": str(ref_path) if ref_path.is_file() else None,
|
||||
"green_up_50pct_date": reference.get("green_up_50pct_date"),
|
||||
"green_down_50pct_date": reference.get("green_down_50pct_date"),
|
||||
},
|
||||
"scenarios": scenarios,
|
||||
}
|
||||
|
||||
|
||||
def write_fusion_phenology_for_site(
|
||||
site_name: str,
|
||||
season: int,
|
||||
*,
|
||||
scenario_keys: tuple[str, ...] = FUSION_SCENARIO_KEYS,
|
||||
) -> Path | None:
|
||||
if _timesat_pkg is None:
|
||||
out = fusion_phenology_path(site_name, season)
|
||||
print(
|
||||
f"[Fusion phenology] Skipped (no timesat); would write {out}. "
|
||||
"pip install timesat"
|
||||
)
|
||||
return None
|
||||
payload = compute_fusion_phenology_for_site(
|
||||
site_name, season, scenario_keys=scenario_keys
|
||||
)
|
||||
out = fusion_phenology_path(site_name, season)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
out.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
||||
n_ok = sum(
|
||||
1
|
||||
for s in payload["scenarios"].values()
|
||||
if s.get("green_up_50pct_date") or s.get("green_down_50pct_date")
|
||||
)
|
||||
print(
|
||||
f"[Fusion phenology] Wrote {out} ({n_ok}/{len(scenario_keys)} scenarios with "
|
||||
f"≥1 transition date)"
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def write_fusion_phenology_all(
|
||||
*,
|
||||
sites_geojson: str | Path = "data/sites.geojson",
|
||||
seasons: dict[str, int] | None = None,
|
||||
) -> int:
|
||||
if seasons:
|
||||
pairs = sorted((s, seasons[s]) for s in seasons.keys())
|
||||
else:
|
||||
pairs = iter_sites_seasons_from_sites_geojson(sites_geojson)
|
||||
n = 0
|
||||
for site, season in pairs:
|
||||
print(f"=== {site} {season} ===")
|
||||
if write_fusion_phenology_for_site(site, season):
|
||||
n += 1
|
||||
print(f"[Fusion phenology] Processed {n} site/season pair(s).")
|
||||
return n
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(
|
||||
description="TIMESAT transitions on no-gap EFAST fusion GCC timeseries."
|
||||
)
|
||||
ap.add_argument("--site", type=str, default=None)
|
||||
ap.add_argument("--season", type=int, default=None)
|
||||
ap.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="All sites in data/sites.geojson (use PRIMARY_SEASON when --primary-only).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--primary-only",
|
||||
action="store_true",
|
||||
help="With --all: only thesis primary seasons per site.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--sites-geojson",
|
||||
type=Path,
|
||||
default=Path("data/sites.geojson"),
|
||||
)
|
||||
args = ap.parse_args()
|
||||
if _timesat_pkg is None:
|
||||
raise SystemExit("Install timesat: pip install timesat")
|
||||
|
||||
primary = {
|
||||
"forthgr": 2024,
|
||||
"innsbruck": 2024,
|
||||
"pitsalu": 2024,
|
||||
"vindeln2": 2023,
|
||||
"sunflowerjerez1": 2024,
|
||||
"institutekarnobat": 2024,
|
||||
}
|
||||
if args.all:
|
||||
write_fusion_phenology_all(
|
||||
sites_geojson=args.sites_geojson,
|
||||
seasons=primary if args.primary_only else None,
|
||||
)
|
||||
return
|
||||
if not args.site or args.season is None:
|
||||
raise SystemExit("Provide --site and --season, or use --all --primary-only")
|
||||
write_fusion_phenology_for_site(args.site, args.season)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -7,11 +7,8 @@ import json
|
|||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from phenology_timesat import (
|
||||
build_yraw_three_years,
|
||||
phenocam_phenology_path,
|
||||
run_timesat_phenology_from_yraw,
|
||||
)
|
||||
from fusion_phenology import timesat_transitions_from_by_date
|
||||
from phenology_timesat import phenocam_phenology_path
|
||||
|
||||
from gap_validation.batch_spatial import (
|
||||
PRIMARY_SEASON,
|
||||
|
|
@ -35,9 +32,7 @@ def _day_offset(iso_a: str | None, iso_b: str | None) -> int | None:
|
|||
|
||||
|
||||
def _timesat_transitions(by_date: dict[str, float], season: int) -> dict[str, str | None]:
|
||||
y1, y2, y3 = season - 1, season, season + 1
|
||||
yraw, _mode = build_yraw_three_years(by_date, y1, y2, y3)
|
||||
out = run_timesat_phenology_from_yraw(yraw, (y1, y2, y3))
|
||||
out = timesat_transitions_from_by_date(by_date, season)
|
||||
return {
|
||||
"green_up": out.get("green_up_50pct_date"),
|
||||
"green_down": out.get("green_down_50pct_date"),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ from scipy.stats import pearsonr
|
|||
|
||||
# Match postprocessing valid mask on reflectance (METH / postprocessing.py).
|
||||
VALID_REFL_THRESHOLD = 0.001
|
||||
GCC_DENOM_EPS = 1e-3
|
||||
MAX_REPORTED_NSE_S2 = 20.0
|
||||
|
||||
|
||||
def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.ndarray:
|
||||
|
|
@ -18,15 +20,27 @@ def _gcc_from_rgb(blue: np.ndarray, green: np.ndarray, red: np.ndarray) -> np.nd
|
|||
out = np.full_like(blue, np.nan, dtype=np.float64)
|
||||
m = (
|
||||
np.isfinite(t)
|
||||
& (t > 0)
|
||||
& (t >= GCC_DENOM_EPS)
|
||||
& np.isfinite(blue)
|
||||
& np.isfinite(green)
|
||||
& np.isfinite(red)
|
||||
& (blue > GCC_DENOM_EPS)
|
||||
& (green > GCC_DENOM_EPS)
|
||||
& (red > GCC_DENOM_EPS)
|
||||
)
|
||||
out[m] = green[m].astype(np.float64) / t[m]
|
||||
return out.astype(np.float32)
|
||||
|
||||
|
||||
def _positive_bgr_mask(fusion_path: Path) -> np.ndarray | None:
|
||||
"""Pixels with strictly positive blue, green, red (BtI REFL); None if not applicable."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
if src.count < 3:
|
||||
return None
|
||||
stacks = src.read(indexes=[1, 2, 3]).astype(np.float32)
|
||||
return np.isfinite(stacks).all(axis=0) & (stacks > GCC_DENOM_EPS).all(axis=0)
|
||||
|
||||
|
||||
def read_fused_gcc(fusion_path: Path) -> tuple[np.ndarray, dict]:
|
||||
"""Fused GCC: BtI from 4-band REFL or ItB single-band GCC."""
|
||||
with rasterio.open(fusion_path) as src:
|
||||
|
|
@ -73,8 +87,10 @@ def valid_mask_fused(fusion_path: Path, mode: str) -> np.ndarray:
|
|||
d = src.read(1).astype(np.float32)
|
||||
return np.isfinite(d) & (d > VALID_REFL_THRESHOLD)
|
||||
stacks = src.read().astype(np.float32)
|
||||
ok = np.isfinite(stacks).all(axis=0) & (
|
||||
np.nanmax(stacks, axis=0) > VALID_REFL_THRESHOLD
|
||||
with np.errstate(all="ignore"):
|
||||
mx = np.nanmax(stacks, axis=0)
|
||||
ok = np.isfinite(stacks).all(axis=0) & np.isfinite(mx) & (
|
||||
mx > VALID_REFL_THRESHOLD
|
||||
)
|
||||
return ok
|
||||
|
||||
|
|
@ -95,7 +111,11 @@ def spatial_scores(
|
|||
mae = float(np.mean(np.abs(yt - yp)))
|
||||
bias = float(np.mean(yp - yt))
|
||||
den = float(np.sum((yt - mean_t) ** 2))
|
||||
nse_s2 = float(1.0 - np.sum((yt - yp) ** 2) / den) if den > 0 else None
|
||||
nse_s2 = None
|
||||
if den > 0:
|
||||
raw = float(1.0 - np.sum((yt - yp) ** 2) / den)
|
||||
if abs(raw) <= MAX_REPORTED_NSE_S2:
|
||||
nse_s2 = raw
|
||||
r = None
|
||||
if np.std(yt) > 0 and np.std(yp) > 0:
|
||||
r = float(pearsonr(yt, yp)[0])
|
||||
|
|
@ -122,6 +142,28 @@ def withheld_gcc_on_fusion_grid(
|
|||
return yt, yp, prof
|
||||
|
||||
|
||||
def mask_gap_whittaker(
|
||||
yt: np.ndarray,
|
||||
y_gap: np.ndarray,
|
||||
fused_gap_path: Path,
|
||||
mode: str,
|
||||
) -> np.ndarray:
|
||||
"""Mask for gap fusion and Whittaker vs withheld S2 (does not require no-gap fusion)."""
|
||||
m = (
|
||||
valid_mask_fused(fused_gap_path, mode)
|
||||
& np.isfinite(yt)
|
||||
& np.isfinite(y_gap)
|
||||
& (yt > VALID_REFL_THRESHOLD)
|
||||
& (yt <= 1.0)
|
||||
& (y_gap > VALID_REFL_THRESHOLD)
|
||||
& (y_gap <= 1.0)
|
||||
)
|
||||
pos = _positive_bgr_mask(fused_gap_path)
|
||||
if pos is not None:
|
||||
m &= pos
|
||||
return m
|
||||
|
||||
|
||||
def common_valid_mask(
|
||||
yt: np.ndarray,
|
||||
y_gap: np.ndarray,
|
||||
|
|
@ -129,16 +171,14 @@ def common_valid_mask(
|
|||
fused_gap_path: Path,
|
||||
mode: str,
|
||||
) -> np.ndarray:
|
||||
"""Shared finite mask: truth GCC, gap/nogap preds, and fusion valid-data rules."""
|
||||
m = (
|
||||
valid_mask_fused(fused_gap_path, mode)
|
||||
& np.isfinite(yt)
|
||||
& np.isfinite(y_gap)
|
||||
& (yt > VALID_REFL_THRESHOLD)
|
||||
& (y_gap > VALID_REFL_THRESHOLD)
|
||||
)
|
||||
"""Mask including no-gap fusion when computing gap-vs-no-gap deltas (internal QA)."""
|
||||
m = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
|
||||
if y_nogap is not None:
|
||||
m &= np.isfinite(y_nogap) & (y_nogap > VALID_REFL_THRESHOLD)
|
||||
m &= (
|
||||
np.isfinite(y_nogap)
|
||||
& (y_nogap > VALID_REFL_THRESHOLD)
|
||||
& (y_nogap <= 1.0)
|
||||
)
|
||||
return m
|
||||
|
||||
|
||||
|
|
@ -150,18 +190,20 @@ def evaluate_gap_vs_withheld(
|
|||
*,
|
||||
whittaker_context: tuple[Path, str, str, str, str, str] | None = None,
|
||||
) -> dict:
|
||||
"""Spatial metrics for gap and no-gap; deltas; optional Whittaker constant-field vs same mask.
|
||||
"""Spatial metrics for gap and no-gap; optional Whittaker constant-field vs withheld S2.
|
||||
|
||||
``delta_rmse`` = RMSE_gap − RMSE_no_gap; ``delta_nse`` = NSE_no_gap − NSE_gap (higher gap loss → positive delta_nse).
|
||||
``delta_rmse`` / ``delta_nse`` compare gap vs no-gap fusion on a shared mask (QA only;
|
||||
``delta_nse`` = NSE_no_gap − NSE_gap, not exported to thesis tables).
|
||||
"""
|
||||
yt, y_gap, _prof = withheld_gcc_on_fusion_grid(withheld_refl_path, fused_gap_path)
|
||||
y_nogap = None
|
||||
if fused_nogap_path is not None and fused_nogap_path.is_file():
|
||||
y_nogap, _ = read_fused_gcc(fused_nogap_path)
|
||||
mask = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
|
||||
out: dict = {"gap": spatial_scores(yt, y_gap, mask)}
|
||||
mask_gw = mask_gap_whittaker(yt, y_gap, fused_gap_path, mode)
|
||||
out: dict = {"gap": spatial_scores(yt, y_gap, mask_gw)}
|
||||
if y_nogap is not None:
|
||||
out["no_gap"] = spatial_scores(yt, y_nogap, mask)
|
||||
mask_full = common_valid_mask(yt, y_gap, y_nogap, fused_gap_path, mode)
|
||||
out["no_gap"] = spatial_scores(yt, y_nogap, mask_full)
|
||||
g, ng = out["gap"], out["no_gap"]
|
||||
if g.get("rmse") is not None and ng.get("rmse") is not None:
|
||||
out["delta_rmse"] = float(g["rmse"] - ng["rmse"])
|
||||
|
|
@ -180,7 +222,7 @@ def evaluate_gap_vs_withheld(
|
|||
window_end_iso=w1,
|
||||
)
|
||||
if wgcc is not None:
|
||||
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask)
|
||||
out["whittaker"] = constant_field_scores(yt, float(wgcc), mask_gw)
|
||||
return out
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from rasterio.vrt import WarpedVRT
|
|||
from rasterio import shutil as rio_shutil
|
||||
|
||||
RESOLUTION_RATIO = 21
|
||||
# Centred temporal MA on S3 LR stack (METHODOLOGY §5.4.3); odd ≥3, or 1 to disable.
|
||||
# Centred temporal MA on S3 LR stack (thesis/Method.tex, sec:data_preparation); odd ≥3, or 1 to disable.
|
||||
S3_MOVING_AVERAGE_WINDOW_DAYS = 5
|
||||
|
||||
|
||||
|
|
@ -79,7 +79,9 @@ def _get_itb_base_dir(season, site_name, cleaning_strategy):
|
|||
|
||||
|
||||
def _compute_gcc_from_refl_array(blue, green, red):
|
||||
total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
|
||||
total = (
|
||||
blue.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
|
||||
)
|
||||
mask = (total > 0) & np.isfinite(total)
|
||||
gcc = np.zeros_like(green, dtype=np.float32)
|
||||
gcc[mask] = green[mask].astype(np.float32) / total[mask]
|
||||
|
|
@ -90,8 +92,8 @@ def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
|
|||
dst_s2_dir.mkdir(parents=True, exist_ok=True)
|
||||
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
|
||||
dst = dst_s2_dir / src.name
|
||||
if dst.exists():
|
||||
continue
|
||||
if dst.is_symlink() or dst.exists():
|
||||
dst.unlink(missing_ok=True)
|
||||
try:
|
||||
dst.symlink_to(src.resolve())
|
||||
except OSError:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue