Added index then blend.

This commit is contained in:
Felix Delattre 2026-04-11 17:39:19 +02:00
parent 8e7fb1de18
commit 55d6facc4d
8 changed files with 661 additions and 68 deletions

4
.gitignore vendored
View file

@ -42,3 +42,7 @@ dist/
# OS
.DS_Store
Thumbs.db
AGENTS.md
METHODOLOGY.md
.vibe

109
fusion.py
View file

@ -1,13 +1,15 @@
"""EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
from datetime import datetime, timedelta
from preparation import _get_base_dir, RESOLUTION_RATIO
from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
def _import_efast():
"""Lazy import of efast to avoid import errors when not using efast functions."""
try:
import efast
return efast
except ImportError:
raise ImportError(
@ -15,7 +17,14 @@ def _import_efast():
)
def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None, date_range=None):
def run_efast(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
@ -47,7 +56,9 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive",
}
if sigma is not None:
kwargs["sigma"] = sigma
efast.fusion(current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs)
efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print(
f"[EFAST] Saved: {output_file}"
if output_file.exists()
@ -60,8 +71,94 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive",
print("[EFAST] Completed")
def run_all_efast_scenarios(season, site_position, site_name, sigma_value=30, date_range=None):
def run_all_efast_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
"""Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
for strategy in ["aggressive", "nonaggressive"]:
run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=None, date_range=date_range)
run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma_value, date_range=date_range)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)
def run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
s2_output_dir = efast_base_dir / "s2"
s3_output_dir = efast_base_dir / "s3"
fusion_output_dir = efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
fusion_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
efast = _import_efast()
start_str, end_str = datetime_range.split("/")
start_date = datetime.strptime(start_str, "%Y-%m-%d")
end_date = datetime.strptime(end_str, "%Y-%m-%d")
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime("%Y%m%d")
output_file = fusion_output_dir / f"GCC_{date_str}.tif"
try:
kwargs = {
"product": "GCC",
"max_days": 30,
"date_position": 2,
"minimum_acquisition_importance": 0.0,
"ratio": RESOLUTION_RATIO,
}
if sigma is not None:
kwargs["sigma"] = sigma
efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print(
f"[EFAST-ITB] Saved: {output_file}"
if output_file.exists()
else f"[EFAST-ITB] No output for {date_str}"
)
except Exception as e:
print(f"[EFAST-ITB] Error {date_str}: {e}")
current_date += timedelta(days=1)
print("[EFAST-ITB] Completed")
def run_all_efast_itb_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
for strategy in ["aggressive", "nonaggressive"]:
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)

View file

@ -1,4 +1,5 @@
"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
import json
import numpy as np
import rasterio
@ -67,7 +68,9 @@ def _get_ndvi_value(ndvi_file, site_position):
return None
def _create_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"):
def _create_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[NDVI-{source_name}] Creating timeseries.json...")
timeseries = []
@ -196,13 +199,23 @@ def create_ndvi_timeseries_post_process(season, site_position, site_name):
processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/")
output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
)
_create_timeseries_for_dir(
input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}"
input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
)
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
_create_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}")
_create_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
def _calculate_and_write_gcc(input_file, output_file):
@ -261,6 +274,25 @@ def _get_gcc_from_original(input_file, site_position):
"""Calculate GCC directly from original file without creating GeoTIFF."""
try:
with rasterio.open(input_file) as src:
if src.count == 1:
g = src.read(1).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
win = g[r0:r1, c0:c1]
mask = np.isfinite(win) & (win > 0)
if not np.any(mask):
return None
return float(np.mean(win[mask]))
if src.count < 3:
return None
@ -290,11 +322,21 @@ def _get_gcc_from_original(input_file, site_position):
# Calculate GCC for each pixel in window
total = red_window + green_window + blue_window
mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0)
mask = (
(total > 0)
& ~np.isnan(total)
& (blue_window >= 0)
& (green_window >= 0)
& (red_window >= 0)
)
if not np.any(mask):
negative_pixels = np.sum((blue_window < 0) | (green_window < 0) | (red_window < 0))
negative_pixels = np.sum(
(blue_window < 0) | (green_window < 0) | (red_window < 0)
)
if negative_pixels > 0:
print(f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)")
print(
f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
)
return None
gcc_window = np.zeros_like(green_window, dtype=np.float32)
@ -303,11 +345,13 @@ def _get_gcc_from_original(input_file, site_position):
# Return mean of valid GCC values
valid_gcc = gcc_window[mask]
return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
except Exception as e:
except Exception:
return None
def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"):
def _create_gcc_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[GCC-{source_name}] Creating timeseries.json...")
timeseries = []
@ -342,7 +386,9 @@ def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_
f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
)
timeseries.append({"date": date, "filename": filename, "greenness_index": gcc_value})
timeseries.append(
{"date": date, "filename": filename, "greenness_index": gcc_value}
)
timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
@ -400,13 +446,41 @@ def create_gcc_timeseries_post_process(season, site_position, site_name):
processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/")
output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
)
_create_gcc_timeseries_for_dir(
input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}"
input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
)
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
_create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}")
_create_gcc_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
if not base_itb.exists():
continue
for source in ["s2", "s3"]:
inp, out = base_itb / source, base_itb / "gcc" / source
_create_gcc_timeseries_for_dir(
inp,
out,
site_position,
f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
)
_create_gcc_timeseries_for_dir(
base_itb / "fusion",
base_itb / "gcc" / "fusion",
site_position,
f"POST-ITB-FUSION-{strategy}-σ{sigma}",
)
def _get_bands_from_original(input_file, site_position):
@ -425,7 +499,10 @@ def _get_bands_from_original(input_file, site_position):
row, col = src.index(x[0], y[0])
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
bands = [src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32) for i in range(4)]
bands = [
src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
for i in range(4)
]
mask = ~np.any([np.isnan(b) for b in bands], axis=0)
mask &= np.all([b > 0 for b in bands], axis=0)
if not np.any(mask):
@ -440,7 +517,9 @@ def _get_bands_from_original(input_file, site_position):
return None
def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"):
def _create_bands_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[BANDS-{source_name}] Creating timeseries.json...")
timeseries = []
for f in sorted(input_dir.glob(pattern)):
@ -456,11 +535,14 @@ def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, sourc
timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
print(f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)")
print(
f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
)
def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
"""Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
def load(p):
p = Path(p)
if not p.exists():
@ -469,6 +551,7 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
return json.loads((p / "timeseries.json").read_text())
except Exception:
return []
ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
@ -482,12 +565,16 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
export_dir.mkdir(parents=True, exist_ok=True)
(export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
def esc(v):
s = str(v) if v is not None else ""
return f'"{s}"' if "," in s or '"' in s else s
rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
(export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
print(f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)")
print(
f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
)
def create_prepared_fusion_timeseries(season, site_position, site_name):
@ -497,17 +584,86 @@ def create_prepared_fusion_timeseries(season, site_position, site_name):
for source in ["s2", "s3"]:
inp = base / source
if inp.exists():
_create_timeseries_for_dir(inp, base / "ndvi" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif")
_create_gcc_timeseries_for_dir(inp, base / "gcc" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif")
_create_bands_timeseries_for_dir(inp, base / "bands" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif")
_write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source)
_create_timeseries_for_dir(
inp,
base / "ndvi" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = base / fusion_sub
if inp.exists():
_create_timeseries_for_dir(inp, base / "ndvi" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif")
_create_gcc_timeseries_for_dir(inp, base / "gcc" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif")
_create_bands_timeseries_for_dir(inp, base / "bands" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif")
_write_export(base / "ndvi" / fusion_sub, base / "gcc" / fusion_sub, base / "bands" / fusion_sub, base / "export" / fusion_sub)
_create_timeseries_for_dir(
inp,
base / "ndvi" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_write_export(
base / "ndvi" / fusion_sub,
base / "gcc" / fusion_sub,
base / "bands" / fusion_sub,
base / "export" / fusion_sub,
)
itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
if not itb.exists():
continue
for source in ["s2", "s3"]:
inp = itb / source
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / source,
site_position,
f"PREPARED-ITB-{source.upper()}-{strategy}",
"*.tif",
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = itb / fusion_sub
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / fusion_sub,
site_position,
f"FUSION-ITB-{strategy}-σ{sig or 20}",
"*.tif",
)
def create_bands_timeseries_post_process(season, site_position, site_name):
@ -518,5 +674,16 @@ def create_bands_timeseries_post_process(season, site_position, site_name):
for source in ["s2", "s3", "fusion"]:
inp, out = base / source, base / "bands" / source
if inp.exists():
_create_bands_timeseries_for_dir(inp, out, site_position, f"POST-{source.upper()}-{strategy}-σ{sigma}", "*.geotiff")
_write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source)
_create_bands_timeseries_for_dir(
inp,
out,
site_position,
f"POST-{source.upper()}-{strategy}-σ{sigma}",
"*.geotiff",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)

View file

@ -1,4 +1,5 @@
"""Metrics and statistics: temporal/spatial metrics and PhenoCam stats."""
import json
import numpy as np
from pathlib import Path
@ -132,6 +133,31 @@ def _get_spatial_stats_from_raster(raster_file, site_position):
"""Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
try:
with rasterio.open(raster_file) as src:
if src.count == 1:
g = src.read(1).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
win = g[r0:r1, c0:c1]
mask = np.isfinite(win) & (win > 0)
if not np.any(mask):
return None
valid = win[mask]
return {
"mean": float(np.mean(valid)),
"std": float(np.std(valid)),
"min": float(np.min(valid)),
"max": float(np.max(valid)),
}
if src.count < 3:
return None
@ -161,7 +187,13 @@ def _get_spatial_stats_from_raster(raster_file, site_position):
# Calculate GCC for each pixel in window
total = red_window + green_window + blue_window
mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0)
mask = (
(total > 0)
& ~np.isnan(total)
& (blue_window >= 0)
& (green_window >= 0)
& (red_window >= 0)
)
if not np.any(mask):
return None
@ -259,7 +291,9 @@ def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position
# Calculate spatial metrics
fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position)
spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
return temporal_metrics, spatial_metrics
@ -283,7 +317,9 @@ def calculate_all_metrics(season, site_name, site_position):
results["phenocam_stats"] = phenocam_stats
# Calculate S2 baseline metrics once (S2 data is identical across scenarios)
s2_ts_path = base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json"
s2_ts_path = (
base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json"
)
s2_ts = load_timeseries(s2_ts_path)
if s2_ts:
s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts)
@ -303,7 +339,9 @@ def calculate_all_metrics(season, site_name, site_position):
fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts:
print(f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping")
print(
f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
)
continue
# Calculate temporal metrics
@ -313,7 +351,30 @@ def calculate_all_metrics(season, site_name, site_position):
# Calculate spatial metrics
fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position)
spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
if spatial_metrics:
results["spatial"][scenario_name] = spatial_metrics
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
scenario_name = f"{strategy}_sigma{sigma}_itb"
processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts:
print(
f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
)
continue
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
if temporal_metrics:
results["temporal"][scenario_name] = temporal_metrics
fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
if spatial_metrics:
results["spatial"][scenario_name] = spatial_metrics
@ -321,14 +382,18 @@ def calculate_all_metrics(season, site_name, site_position):
if results["temporal"]:
best_temporal = max(
results["temporal"].items(),
key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1
key=lambda x: x[1].get("r_squared", -1)
if x[1].get("r_squared") is not None
else -1,
)
results["summary"] = {"best_temporal_scenario": best_temporal[0]}
if results["spatial"]:
best_spatial = max(
results["spatial"].items(),
key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1
key=lambda x: x[1].get("r_squared", -1)
if x[1].get("r_squared") is not None
else -1,
)
if "summary" not in results:
results["summary"] = {}

View file

@ -1,17 +1,23 @@
"""Post-processing: crop fusion/S2/S3 to valid pixels."""
from pathlib import Path
import numpy as np
import rasterio
from rasterio import windows
from rasterio.warp import reproject, Resampling
from rasterio.io import MemoryFile
def process_cropped(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None):
def process_cropped(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
"""Crop fusion to valid data, then crop S2/S3 to match."""
base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}"
processed_dir = f"processed_{cleaning_strategy}_sigma{sigma}" if sigma else f"processed_{cleaning_strategy}_sigma20"
processed_dir = (
f"processed_{cleaning_strategy}_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_sigma20"
)
processed = base / processed_dir
s2_prep = prepared / "s2"
@ -21,7 +27,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True)
print(f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}")
print(
f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
# Crop fusion to valid data and get dimensions
fusion_dims = {}
@ -48,11 +56,29 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop)
fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile)
fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS] Cropped fusion: {output_file}")
# Crop S2 and S3 to fusion size
for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items():
for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
# S2
for s2_file in s2_prep.glob("*REFL.tif"):
@ -61,7 +87,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update({"width": w, "height": h, "transform": transform, "crs": crs})
p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS] Cropped: {output_file}")
@ -83,7 +111,7 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest
resampling=Resampling.nearest,
)
# Crop using same window
data = resampled.read(window=window)
@ -96,11 +124,135 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
print("[PROCESS] Completed")
def process_cropped_itb(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}_itb"
processed_dir = (
f"processed_{cleaning_strategy}_itb_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_itb_sigma20"
)
processed = base / processed_dir
s2_prep = prepared / "s2"
s3_prep = prepared / "s3"
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True)
print(
f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
fusion_dims = {}
for fusion_file in fusion_prep.glob("GCC_*.tif"):
date_str = fusion_file.stem.split("_")[1]
with rasterio.open(fusion_file) as src:
data = src.read()
valid = ~np.isnan(data) & (data > 0.001)
rows = np.any(valid, axis=(0, 2))
cols = np.any(valid, axis=(0, 1))
row_idx = np.where(rows)[0]
col_idx = np.where(cols)[0]
if len(row_idx) == 0 or len(col_idx) == 0:
print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
continue
r0, r1 = row_idx[0], row_idx[-1]
c0, c1 = col_idx[0], col_idx[-1]
w, h = c1 - c0 + 1, r1 - r0 + 1
window = windows.Window(c0, r0, w, h)
data_crop = src.read(window=window)
transform = rasterio.windows.transform(window, src.transform)
p = src.profile.copy()
p.update({"width": w, "height": h, "transform": transform})
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop)
fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
for s2_file in s2_prep.glob("*GCC.tif"):
parts = s2_file.stem.split("_")
if len(parts) > 2 and parts[2] == date_str:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
break
s3_file = s3_prep / f"composite_{date_str}.tif"
if s3_file.exists():
output_file = processed / "s3" / f"{date_str}_0.geotiff"
with rasterio.open(s3_file) as src:
temp_profile = fusion_profile.copy()
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest,
)
data = resampled.read(window=window)
p2 = resampled.profile.copy()
p2.update({"width": w, "height": h, "transform": transform})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
print("[PROCESS-ITB] Completed")
def post_process_all_itb_scenarios(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]:
process_cropped_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_all_scenarios(season, site_position, site_name):
"""Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]:
process_cropped(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma)
process_cropped(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_timeseries(season, site_position, site_name):
@ -110,6 +262,7 @@ def post_process_timeseries(season, site_position, site_name):
create_gcc_timeseries_post_process,
create_bands_timeseries_post_process,
)
create_ndvi_timeseries_post_process(season, site_position, site_name)
create_gcc_timeseries_post_process(season, site_position, site_name)
create_bands_timeseries_post_process(season, site_position, site_name)

View file

@ -1,4 +1,5 @@
"""Data preparation: S2/S3 preprocessing for fusion."""
import json
import shutil
from pathlib import Path
@ -16,6 +17,7 @@ def _import_distance_to_clouds():
"""Lazy import of efast.distance_to_clouds."""
try:
from efast.s2_processing import distance_to_clouds
return distance_to_clouds
except ImportError:
raise ImportError(
@ -40,6 +42,76 @@ def _get_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
def _get_itb_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
def _compute_gcc_from_refl_array(blue, green, red):
total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
mask = (total > 0) & np.isfinite(total)
gcc = np.zeros_like(green, dtype=np.float32)
gcc[mask] = green[mask].astype(np.float32) / total[mask]
return gcc
def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
dst_s2_dir.mkdir(parents=True, exist_ok=True)
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
dst = dst_s2_dir / src.name
if dst.exists():
continue
try:
dst.symlink_to(src.resolve())
except OSError:
shutil.copy2(src, dst)
def prepare_s2_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
s2_prep = base / "s2"
itb_s2.mkdir(parents=True, exist_ok=True)
for refl in sorted(s2_prep.glob("*REFL.tif")):
out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
if out.exists():
continue
with rasterio.open(refl) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S2-ITB] Saved {out.name}")
_link_dist_cloud_from_prepared(s2_prep, itb_s2)
def prepare_s3_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
itb_s3.mkdir(parents=True, exist_ok=True)
for comp in sorted((base / "s3").glob("composite_*.tif")):
out = itb_s3 / comp.name
if out.exists():
continue
with rasterio.open(comp) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S3-ITB] Saved {out.name}")
def _reproject_raster_to_target(
src_path,
dst_path,
@ -90,7 +162,9 @@ def _rescale_dist_cloud_for_small_roi(s2_output_dir):
print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None):
def prepare_s2(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position
s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
@ -99,7 +173,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
clouds = _load_excluded(season, site_name, cleaning_strategy)
s2_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}")
print(
f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
if not s3_files:
@ -113,7 +189,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
for s2_file in sorted(s2_dir.glob("*.geotiff")):
if s2_file.name in clouds["s2"]:
print(f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})")
print(
f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
)
continue
date_str = s2_file.name.split("_")[0]
refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
@ -136,14 +214,16 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
temp_normalized.unlink()
print(f"[S2-PREP] Saved: {refl_dst}")
print(f"[S2-PREP] Computing distance-to-clouds...")
print("[S2-PREP] Computing distance-to-clouds...")
distance_to_clouds = _import_distance_to_clouds()
distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
_rescale_dist_cloud_for_small_roi(s2_output_dir)
print("[S2-PREP] Completed")
def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None):
def prepare_s3(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
base_dir = _get_base_dir(season, site_name, cleaning_strategy)
@ -153,16 +233,22 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
clouds = _load_excluded(season, site_name, cleaning_strategy)
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
print(f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}")
print(
f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_by_date = defaultdict(list)
for s3_file in s3_dir.glob("*.geotiff"):
if s3_file.name not in clouds["s3"]:
s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
else:
print(f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})")
print(
f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
)
print(f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates")
print(
f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
)
temp_composite_dir = s3_preprocessed_dir / "temp_composites"
if temp_composite_dir.exists():
@ -187,7 +273,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
profile.update({"count": composite.shape[0], "dtype": "float32"})
with rasterio.open(composite_path, "w", **profile) as dst:
dst.write(composite)
print(f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged")
print(
f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
)
# Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
# This ensures fusion covers the same area as S2 and dimensions match
@ -212,7 +300,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
height,
)
print(f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)...")
print(
f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
)
# Reproject each S3 composite to match S2 REFL bounds
sen3_paths = sorted(temp_composite_dir.glob("*.tif"))

2
pyproject.toml Normal file
View file

@ -0,0 +1,2 @@
[tool.ruff.lint.per-file-ignores]
"run.py" = ["F401"]

23
run.py
View file

@ -1,10 +1,19 @@
from fusion import run_all_efast_scenarios
from postprocessing import post_process_all_scenarios, post_process_timeseries
from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
from postprocessing import (
post_process_all_scenarios,
post_process_all_itb_scenarios,
post_process_timeseries,
)
from acquisition_s2 import download_s2
from acquisition_s3 import download_s3
from acquisition_phenocam import download_phenocam
from preselection import create_timeseries
from preparation import prepare_s2, prepare_s3
from preparation import (
prepare_s2,
prepare_s3,
prepare_s2_gcc_for_itb,
prepare_s3_gcc_for_itb,
)
from metrics_indices import create_prepared_fusion_timeseries
from metrics_stats import calculate_all_metrics
@ -28,6 +37,13 @@ def run_pipeline(season, site_position, site_name):
# print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
# run_all_efast_scenarios(season, site_position, site_name)
# Index-then-Blend (ItB): GCC stacks, EFAST fusion with product=GCC
# for strategy in ["aggressive", "nonaggressive"]:
# prepare_s2_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy)
# prepare_s3_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy)
# run_all_efast_itb_scenarios(season, site_position, site_name)
# post_process_all_itb_scenarios(season, site_position, site_name)
print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
create_prepared_fusion_timeseries(season, site_position, site_name)
@ -51,4 +67,3 @@ if __name__ == "__main__":
run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")