Added index then blend.

This commit is contained in:
Felix Delattre 2026-04-11 17:39:19 +02:00
parent 8e7fb1de18
commit 55d6facc4d
8 changed files with 661 additions and 68 deletions

4
.gitignore vendored
View file

@ -42,3 +42,7 @@ dist/
# OS # OS
.DS_Store .DS_Store
Thumbs.db Thumbs.db
AGENTS.md
METHODOLOGY.md
.vibe

109
fusion.py
View file

@ -1,13 +1,15 @@
"""EFAST fusion: S2/S3 reflectance fusion for four scenarios.""" """EFAST fusion: S2/S3 reflectance fusion for four scenarios."""
from datetime import datetime, timedelta from datetime import datetime, timedelta
from preparation import _get_base_dir, RESOLUTION_RATIO from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO
def _import_efast(): def _import_efast():
"""Lazy import of efast to avoid import errors when not using efast functions.""" """Lazy import of efast to avoid import errors when not using efast functions."""
try: try:
import efast import efast
return efast return efast
except ImportError: except ImportError:
raise ImportError( raise ImportError(
@ -15,7 +17,14 @@ def _import_efast():
) )
def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None, date_range=None): def run_efast(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
):
lat, lon = site_position lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31" datetime_range = date_range or f"{season}-01-01/{season}-12-31"
@ -47,7 +56,9 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive",
} }
if sigma is not None: if sigma is not None:
kwargs["sigma"] = sigma kwargs["sigma"] = sigma
efast.fusion(current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs) efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print( print(
f"[EFAST] Saved: {output_file}" f"[EFAST] Saved: {output_file}"
if output_file.exists() if output_file.exists()
@ -60,8 +71,94 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive",
print("[EFAST] Completed") print("[EFAST] Completed")
def run_all_efast_scenarios(season, site_position, site_name, sigma_value=30, date_range=None): def run_all_efast_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
"""Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist.""" """Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist."""
for strategy in ["aggressive", "nonaggressive"]: for strategy in ["aggressive", "nonaggressive"]:
run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=None, date_range=date_range) run_efast(
run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma_value, date_range=date_range) season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)
def run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy="aggressive",
sigma=None,
date_range=None,
):
lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31"
efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy)
s2_output_dir = efast_base_dir / "s2"
s3_output_dir = efast_base_dir / "s3"
fusion_output_dir = efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion")
fusion_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
efast = _import_efast()
start_str, end_str = datetime_range.split("/")
start_date = datetime.strptime(start_str, "%Y-%m-%d")
end_date = datetime.strptime(end_str, "%Y-%m-%d")
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime("%Y%m%d")
output_file = fusion_output_dir / f"GCC_{date_str}.tif"
try:
kwargs = {
"product": "GCC",
"max_days": 30,
"date_position": 2,
"minimum_acquisition_importance": 0.0,
"ratio": RESOLUTION_RATIO,
}
if sigma is not None:
kwargs["sigma"] = sigma
efast.fusion(
current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs
)
print(
f"[EFAST-ITB] Saved: {output_file}"
if output_file.exists()
else f"[EFAST-ITB] No output for {date_str}"
)
except Exception as e:
print(f"[EFAST-ITB] Error {date_str}: {e}")
current_date += timedelta(days=1)
print("[EFAST-ITB] Completed")
def run_all_efast_itb_scenarios(
season, site_position, site_name, sigma_value=30, date_range=None
):
for strategy in ["aggressive", "nonaggressive"]:
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=None,
date_range=date_range,
)
run_efast_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma_value,
date_range=date_range,
)

View file

@ -1,4 +1,5 @@
"""Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs.""" """Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs."""
import json import json
import numpy as np import numpy as np
import rasterio import rasterio
@ -67,7 +68,9 @@ def _get_ndvi_value(ndvi_file, site_position):
return None return None
def _create_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): def _create_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[NDVI-{source_name}] Creating timeseries.json...") print(f"[NDVI-{source_name}] Creating timeseries.json...")
timeseries = [] timeseries = []
@ -196,13 +199,23 @@ def create_ndvi_timeseries_post_process(season, site_position, site_name):
processed_dir = f"processed_{strategy}_sigma{sigma}" processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/") input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/") output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/"
)
_create_timeseries_for_dir( _create_timeseries_for_dir(
input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}" input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
) )
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/") input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/") output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/")
_create_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}") _create_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
def _calculate_and_write_gcc(input_file, output_file): def _calculate_and_write_gcc(input_file, output_file):
@ -261,6 +274,25 @@ def _get_gcc_from_original(input_file, site_position):
"""Calculate GCC directly from original file without creating GeoTIFF.""" """Calculate GCC directly from original file without creating GeoTIFF."""
try: try:
with rasterio.open(input_file) as src: with rasterio.open(input_file) as src:
if src.count == 1:
g = src.read(1).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
win = g[r0:r1, c0:c1]
mask = np.isfinite(win) & (win > 0)
if not np.any(mask):
return None
return float(np.mean(win[mask]))
if src.count < 3: if src.count < 3:
return None return None
@ -290,11 +322,21 @@ def _get_gcc_from_original(input_file, site_position):
# Calculate GCC for each pixel in window # Calculate GCC for each pixel in window
total = red_window + green_window + blue_window total = red_window + green_window + blue_window
mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) mask = (
(total > 0)
& ~np.isnan(total)
& (blue_window >= 0)
& (green_window >= 0)
& (red_window >= 0)
)
if not np.any(mask): if not np.any(mask):
negative_pixels = np.sum((blue_window < 0) | (green_window < 0) | (red_window < 0)) negative_pixels = np.sum(
(blue_window < 0) | (green_window < 0) | (red_window < 0)
)
if negative_pixels > 0: if negative_pixels > 0:
print(f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)") print(
f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)"
)
return None return None
gcc_window = np.zeros_like(green_window, dtype=np.float32) gcc_window = np.zeros_like(green_window, dtype=np.float32)
@ -303,11 +345,13 @@ def _get_gcc_from_original(input_file, site_position):
# Return mean of valid GCC values # Return mean of valid GCC values
valid_gcc = gcc_window[mask] valid_gcc = gcc_window[mask]
return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None
except Exception as e: except Exception:
return None return None
def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): def _create_gcc_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[GCC-{source_name}] Creating timeseries.json...") print(f"[GCC-{source_name}] Creating timeseries.json...")
timeseries = [] timeseries = []
@ -342,7 +386,9 @@ def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_
f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)" f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)"
) )
timeseries.append({"date": date, "filename": filename, "greenness_index": gcc_value}) timeseries.append(
{"date": date, "filename": filename, "greenness_index": gcc_value}
)
timeseries.sort(key=lambda x: x["date"]) timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
@ -400,13 +446,41 @@ def create_gcc_timeseries_post_process(season, site_position, site_name):
processed_dir = f"processed_{strategy}_sigma{sigma}" processed_dir = f"processed_{strategy}_sigma{sigma}"
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/") input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/") output_dir = Path(
f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/"
)
_create_gcc_timeseries_for_dir( _create_gcc_timeseries_for_dir(
input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}" input_dir,
output_dir,
site_position,
f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}",
) )
input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/") input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/")
output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/") output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/")
_create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}") _create_gcc_timeseries_for_dir(
input_dir,
output_dir,
site_position,
f"POST-PROCESS-FUSION-{strategy}-σ{sigma}",
)
itb_dir = f"processed_{strategy}_itb_sigma{sigma}"
base_itb = Path(f"data/{site_name}/{season}/{itb_dir}")
if not base_itb.exists():
continue
for source in ["s2", "s3"]:
inp, out = base_itb / source, base_itb / "gcc" / source
_create_gcc_timeseries_for_dir(
inp,
out,
site_position,
f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}",
)
_create_gcc_timeseries_for_dir(
base_itb / "fusion",
base_itb / "gcc" / "fusion",
site_position,
f"POST-ITB-FUSION-{strategy}-σ{sigma}",
)
def _get_bands_from_original(input_file, site_position): def _get_bands_from_original(input_file, site_position):
@ -425,7 +499,10 @@ def _get_bands_from_original(input_file, site_position):
row, col = src.index(x[0], y[0]) row, col = src.index(x[0], y[0])
r0, r1 = max(0, row - 1), min(src.height, row + 2) r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2) c0, c1 = max(0, col - 1), min(src.width, col + 2)
bands = [src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32) for i in range(4)] bands = [
src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32)
for i in range(4)
]
mask = ~np.any([np.isnan(b) for b in bands], axis=0) mask = ~np.any([np.isnan(b) for b in bands], axis=0)
mask &= np.all([b > 0 for b in bands], axis=0) mask &= np.all([b > 0 for b in bands], axis=0)
if not np.any(mask): if not np.any(mask):
@ -440,7 +517,9 @@ def _get_bands_from_original(input_file, site_position):
return None return None
def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): def _create_bands_timeseries_for_dir(
input_dir, output_dir, site_position, source_name, pattern="*.geotiff"
):
print(f"[BANDS-{source_name}] Creating timeseries.json...") print(f"[BANDS-{source_name}] Creating timeseries.json...")
timeseries = [] timeseries = []
for f in sorted(input_dir.glob(pattern)): for f in sorted(input_dir.glob(pattern)):
@ -456,11 +535,14 @@ def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, sourc
timeseries.sort(key=lambda x: x["date"]) timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2)) (output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2))
print(f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)") print(
f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)"
)
def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir): def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
"""Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv.""" """Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv."""
def load(p): def load(p):
p = Path(p) p = Path(p)
if not p.exists(): if not p.exists():
@ -469,6 +551,7 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
return json.loads((p / "timeseries.json").read_text()) return json.loads((p / "timeseries.json").read_text())
except Exception: except Exception:
return [] return []
ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)} ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)}
gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)} gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)}
bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)} bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)}
@ -482,12 +565,16 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir):
export_dir.mkdir(parents=True, exist_ok=True) export_dir.mkdir(parents=True, exist_ok=True)
(export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2)) (export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2))
cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"] cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"]
def esc(v): def esc(v):
s = str(v) if v is not None else "" s = str(v) if v is not None else ""
return f'"{s}"' if "," in s or '"' in s else s return f'"{s}"' if "," in s or '"' in s else s
rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged] rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged]
(export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows)) (export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows))
print(f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)") print(
f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)"
)
def create_prepared_fusion_timeseries(season, site_position, site_name): def create_prepared_fusion_timeseries(season, site_position, site_name):
@ -497,17 +584,86 @@ def create_prepared_fusion_timeseries(season, site_position, site_name):
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
inp = base / source inp = base / source
if inp.exists(): if inp.exists():
_create_timeseries_for_dir(inp, base / "ndvi" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") _create_timeseries_for_dir(
_create_gcc_timeseries_for_dir(inp, base / "gcc" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") inp,
_create_bands_timeseries_for_dir(inp, base / "bands" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") base / "ndvi" / source,
_write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source) site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / source,
site_position,
f"PREPARED-{source.upper()}-{strategy}",
"*.tif",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]: for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = base / fusion_sub inp = base / fusion_sub
if inp.exists(): if inp.exists():
_create_timeseries_for_dir(inp, base / "ndvi" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") _create_timeseries_for_dir(
_create_gcc_timeseries_for_dir(inp, base / "gcc" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") inp,
_create_bands_timeseries_for_dir(inp, base / "bands" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") base / "ndvi" / fusion_sub,
_write_export(base / "ndvi" / fusion_sub, base / "gcc" / fusion_sub, base / "bands" / fusion_sub, base / "export" / fusion_sub) site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_gcc_timeseries_for_dir(
inp,
base / "gcc" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_create_bands_timeseries_for_dir(
inp,
base / "bands" / fusion_sub,
site_position,
f"FUSION-{strategy}-σ{sig or 20}",
"*.tif",
)
_write_export(
base / "ndvi" / fusion_sub,
base / "gcc" / fusion_sub,
base / "bands" / fusion_sub,
base / "export" / fusion_sub,
)
itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb")
if not itb.exists():
continue
for source in ["s2", "s3"]:
inp = itb / source
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / source,
site_position,
f"PREPARED-ITB-{source.upper()}-{strategy}",
"*.tif",
)
for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]:
inp = itb / fusion_sub
if inp.exists():
_create_gcc_timeseries_for_dir(
inp,
itb / "gcc" / fusion_sub,
site_position,
f"FUSION-ITB-{strategy}-σ{sig or 20}",
"*.tif",
)
def create_bands_timeseries_post_process(season, site_position, site_name): def create_bands_timeseries_post_process(season, site_position, site_name):
@ -518,5 +674,16 @@ def create_bands_timeseries_post_process(season, site_position, site_name):
for source in ["s2", "s3", "fusion"]: for source in ["s2", "s3", "fusion"]:
inp, out = base / source, base / "bands" / source inp, out = base / source, base / "bands" / source
if inp.exists(): if inp.exists():
_create_bands_timeseries_for_dir(inp, out, site_position, f"POST-{source.upper()}-{strategy}-σ{sigma}", "*.geotiff") _create_bands_timeseries_for_dir(
_write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source) inp,
out,
site_position,
f"POST-{source.upper()}-{strategy}-σ{sigma}",
"*.geotiff",
)
_write_export(
base / "ndvi" / source,
base / "gcc" / source,
base / "bands" / source,
base / "export" / source,
)

View file

@ -1,4 +1,5 @@
"""Metrics and statistics: temporal/spatial metrics and PhenoCam stats.""" """Metrics and statistics: temporal/spatial metrics and PhenoCam stats."""
import json import json
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
@ -132,6 +133,31 @@ def _get_spatial_stats_from_raster(raster_file, site_position):
"""Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window.""" """Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window."""
try: try:
with rasterio.open(raster_file) as src: with rasterio.open(raster_file) as src:
if src.count == 1:
g = src.read(1).astype(np.float32)
lon, lat = site_position[1], site_position[0]
x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat])
if not (
src.bounds.left <= x[0] <= src.bounds.right
and src.bounds.bottom <= y[0] <= src.bounds.top
):
return None
row, col = src.index(x[0], y[0])
if row < 0 or row >= src.height or col < 0 or col >= src.width:
return None
r0, r1 = max(0, row - 1), min(src.height, row + 2)
c0, c1 = max(0, col - 1), min(src.width, col + 2)
win = g[r0:r1, c0:c1]
mask = np.isfinite(win) & (win > 0)
if not np.any(mask):
return None
valid = win[mask]
return {
"mean": float(np.mean(valid)),
"std": float(np.std(valid)),
"min": float(np.min(valid)),
"max": float(np.max(valid)),
}
if src.count < 3: if src.count < 3:
return None return None
@ -161,7 +187,13 @@ def _get_spatial_stats_from_raster(raster_file, site_position):
# Calculate GCC for each pixel in window # Calculate GCC for each pixel in window
total = red_window + green_window + blue_window total = red_window + green_window + blue_window
mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) mask = (
(total > 0)
& ~np.isnan(total)
& (blue_window >= 0)
& (green_window >= 0)
& (red_window >= 0)
)
if not np.any(mask): if not np.any(mask):
return None return None
@ -259,7 +291,9 @@ def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position
# Calculate spatial metrics # Calculate spatial metrics
fusion_raster_dir = base / processed_dir / "fusion" fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
return temporal_metrics, spatial_metrics return temporal_metrics, spatial_metrics
@ -283,7 +317,9 @@ def calculate_all_metrics(season, site_name, site_position):
results["phenocam_stats"] = phenocam_stats results["phenocam_stats"] = phenocam_stats
# Calculate S2 baseline metrics once (S2 data is identical across scenarios) # Calculate S2 baseline metrics once (S2 data is identical across scenarios)
s2_ts_path = base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json" s2_ts_path = (
base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json"
)
s2_ts = load_timeseries(s2_ts_path) s2_ts = load_timeseries(s2_ts_path)
if s2_ts: if s2_ts:
s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts) s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts)
@ -303,7 +339,9 @@ def calculate_all_metrics(season, site_name, site_position):
fusion_ts = load_timeseries(fusion_ts_path) fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts: if not fusion_ts:
print(f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping") print(
f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping"
)
continue continue
# Calculate temporal metrics # Calculate temporal metrics
@ -313,7 +351,30 @@ def calculate_all_metrics(season, site_name, site_position):
# Calculate spatial metrics # Calculate spatial metrics
fusion_raster_dir = base / processed_dir / "fusion" fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
if spatial_metrics:
results["spatial"][scenario_name] = spatial_metrics
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [20, 30]:
scenario_name = f"{strategy}_sigma{sigma}_itb"
processed_dir = f"processed_{strategy}_itb_sigma{sigma}"
fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json"
fusion_ts = load_timeseries(fusion_ts_path)
if not fusion_ts:
print(
f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping"
)
continue
temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts)
if temporal_metrics:
results["temporal"][scenario_name] = temporal_metrics
fusion_raster_dir = base / processed_dir / "fusion"
spatial_metrics = calculate_spatial_metrics(
fusion_raster_dir, phenocam_ts, site_position
)
if spatial_metrics: if spatial_metrics:
results["spatial"][scenario_name] = spatial_metrics results["spatial"][scenario_name] = spatial_metrics
@ -321,14 +382,18 @@ def calculate_all_metrics(season, site_name, site_position):
if results["temporal"]: if results["temporal"]:
best_temporal = max( best_temporal = max(
results["temporal"].items(), results["temporal"].items(),
key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1 key=lambda x: x[1].get("r_squared", -1)
if x[1].get("r_squared") is not None
else -1,
) )
results["summary"] = {"best_temporal_scenario": best_temporal[0]} results["summary"] = {"best_temporal_scenario": best_temporal[0]}
if results["spatial"]: if results["spatial"]:
best_spatial = max( best_spatial = max(
results["spatial"].items(), results["spatial"].items(),
key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1 key=lambda x: x[1].get("r_squared", -1)
if x[1].get("r_squared") is not None
else -1,
) )
if "summary" not in results: if "summary" not in results:
results["summary"] = {} results["summary"] = {}

View file

@ -1,17 +1,23 @@
"""Post-processing: crop fusion/S2/S3 to valid pixels.""" """Post-processing: crop fusion/S2/S3 to valid pixels."""
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
import rasterio import rasterio
from rasterio import windows from rasterio import windows
from rasterio.warp import reproject, Resampling from rasterio.warp import reproject, Resampling
from rasterio.io import MemoryFile
def process_cropped(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None): def process_cropped(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
"""Crop fusion to valid data, then crop S2/S3 to match.""" """Crop fusion to valid data, then crop S2/S3 to match."""
base = Path(f"data/{site_name}/{season}") base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}" prepared = base / f"prepared_{cleaning_strategy}"
processed_dir = f"processed_{cleaning_strategy}_sigma{sigma}" if sigma else f"processed_{cleaning_strategy}_sigma20" processed_dir = (
f"processed_{cleaning_strategy}_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_sigma20"
)
processed = base / processed_dir processed = base / processed_dir
s2_prep = prepared / "s2" s2_prep = prepared / "s2"
@ -21,7 +27,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
print(f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}") print(
f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
# Crop fusion to valid data and get dimensions # Crop fusion to valid data and get dimensions
fusion_dims = {} fusion_dims = {}
@ -48,11 +56,29 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
output_file = processed / "fusion" / f"{date_str}_0.geotiff" output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst: with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop) dst.write(data_crop)
fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile) fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS] Cropped fusion: {output_file}") print(f"[PROCESS] Cropped fusion: {output_file}")
# Crop S2 and S3 to fusion size # Crop S2 and S3 to fusion size
for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items(): for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h) window = windows.Window(c0, r0, w, h)
# S2 # S2
for s2_file in s2_prep.glob("*REFL.tif"): for s2_file in s2_prep.glob("*REFL.tif"):
@ -61,7 +87,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
with rasterio.open(s2_file) as src: with rasterio.open(s2_file) as src:
data = src.read(window=window) data = src.read(window=window)
p2 = src.profile.copy() p2 = src.profile.copy()
p2.update({"width": w, "height": h, "transform": transform, "crs": crs}) p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst: with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data) dst.write(data)
print(f"[PROCESS] Cropped: {output_file}") print(f"[PROCESS] Cropped: {output_file}")
@ -83,7 +111,7 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
src_crs=src.crs, src_crs=src.crs,
dst_transform=fusion_transform, dst_transform=fusion_transform,
dst_crs=crs, dst_crs=crs,
resampling=Resampling.nearest resampling=Resampling.nearest,
) )
# Crop using same window # Crop using same window
data = resampled.read(window=window) data = resampled.read(window=window)
@ -96,11 +124,135 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress
print("[PROCESS] Completed") print("[PROCESS] Completed")
def process_cropped_itb(
season, site_position, site_name, cleaning_strategy="aggressive", sigma=None
):
base = Path(f"data/{site_name}/{season}")
prepared = base / f"prepared_{cleaning_strategy}_itb"
processed_dir = (
f"processed_{cleaning_strategy}_itb_sigma{sigma}"
if sigma
else f"processed_{cleaning_strategy}_itb_sigma20"
)
processed = base / processed_dir
s2_prep = prepared / "s2"
s3_prep = prepared / "s3"
fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion")
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True)
print(
f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}"
)
fusion_dims = {}
for fusion_file in fusion_prep.glob("GCC_*.tif"):
date_str = fusion_file.stem.split("_")[1]
with rasterio.open(fusion_file) as src:
data = src.read()
valid = ~np.isnan(data) & (data > 0.001)
rows = np.any(valid, axis=(0, 2))
cols = np.any(valid, axis=(0, 1))
row_idx = np.where(rows)[0]
col_idx = np.where(cols)[0]
if len(row_idx) == 0 or len(col_idx) == 0:
print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)")
continue
r0, r1 = row_idx[0], row_idx[-1]
c0, c1 = col_idx[0], col_idx[-1]
w, h = c1 - c0 + 1, r1 - r0 + 1
window = windows.Window(c0, r0, w, h)
data_crop = src.read(window=window)
transform = rasterio.windows.transform(window, src.transform)
p = src.profile.copy()
p.update({"width": w, "height": h, "transform": transform})
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
with rasterio.open(output_file, "w", **p) as dst:
dst.write(data_crop)
fusion_dims[date_str] = (
c0,
r0,
w,
h,
transform,
src.transform,
src.crs,
src.profile,
)
print(f"[PROCESS-ITB] Cropped fusion: {output_file}")
for date_str, (
c0,
r0,
w,
h,
transform,
fusion_transform,
crs,
fusion_profile,
) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
for s2_file in s2_prep.glob("*GCC.tif"):
parts = s2_file.stem.split("_")
if len(parts) > 2 and parts[2] == date_str:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update(
{"width": w, "height": h, "transform": transform, "crs": crs}
)
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
break
s3_file = s3_prep / f"composite_{date_str}.tif"
if s3_file.exists():
output_file = processed / "s3" / f"{date_str}_0.geotiff"
with rasterio.open(s3_file) as src:
temp_profile = fusion_profile.copy()
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest,
)
data = resampled.read(window=window)
p2 = resampled.profile.copy()
p2.update({"width": w, "height": h, "transform": transform})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS-ITB] Cropped: {output_file}")
print("[PROCESS-ITB] Completed")
def post_process_all_itb_scenarios(season, site_position, site_name):
for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]:
process_cropped_itb(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_all_scenarios(season, site_position, site_name): def post_process_all_scenarios(season, site_position, site_name):
"""Crop fusion/S2/S3 to valid pixels for all 4 scenarios.""" """Crop fusion/S2/S3 to valid pixels for all 4 scenarios."""
for strategy in ["aggressive", "nonaggressive"]: for strategy in ["aggressive", "nonaggressive"]:
for sigma in [None, 30]: for sigma in [None, 30]:
process_cropped(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma) process_cropped(
season,
site_position,
site_name,
cleaning_strategy=strategy,
sigma=sigma,
)
def post_process_timeseries(season, site_position, site_name): def post_process_timeseries(season, site_position, site_name):
@ -110,6 +262,7 @@ def post_process_timeseries(season, site_position, site_name):
create_gcc_timeseries_post_process, create_gcc_timeseries_post_process,
create_bands_timeseries_post_process, create_bands_timeseries_post_process,
) )
create_ndvi_timeseries_post_process(season, site_position, site_name) create_ndvi_timeseries_post_process(season, site_position, site_name)
create_gcc_timeseries_post_process(season, site_position, site_name) create_gcc_timeseries_post_process(season, site_position, site_name)
create_bands_timeseries_post_process(season, site_position, site_name) create_bands_timeseries_post_process(season, site_position, site_name)

View file

@ -1,4 +1,5 @@
"""Data preparation: S2/S3 preprocessing for fusion.""" """Data preparation: S2/S3 preprocessing for fusion."""
import json import json
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -16,6 +17,7 @@ def _import_distance_to_clouds():
"""Lazy import of efast.distance_to_clouds.""" """Lazy import of efast.distance_to_clouds."""
try: try:
from efast.s2_processing import distance_to_clouds from efast.s2_processing import distance_to_clouds
return distance_to_clouds return distance_to_clouds
except ImportError: except ImportError:
raise ImportError( raise ImportError(
@ -40,6 +42,76 @@ def _get_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/") return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/")
def _get_itb_base_dir(season, site_name, cleaning_strategy):
return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb")
def _compute_gcc_from_refl_array(blue, green, red):
total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32)
mask = (total > 0) & np.isfinite(total)
gcc = np.zeros_like(green, dtype=np.float32)
gcc[mask] = green[mask].astype(np.float32) / total[mask]
return gcc
def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir):
dst_s2_dir.mkdir(parents=True, exist_ok=True)
for src in src_s2_dir.glob("*DIST_CLOUD.tif"):
dst = dst_s2_dir / src.name
if dst.exists():
continue
try:
dst.symlink_to(src.resolve())
except OSError:
shutil.copy2(src, dst)
def prepare_s2_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2"
s2_prep = base / "s2"
itb_s2.mkdir(parents=True, exist_ok=True)
for refl in sorted(s2_prep.glob("*REFL.tif")):
out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif")
if out.exists():
continue
with rasterio.open(refl) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S2-ITB] Saved {out.name}")
_link_dist_cloud_from_prepared(s2_prep, itb_s2)
def prepare_s3_gcc_for_itb(
season, site_position, site_name, cleaning_strategy="aggressive"
):
base = _get_base_dir(season, site_name, cleaning_strategy)
itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3"
itb_s3.mkdir(parents=True, exist_ok=True)
for comp in sorted((base / "s3").glob("composite_*.tif")):
out = itb_s3 / comp.name
if out.exists():
continue
with rasterio.open(comp) as src:
if src.count < 4:
continue
b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4))
gcc = _compute_gcc_from_refl_array(b, g, r)
profile = src.profile.copy()
profile.update({"count": 1, "dtype": "float32", "nodata": 0})
with rasterio.open(out, "w", **profile) as dst:
dst.write(gcc, 1)
print(f"[S3-ITB] Saved {out.name}")
def _reproject_raster_to_target( def _reproject_raster_to_target(
src_path, src_path,
dst_path, dst_path,
@ -90,7 +162,9 @@ def _rescale_dist_cloud_for_small_roi(s2_output_dir):
print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})") print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})")
def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): def prepare_s2(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position lat, lon = site_position
s2_dir = Path(f"data/{site_name}/{season}/raw/s2/") s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
@ -99,7 +173,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
clouds = _load_excluded(season, site_name, cleaning_strategy) clouds = _load_excluded(season, site_name, cleaning_strategy)
s2_output_dir.mkdir(parents=True, exist_ok=True) s2_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") print(
f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]] s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
if not s3_files: if not s3_files:
@ -113,7 +189,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
for s2_file in sorted(s2_dir.glob("*.geotiff")): for s2_file in sorted(s2_dir.glob("*.geotiff")):
if s2_file.name in clouds["s2"]: if s2_file.name in clouds["s2"]:
print(f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})") print(
f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})"
)
continue continue
date_str = s2_file.name.split("_")[0] date_str = s2_file.name.split("_")[0]
refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif" refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
@ -136,14 +214,16 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive",
temp_normalized.unlink() temp_normalized.unlink()
print(f"[S2-PREP] Saved: {refl_dst}") print(f"[S2-PREP] Saved: {refl_dst}")
print(f"[S2-PREP] Computing distance-to-clouds...") print("[S2-PREP] Computing distance-to-clouds...")
distance_to_clouds = _import_distance_to_clouds() distance_to_clouds = _import_distance_to_clouds()
distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO) distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO)
_rescale_dist_cloud_for_small_roi(s2_output_dir) _rescale_dist_cloud_for_small_roi(s2_output_dir)
print("[S2-PREP] Completed") print("[S2-PREP] Completed")
def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): def prepare_s3(
season, site_position, site_name, cleaning_strategy="aggressive", date_range=None
):
lat, lon = site_position lat, lon = site_position
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
base_dir = _get_base_dir(season, site_name, cleaning_strategy) base_dir = _get_base_dir(season, site_name, cleaning_strategy)
@ -153,16 +233,22 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
clouds = _load_excluded(season, site_name, cleaning_strategy) clouds = _load_excluded(season, site_name, cleaning_strategy)
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
print(f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") print(
f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}"
)
s3_by_date = defaultdict(list) s3_by_date = defaultdict(list)
for s3_file in s3_dir.glob("*.geotiff"): for s3_file in s3_dir.glob("*.geotiff"):
if s3_file.name not in clouds["s3"]: if s3_file.name not in clouds["s3"]:
s3_by_date[s3_file.name.split("_")[0]].append(s3_file) s3_by_date[s3_file.name.split("_")[0]].append(s3_file)
else: else:
print(f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})") print(
f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})"
)
print(f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates") print(
f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates"
)
temp_composite_dir = s3_preprocessed_dir / "temp_composites" temp_composite_dir = s3_preprocessed_dir / "temp_composites"
if temp_composite_dir.exists(): if temp_composite_dir.exists():
@ -187,7 +273,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
profile.update({"count": composite.shape[0], "dtype": "float32"}) profile.update({"count": composite.shape[0], "dtype": "float32"})
with rasterio.open(composite_path, "w", **profile) as dst: with rasterio.open(composite_path, "w", **profile) as dst:
dst.write(composite) dst.write(composite)
print(f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged") print(
f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged"
)
# Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
# This ensures fusion covers the same area as S2 and dimensions match # This ensures fusion covers the same area as S2 and dimensions match
@ -212,7 +300,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive",
height, height,
) )
print(f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)...") print(
f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..."
)
# Reproject each S3 composite to match S2 REFL bounds # Reproject each S3 composite to match S2 REFL bounds
sen3_paths = sorted(temp_composite_dir.glob("*.tif")) sen3_paths = sorted(temp_composite_dir.glob("*.tif"))

2
pyproject.toml Normal file
View file

@ -0,0 +1,2 @@
[tool.ruff.lint.per-file-ignores]
"run.py" = ["F401"]

23
run.py
View file

@ -1,10 +1,19 @@
from fusion import run_all_efast_scenarios from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios
from postprocessing import post_process_all_scenarios, post_process_timeseries from postprocessing import (
post_process_all_scenarios,
post_process_all_itb_scenarios,
post_process_timeseries,
)
from acquisition_s2 import download_s2 from acquisition_s2 import download_s2
from acquisition_s3 import download_s3 from acquisition_s3 import download_s3
from acquisition_phenocam import download_phenocam from acquisition_phenocam import download_phenocam
from preselection import create_timeseries from preselection import create_timeseries
from preparation import prepare_s2, prepare_s3 from preparation import (
prepare_s2,
prepare_s3,
prepare_s2_gcc_for_itb,
prepare_s3_gcc_for_itb,
)
from metrics_indices import create_prepared_fusion_timeseries from metrics_indices import create_prepared_fusion_timeseries
from metrics_stats import calculate_all_metrics from metrics_stats import calculate_all_metrics
@ -28,6 +37,13 @@ def run_pipeline(season, site_position, site_name):
# print(f"Running EFAST fusion for all scenarios: {site_name}, {season}") # print(f"Running EFAST fusion for all scenarios: {site_name}, {season}")
# run_all_efast_scenarios(season, site_position, site_name) # run_all_efast_scenarios(season, site_position, site_name)
# Index-then-Blend (ItB): GCC stacks, EFAST fusion with product=GCC
# for strategy in ["aggressive", "nonaggressive"]:
# prepare_s2_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy)
# prepare_s3_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy)
# run_all_efast_itb_scenarios(season, site_position, site_name)
# post_process_all_itb_scenarios(season, site_position, site_name)
print(f"Creating prepared/fusion timeseries: {site_name}, {season}") print(f"Creating prepared/fusion timeseries: {site_name}, {season}")
create_prepared_fusion_timeseries(season, site_position, site_name) create_prepared_fusion_timeseries(season, site_position, site_name)
@ -51,4 +67,3 @@ if __name__ == "__main__":
run_pipeline(2023, (64.2437, 19.7673), "vindeln2") run_pipeline(2023, (64.2437, 19.7673), "vindeln2")
run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1") run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1")
run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat") run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")