Added efast.

This commit is contained in:
Felix Delattre 2025-12-22 10:10:23 +01:00
parent 607f577c6a
commit 83661762b3
6 changed files with 293 additions and 40 deletions

1
.python-version Normal file
View file

@ -0,0 +1 @@
3.11.10

View file

@ -6,38 +6,45 @@ from datetime import datetime
def detect_clouds(year, site_name):
output_file = Path(f"data/{site_name}/{year}/clouds.json")
clouds = {"s2": [], "s3": []}
for source in ["s2", "s3"]:
timeseries_file = Path(f"data/{site_name}/{year}/ndvi/{source}/timeseries.json")
if not timeseries_file.exists():
print(f"[CLOUDS-{source.upper()}] No timeseries.json found")
continue
print(f"[CLOUDS-{source.upper()}] Processing {timeseries_file}...")
with open(timeseries_file) as f:
timeseries = json.load(f)
entries = [(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) for e in timeseries if e["ndvi"] is not None]
entries = [
(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
for e in timeseries
if e["ndvi"] is not None
]
for entry, entry_date in entries:
# Use 14-day window for seasonal context, require NDVI < 0.3 and >0.15 below max
window_ndvi = [e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14]
window_ndvi = [
e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14
]
if len(window_ndvi) < 3:
continue
max_ndvi = max(window_ndvi)
threshold = max_ndvi - 0.15
if entry["ndvi"] < threshold and entry["ndvi"] < 0.3:
clouds[source].append(entry["filename"])
print(f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files")
print(
f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files"
)
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, "w") as f:
json.dump(clouds, f, indent=2)
print(f"[CLOUDS] Saved: {output_file}")
print(f"[CLOUDS] Saved: {output_file}")

View file

@ -5,7 +5,6 @@ import requests
from rasterio.warp import transform_geom
from rasterio.windows import from_bounds, transform as window_transform
from pystac_client import Client
from pathlib import Path
def download_s2(year, site_position, site_name, date_range=None):
@ -129,7 +128,9 @@ def download_s2(year, site_position, site_name, date_range=None):
# If not found, try averaging all bands
if viewing_angle is None:
angles = []
for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle"):
for angle_elem in root.findall(
".//Mean_Viewing_Incidence_Angle"
):
zenith_elem = angle_elem.find("ZENITH_ANGLE")
if zenith_elem is not None:
angles.append(abs(float(zenith_elem.text)))
@ -145,7 +146,11 @@ def download_s2(year, site_position, site_name, date_range=None):
if viewing_angle is not None:
dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle)
print(f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else f"[S2] Saved: {filepath}")
print(
f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)"
if viewing_angle
else f"[S2] Saved: {filepath}"
)
else:
print(f"[S2] Skipping {date}_{increment} (missing bands)")

235
efast.py Normal file
View file

@ -0,0 +1,235 @@
import json
import shutil
import importlib.util
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import rasterio
from rasterio.warp import Resampling
from rasterio.vrt import WarpedVRT
from scipy import ndimage
_this_file = Path(__file__).resolve()
_venv_lib = _this_file.parent.parent / "venv" / "lib"
_efast_pkg_path = None
if _venv_lib.exists():
for py_dir in _venv_lib.glob("python*"):
candidate = py_dir / "site-packages" / "efast" / "efast.py"
if candidate.exists():
_efast_pkg_path = candidate
break
if _efast_pkg_path and _efast_pkg_path.exists():
spec = importlib.util.spec_from_file_location(
"efast_fusion_module", _efast_pkg_path
)
efast_fusion = importlib.util.module_from_spec(spec)
spec.loader.exec_module(efast_fusion)
else:
import site
for site_pkg in site.getsitepackages():
candidate = Path(site_pkg) / "efast" / "efast.py"
if candidate.exists():
spec = importlib.util.spec_from_file_location(
"efast_fusion_module", candidate
)
efast_fusion = importlib.util.module_from_spec(spec)
spec.loader.exec_module(efast_fusion)
break
else:
raise ImportError(
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
)
def prepare_s2(year, site_position, site_name, date_range=None):
s2_dir = Path(f"data/{site_name}/{year}/s2/")
s3_dir = Path(f"data/{site_name}/{year}/s3/")
s2_output_dir = Path(f"data/{site_name}/{year}/efast/s2/")
clouds_file = Path(f"data/{site_name}/{year}/clouds.json")
clouds = {"s2": set(), "s3": set()}
if clouds_file.exists():
clouds_data = json.loads(clouds_file.read_text())
clouds["s2"] = set(clouds_data.get("s2", []))
clouds["s3"] = set(clouds_data.get("s3", []))
s2_output_dir.mkdir(parents=True, exist_ok=True)
s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
if not s3_files:
raise ValueError("No non-cloud S3 files found for reference bounds")
with rasterio.open(s3_files[0]) as s3_ref:
target_bounds = s3_ref.bounds
target_crs = s3_ref.crs
s3_width = s3_ref.width
s3_height = s3_ref.height
ratio = 21
s2_width = s3_width * ratio
s2_height = s3_height * ratio
for s2_file in s2_dir.glob("*.geotiff"):
if s2_file.name in clouds["s2"]:
continue
date_str = s2_file.name.split("_")[0]
refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
if not refl_dst.exists():
with rasterio.open(s2_file) as src:
data = src.read().astype("float32") / 10000.0
s2_res = (target_bounds.right - target_bounds.left) / s2_width
dst_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
s2_width,
s2_height,
)
reprojected_data, _ = rasterio.warp.reproject(
source=data,
destination=np.zeros(
(src.count, s2_height, s2_width), dtype=data.dtype
),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=dst_transform,
dst_crs=target_crs,
resampling=Resampling.cubic,
)
profile = src.profile.copy()
profile.update(
{
"dtype": "float32",
"nodata": 0,
"width": s2_width,
"height": s2_height,
"transform": dst_transform,
"crs": target_crs,
}
)
with rasterio.open(refl_dst, "w", **profile) as dst_file:
dst_file.write(reprojected_data)
dist_cloud_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif"
if not dist_cloud_dst.exists():
with rasterio.open(refl_dst) as src:
s2_hr = src.read(1)
mask = s2_hr == 0
distance_to_cloud_hr = ndimage.distance_transform_edt(~mask)
distance_to_cloud_hr = np.clip(distance_to_cloud_hr, 0, 255).astype(
"float32"
)
s3_res = (target_bounds.right - target_bounds.left) / s3_width
lr_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
s3_width,
s3_height,
)
distance_to_cloud_lr, _ = rasterio.warp.reproject(
source=distance_to_cloud_hr[np.newaxis, :, :],
destination=np.zeros(
(1, s3_height, s3_width), dtype=distance_to_cloud_hr.dtype
),
src_transform=src.transform,
src_crs=target_crs,
dst_transform=lr_transform,
dst_crs=target_crs,
resampling=Resampling.average,
)
distance_to_cloud_lr = distance_to_cloud_lr[0]
profile = src.profile.copy()
profile.update(
{
"count": 1,
"dtype": "float32",
"width": s3_width,
"height": s3_height,
"transform": lr_transform,
}
)
with rasterio.open(dist_cloud_dst, "w", **profile) as dst:
dst.write(distance_to_cloud_lr, 1)
def prepare_s3(year, site_position, site_name, date_range=None):
s3_dir = Path(f"data/{site_name}/{year}/s3/")
s3_preprocessed_dir = Path(f"data/{site_name}/{year}/efast/s3/")
clouds_file = Path(f"data/{site_name}/{year}/clouds.json")
clouds = {"s3": set()}
if clouds_file.exists():
clouds_data = json.loads(clouds_file.read_text())
clouds["s3"] = set(clouds_data.get("s3", []))
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
for s3_file in s3_dir.glob("*.geotiff"):
if s3_file.name in clouds["s3"]:
continue
date_str = s3_file.name.split("_")[0]
output_path = s3_preprocessed_dir / f"composite_{date_str}.tif"
if output_path.exists():
continue
shutil.copy2(s3_file, output_path)
def run_efast(year, site_position, site_name, date_range=None):
lat, lon = site_position
datetime_range = date_range or f"{year}-01-01/{year}-12-31"
efast_base_dir = Path(f"data/{site_name}/{year}/efast/")
s2_output_dir = efast_base_dir / "s2"
s3_output_dir = efast_base_dir / "s3"
fusion_output_dir = efast_base_dir / "fusion"
fusion_output_dir.mkdir(parents=True, exist_ok=True)
print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {year}")
start_date = datetime.strptime(datetime_range.split("/")[0], "%Y-%m-%d")
end_date = datetime.strptime(datetime_range.split("/")[1], "%Y-%m-%d")
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime("%Y%m%d")
output_file = fusion_output_dir / f"REFL_{date_str}.tif"
if output_file.exists():
print(f"[EFAST] Skipping {date_str} (exists)")
current_date += timedelta(days=1)
continue
if list(s2_output_dir.glob(f"*{date_str}*REFL.tif")) and list(
s3_output_dir.glob(f"composite_{date_str}.tif")
):
try:
efast_fusion.fusion(
current_date,
s3_output_dir,
s2_output_dir,
fusion_output_dir,
product="REFL",
max_days=30,
date_position=2,
minimum_acquisition_importance=0.0,
ratio=21,
)
if output_file.exists():
print(f"[EFAST] Saved: {output_file}")
else:
print(f"[EFAST] No output for {date_str}")
except Exception as e:
print(f"[EFAST] Error processing {date_str}: {e}")
else:
print(f"[EFAST] Skipping {date_str} (insufficient data)")
current_date += timedelta(days=1)
print("[EFAST] Completed")

29
ndvi.py
View file

@ -1,4 +1,3 @@
import os
import json
import numpy as np
import rasterio
@ -50,17 +49,17 @@ def generate_ndvi(year, site_position, site_name):
dst.set_band_description(1, "NDVI")
print(f"[NDVI-{source.upper()}] Saved: {output_file}")
print(f"[NDVI-{source.upper()}] Completed")
def create_ndvi_timeseries(year, site_position, site_name):
for source in ["s2", "s3"]:
output_dir = Path(f"data/{site_name}/{year}/ndvi/{source}/")
print(f"[NDVI-{source.upper()}] Creating timeseries.json...")
timeseries = []
ndvi_files = sorted(output_dir.glob("*.geotiff"))
for ndvi_file in ndvi_files:
filename = ndvi_file.name
@ -69,7 +68,7 @@ def create_ndvi_timeseries(year, site_position, site_name):
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
except ValueError:
date = date_str
ndvi_value = None
try:
with rasterio.open(ndvi_file) as src:
@ -81,17 +80,17 @@ def create_ndvi_timeseries(year, site_position, site_name):
if value != 0 and not np.isnan(value):
ndvi_value = value
except Exception as e:
print(f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}")
timeseries.append({
"date": date,
"filename": filename,
"ndvi": ndvi_value
})
print(
f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}"
)
timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
timeseries.sort(key=lambda x: x["date"])
timeseries_file = output_dir / "timeseries.json"
with open(timeseries_file, "w") as f:
json.dump(timeseries, f, indent=2)
print(f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)")
print(
f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)"
)

20
run.py
View file

@ -1,7 +1,4 @@
from download_s2 import download_s2
from download_s3 import download_s3
from ndvi import generate_ndvi, create_ndvi_timeseries
from clouds import detect_clouds
from efast import run_efast, prepare_s2, prepare_s3
year = 2024
site_position = (47.116171, 11.320308)
@ -17,6 +14,15 @@ site_name = "innsbruck"
# create_ndvi_timeseries(year, site_position, site_name)
# print("All NDVI generation completed")
print(f"Detecting clouds for {site_name}, {year}")
detect_clouds(year, site_name)
print("Cloud detection completed")
# print(f"Detecting clouds for {site_name}, {year}")
# detect_clouds(year, site_name)
# print("Cloud detection completed")
print(f"Preparing data for EFAST fusion for {site_name}, {year}")
prepare_s2(year, site_position, site_name)
prepare_s3(year, site_position, site_name)
print("Data preparation completed")
print(f"Running EFAST fusion for {site_name}, {year}")
run_efast(year, site_position, site_name)
print("EFAST fusion completed")