Added efast.

2025-12-22 10:10:23 +01:00 · 2025-12-22 10:10:23 +01:00 · 83661762b3
commit 83661762b3
parent 607f577c6a
6 changed files with 293 additions and 40 deletions
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
 3.11.10
--- a/clouds.py
+++ b/clouds.py
@ -6,38 +6,45 @@ from datetime import datetime
 def detect_clouds(year, site_name):
    output_file = Path(f"data/{site_name}/{year}/clouds.json")
    clouds = {"s2": [], "s3": []}
-    
+
    for source in ["s2", "s3"]:
        timeseries_file = Path(f"data/{site_name}/{year}/ndvi/{source}/timeseries.json")
        if not timeseries_file.exists():
            print(f"[CLOUDS-{source.upper()}] No timeseries.json found")
            continue
-        
+
        print(f"[CLOUDS-{source.upper()}] Processing {timeseries_file}...")
-        
+
        with open(timeseries_file) as f:
            timeseries = json.load(f)
-        
+
-        entries = [(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) for e in timeseries if e["ndvi"] is not None]
+        entries = [
-        
+            (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
            for e in timeseries
            if e["ndvi"] is not None
        ]
        for entry, entry_date in entries:
            # Use 14-day window for seasonal context, require NDVI < 0.3 and >0.15 below max
-            window_ndvi = [e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14]
+            window_ndvi = [
-            
+                e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14
            ]
            if len(window_ndvi) < 3:
                continue
-            
+
            max_ndvi = max(window_ndvi)
            threshold = max_ndvi - 0.15
-            
+
            if entry["ndvi"] < threshold and entry["ndvi"] < 0.3:
                clouds[source].append(entry["filename"])
-        
+
-        print(f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files")
+        print(
-    
+            f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files"
        )
    output_file.parent.mkdir(parents=True, exist_ok=True)
    with open(output_file, "w") as f:
        json.dump(clouds, f, indent=2)
    print(f"[CLOUDS] Saved: {output_file}")
    print(f"[CLOUDS] Saved: {output_file}")
--- a/download_s2.py
+++ b/download_s2.py
@ -5,7 +5,6 @@ import requests
 from rasterio.warp import transform_geom
 from rasterio.windows import from_bounds, transform as window_transform
 from pystac_client import Client
 from pathlib import Path
 def download_s2(year, site_position, site_name, date_range=None):
@ -129,7 +128,9 @@ def download_s2(year, site_position, site_name, date_range=None):
                    # If not found, try averaging all bands
                    if viewing_angle is None:
                        angles = []
-                        for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle"):
+                        for angle_elem in root.findall(
                            ".//Mean_Viewing_Incidence_Angle"
                        ):
                            zenith_elem = angle_elem.find("ZENITH_ANGLE")
                            if zenith_elem is not None:
                                angles.append(abs(float(zenith_elem.text)))
@ -145,7 +146,11 @@ def download_s2(year, site_position, site_name, date_range=None):
                if viewing_angle is not None:
                    dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle)
-            print(f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else f"[S2] Saved: {filepath}")
+            print(
                f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)"
                if viewing_angle
                else f"[S2] Saved: {filepath}"
            )
        else:
            print(f"[S2] Skipping {date}_{increment} (missing bands)")
--- a/efast.py
+++ b/efast.py
@ -0,0 +1,235 @@
 import json
 import shutil
 import importlib.util
 from pathlib import Path
 from datetime import datetime, timedelta
 import numpy as np
 import rasterio
 from rasterio.warp import Resampling
 from rasterio.vrt import WarpedVRT
 from scipy import ndimage
 _this_file = Path(__file__).resolve()
 _venv_lib = _this_file.parent.parent / "venv" / "lib"
 _efast_pkg_path = None
 if _venv_lib.exists():
    for py_dir in _venv_lib.glob("python*"):
        candidate = py_dir / "site-packages" / "efast" / "efast.py"
        if candidate.exists():
            _efast_pkg_path = candidate
            break
 if _efast_pkg_path and _efast_pkg_path.exists():
    spec = importlib.util.spec_from_file_location(
        "efast_fusion_module", _efast_pkg_path
    )
    efast_fusion = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(efast_fusion)
 else:
    import site
    for site_pkg in site.getsitepackages():
        candidate = Path(site_pkg) / "efast" / "efast.py"
        if candidate.exists():
            spec = importlib.util.spec_from_file_location(
                "efast_fusion_module", candidate
            )
            efast_fusion = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(efast_fusion)
            break
    else:
        raise ImportError(
            "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
        )
 def prepare_s2(year, site_position, site_name, date_range=None):
    s2_dir = Path(f"data/{site_name}/{year}/s2/")
    s3_dir = Path(f"data/{site_name}/{year}/s3/")
    s2_output_dir = Path(f"data/{site_name}/{year}/efast/s2/")
    clouds_file = Path(f"data/{site_name}/{year}/clouds.json")
    clouds = {"s2": set(), "s3": set()}
    if clouds_file.exists():
        clouds_data = json.loads(clouds_file.read_text())
        clouds["s2"] = set(clouds_data.get("s2", []))
        clouds["s3"] = set(clouds_data.get("s3", []))
    s2_output_dir.mkdir(parents=True, exist_ok=True)
    s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]]
    if not s3_files:
        raise ValueError("No non-cloud S3 files found for reference bounds")
    with rasterio.open(s3_files[0]) as s3_ref:
        target_bounds = s3_ref.bounds
        target_crs = s3_ref.crs
        s3_width = s3_ref.width
        s3_height = s3_ref.height
        ratio = 21
        s2_width = s3_width * ratio
        s2_height = s3_height * ratio
    for s2_file in s2_dir.glob("*.geotiff"):
        if s2_file.name in clouds["s2"]:
            continue
        date_str = s2_file.name.split("_")[0]
        refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif"
        if not refl_dst.exists():
            with rasterio.open(s2_file) as src:
                data = src.read().astype("float32") / 10000.0
                s2_res = (target_bounds.right - target_bounds.left) / s2_width
                dst_transform = rasterio.transform.from_bounds(
                    target_bounds.left,
                    target_bounds.bottom,
                    target_bounds.right,
                    target_bounds.top,
                    s2_width,
                    s2_height,
                )
                reprojected_data, _ = rasterio.warp.reproject(
                    source=data,
                    destination=np.zeros(
                        (src.count, s2_height, s2_width), dtype=data.dtype
                    ),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=dst_transform,
                    dst_crs=target_crs,
                    resampling=Resampling.cubic,
                )
                profile = src.profile.copy()
                profile.update(
                    {
                        "dtype": "float32",
                        "nodata": 0,
                        "width": s2_width,
                        "height": s2_height,
                        "transform": dst_transform,
                        "crs": target_crs,
                    }
                )
                with rasterio.open(refl_dst, "w", **profile) as dst_file:
                    dst_file.write(reprojected_data)
        dist_cloud_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif"
        if not dist_cloud_dst.exists():
            with rasterio.open(refl_dst) as src:
                s2_hr = src.read(1)
                mask = s2_hr == 0
                distance_to_cloud_hr = ndimage.distance_transform_edt(~mask)
                distance_to_cloud_hr = np.clip(distance_to_cloud_hr, 0, 255).astype(
                    "float32"
                )
                s3_res = (target_bounds.right - target_bounds.left) / s3_width
                lr_transform = rasterio.transform.from_bounds(
                    target_bounds.left,
                    target_bounds.bottom,
                    target_bounds.right,
                    target_bounds.top,
                    s3_width,
                    s3_height,
                )
                distance_to_cloud_lr, _ = rasterio.warp.reproject(
                    source=distance_to_cloud_hr[np.newaxis, :, :],
                    destination=np.zeros(
                        (1, s3_height, s3_width), dtype=distance_to_cloud_hr.dtype
                    ),
                    src_transform=src.transform,
                    src_crs=target_crs,
                    dst_transform=lr_transform,
                    dst_crs=target_crs,
                    resampling=Resampling.average,
                )
                distance_to_cloud_lr = distance_to_cloud_lr[0]
                profile = src.profile.copy()
                profile.update(
                    {
                        "count": 1,
                        "dtype": "float32",
                        "width": s3_width,
                        "height": s3_height,
                        "transform": lr_transform,
                    }
                )
                with rasterio.open(dist_cloud_dst, "w", **profile) as dst:
                    dst.write(distance_to_cloud_lr, 1)
 def prepare_s3(year, site_position, site_name, date_range=None):
    s3_dir = Path(f"data/{site_name}/{year}/s3/")
    s3_preprocessed_dir = Path(f"data/{site_name}/{year}/efast/s3/")
    clouds_file = Path(f"data/{site_name}/{year}/clouds.json")
    clouds = {"s3": set()}
    if clouds_file.exists():
        clouds_data = json.loads(clouds_file.read_text())
        clouds["s3"] = set(clouds_data.get("s3", []))
    s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
    for s3_file in s3_dir.glob("*.geotiff"):
        if s3_file.name in clouds["s3"]:
            continue
        date_str = s3_file.name.split("_")[0]
        output_path = s3_preprocessed_dir / f"composite_{date_str}.tif"
        if output_path.exists():
            continue
        shutil.copy2(s3_file, output_path)
 def run_efast(year, site_position, site_name, date_range=None):
    lat, lon = site_position
    datetime_range = date_range or f"{year}-01-01/{year}-12-31"
    efast_base_dir = Path(f"data/{site_name}/{year}/efast/")
    s2_output_dir = efast_base_dir / "s2"
    s3_output_dir = efast_base_dir / "s3"
    fusion_output_dir = efast_base_dir / "fusion"
    fusion_output_dir.mkdir(parents=True, exist_ok=True)
    print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {year}")
    start_date = datetime.strptime(datetime_range.split("/")[0], "%Y-%m-%d")
    end_date = datetime.strptime(datetime_range.split("/")[1], "%Y-%m-%d")
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y%m%d")
        output_file = fusion_output_dir / f"REFL_{date_str}.tif"
        if output_file.exists():
            print(f"[EFAST] Skipping {date_str} (exists)")
            current_date += timedelta(days=1)
            continue
        if list(s2_output_dir.glob(f"*{date_str}*REFL.tif")) and list(
            s3_output_dir.glob(f"composite_{date_str}.tif")
        ):
            try:
                efast_fusion.fusion(
                    current_date,
                    s3_output_dir,
                    s2_output_dir,
                    fusion_output_dir,
                    product="REFL",
                    max_days=30,
                    date_position=2,
                    minimum_acquisition_importance=0.0,
                    ratio=21,
                )
                if output_file.exists():
                    print(f"[EFAST] Saved: {output_file}")
                else:
                    print(f"[EFAST] No output for {date_str}")
            except Exception as e:
                print(f"[EFAST] Error processing {date_str}: {e}")
        else:
            print(f"[EFAST] Skipping {date_str} (insufficient data)")
        current_date += timedelta(days=1)
    print("[EFAST] Completed")
--- a/ndvi.py
+++ b/ndvi.py
@ -1,4 +1,3 @@
 import os
 import json
 import numpy as np
 import rasterio
@ -50,17 +49,17 @@ def generate_ndvi(year, site_position, site_name):
                    dst.set_band_description(1, "NDVI")
            print(f"[NDVI-{source.upper()}] Saved: {output_file}")
-        
+
        print(f"[NDVI-{source.upper()}] Completed")
 def create_ndvi_timeseries(year, site_position, site_name):
    for source in ["s2", "s3"]:
        output_dir = Path(f"data/{site_name}/{year}/ndvi/{source}/")
-        
+
        print(f"[NDVI-{source.upper()}] Creating timeseries.json...")
        timeseries = []
-        
+
        ndvi_files = sorted(output_dir.glob("*.geotiff"))
        for ndvi_file in ndvi_files:
            filename = ndvi_file.name
@ -69,7 +68,7 @@ def create_ndvi_timeseries(year, site_position, site_name):
                date = datetime.strptime(date_str, "%Y%m%d").isoformat()
            except ValueError:
                date = date_str
-            
+
            ndvi_value = None
            try:
                with rasterio.open(ndvi_file) as src:
@ -81,17 +80,17 @@ def create_ndvi_timeseries(year, site_position, site_name):
                        if value != 0 and not np.isnan(value):
                            ndvi_value = value
            except Exception as e:
-                print(f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}")
+                print(
-            
+                    f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}"
-            timeseries.append({
+                )
-                "date": date,
+
-                "filename": filename,
+            timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
-                "ndvi": ndvi_value
+
            })
        timeseries.sort(key=lambda x: x["date"])
        timeseries_file = output_dir / "timeseries.json"
        with open(timeseries_file, "w") as f:
            json.dump(timeseries, f, indent=2)
-        
+
-        print(f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)")
+        print(
            f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)"
        )
--- a/run.py
+++ b/run.py
@ -1,7 +1,4 @@
-from download_s2 import download_s2
+from efast import run_efast, prepare_s2, prepare_s3
 from download_s3 import download_s3
 from ndvi import generate_ndvi, create_ndvi_timeseries
 from clouds import detect_clouds
 year = 2024
 site_position = (47.116171, 11.320308)
@ -17,6 +14,15 @@ site_name = "innsbruck"
 # create_ndvi_timeseries(year, site_position, site_name)
 # print("All NDVI generation completed")
-print(f"Detecting clouds for {site_name}, {year}")
+# print(f"Detecting clouds for {site_name}, {year}")
-detect_clouds(year, site_name)
+# detect_clouds(year, site_name)
-print("Cloud detection completed")
+# print("Cloud detection completed")
 print(f"Preparing data for EFAST fusion for {site_name}, {year}")
 prepare_s2(year, site_position, site_name)
 prepare_s3(year, site_position, site_name)
 print("Data preparation completed")
 print(f"Running EFAST fusion for {site_name}, {year}")
 run_efast(year, site_position, site_name)
 print("EFAST fusion completed")