diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..3e72aa6 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11.10 diff --git a/clouds.py b/clouds.py index 4c36616..a075bcc 100644 --- a/clouds.py +++ b/clouds.py @@ -6,38 +6,45 @@ from datetime import datetime def detect_clouds(year, site_name): output_file = Path(f"data/{site_name}/{year}/clouds.json") clouds = {"s2": [], "s3": []} - + for source in ["s2", "s3"]: timeseries_file = Path(f"data/{site_name}/{year}/ndvi/{source}/timeseries.json") if not timeseries_file.exists(): print(f"[CLOUDS-{source.upper()}] No timeseries.json found") continue - + print(f"[CLOUDS-{source.upper()}] Processing {timeseries_file}...") - + with open(timeseries_file) as f: timeseries = json.load(f) - - entries = [(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) for e in timeseries if e["ndvi"] is not None] - + + entries = [ + (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) + for e in timeseries + if e["ndvi"] is not None + ] + for entry, entry_date in entries: # Use 14-day window for seasonal context, require NDVI < 0.3 and >0.15 below max - window_ndvi = [e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14] - + window_ndvi = [ + e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14 + ] + if len(window_ndvi) < 3: continue - + max_ndvi = max(window_ndvi) threshold = max_ndvi - 0.15 - + if entry["ndvi"] < threshold and entry["ndvi"] < 0.3: clouds[source].append(entry["filename"]) - - print(f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files") - + + print( + f"[CLOUDS-{source.upper()}] Found {len(clouds[source])} cloud-covered files" + ) + output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file, "w") as f: json.dump(clouds, f, indent=2) - - print(f"[CLOUDS] Saved: {output_file}") + print(f"[CLOUDS] Saved: {output_file}") diff --git a/download_s2.py b/download_s2.py index 34dd37f..bcec0a4 100644 --- a/download_s2.py +++ b/download_s2.py @@ -5,7 +5,6 @@ import requests from rasterio.warp import transform_geom from rasterio.windows import from_bounds, transform as window_transform from pystac_client import Client -from pathlib import Path def download_s2(year, site_position, site_name, date_range=None): @@ -129,7 +128,9 @@ def download_s2(year, site_position, site_name, date_range=None): # If not found, try averaging all bands if viewing_angle is None: angles = [] - for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle"): + for angle_elem in root.findall( + ".//Mean_Viewing_Incidence_Angle" + ): zenith_elem = angle_elem.find("ZENITH_ANGLE") if zenith_elem is not None: angles.append(abs(float(zenith_elem.text))) @@ -145,7 +146,11 @@ def download_s2(year, site_position, site_name, date_range=None): if viewing_angle is not None: dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle) - print(f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else f"[S2] Saved: {filepath}") + print( + f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" + if viewing_angle + else f"[S2] Saved: {filepath}" + ) else: print(f"[S2] Skipping {date}_{increment} (missing bands)") diff --git a/efast.py b/efast.py new file mode 100644 index 0000000..39fb97a --- /dev/null +++ b/efast.py @@ -0,0 +1,235 @@ +import json +import shutil +import importlib.util +from pathlib import Path +from datetime import datetime, timedelta +import numpy as np +import rasterio +from rasterio.warp import Resampling +from rasterio.vrt import WarpedVRT +from scipy import ndimage + +_this_file = Path(__file__).resolve() +_venv_lib = _this_file.parent.parent / "venv" / "lib" +_efast_pkg_path = None +if _venv_lib.exists(): + for py_dir in _venv_lib.glob("python*"): + candidate = py_dir / "site-packages" / "efast" / "efast.py" + if candidate.exists(): + _efast_pkg_path = candidate + break + +if _efast_pkg_path and _efast_pkg_path.exists(): + spec = importlib.util.spec_from_file_location( + "efast_fusion_module", _efast_pkg_path + ) + efast_fusion = importlib.util.module_from_spec(spec) + spec.loader.exec_module(efast_fusion) +else: + import site + + for site_pkg in site.getsitepackages(): + candidate = Path(site_pkg) / "efast" / "efast.py" + if candidate.exists(): + spec = importlib.util.spec_from_file_location( + "efast_fusion_module", candidate + ) + efast_fusion = importlib.util.module_from_spec(spec) + spec.loader.exec_module(efast_fusion) + break + else: + raise ImportError( + "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git" + ) + + +def prepare_s2(year, site_position, site_name, date_range=None): + s2_dir = Path(f"data/{site_name}/{year}/s2/") + s3_dir = Path(f"data/{site_name}/{year}/s3/") + s2_output_dir = Path(f"data/{site_name}/{year}/efast/s2/") + clouds_file = Path(f"data/{site_name}/{year}/clouds.json") + + clouds = {"s2": set(), "s3": set()} + if clouds_file.exists(): + clouds_data = json.loads(clouds_file.read_text()) + clouds["s2"] = set(clouds_data.get("s2", [])) + clouds["s3"] = set(clouds_data.get("s3", [])) + + s2_output_dir.mkdir(parents=True, exist_ok=True) + + s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]] + if not s3_files: + raise ValueError("No non-cloud S3 files found for reference bounds") + + with rasterio.open(s3_files[0]) as s3_ref: + target_bounds = s3_ref.bounds + target_crs = s3_ref.crs + s3_width = s3_ref.width + s3_height = s3_ref.height + ratio = 21 + s2_width = s3_width * ratio + s2_height = s3_height * ratio + + for s2_file in s2_dir.glob("*.geotiff"): + if s2_file.name in clouds["s2"]: + continue + date_str = s2_file.name.split("_")[0] + + refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif" + if not refl_dst.exists(): + with rasterio.open(s2_file) as src: + data = src.read().astype("float32") / 10000.0 + s2_res = (target_bounds.right - target_bounds.left) / s2_width + dst_transform = rasterio.transform.from_bounds( + target_bounds.left, + target_bounds.bottom, + target_bounds.right, + target_bounds.top, + s2_width, + s2_height, + ) + reprojected_data, _ = rasterio.warp.reproject( + source=data, + destination=np.zeros( + (src.count, s2_height, s2_width), dtype=data.dtype + ), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=dst_transform, + dst_crs=target_crs, + resampling=Resampling.cubic, + ) + profile = src.profile.copy() + profile.update( + { + "dtype": "float32", + "nodata": 0, + "width": s2_width, + "height": s2_height, + "transform": dst_transform, + "crs": target_crs, + } + ) + with rasterio.open(refl_dst, "w", **profile) as dst_file: + dst_file.write(reprojected_data) + + dist_cloud_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif" + if not dist_cloud_dst.exists(): + with rasterio.open(refl_dst) as src: + s2_hr = src.read(1) + mask = s2_hr == 0 + distance_to_cloud_hr = ndimage.distance_transform_edt(~mask) + distance_to_cloud_hr = np.clip(distance_to_cloud_hr, 0, 255).astype( + "float32" + ) + + s3_res = (target_bounds.right - target_bounds.left) / s3_width + lr_transform = rasterio.transform.from_bounds( + target_bounds.left, + target_bounds.bottom, + target_bounds.right, + target_bounds.top, + s3_width, + s3_height, + ) + distance_to_cloud_lr, _ = rasterio.warp.reproject( + source=distance_to_cloud_hr[np.newaxis, :, :], + destination=np.zeros( + (1, s3_height, s3_width), dtype=distance_to_cloud_hr.dtype + ), + src_transform=src.transform, + src_crs=target_crs, + dst_transform=lr_transform, + dst_crs=target_crs, + resampling=Resampling.average, + ) + distance_to_cloud_lr = distance_to_cloud_lr[0] + + profile = src.profile.copy() + profile.update( + { + "count": 1, + "dtype": "float32", + "width": s3_width, + "height": s3_height, + "transform": lr_transform, + } + ) + with rasterio.open(dist_cloud_dst, "w", **profile) as dst: + dst.write(distance_to_cloud_lr, 1) + + +def prepare_s3(year, site_position, site_name, date_range=None): + s3_dir = Path(f"data/{site_name}/{year}/s3/") + s3_preprocessed_dir = Path(f"data/{site_name}/{year}/efast/s3/") + clouds_file = Path(f"data/{site_name}/{year}/clouds.json") + + clouds = {"s3": set()} + if clouds_file.exists(): + clouds_data = json.loads(clouds_file.read_text()) + clouds["s3"] = set(clouds_data.get("s3", [])) + + s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) + for s3_file in s3_dir.glob("*.geotiff"): + if s3_file.name in clouds["s3"]: + continue + date_str = s3_file.name.split("_")[0] + output_path = s3_preprocessed_dir / f"composite_{date_str}.tif" + if output_path.exists(): + continue + shutil.copy2(s3_file, output_path) + + +def run_efast(year, site_position, site_name, date_range=None): + lat, lon = site_position + datetime_range = date_range or f"{year}-01-01/{year}-12-31" + + efast_base_dir = Path(f"data/{site_name}/{year}/efast/") + s2_output_dir = efast_base_dir / "s2" + s3_output_dir = efast_base_dir / "s3" + fusion_output_dir = efast_base_dir / "fusion" + + fusion_output_dir.mkdir(parents=True, exist_ok=True) + + print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {year}") + + start_date = datetime.strptime(datetime_range.split("/")[0], "%Y-%m-%d") + end_date = datetime.strptime(datetime_range.split("/")[1], "%Y-%m-%d") + + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime("%Y%m%d") + + output_file = fusion_output_dir / f"REFL_{date_str}.tif" + if output_file.exists(): + print(f"[EFAST] Skipping {date_str} (exists)") + current_date += timedelta(days=1) + continue + + if list(s2_output_dir.glob(f"*{date_str}*REFL.tif")) and list( + s3_output_dir.glob(f"composite_{date_str}.tif") + ): + try: + efast_fusion.fusion( + current_date, + s3_output_dir, + s2_output_dir, + fusion_output_dir, + product="REFL", + max_days=30, + date_position=2, + minimum_acquisition_importance=0.0, + ratio=21, + ) + if output_file.exists(): + print(f"[EFAST] Saved: {output_file}") + else: + print(f"[EFAST] No output for {date_str}") + except Exception as e: + print(f"[EFAST] Error processing {date_str}: {e}") + else: + print(f"[EFAST] Skipping {date_str} (insufficient data)") + + current_date += timedelta(days=1) + + print("[EFAST] Completed") diff --git a/ndvi.py b/ndvi.py index 6b02d85..f12b62f 100644 --- a/ndvi.py +++ b/ndvi.py @@ -1,4 +1,3 @@ -import os import json import numpy as np import rasterio @@ -50,17 +49,17 @@ def generate_ndvi(year, site_position, site_name): dst.set_band_description(1, "NDVI") print(f"[NDVI-{source.upper()}] Saved: {output_file}") - + print(f"[NDVI-{source.upper()}] Completed") def create_ndvi_timeseries(year, site_position, site_name): for source in ["s2", "s3"]: output_dir = Path(f"data/{site_name}/{year}/ndvi/{source}/") - + print(f"[NDVI-{source.upper()}] Creating timeseries.json...") timeseries = [] - + ndvi_files = sorted(output_dir.glob("*.geotiff")) for ndvi_file in ndvi_files: filename = ndvi_file.name @@ -69,7 +68,7 @@ def create_ndvi_timeseries(year, site_position, site_name): date = datetime.strptime(date_str, "%Y%m%d").isoformat() except ValueError: date = date_str - + ndvi_value = None try: with rasterio.open(ndvi_file) as src: @@ -81,17 +80,17 @@ def create_ndvi_timeseries(year, site_position, site_name): if value != 0 and not np.isnan(value): ndvi_value = value except Exception as e: - print(f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}") - - timeseries.append({ - "date": date, - "filename": filename, - "ndvi": ndvi_value - }) - + print( + f"[NDVI-{source.upper()}] Warning: Could not sample {filename}: {e}" + ) + + timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) + timeseries.sort(key=lambda x: x["date"]) timeseries_file = output_dir / "timeseries.json" with open(timeseries_file, "w") as f: json.dump(timeseries, f, indent=2) - - print(f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)") + + print( + f"[NDVI-{source.upper()}] Saved: {timeseries_file} ({len(timeseries)} entries)" + ) diff --git a/run.py b/run.py index 7ffa6bc..ae58b5e 100644 --- a/run.py +++ b/run.py @@ -1,7 +1,4 @@ -from download_s2 import download_s2 -from download_s3 import download_s3 -from ndvi import generate_ndvi, create_ndvi_timeseries -from clouds import detect_clouds +from efast import run_efast, prepare_s2, prepare_s3 year = 2024 site_position = (47.116171, 11.320308) @@ -17,6 +14,15 @@ site_name = "innsbruck" # create_ndvi_timeseries(year, site_position, site_name) # print("All NDVI generation completed") -print(f"Detecting clouds for {site_name}, {year}") -detect_clouds(year, site_name) -print("Cloud detection completed") +# print(f"Detecting clouds for {site_name}, {year}") +# detect_clouds(year, site_name) +# print("Cloud detection completed") + +print(f"Preparing data for EFAST fusion for {site_name}, {year}") +prepare_s2(year, site_position, site_name) +prepare_s3(year, site_position, site_name) +print("Data preparation completed") + +print(f"Running EFAST fusion for {site_name}, {year}") +run_efast(year, site_position, site_name) +print("EFAST fusion completed")