diff --git a/call_efast.py b/call_efast.py index 54ade92..20543b5 100644 --- a/call_efast.py +++ b/call_efast.py @@ -148,8 +148,47 @@ def prepare_s3(season, site_position, site_name, date_range=None): with rasterio.open(composite_path, "w", **profile) as dst: dst.write(composite) - _, _, reproject_and_crop_s3 = _import_efast() - reproject_and_crop_s3(temp_composite_dir, s2_prepared_dir, s3_preprocessed_dir) + # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds + # This ensures fusion covers the same area as S2 and dimensions match + sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif")) + if len(sen2_ref_paths) == 0: + raise ValueError(f"No REFL files found in {s2_prepared_dir}") + + # Get bounds from REFL file (full coverage, matches S2) + with rasterio.open(sen2_ref_paths[0]) as s2_ref: + target_bounds = s2_ref.bounds + target_crs = s2_ref.crs + s2_resolution = abs(s2_ref.transform[0]) + s3_resolution = s2_resolution * RESOLUTION_RATIO + width = int((target_bounds.right - target_bounds.left) / s3_resolution) + height = int((target_bounds.top - target_bounds.bottom) / s3_resolution) + s3_transform = rasterio.transform.from_bounds( + target_bounds.left, + target_bounds.bottom, + target_bounds.right, + target_bounds.top, + width, + height, + ) + + # Reproject each S3 composite to match S2 REFL bounds + sen3_paths = list(temp_composite_dir.glob("*.tif")) + for sen3_path in sen3_paths: + vrt_options = { + "transform": s3_transform, + "height": height, + "width": width, + "crs": target_crs, + "resampling": Resampling.cubic, + } + with rasterio.open(sen3_path) as s3_src: + with WarpedVRT(s3_src, **vrt_options) as vrt: + name = sen3_path.name + outfile = s3_preprocessed_dir / name + profile = vrt.profile.copy() + profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"}) + rio_shutil.copy(vrt, outfile, **profile) + shutil.rmtree(temp_composite_dir) diff --git a/download_s3.py b/download_s3.py index 7098dc1..4ad9409 100644 --- a/download_s3.py +++ b/download_s3.py @@ -1,4 +1,5 @@ import os +import time from pathlib import Path from datetime import datetime from dotenv import load_dotenv @@ -11,7 +12,7 @@ from rasterio.transform import from_bounds load_dotenv() -BBOX_SIZE = 0.011 +BBOX_SIZE = 0.016 # Larger than S2 to ensure full coverage including padded pixels def _get_bbox(lon, lat): @@ -131,10 +132,28 @@ def download_s3(season, site_position, site_name, date_range=None): ).resample_spatial(projection=32632) output_file = output_dir / "s3_data.nc" - print("[S3] Downloading NetCDF...") - datacube.download(str(output_file), format="NetCDF") + print(f"[S3] Downloading NetCDF to {output_file}...") + print(f"[S3] Temporal extent: {start_date} to {end_date}") + print(f"[S3] Spatial extent: {spatial_extent}") + print(f"[S3] Bands: {openeo_bands}") + print("[S3] This may take several minutes depending on data volume...") + + start_time = time.time() + try: + datacube.download(str(output_file), format="NetCDF") + elapsed = time.time() - start_time + print(f"[S3] Download completed in {elapsed:.1f} seconds") + except Exception as e: + elapsed = time.time() - start_time + print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}") + raise print("[S3] Processing NetCDF...") + process_start = time.time() _process_netcdf(output_file, output_dir, bands, openeo_bands) + process_elapsed = time.time() - process_start + print(f"[S3] Processing completed in {process_elapsed:.1f} seconds") + + print(f"[S3] Removing temporary NetCDF file...") os.remove(output_file) print("[S3] Completed") diff --git a/ndvi.py b/ndvi.py index bd3b588..15ebb5e 100644 --- a/ndvi.py +++ b/ndvi.py @@ -126,6 +126,10 @@ def _process_ndvi_files( return for geotiff_file in geotiff_files: + # Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI) + if "DIST_CLOUD" in geotiff_file.name: + continue + # Check if file has enough bands (need at least 4 for RED and NIR) try: with rasterio.open(geotiff_file) as src: diff --git a/run.py b/run.py index 16149c2..35f789b 100644 --- a/run.py +++ b/run.py @@ -14,14 +14,14 @@ def run_pipeline(season, site_position, site_name): try: # print(f"Downloading data for {site_name}, {season}") # download_s2(season, site_position, site_name) - # download_s3(season, site_position, site_name) + download_s3(season, site_position, site_name) # print(f"Generating NDVI for raw data: {site_name}, {season}") - # generate_ndvi_raw(season, site_position, site_name) - # create_ndvi_timeseries_raw(season, site_position, site_name) + generate_ndvi_raw(season, site_position, site_name) + create_ndvi_timeseries_raw(season, site_position, site_name) # print(f"Detecting clouds for {site_name}, {season}") - # detect_clouds(season, site_name) + detect_clouds(season, site_name) print(f"Preparing data for EFAST fusion for {site_name}, {season}") prepare_s2(season, site_position, site_name)