Larger s3 area.

This commit is contained in:
Felix Delattre 2026-01-11 02:20:17 +01:00
parent d925378ff4
commit cd1a7d0ab8
4 changed files with 71 additions and 9 deletions

View file

@ -148,8 +148,47 @@ def prepare_s3(season, site_position, site_name, date_range=None):
with rasterio.open(composite_path, "w", **profile) as dst:
dst.write(composite)
_, _, reproject_and_crop_s3 = _import_efast()
reproject_and_crop_s3(temp_composite_dir, s2_prepared_dir, s3_preprocessed_dir)
# Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds
# This ensures fusion covers the same area as S2 and dimensions match
sen2_ref_paths = list(s2_prepared_dir.glob("*REFL.tif"))
if len(sen2_ref_paths) == 0:
raise ValueError(f"No REFL files found in {s2_prepared_dir}")
# Get bounds from REFL file (full coverage, matches S2)
with rasterio.open(sen2_ref_paths[0]) as s2_ref:
target_bounds = s2_ref.bounds
target_crs = s2_ref.crs
s2_resolution = abs(s2_ref.transform[0])
s3_resolution = s2_resolution * RESOLUTION_RATIO
width = int((target_bounds.right - target_bounds.left) / s3_resolution)
height = int((target_bounds.top - target_bounds.bottom) / s3_resolution)
s3_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
width,
height,
)
# Reproject each S3 composite to match S2 REFL bounds
sen3_paths = list(temp_composite_dir.glob("*.tif"))
for sen3_path in sen3_paths:
vrt_options = {
"transform": s3_transform,
"height": height,
"width": width,
"crs": target_crs,
"resampling": Resampling.cubic,
}
with rasterio.open(sen3_path) as s3_src:
with WarpedVRT(s3_src, **vrt_options) as vrt:
name = sen3_path.name
outfile = s3_preprocessed_dir / name
profile = vrt.profile.copy()
profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"})
rio_shutil.copy(vrt, outfile, **profile)
shutil.rmtree(temp_composite_dir)

View file

@ -1,4 +1,5 @@
import os
import time
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
@ -11,7 +12,7 @@ from rasterio.transform import from_bounds
load_dotenv()
BBOX_SIZE = 0.011
BBOX_SIZE = 0.016 # Larger than S2 to ensure full coverage including padded pixels
def _get_bbox(lon, lat):
@ -131,10 +132,28 @@ def download_s3(season, site_position, site_name, date_range=None):
).resample_spatial(projection=32632)
output_file = output_dir / "s3_data.nc"
print("[S3] Downloading NetCDF...")
print(f"[S3] Downloading NetCDF to {output_file}...")
print(f"[S3] Temporal extent: {start_date} to {end_date}")
print(f"[S3] Spatial extent: {spatial_extent}")
print(f"[S3] Bands: {openeo_bands}")
print("[S3] This may take several minutes depending on data volume...")
start_time = time.time()
try:
datacube.download(str(output_file), format="NetCDF")
elapsed = time.time() - start_time
print(f"[S3] Download completed in {elapsed:.1f} seconds")
except Exception as e:
elapsed = time.time() - start_time
print(f"[S3] Download failed after {elapsed:.1f} seconds: {e}")
raise
print("[S3] Processing NetCDF...")
process_start = time.time()
_process_netcdf(output_file, output_dir, bands, openeo_bands)
process_elapsed = time.time() - process_start
print(f"[S3] Processing completed in {process_elapsed:.1f} seconds")
print(f"[S3] Removing temporary NetCDF file...")
os.remove(output_file)
print("[S3] Completed")

View file

@ -126,6 +126,10 @@ def _process_ndvi_files(
return
for geotiff_file in geotiff_files:
# Skip DIST_CLOUD files silently (single-band distance-to-clouds, not suitable for NDVI)
if "DIST_CLOUD" in geotiff_file.name:
continue
# Check if file has enough bands (need at least 4 for RED and NIR)
try:
with rasterio.open(geotiff_file) as src:

8
run.py
View file

@ -14,14 +14,14 @@ def run_pipeline(season, site_position, site_name):
try:
# print(f"Downloading data for {site_name}, {season}")
# download_s2(season, site_position, site_name)
# download_s3(season, site_position, site_name)
download_s3(season, site_position, site_name)
# print(f"Generating NDVI for raw data: {site_name}, {season}")
# generate_ndvi_raw(season, site_position, site_name)
# create_ndvi_timeseries_raw(season, site_position, site_name)
generate_ndvi_raw(season, site_position, site_name)
create_ndvi_timeseries_raw(season, site_position, site_name)
# print(f"Detecting clouds for {site_name}, {season}")
# detect_clouds(season, site_name)
detect_clouds(season, site_name)
print(f"Preparing data for EFAST fusion for {site_name}, {season}")
prepare_s2(season, site_position, site_name)