This commit is contained in:
Felix Delattre 2026-01-25 15:22:34 +01:00
parent 7a695cc089
commit 415af89c7d
4 changed files with 80 additions and 143 deletions

View file

@ -25,6 +25,11 @@ def detect_clouds(season, site_name):
with open(timeseries_file) as f: with open(timeseries_file) as f:
timeseries = json.load(f) timeseries = json.load(f)
# Flag entries with ndvi: None as outliers (bad/invalid data)
for e in timeseries:
if e.get("ndvi") is None:
clouds[source].append(e["filename"])
entries = [ entries = [
(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
for e in timeseries for e in timeseries

View file

@ -107,6 +107,7 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name):
timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
timeseries.sort(key=lambda x: x["date"]) timeseries.sort(key=lambda x: x["date"])
output_dir.mkdir(parents=True, exist_ok=True)
timeseries_file = output_dir / "timeseries.json" timeseries_file = output_dir / "timeseries.json"
with open(timeseries_file, "w") as f: with open(timeseries_file, "w") as f:
json.dump(timeseries, f, indent=2) json.dump(timeseries, f, indent=2)

View file

@ -1,59 +1,13 @@
from pathlib import Path from pathlib import Path
from datetime import datetime
import numpy as np import numpy as np
import rasterio import rasterio
from rasterio import windows from rasterio import windows
from rasterio.warp import reproject, Resampling
from rasterio.io import MemoryFile
def _crop_to_bounds(src_file, bounds, output_file, row_based_height=None):
"""Crop a raster file to given bounds and save."""
crop_left, crop_bottom, crop_right, crop_top, crop_crs = bounds
with rasterio.open(src_file) as src:
# Calculate window from bounds
window = windows.from_bounds(crop_left, crop_bottom, crop_right, crop_top, src.transform)
# Use row-based height if provided (for fusion), otherwise calculate from bounds
if row_based_height is not None:
col_off = int(round(window.col_off))
window = windows.Window(col_off, 0, src.width, row_based_height)
# Calculate bottom Y from row index
bottom_y = src.transform[5] + row_based_height * src.transform[4]
else:
pixel_size = abs(src.transform[0])
width = int(round((crop_right - crop_left) / pixel_size))
height = int(round((crop_top - crop_bottom) / pixel_size))
window = windows.Window(
int(round(window.col_off)), int(round(window.row_off)), width, height
)
bottom_y = crop_bottom
# Clip window to source bounds
src_window = windows.Window(0, 0, src.width, src.height)
window = window.intersection(src_window)
if not window or window.height <= 0 or window.width <= 0:
return False
data = src.read(window=window)
transform = rasterio.transform.from_bounds(
crop_left, bottom_y, crop_right, crop_top, window.width, window.height
)
profile = src.profile.copy()
profile.update({
"height": window.height,
"width": window.width,
"transform": transform,
"crs": crop_crs,
})
with rasterio.open(output_file, "w", **profile) as dst:
dst.write(data)
return True
def process_cropped(season, site_position, site_name): def process_cropped(season, site_position, site_name):
"""Crop prepared S2, S3, and fusion files to fusion valid data bounds.""" """Crop fusion to valid data, then crop S2/S3 to match."""
base = Path(f"data/{site_name}/{season}") base = Path(f"data/{site_name}/{season}")
prepared = base / "prepared" prepared = base / "prepared"
processed = base / "processed" processed = base / "processed"
@ -65,94 +19,71 @@ def process_cropped(season, site_position, site_name):
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
print(f"[PROCESS] Cropping files to fusion valid data bounds: {site_name}, {season}") print(f"[PROCESS] Processing files: {site_name}, {season}")
# Collect all available DIST_CLOUD files and their dates
dist_cloud_files = {}
for dist_cloud_file in s2_prep.glob("S2A_MSIL2A_*_DIST_CLOUD.tif"):
date_str = dist_cloud_file.stem.split("_")[2]
try:
date_obj = datetime.strptime(date_str, "%Y%m%d")
dist_cloud_files[date_obj] = dist_cloud_file
except ValueError:
continue
if not dist_cloud_files:
print("[PROCESS] Warning: No DIST_CLOUD files found. Cannot process fusion files.")
return
dist_cloud_dates = sorted(dist_cloud_files.keys())
def find_closest_dist_cloud(target_date_str):
"""Find the closest DIST_CLOUD file to the target date."""
try:
target_date = datetime.strptime(target_date_str, "%Y%m%d")
except ValueError:
return None
# Find closest date
closest_date = min(dist_cloud_dates, key=lambda d: abs((d - target_date).days))
return dist_cloud_files[closest_date]
# Determine valid bounds for each fusion file
fusion_bounds = {}
fusion_rows = {}
# Crop fusion to valid data and get dimensions
fusion_dims = {}
for fusion_file in fusion_prep.glob("REFL_*.tif"): for fusion_file in fusion_prep.glob("REFL_*.tif"):
date_str = fusion_file.stem.split("_")[1] date_str = fusion_file.stem.split("_")[1]
with rasterio.open(fusion_file) as src:
# Try exact date first, then find closest data = src.read()
dist_cloud = s2_prep / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif" valid = ~np.isnan(data) & (data > 0.001)
if not dist_cloud.exists(): rows = np.any(valid, axis=(0, 2))
dist_cloud = find_closest_dist_cloud(date_str) cols = np.any(valid, axis=(0, 1))
if dist_cloud is None: r0, r1 = np.where(rows)[0][[0, -1]]
continue c0, c1 = np.where(cols)[0][[0, -1]]
w, h = c1 - c0 + 1, r1 - r0 + 1
with rasterio.open(dist_cloud) as dist_src: window = windows.Window(c0, r0, w, h)
dist_bounds = dist_src.bounds data_crop = src.read(window=window)
dist_crs = dist_src.crs transform = rasterio.windows.transform(window, src.transform)
p = src.profile.copy()
# Find first valid row from bottom in fusion file p.update({"width": w, "height": h, "transform": transform})
with rasterio.open(fusion_file) as fusion_src:
data = fusion_src.read()
height = data.shape[1]
first_valid_row = height
for row_idx in range(height - 1, -1, -1):
if np.any(~np.isnan(data[:, row_idx, :]) & (data[:, row_idx, :] > 0.001)):
first_valid_row = row_idx
break
valid_bottom_y = (fusion_src.transform * (0, first_valid_row + 1))[1]
crop_bottom = max(dist_bounds.bottom, valid_bottom_y)
fusion_bounds[date_str] = (
dist_bounds.left, crop_bottom, dist_bounds.right, dist_bounds.top, dist_crs
)
fusion_rows[date_str] = first_valid_row
# Process S2 files
for refl_file in s2_prep.glob("*REFL.tif"):
date_str = refl_file.stem.split("_")[2]
if date_str in fusion_bounds:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
if _crop_to_bounds(refl_file, fusion_bounds[date_str], output_file):
print(f"[PROCESS] Saved: {output_file}")
# Process S3 files
for s3_file in s3_prep.glob("composite_*.tif"):
date_str = s3_file.stem.split("_")[1]
if date_str in fusion_bounds:
output_file = processed / "s3" / f"{date_str}_0.geotiff"
if _crop_to_bounds(s3_file, fusion_bounds[date_str], output_file):
print(f"[PROCESS] Saved: {output_file}")
# Process fusion files (use row-based cropping)
for date_str, bounds in fusion_bounds.items():
fusion_file = fusion_prep / f"REFL_{date_str}.tif"
if fusion_file.exists():
output_file = processed / "fusion" / f"{date_str}_0.geotiff" output_file = processed / "fusion" / f"{date_str}_0.geotiff"
if _crop_to_bounds(fusion_file, bounds, output_file, row_based_height=fusion_rows[date_str] + 1): with rasterio.open(output_file, "w", **p) as dst:
print(f"[PROCESS] Saved: {output_file}") dst.write(data_crop)
fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile)
print(f"[PROCESS] Cropped fusion: {output_file}")
# Crop S2 and S3 to fusion size
for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items():
window = windows.Window(c0, r0, w, h)
# S2
for s2_file in s2_prep.glob("*REFL.tif"):
if s2_file.stem.split("_")[2] == date_str:
output_file = processed / "s2" / f"{date_str}_0.geotiff"
with rasterio.open(s2_file) as src:
data = src.read(window=window)
p2 = src.profile.copy()
p2.update({"width": w, "height": h, "transform": transform, "crs": crs})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS] Cropped: {output_file}")
# S3: resample to fusion pixel size, then crop
s3_file = s3_prep / f"composite_{date_str}.tif"
if s3_file.exists():
output_file = processed / "s3" / f"{date_str}_0.geotiff"
with rasterio.open(s3_file) as src:
# Resample to fusion pixel size
temp_profile = fusion_profile.copy()
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
with rasterio.MemoryFile() as memfile:
with memfile.open(**temp_profile) as resampled:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(resampled, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=fusion_transform,
dst_crs=crs,
resampling=Resampling.nearest
)
# Crop using same window
data = resampled.read(window=window)
p2 = resampled.profile.copy()
p2.update({"width": w, "height": h, "transform": transform})
with rasterio.open(output_file, "w", **p2) as dst:
dst.write(data)
print(f"[PROCESS] Cropped: {output_file}")
print("[PROCESS] Completed") print("[PROCESS] Completed")

18
run.py
View file

@ -21,21 +21,21 @@ def run_pipeline(season, site_position, site_name):
# print(f"Generating NDVI for raw data: {site_name}, {season}") # print(f"Generating NDVI for raw data: {site_name}, {season}")
# generate_ndvi_raw(season, site_position, site_name) # generate_ndvi_raw(season, site_position, site_name)
# create_ndvi_timeseries_raw(season, site_position, site_name) #create_ndvi_timeseries_raw(season, site_position, site_name)
# print(f"Detecting clouds for {site_name}, {season}") # print(f"Detecting clouds for {site_name}, {season}")
# detect_clouds(season, site_name) #detect_clouds(season, site_name)
# print(f"Preparing data for EFAST fusion for {site_name}, {season}") #print(f"Preparing data for EFAST fusion for {site_name}, {season}")
# prepare_s2(season, site_position, site_name) #prepare_s2(season, site_position, site_name)
# prepare_s3(season, site_position, site_name) #prepare_s3(season, site_position, site_name)
# print(f"Running EFAST fusion for {site_name}, {season}") #print(f"Running EFAST fusion for {site_name}, {season}")
# run_efast(season, site_position, site_name) #run_efast(season, site_position, site_name)
# print(f"Post-processing data: {site_name}, {season}") #print(f"Post-processing data: {site_name}, {season}")
process_cropped(season, site_position, site_name) process_cropped(season, site_position, site_name)
# print(f"Generating NDVI for final outputs: {site_name}, {season}") #print(f"Generating NDVI for final outputs: {site_name}, {season}")
generate_ndvi_post_process(season, site_position, site_name) generate_ndvi_post_process(season, site_position, site_name)
create_ndvi_timeseries_post_process(season, site_position, site_name) create_ndvi_timeseries_post_process(season, site_position, site_name)