From 415af89c7df1adf89d5f90efea8334baefdee057 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Sun, 25 Jan 2026 15:22:34 +0100 Subject: [PATCH] foo --- clouds.py | 5 ++ ndvi.py | 1 + post_process.py | 199 ++++++++++++++++-------------------------------- run.py | 18 ++--- 4 files changed, 80 insertions(+), 143 deletions(-) diff --git a/clouds.py b/clouds.py index a102d69..61f1a78 100644 --- a/clouds.py +++ b/clouds.py @@ -25,6 +25,11 @@ def detect_clouds(season, site_name): with open(timeseries_file) as f: timeseries = json.load(f) + # Flag entries with ndvi: None as outliers (bad/invalid data) + for e in timeseries: + if e.get("ndvi") is None: + clouds[source].append(e["filename"]) + entries = [ (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) for e in timeseries diff --git a/ndvi.py b/ndvi.py index 2877a3e..7c4a44a 100644 --- a/ndvi.py +++ b/ndvi.py @@ -107,6 +107,7 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name): timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) timeseries.sort(key=lambda x: x["date"]) + output_dir.mkdir(parents=True, exist_ok=True) timeseries_file = output_dir / "timeseries.json" with open(timeseries_file, "w") as f: json.dump(timeseries, f, indent=2) diff --git a/post_process.py b/post_process.py index 75b6414..bf7d2af 100644 --- a/post_process.py +++ b/post_process.py @@ -1,59 +1,13 @@ from pathlib import Path -from datetime import datetime import numpy as np import rasterio from rasterio import windows - - -def _crop_to_bounds(src_file, bounds, output_file, row_based_height=None): - """Crop a raster file to given bounds and save.""" - crop_left, crop_bottom, crop_right, crop_top, crop_crs = bounds - - with rasterio.open(src_file) as src: - # Calculate window from bounds - window = windows.from_bounds(crop_left, crop_bottom, crop_right, crop_top, src.transform) - - # Use row-based height if provided (for fusion), otherwise calculate from bounds - if row_based_height is not None: - col_off = int(round(window.col_off)) - window = windows.Window(col_off, 0, src.width, row_based_height) - # Calculate bottom Y from row index - bottom_y = src.transform[5] + row_based_height * src.transform[4] - else: - pixel_size = abs(src.transform[0]) - width = int(round((crop_right - crop_left) / pixel_size)) - height = int(round((crop_top - crop_bottom) / pixel_size)) - window = windows.Window( - int(round(window.col_off)), int(round(window.row_off)), width, height - ) - bottom_y = crop_bottom - - # Clip window to source bounds - src_window = windows.Window(0, 0, src.width, src.height) - window = window.intersection(src_window) - if not window or window.height <= 0 or window.width <= 0: - return False - - data = src.read(window=window) - transform = rasterio.transform.from_bounds( - crop_left, bottom_y, crop_right, crop_top, window.width, window.height - ) - - profile = src.profile.copy() - profile.update({ - "height": window.height, - "width": window.width, - "transform": transform, - "crs": crop_crs, - }) - - with rasterio.open(output_file, "w", **profile) as dst: - dst.write(data) - return True +from rasterio.warp import reproject, Resampling +from rasterio.io import MemoryFile def process_cropped(season, site_position, site_name): - """Crop prepared S2, S3, and fusion files to fusion valid data bounds.""" + """Crop fusion to valid data, then crop S2/S3 to match.""" base = Path(f"data/{site_name}/{season}") prepared = base / "prepared" processed = base / "processed" @@ -65,94 +19,71 @@ def process_cropped(season, site_position, site_name): for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: output_dir.mkdir(parents=True, exist_ok=True) - print(f"[PROCESS] Cropping files to fusion valid data bounds: {site_name}, {season}") - - # Collect all available DIST_CLOUD files and their dates - dist_cloud_files = {} - for dist_cloud_file in s2_prep.glob("S2A_MSIL2A_*_DIST_CLOUD.tif"): - date_str = dist_cloud_file.stem.split("_")[2] - try: - date_obj = datetime.strptime(date_str, "%Y%m%d") - dist_cloud_files[date_obj] = dist_cloud_file - except ValueError: - continue - - if not dist_cloud_files: - print("[PROCESS] Warning: No DIST_CLOUD files found. Cannot process fusion files.") - return - - dist_cloud_dates = sorted(dist_cloud_files.keys()) - - def find_closest_dist_cloud(target_date_str): - """Find the closest DIST_CLOUD file to the target date.""" - try: - target_date = datetime.strptime(target_date_str, "%Y%m%d") - except ValueError: - return None - - # Find closest date - closest_date = min(dist_cloud_dates, key=lambda d: abs((d - target_date).days)) - return dist_cloud_files[closest_date] - - # Determine valid bounds for each fusion file - fusion_bounds = {} - fusion_rows = {} + print(f"[PROCESS] Processing files: {site_name}, {season}") + # Crop fusion to valid data and get dimensions + fusion_dims = {} for fusion_file in fusion_prep.glob("REFL_*.tif"): date_str = fusion_file.stem.split("_")[1] - - # Try exact date first, then find closest - dist_cloud = s2_prep / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif" - if not dist_cloud.exists(): - dist_cloud = find_closest_dist_cloud(date_str) - if dist_cloud is None: - continue - - with rasterio.open(dist_cloud) as dist_src: - dist_bounds = dist_src.bounds - dist_crs = dist_src.crs - - # Find first valid row from bottom in fusion file - with rasterio.open(fusion_file) as fusion_src: - data = fusion_src.read() - height = data.shape[1] - - first_valid_row = height - for row_idx in range(height - 1, -1, -1): - if np.any(~np.isnan(data[:, row_idx, :]) & (data[:, row_idx, :] > 0.001)): - first_valid_row = row_idx - break - - valid_bottom_y = (fusion_src.transform * (0, first_valid_row + 1))[1] - crop_bottom = max(dist_bounds.bottom, valid_bottom_y) - - fusion_bounds[date_str] = ( - dist_bounds.left, crop_bottom, dist_bounds.right, dist_bounds.top, dist_crs - ) - fusion_rows[date_str] = first_valid_row - - # Process S2 files - for refl_file in s2_prep.glob("*REFL.tif"): - date_str = refl_file.stem.split("_")[2] - if date_str in fusion_bounds: - output_file = processed / "s2" / f"{date_str}_0.geotiff" - if _crop_to_bounds(refl_file, fusion_bounds[date_str], output_file): - print(f"[PROCESS] Saved: {output_file}") - - # Process S3 files - for s3_file in s3_prep.glob("composite_*.tif"): - date_str = s3_file.stem.split("_")[1] - if date_str in fusion_bounds: - output_file = processed / "s3" / f"{date_str}_0.geotiff" - if _crop_to_bounds(s3_file, fusion_bounds[date_str], output_file): - print(f"[PROCESS] Saved: {output_file}") - - # Process fusion files (use row-based cropping) - for date_str, bounds in fusion_bounds.items(): - fusion_file = fusion_prep / f"REFL_{date_str}.tif" - if fusion_file.exists(): + with rasterio.open(fusion_file) as src: + data = src.read() + valid = ~np.isnan(data) & (data > 0.001) + rows = np.any(valid, axis=(0, 2)) + cols = np.any(valid, axis=(0, 1)) + r0, r1 = np.where(rows)[0][[0, -1]] + c0, c1 = np.where(cols)[0][[0, -1]] + w, h = c1 - c0 + 1, r1 - r0 + 1 + window = windows.Window(c0, r0, w, h) + data_crop = src.read(window=window) + transform = rasterio.windows.transform(window, src.transform) + p = src.profile.copy() + p.update({"width": w, "height": h, "transform": transform}) output_file = processed / "fusion" / f"{date_str}_0.geotiff" - if _crop_to_bounds(fusion_file, bounds, output_file, row_based_height=fusion_rows[date_str] + 1): - print(f"[PROCESS] Saved: {output_file}") + with rasterio.open(output_file, "w", **p) as dst: + dst.write(data_crop) + fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile) + print(f"[PROCESS] Cropped fusion: {output_file}") + + # Crop S2 and S3 to fusion size + for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items(): + window = windows.Window(c0, r0, w, h) + # S2 + for s2_file in s2_prep.glob("*REFL.tif"): + if s2_file.stem.split("_")[2] == date_str: + output_file = processed / "s2" / f"{date_str}_0.geotiff" + with rasterio.open(s2_file) as src: + data = src.read(window=window) + p2 = src.profile.copy() + p2.update({"width": w, "height": h, "transform": transform, "crs": crs}) + with rasterio.open(output_file, "w", **p2) as dst: + dst.write(data) + print(f"[PROCESS] Cropped: {output_file}") + # S3: resample to fusion pixel size, then crop + s3_file = s3_prep / f"composite_{date_str}.tif" + if s3_file.exists(): + output_file = processed / "s3" / f"{date_str}_0.geotiff" + with rasterio.open(s3_file) as src: + # Resample to fusion pixel size + temp_profile = fusion_profile.copy() + temp_profile.update({"dtype": src.profile["dtype"], "count": src.count}) + with rasterio.MemoryFile() as memfile: + with memfile.open(**temp_profile) as resampled: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(resampled, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=fusion_transform, + dst_crs=crs, + resampling=Resampling.nearest + ) + # Crop using same window + data = resampled.read(window=window) + p2 = resampled.profile.copy() + p2.update({"width": w, "height": h, "transform": transform}) + with rasterio.open(output_file, "w", **p2) as dst: + dst.write(data) + print(f"[PROCESS] Cropped: {output_file}") print("[PROCESS] Completed") diff --git a/run.py b/run.py index 9935ffd..37bf7ac 100644 --- a/run.py +++ b/run.py @@ -21,21 +21,21 @@ def run_pipeline(season, site_position, site_name): # print(f"Generating NDVI for raw data: {site_name}, {season}") # generate_ndvi_raw(season, site_position, site_name) - # create_ndvi_timeseries_raw(season, site_position, site_name) + #create_ndvi_timeseries_raw(season, site_position, site_name) # print(f"Detecting clouds for {site_name}, {season}") - # detect_clouds(season, site_name) + #detect_clouds(season, site_name) - # print(f"Preparing data for EFAST fusion for {site_name}, {season}") - # prepare_s2(season, site_position, site_name) - # prepare_s3(season, site_position, site_name) + #print(f"Preparing data for EFAST fusion for {site_name}, {season}") + #prepare_s2(season, site_position, site_name) + #prepare_s3(season, site_position, site_name) - # print(f"Running EFAST fusion for {site_name}, {season}") - # run_efast(season, site_position, site_name) + #print(f"Running EFAST fusion for {site_name}, {season}") + #run_efast(season, site_position, site_name) - # print(f"Post-processing data: {site_name}, {season}") + #print(f"Post-processing data: {site_name}, {season}") process_cropped(season, site_position, site_name) - # print(f"Generating NDVI for final outputs: {site_name}, {season}") + #print(f"Generating NDVI for final outputs: {site_name}, {season}") generate_ndvi_post_process(season, site_position, site_name) create_ndvi_timeseries_post_process(season, site_position, site_name)