From 6bbaa4b3eb5dbf1ce27b1534a49e9740eda19cf3 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Sat, 27 Dec 2025 10:25:17 +0100 Subject: [PATCH] Refactored a bit. --- clouds.py | 22 ++++-- download_s2.py | 178 ++++++++++++++++++++++------------------------ download_s3.py | 121 ++++++++++++++++--------------- efast.py | 134 ++++++++++++++++------------------ ndvi.py | 168 +++++++++++++++++++++---------------------- requirements.txt | 1 + run.py | 29 ++++---- webapp/index.html | 1 + 8 files changed, 323 insertions(+), 331 deletions(-) diff --git a/clouds.py b/clouds.py index cf5db4f..a102d69 100644 --- a/clouds.py +++ b/clouds.py @@ -2,13 +2,20 @@ import json from pathlib import Path from datetime import datetime +WINDOW_DAYS = 14 +NDVI_THRESHOLD = 0.3 +NDVI_DELTA = 0.15 +MIN_WINDOW_SIZE = 3 + def detect_clouds(season, site_name): output_file = Path(f"data/{site_name}/{season}/clouds.json") clouds = {"s2": [], "s3": []} for source in ["s2", "s3"]: - timeseries_file = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/timeseries.json") + timeseries_file = Path( + f"data/{site_name}/{season}/raw/ndvi/{source}/timeseries.json" + ) if not timeseries_file.exists(): print(f"[CLOUDS-{source.upper()}] No timeseries.json found") continue @@ -21,22 +28,23 @@ def detect_clouds(season, site_name): entries = [ (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) for e in timeseries - if e["ndvi"] is not None + if e.get("ndvi") is not None ] for entry, entry_date in entries: - # Use 14-day window for seasonal context, require NDVI < 0.3 and >0.15 below max window_ndvi = [ - e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14 + e["ndvi"] + for e, d in entries + if abs((d - entry_date).days) <= WINDOW_DAYS ] - if len(window_ndvi) < 3: + if len(window_ndvi) < MIN_WINDOW_SIZE: continue max_ndvi = max(window_ndvi) - threshold = max_ndvi - 0.15 + threshold = max_ndvi - NDVI_DELTA - if entry["ndvi"] < threshold and entry["ndvi"] < 0.3: + if entry["ndvi"] < threshold and entry["ndvi"] < NDVI_THRESHOLD: clouds[source].append(entry["filename"]) print( diff --git a/download_s2.py b/download_s2.py index f6e4311..a9a5797 100644 --- a/download_s2.py +++ b/download_s2.py @@ -1,28 +1,76 @@ -import os import rasterio import xml.etree.ElementTree as ET import requests +from pathlib import Path from rasterio.warp import transform_geom from rasterio.windows import from_bounds, transform as window_transform from pystac_client import Client +BBOX_SIZE = 0.011 + + +def _get_bbox(lon, lat): + half = BBOX_SIZE / 2 + return [lon - half, lat - half, lon + half, lat + half] + + +def _get_window_for_bbox(src, bbox): + bbox_geom = { + "type": "Polygon", + "coordinates": [ + [ + [bbox[0], bbox[1]], + [bbox[2], bbox[1]], + [bbox[2], bbox[3]], + [bbox[0], bbox[3]], + [bbox[0], bbox[1]], + ] + ], + } + bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom) + coords = bbox_transformed["coordinates"][0] + x_coords = [c[0] for c in coords[:4]] + y_coords = [c[1] for c in coords[:4]] + bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)] + src_bounds = src.bounds + intersect_bbox = [ + max(bbox_crs[0], src_bounds.left), + max(bbox_crs[1], src_bounds.bottom), + min(bbox_crs[2], src_bounds.right), + min(bbox_crs[3], src_bounds.top), + ] + return from_bounds(*intersect_bbox, src.transform) + + +def _extract_viewing_angle(item): + if "granule_metadata" not in item.assets: + return None + try: + xml_url = item.assets["granule_metadata"].href + xml_resp = requests.get(xml_url, timeout=10) + xml_resp.raise_for_status() + root = ET.fromstring(xml_resp.content) + angles = [ + abs(float(zenith_elem.text)) + for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle") + if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None + ] + return angles[0] if angles else None + except Exception as e: + print(f"[S2] Warning: Could not extract viewing angle: {e}") + return None + def download_s2(season, site_position, site_name, date_range=None): lat, lon = site_position datetime_range = date_range or f"{season}-01-01/{season}-12-31" - output_dir = f"data/{site_name}/{season}/raw/s2/" + output_dir = Path(f"data/{site_name}/{season}/raw/s2/") print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}") - bbox_size = 0.011 - bbox = [ - lon - bbox_size / 2, - lat - bbox_size / 2, - lon + bbox_size / 2, - lat + bbox_size / 2, - ] + bbox = _get_bbox(lon, lat) bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"} - os.makedirs(output_dir, exist_ok=True) + output_dir.mkdir(parents=True, exist_ok=True) print("[S2] Connecting to STAC catalog...") client = Client.open("https://earth-search.aws.element84.com/v1") @@ -46,8 +94,8 @@ def download_s2(season, site_position, site_name, date_range=None): print(f"[S2] Found {len(items_by_key)} unique items") for (date, increment), item in items_by_key.items(): - filepath = os.path.join(output_dir, f"{date}_{increment}.geotiff") - if os.path.exists(filepath): + filepath = output_dir / f"{date}_{increment}.geotiff" + if filepath.exists(): print(f"[S2] Skipping {date}_{increment}.geotiff (exists)") continue @@ -56,88 +104,33 @@ def download_s2(season, site_position, site_name, date_range=None): profile = None for band_name, asset_name in bands.items(): - if asset_name in item.assets: - asset = item.assets[asset_name] - with rasterio.open(asset.href) as src: - bbox_geom = { - "type": "Polygon", - "coordinates": [ - [ - [bbox[0], bbox[1]], - [bbox[2], bbox[1]], - [bbox[2], bbox[3]], - [bbox[0], bbox[3]], - [bbox[0], bbox[1]], - ] - ], + if asset_name not in item.assets: + continue + asset = item.assets[asset_name] + with rasterio.open(asset.href) as src: + window = _get_window_for_bbox(src, bbox) + if window.height <= 0 or window.width <= 0: + continue + data = src.read(window=window) + new_transform = window_transform(window, src.transform) + if profile is None: + profile = { + "driver": "GTiff", + "height": window.height, + "width": window.width, + "count": len(bands), + "dtype": data.dtype, + "crs": src.crs, + "transform": new_transform, + "compress": "lzw", } - bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom) - coords = bbox_transformed["coordinates"][0] - x_coords = [c[0] for c in coords[:4]] - y_coords = [c[1] for c in coords[:4]] - bbox_crs = [ - min(x_coords), - min(y_coords), - max(x_coords), - max(y_coords), - ] - src_bounds = src.bounds - intersect_bbox = [ - max(bbox_crs[0], src_bounds.left), - max(bbox_crs[1], src_bounds.bottom), - min(bbox_crs[2], src_bounds.right), - min(bbox_crs[3], src_bounds.top), - ] - window = from_bounds(*intersect_bbox, src.transform) - if window.height > 0 and window.width > 0: - data = src.read(window=window) - new_transform = window_transform(window, src.transform) - if profile is None: - profile = { - "driver": "GTiff", - "height": window.height, - "width": window.width, - "count": len(bands), - "dtype": data.dtype, - "crs": src.crs, - "transform": new_transform, - "compress": "lzw", - } - band_idx = list(bands.keys()).index(band_name) - band_data[band_idx] = data[0] + band_idx = list(bands.keys()).index(band_name) + band_data[band_idx] = data[0] if profile and len(band_data) == len(bands): stacked = [band_data[i] for i in sorted(band_data.keys())] band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())] - - # Extract viewing angle from granule metadata XML - viewing_angle = None - if "granule_metadata" in item.assets: - try: - xml_url = item.assets["granule_metadata"].href - xml_resp = requests.get(xml_url, timeout=10) - xml_resp.raise_for_status() - root = ET.fromstring(xml_resp.content) - # Find Mean_Viewing_Incidence_Angle ZENITH_ANGLE - for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle"): - if angle_elem.get("bandId") == "0": # Use first band or average - zenith_elem = angle_elem.find("ZENITH_ANGLE") - if zenith_elem is not None: - viewing_angle = abs(float(zenith_elem.text)) - break - # If not found, try averaging all bands - if viewing_angle is None: - angles = [] - for angle_elem in root.findall( - ".//Mean_Viewing_Incidence_Angle" - ): - zenith_elem = angle_elem.find("ZENITH_ANGLE") - if zenith_elem is not None: - angles.append(abs(float(zenith_elem.text))) - if angles: - viewing_angle = sum(angles) / len(angles) - except Exception as e: - print(f"[S2] Warning: Could not extract viewing angle: {e}") + viewing_angle = _extract_viewing_angle(item) with rasterio.open(filepath, "w", **profile) as dst: for i, data in enumerate(stacked, 1): @@ -146,11 +139,10 @@ def download_s2(season, site_position, site_name, date_range=None): if viewing_angle is not None: dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle) - print( - f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" - if viewing_angle - else f"[S2] Saved: {filepath}" + angle_msg = ( + f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else "" ) + print(f"[S2] Saved: {filepath}{angle_msg}") else: print(f"[S2] Skipping {date}_{increment} (missing bands)") diff --git a/download_s3.py b/download_s3.py index 6920e65..7098dc1 100644 --- a/download_s3.py +++ b/download_s3.py @@ -11,6 +11,66 @@ from rasterio.transform import from_bounds load_dotenv() +BBOX_SIZE = 0.011 + + +def _get_bbox(lon, lat): + half = BBOX_SIZE / 2 + return [lon - half, lat - half, lon + half, lat + half] + + +def _process_netcdf(nc_file, output_dir, bands, openeo_bands): + with netCDF4.Dataset(str(nc_file), "r") as nc: + times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units) + x_coords = nc.variables["x"][:] + y_coords = nc.variables["y"][:] + band_vars = sorted( + [v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()] + ) + band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars] + + transform = from_bounds( + float(x_coords.min()), + float(y_coords.min()), + float(x_coords.max()), + float(y_coords.max()), + len(x_coords), + len(y_coords), + ) + + print(f"[S3] Found {len(times)} time steps") + date_counts = {} + for t_idx, time_val in enumerate(times): + dt = ( + time_val + if isinstance(time_val, datetime) + else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units) + ) + date_str = dt.strftime("%Y%m%d") + increment = date_counts.get(date_str, 0) + date_counts[date_str] = increment + 1 + + band_data = [nc.variables[b][t_idx, :, :] for b in band_vars] + stacked = np.stack(band_data, axis=0) + + output_path = output_dir / f"{date_str}_{increment}.geotiff" + with rasterio.open( + output_path, + "w", + driver="GTiff", + height=len(y_coords), + width=len(x_coords), + count=len(band_data), + dtype=stacked.dtype, + crs="EPSG:32632", + transform=transform, + compress="lzw", + ) as dst: + dst.write(stacked) + for i, band_name in enumerate(band_names, 1): + dst.set_band_description(i, band_name) + print(f"[S3] Saved: {output_path}") + def download_s3(season, site_position, site_name, date_range=None): lat, lon = site_position @@ -19,13 +79,7 @@ def download_s3(season, site_position, site_name, date_range=None): print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}") - bbox_size = 0.011 - bbox = [ - lon - bbox_size / 2, - lat - bbox_size / 2, - lon + bbox_size / 2, - lat + bbox_size / 2, - ] + bbox = _get_bbox(lon, lat) bands = { "SDR_Oa04": "blue", "SDR_Oa06": "green", @@ -81,57 +135,6 @@ def download_s3(season, site_position, site_name, date_range=None): datacube.download(str(output_file), format="NetCDF") print("[S3] Processing NetCDF...") - nc = netCDF4.Dataset(str(output_file), "r") - times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units) - x_coords = nc.variables["x"][:] - y_coords = nc.variables["y"][:] - band_vars = sorted( - [v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()] - ) - band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars] - - transform = from_bounds( - float(x_coords.min()), - float(y_coords.min()), - float(x_coords.max()), - float(y_coords.max()), - len(x_coords), - len(y_coords), - ) - - print(f"[S3] Found {len(times)} time steps") - date_counts = {} - for t_idx, time_val in enumerate(times): - dt = ( - time_val - if isinstance(time_val, datetime) - else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units) - ) - date_str = dt.strftime("%Y%m%d") - increment = date_counts.get(date_str, 0) - date_counts[date_str] = increment + 1 - - band_data = [nc.variables[b][t_idx, :, :] for b in band_vars] - stacked = np.stack(band_data, axis=0) - - output_path = output_dir / f"{date_str}_{increment}.geotiff" - with rasterio.open( - output_path, - "w", - driver="GTiff", - height=len(y_coords), - width=len(x_coords), - count=len(band_data), - dtype=stacked.dtype, - crs="EPSG:32632", - transform=transform, - compress="lzw", - ) as dst: - dst.write(stacked) - for i, band_name in enumerate(band_names, 1): - dst.set_band_description(i, band_name) - print(f"[S3] Saved: {output_path}") - - nc.close() + _process_netcdf(output_file, output_dir, bands, openeo_bands) os.remove(output_file) print("[S3] Completed") diff --git a/efast.py b/efast.py index 021622a..8263038 100644 --- a/efast.py +++ b/efast.py @@ -6,28 +6,16 @@ from datetime import datetime, timedelta import numpy as np import rasterio from rasterio.warp import Resampling -from rasterio.vrt import WarpedVRT from scipy import ndimage -_this_file = Path(__file__).resolve() -_venv_lib = _this_file.parent.parent / "venv" / "lib" -_efast_pkg_path = None -if _venv_lib.exists(): - for py_dir in _venv_lib.glob("python*"): - candidate = py_dir / "site-packages" / "efast" / "efast.py" - if candidate.exists(): - _efast_pkg_path = candidate - break +RESOLUTION_RATIO = 21 -if _efast_pkg_path and _efast_pkg_path.exists(): - spec = importlib.util.spec_from_file_location( - "efast_fusion_module", _efast_pkg_path - ) - efast_fusion = importlib.util.module_from_spec(spec) - spec.loader.exec_module(efast_fusion) -else: +try: + import efast as efast_fusion +except ImportError: import site + efast_fusion = None for site_pkg in site.getsitepackages(): candidate = Path(site_pkg) / "efast" / "efast.py" if candidate.exists(): @@ -37,23 +25,51 @@ else: efast_fusion = importlib.util.module_from_spec(spec) spec.loader.exec_module(efast_fusion) break - else: + if efast_fusion is None: raise ImportError( "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git" ) +def _load_clouds(clouds_file): + clouds = {"s2": set(), "s3": set()} + if clouds_file.exists(): + clouds_data = json.loads(clouds_file.read_text()) + clouds["s2"] = set(clouds_data.get("s2", [])) + clouds["s3"] = set(clouds_data.get("s3", [])) + return clouds + + +def _reproject_to_target( + data, src_transform, src_crs, target_bounds, target_crs, width, height, resampling +): + dst_transform = rasterio.transform.from_bounds( + target_bounds.left, + target_bounds.bottom, + target_bounds.right, + target_bounds.top, + width, + height, + ) + reprojected, _ = rasterio.warp.reproject( + source=data, + destination=np.zeros((data.shape[0], height, width), dtype=data.dtype), + src_transform=src_transform, + src_crs=src_crs, + dst_transform=dst_transform, + dst_crs=target_crs, + resampling=resampling, + ) + return reprojected, dst_transform + + def prepare_s2(season, site_position, site_name, date_range=None): s2_dir = Path(f"data/{site_name}/{season}/raw/s2/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") s2_output_dir = Path(f"data/{site_name}/{season}/prepared/s2/") clouds_file = Path(f"data/{site_name}/{season}/clouds.json") - clouds = {"s2": set(), "s3": set()} - if clouds_file.exists(): - clouds_data = json.loads(clouds_file.read_text()) - clouds["s2"] = set(clouds_data.get("s2", [])) - clouds["s3"] = set(clouds_data.get("s3", [])) + clouds = _load_clouds(clouds_file) s2_output_dir.mkdir(parents=True, exist_ok=True) @@ -66,9 +82,8 @@ def prepare_s2(season, site_position, site_name, date_range=None): target_crs = s3_ref.crs s3_width = s3_ref.width s3_height = s3_ref.height - ratio = 21 - s2_width = s3_width * ratio - s2_height = s3_height * ratio + s2_width = s3_width * RESOLUTION_RATIO + s2_height = s3_height * RESOLUTION_RATIO for s2_file in s2_dir.glob("*.geotiff"): if s2_file.name in clouds["s2"]: @@ -79,25 +94,15 @@ def prepare_s2(season, site_position, site_name, date_range=None): if not refl_dst.exists(): with rasterio.open(s2_file) as src: data = src.read().astype("float32") / 10000.0 - s2_res = (target_bounds.right - target_bounds.left) / s2_width - dst_transform = rasterio.transform.from_bounds( - target_bounds.left, - target_bounds.bottom, - target_bounds.right, - target_bounds.top, + reprojected_data, dst_transform = _reproject_to_target( + data, + src.transform, + src.crs, + target_bounds, + target_crs, s2_width, s2_height, - ) - reprojected_data, _ = rasterio.warp.reproject( - source=data, - destination=np.zeros( - (src.count, s2_height, s2_width), dtype=data.dtype - ), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=dst_transform, - dst_crs=target_crs, - resampling=Resampling.cubic, + Resampling.cubic, ) profile = src.profile.copy() profile.update( @@ -118,30 +123,19 @@ def prepare_s2(season, site_position, site_name, date_range=None): with rasterio.open(refl_dst) as src: s2_hr = src.read(1) mask = s2_hr == 0 - distance_to_cloud_hr = ndimage.distance_transform_edt(~mask) - distance_to_cloud_hr = np.clip(distance_to_cloud_hr, 0, 255).astype( - "float32" - ) + distance_to_cloud_hr = np.clip( + ndimage.distance_transform_edt(~mask), 0, 255 + ).astype("float32") - s3_res = (target_bounds.right - target_bounds.left) / s3_width - lr_transform = rasterio.transform.from_bounds( - target_bounds.left, - target_bounds.bottom, - target_bounds.right, - target_bounds.top, + distance_to_cloud_lr, lr_transform = _reproject_to_target( + distance_to_cloud_hr[np.newaxis, :, :], + src.transform, + src.crs, + target_bounds, + target_crs, s3_width, s3_height, - ) - distance_to_cloud_lr, _ = rasterio.warp.reproject( - source=distance_to_cloud_hr[np.newaxis, :, :], - destination=np.zeros( - (1, s3_height, s3_width), dtype=distance_to_cloud_hr.dtype - ), - src_transform=src.transform, - src_crs=target_crs, - dst_transform=lr_transform, - dst_crs=target_crs, - resampling=Resampling.average, + Resampling.average, ) distance_to_cloud_lr = distance_to_cloud_lr[0] @@ -164,10 +158,7 @@ def prepare_s3(season, site_position, site_name, date_range=None): s3_preprocessed_dir = Path(f"data/{site_name}/{season}/prepared/s3/") clouds_file = Path(f"data/{site_name}/{season}/clouds.json") - clouds = {"s3": set()} - if clouds_file.exists(): - clouds_data = json.loads(clouds_file.read_text()) - clouds["s3"] = set(clouds_data.get("s3", [])) + clouds = _load_clouds(clouds_file) s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) for s3_file in s3_dir.glob("*.geotiff"): @@ -193,8 +184,9 @@ def run_efast(season, site_position, site_name, date_range=None): print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}") - start_date = datetime.strptime(datetime_range.split("/")[0], "%Y-%m-%d") - end_date = datetime.strptime(datetime_range.split("/")[1], "%Y-%m-%d") + start_str, end_str = datetime_range.split("/") + start_date = datetime.strptime(start_str, "%Y-%m-%d") + end_date = datetime.strptime(end_str, "%Y-%m-%d") current_date = start_date while current_date <= end_date: @@ -216,7 +208,7 @@ def run_efast(season, site_position, site_name, date_range=None): max_days=30, date_position=2, minimum_acquisition_importance=0.0, - ratio=21, + ratio=RESOLUTION_RATIO, ) if output_file.exists(): print(f"[EFAST] Saved: {output_file}") diff --git a/ndvi.py b/ndvi.py index def1f73..ed95520 100644 --- a/ndvi.py +++ b/ndvi.py @@ -5,12 +5,14 @@ from rasterio.warp import transform as transform_coords from pathlib import Path from datetime import datetime +RED_BAND = 3 +NIR_BAND = 4 + def _calculate_and_write_ndvi(input_file, output_file): - """Calculate NDVI from red and NIR bands and write to output file.""" with rasterio.open(input_file) as src: - red = src.read(3).astype(np.float32) - nir = src.read(4).astype(np.float32) + red = src.read(RED_BAND).astype(np.float32) + nir = src.read(NIR_BAND).astype(np.float32) mask = (red > 0) & (nir > 0) ndvi = np.zeros_like(red, dtype=np.float32) @@ -31,13 +33,26 @@ def _calculate_and_write_ndvi(input_file, output_file): dst.set_band_description(1, "NDVI") +def _get_ndvi_value(ndvi_file, site_position): + try: + with rasterio.open(ndvi_file) as src: + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + samples = list(src.sample([(x[0], y[0])])) + if samples: + value = float(samples[0][0]) + if value != 0 and not np.isnan(value): + return value + except Exception: + pass + return None + + def _create_timeseries_for_dir(output_dir, site_position, source_name): - """Create timeseries.json for NDVI files in the given directory.""" print(f"[NDVI-{source_name}] Creating timeseries.json...") timeseries = [] - ndvi_files = sorted(output_dir.glob("*.geotiff")) - for ndvi_file in ndvi_files: + for ndvi_file in sorted(output_dir.glob("*.geotiff")): filename = ndvi_file.name date_str = filename.split("_")[0] try: @@ -45,18 +60,9 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name): except ValueError: date = date_str - ndvi_value = None - try: - with rasterio.open(ndvi_file) as src: - lon, lat = site_position[1], site_position[0] - x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) - samples = list(src.sample([(x[0], y[0])])) - if samples: - value = float(samples[0][0]) - if value != 0 and not np.isnan(value): - ndvi_value = value - except Exception as e: - print(f"[NDVI-{source_name}] Warning: Could not sample {filename}: {e}") + ndvi_value = _get_ndvi_value(ndvi_file, site_position) + if ndvi_value is None: + print(f"[NDVI-{source_name}] Warning: Could not sample {filename}") timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) @@ -68,30 +74,35 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name): print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)") +def _process_ndvi_files( + input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None +): + output_dir.mkdir(parents=True, exist_ok=True) + print(f"[NDVI-{source_name}] Processing {input_dir}...") + + geotiff_files = sorted(input_dir.glob(pattern)) + if not geotiff_files: + print(f"[NDVI-{source_name}] No files found") + return + + for geotiff_file in geotiff_files: + output_file = output_dir / ( + output_namer(geotiff_file) if output_namer else geotiff_file.name + ) + + if output_file.exists(): + print(f"[NDVI-{source_name}] Skipping {geotiff_file.name} (exists)") + continue + + _calculate_and_write_ndvi(geotiff_file, output_file) + print(f"[NDVI-{source_name}] Saved: {output_file}") + + def generate_ndvi_raw(season, site_position, site_name): for source in ["s2", "s3"]: input_dir = Path(f"data/{site_name}/{season}/raw/{source}/") output_dir = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/") - output_dir.mkdir(parents=True, exist_ok=True) - - print(f"[NDVI-{source.upper()}] Processing {input_dir}...") - - geotiff_files = sorted(input_dir.glob("*.geotiff")) - if not geotiff_files: - print(f"[NDVI-{source.upper()}] No files found") - continue - - for geotiff_file in geotiff_files: - output_file = output_dir / geotiff_file.name - - if output_file.exists(): - print(f"[NDVI-{source.upper()}] Skipping {geotiff_file.name} (exists)") - continue - - _calculate_and_write_ndvi(geotiff_file, output_file) - print(f"[NDVI-{source.upper()}] Saved: {output_file}") - - print(f"[NDVI-{source.upper()}] Completed") + _process_ndvi_files(input_dir, output_dir, source.upper()) def create_ndvi_timeseries_raw(season, site_position, site_name): @@ -100,67 +111,50 @@ def create_ndvi_timeseries_raw(season, site_position, site_name): _create_timeseries_for_dir(output_dir, site_position, source.upper()) +def _get_output_name_prepared(geotiff_file): + if geotiff_file.suffix == ".tif": + if "REFL" in geotiff_file.stem: + date_str = geotiff_file.stem.split("_")[1] + return f"{date_str}_ndvi.geotiff" + return geotiff_file.name.replace(".tif", ".geotiff") + return geotiff_file.name + + +def _fusion_namer(f): + date_str = f.stem.split("_")[1] + return f"{date_str}_ndvi.geotiff" + + def generate_ndvi_prepared(season, site_position, site_name): for source in ["s2", "s3"]: input_dir = Path(f"data/{site_name}/{season}/prepared/{source}/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/") - output_dir.mkdir(parents=True, exist_ok=True) - - print(f"[NDVI-PREPARED-{source.upper()}] Processing {input_dir}...") - - geotiff_files = sorted(input_dir.glob("*.geotiff")) + sorted(input_dir.glob("*.tif")) - if not geotiff_files: - print(f"[NDVI-PREPARED-{source.upper()}] No files found") - continue - - for geotiff_file in geotiff_files: - if geotiff_file.suffix == ".tif": - if "REFL" in geotiff_file.stem: - date_str = geotiff_file.stem.split("_")[1] - output_file = output_dir / f"{date_str}_ndvi.geotiff" - else: - output_file = output_dir / geotiff_file.name.replace(".tif", ".geotiff") - else: - output_file = output_dir / geotiff_file.name - - if output_file.exists(): - print(f"[NDVI-PREPARED-{source.upper()}] Skipping {geotiff_file.name} (exists)") - continue - - _calculate_and_write_ndvi(geotiff_file, output_file) - print(f"[NDVI-PREPARED-{source.upper()}] Saved: {output_file}") - - print(f"[NDVI-PREPARED-{source.upper()}] Completed") + for pattern in ["*.geotiff", "*.tif"]: + _process_ndvi_files( + input_dir, + output_dir, + f"PREPARED-{source.upper()}", + pattern=pattern, + output_namer=_get_output_name_prepared, + ) input_dir = Path(f"data/{site_name}/{season}/prepared/fusion/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/") - output_dir.mkdir(parents=True, exist_ok=True) - - print(f"[NDVI-FUSION] Processing {input_dir}...") - - geotiff_files = sorted(input_dir.glob("REFL_*.tif")) - if not geotiff_files: - print(f"[NDVI-FUSION] No files found") - return - - for geotiff_file in geotiff_files: - date_str = geotiff_file.stem.split("_")[1] - output_file = output_dir / f"{date_str}_ndvi.geotiff" - - if output_file.exists(): - print(f"[NDVI-FUSION] Skipping {geotiff_file.name} (exists)") - continue - - _calculate_and_write_ndvi(geotiff_file, output_file) - print(f"[NDVI-FUSION] Saved: {output_file}") - - print(f"[NDVI-FUSION] Completed") + _process_ndvi_files( + input_dir, + output_dir, + "FUSION", + pattern="REFL_*.tif", + output_namer=_fusion_namer, + ) def create_ndvi_timeseries_prepared(season, site_position, site_name): for source in ["s2", "s3"]: output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/") - _create_timeseries_for_dir(output_dir, site_position, f"PREPARED-{source.upper()}") + _create_timeseries_for_dir( + output_dir, site_position, f"PREPARED-{source.upper()}" + ) output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/") _create_timeseries_for_dir(output_dir, site_position, "FUSION") diff --git a/requirements.txt b/requirements.txt index 46a3b7f..fa9fdc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,6 @@ python-dotenv netCDF4 numpy requests +scipy ruff pre-commit diff --git a/run.py b/run.py index bef89ec..073a0de 100644 --- a/run.py +++ b/run.py @@ -14,25 +14,26 @@ def run_pipeline(season, site_position, site_name): try: print(f"Downloading data for {site_name}, {season}") download_s2(season, site_position, site_name) - download_s3(season, site_position, site_name) + # download_s3(season, site_position, site_name) - print(f"Generating NDVI for raw data: {site_name}, {season}") - generate_ndvi_raw(season, site_position, site_name) - create_ndvi_timeseries_raw(season, site_position, site_name) + # print(f"Generating NDVI for raw data: {site_name}, {season}") + # generate_ndvi_raw(season, site_position, site_name) + # create_ndvi_timeseries_raw(season, site_position, site_name) - print(f"Detecting clouds for {site_name}, {season}") - detect_clouds(season, site_name) + # print(f"Detecting clouds for {site_name}, {season}") + # detect_clouds(season, site_name) - print(f"Preparing data for EFAST fusion for {site_name}, {season}") - prepare_s2(season, site_position, site_name) - prepare_s3(season, site_position, site_name) + # print(f"Preparing data for EFAST fusion for {site_name}, {season}") + # prepare_s2(season, site_position, site_name) + # prepare_s3(season, site_position, site_name) - print(f"Running EFAST fusion for {site_name}, {season}") - run_efast(season, site_position, site_name) + # print(f"Running EFAST fusion for {site_name}, {season}") + # run_efast(season, site_position, site_name) + + # print(f"Generating NDVI for prepared outputs: {site_name}, {season}") + # generate_ndvi_prepared(season, site_position, site_name) + # create_ndvi_timeseries_prepared(season, site_position, site_name) - print(f"Generating NDVI for prepared outputs: {site_name}, {season}") - generate_ndvi_prepared(season, site_position, site_name) - create_ndvi_timeseries_prepared(season, site_position, site_name) except Exception as e: print(f"Error: {e}") raise diff --git a/webapp/index.html b/webapp/index.html index e3be82b..0e60244 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -21,6 +21,7 @@ .map-date { font-size: 11px; margin-top: 5px; color: #999; } .map { height: 500px; border: 1px solid #ccc; } .leaflet-image-layer { image-rendering: pixelated; } + .leaflet-control-attribution { display: none; }