Refactored a bit.

This commit is contained in:
Felix Delattre 2025-12-27 10:25:17 +01:00
parent 290c8f8c57
commit 6bbaa4b3eb
8 changed files with 323 additions and 331 deletions

View file

@ -2,13 +2,20 @@ import json
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
WINDOW_DAYS = 14
NDVI_THRESHOLD = 0.3
NDVI_DELTA = 0.15
MIN_WINDOW_SIZE = 3
def detect_clouds(season, site_name): def detect_clouds(season, site_name):
output_file = Path(f"data/{site_name}/{season}/clouds.json") output_file = Path(f"data/{site_name}/{season}/clouds.json")
clouds = {"s2": [], "s3": []} clouds = {"s2": [], "s3": []}
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
timeseries_file = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/timeseries.json") timeseries_file = Path(
f"data/{site_name}/{season}/raw/ndvi/{source}/timeseries.json"
)
if not timeseries_file.exists(): if not timeseries_file.exists():
print(f"[CLOUDS-{source.upper()}] No timeseries.json found") print(f"[CLOUDS-{source.upper()}] No timeseries.json found")
continue continue
@ -21,22 +28,23 @@ def detect_clouds(season, site_name):
entries = [ entries = [
(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00"))) (e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
for e in timeseries for e in timeseries
if e["ndvi"] is not None if e.get("ndvi") is not None
] ]
for entry, entry_date in entries: for entry, entry_date in entries:
# Use 14-day window for seasonal context, require NDVI < 0.3 and >0.15 below max
window_ndvi = [ window_ndvi = [
e["ndvi"] for e, d in entries if abs((d - entry_date).days) <= 14 e["ndvi"]
for e, d in entries
if abs((d - entry_date).days) <= WINDOW_DAYS
] ]
if len(window_ndvi) < 3: if len(window_ndvi) < MIN_WINDOW_SIZE:
continue continue
max_ndvi = max(window_ndvi) max_ndvi = max(window_ndvi)
threshold = max_ndvi - 0.15 threshold = max_ndvi - NDVI_DELTA
if entry["ndvi"] < threshold and entry["ndvi"] < 0.3: if entry["ndvi"] < threshold and entry["ndvi"] < NDVI_THRESHOLD:
clouds[source].append(entry["filename"]) clouds[source].append(entry["filename"])
print( print(

View file

@ -1,28 +1,76 @@
import os
import rasterio import rasterio
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from pathlib import Path
from rasterio.warp import transform_geom from rasterio.warp import transform_geom
from rasterio.windows import from_bounds, transform as window_transform from rasterio.windows import from_bounds, transform as window_transform
from pystac_client import Client from pystac_client import Client
BBOX_SIZE = 0.011
def _get_bbox(lon, lat):
half = BBOX_SIZE / 2
return [lon - half, lat - half, lon + half, lat + half]
def _get_window_for_bbox(src, bbox):
bbox_geom = {
"type": "Polygon",
"coordinates": [
[
[bbox[0], bbox[1]],
[bbox[2], bbox[1]],
[bbox[2], bbox[3]],
[bbox[0], bbox[3]],
[bbox[0], bbox[1]],
]
],
}
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
coords = bbox_transformed["coordinates"][0]
x_coords = [c[0] for c in coords[:4]]
y_coords = [c[1] for c in coords[:4]]
bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
src_bounds = src.bounds
intersect_bbox = [
max(bbox_crs[0], src_bounds.left),
max(bbox_crs[1], src_bounds.bottom),
min(bbox_crs[2], src_bounds.right),
min(bbox_crs[3], src_bounds.top),
]
return from_bounds(*intersect_bbox, src.transform)
def _extract_viewing_angle(item):
if "granule_metadata" not in item.assets:
return None
try:
xml_url = item.assets["granule_metadata"].href
xml_resp = requests.get(xml_url, timeout=10)
xml_resp.raise_for_status()
root = ET.fromstring(xml_resp.content)
angles = [
abs(float(zenith_elem.text))
for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle")
if (zenith_elem := angle_elem.find("ZENITH_ANGLE")) is not None
]
return angles[0] if angles else None
except Exception as e:
print(f"[S2] Warning: Could not extract viewing angle: {e}")
return None
def download_s2(season, site_position, site_name, date_range=None): def download_s2(season, site_position, site_name, date_range=None):
lat, lon = site_position lat, lon = site_position
datetime_range = date_range or f"{season}-01-01/{season}-12-31" datetime_range = date_range or f"{season}-01-01/{season}-12-31"
output_dir = f"data/{site_name}/{season}/raw/s2/" output_dir = Path(f"data/{site_name}/{season}/raw/s2/")
print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}") print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
bbox_size = 0.011 bbox = _get_bbox(lon, lat)
bbox = [
lon - bbox_size / 2,
lat - bbox_size / 2,
lon + bbox_size / 2,
lat + bbox_size / 2,
]
bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"} bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
os.makedirs(output_dir, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
print("[S2] Connecting to STAC catalog...") print("[S2] Connecting to STAC catalog...")
client = Client.open("https://earth-search.aws.element84.com/v1") client = Client.open("https://earth-search.aws.element84.com/v1")
@ -46,8 +94,8 @@ def download_s2(season, site_position, site_name, date_range=None):
print(f"[S2] Found {len(items_by_key)} unique items") print(f"[S2] Found {len(items_by_key)} unique items")
for (date, increment), item in items_by_key.items(): for (date, increment), item in items_by_key.items():
filepath = os.path.join(output_dir, f"{date}_{increment}.geotiff") filepath = output_dir / f"{date}_{increment}.geotiff"
if os.path.exists(filepath): if filepath.exists():
print(f"[S2] Skipping {date}_{increment}.geotiff (exists)") print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
continue continue
@ -56,40 +104,13 @@ def download_s2(season, site_position, site_name, date_range=None):
profile = None profile = None
for band_name, asset_name in bands.items(): for band_name, asset_name in bands.items():
if asset_name in item.assets: if asset_name not in item.assets:
continue
asset = item.assets[asset_name] asset = item.assets[asset_name]
with rasterio.open(asset.href) as src: with rasterio.open(asset.href) as src:
bbox_geom = { window = _get_window_for_bbox(src, bbox)
"type": "Polygon", if window.height <= 0 or window.width <= 0:
"coordinates": [ continue
[
[bbox[0], bbox[1]],
[bbox[2], bbox[1]],
[bbox[2], bbox[3]],
[bbox[0], bbox[3]],
[bbox[0], bbox[1]],
]
],
}
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
coords = bbox_transformed["coordinates"][0]
x_coords = [c[0] for c in coords[:4]]
y_coords = [c[1] for c in coords[:4]]
bbox_crs = [
min(x_coords),
min(y_coords),
max(x_coords),
max(y_coords),
]
src_bounds = src.bounds
intersect_bbox = [
max(bbox_crs[0], src_bounds.left),
max(bbox_crs[1], src_bounds.bottom),
min(bbox_crs[2], src_bounds.right),
min(bbox_crs[3], src_bounds.top),
]
window = from_bounds(*intersect_bbox, src.transform)
if window.height > 0 and window.width > 0:
data = src.read(window=window) data = src.read(window=window)
new_transform = window_transform(window, src.transform) new_transform = window_transform(window, src.transform)
if profile is None: if profile is None:
@ -109,35 +130,7 @@ def download_s2(season, site_position, site_name, date_range=None):
if profile and len(band_data) == len(bands): if profile and len(band_data) == len(bands):
stacked = [band_data[i] for i in sorted(band_data.keys())] stacked = [band_data[i] for i in sorted(band_data.keys())]
band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())] band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
viewing_angle = _extract_viewing_angle(item)
# Extract viewing angle from granule metadata XML
viewing_angle = None
if "granule_metadata" in item.assets:
try:
xml_url = item.assets["granule_metadata"].href
xml_resp = requests.get(xml_url, timeout=10)
xml_resp.raise_for_status()
root = ET.fromstring(xml_resp.content)
# Find Mean_Viewing_Incidence_Angle ZENITH_ANGLE
for angle_elem in root.findall(".//Mean_Viewing_Incidence_Angle"):
if angle_elem.get("bandId") == "0": # Use first band or average
zenith_elem = angle_elem.find("ZENITH_ANGLE")
if zenith_elem is not None:
viewing_angle = abs(float(zenith_elem.text))
break
# If not found, try averaging all bands
if viewing_angle is None:
angles = []
for angle_elem in root.findall(
".//Mean_Viewing_Incidence_Angle"
):
zenith_elem = angle_elem.find("ZENITH_ANGLE")
if zenith_elem is not None:
angles.append(abs(float(zenith_elem.text)))
if angles:
viewing_angle = sum(angles) / len(angles)
except Exception as e:
print(f"[S2] Warning: Could not extract viewing angle: {e}")
with rasterio.open(filepath, "w", **profile) as dst: with rasterio.open(filepath, "w", **profile) as dst:
for i, data in enumerate(stacked, 1): for i, data in enumerate(stacked, 1):
@ -146,11 +139,10 @@ def download_s2(season, site_position, site_name, date_range=None):
if viewing_angle is not None: if viewing_angle is not None:
dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle) dst.update_tags(VIEWING_ZENITH_ANGLE=viewing_angle)
print( angle_msg = (
f"[S2] Saved: {filepath} (viewing angle: {viewing_angle:.2f}°)" f" (viewing angle: {viewing_angle:.2f}°)" if viewing_angle else ""
if viewing_angle
else f"[S2] Saved: {filepath}"
) )
print(f"[S2] Saved: {filepath}{angle_msg}")
else: else:
print(f"[S2] Skipping {date}_{increment} (missing bands)") print(f"[S2] Skipping {date}_{increment} (missing bands)")

View file

@ -11,6 +11,66 @@ from rasterio.transform import from_bounds
load_dotenv() load_dotenv()
BBOX_SIZE = 0.011
def _get_bbox(lon, lat):
half = BBOX_SIZE / 2
return [lon - half, lat - half, lon + half, lat + half]
def _process_netcdf(nc_file, output_dir, bands, openeo_bands):
with netCDF4.Dataset(str(nc_file), "r") as nc:
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
x_coords = nc.variables["x"][:]
y_coords = nc.variables["y"][:]
band_vars = sorted(
[v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
)
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
transform = from_bounds(
float(x_coords.min()),
float(y_coords.min()),
float(x_coords.max()),
float(y_coords.max()),
len(x_coords),
len(y_coords),
)
print(f"[S3] Found {len(times)} time steps")
date_counts = {}
for t_idx, time_val in enumerate(times):
dt = (
time_val
if isinstance(time_val, datetime)
else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
)
date_str = dt.strftime("%Y%m%d")
increment = date_counts.get(date_str, 0)
date_counts[date_str] = increment + 1
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
stacked = np.stack(band_data, axis=0)
output_path = output_dir / f"{date_str}_{increment}.geotiff"
with rasterio.open(
output_path,
"w",
driver="GTiff",
height=len(y_coords),
width=len(x_coords),
count=len(band_data),
dtype=stacked.dtype,
crs="EPSG:32632",
transform=transform,
compress="lzw",
) as dst:
dst.write(stacked)
for i, band_name in enumerate(band_names, 1):
dst.set_band_description(i, band_name)
print(f"[S3] Saved: {output_path}")
def download_s3(season, site_position, site_name, date_range=None): def download_s3(season, site_position, site_name, date_range=None):
lat, lon = site_position lat, lon = site_position
@ -19,13 +79,7 @@ def download_s3(season, site_position, site_name, date_range=None):
print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}") print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
bbox_size = 0.011 bbox = _get_bbox(lon, lat)
bbox = [
lon - bbox_size / 2,
lat - bbox_size / 2,
lon + bbox_size / 2,
lat + bbox_size / 2,
]
bands = { bands = {
"SDR_Oa04": "blue", "SDR_Oa04": "blue",
"SDR_Oa06": "green", "SDR_Oa06": "green",
@ -81,57 +135,6 @@ def download_s3(season, site_position, site_name, date_range=None):
datacube.download(str(output_file), format="NetCDF") datacube.download(str(output_file), format="NetCDF")
print("[S3] Processing NetCDF...") print("[S3] Processing NetCDF...")
nc = netCDF4.Dataset(str(output_file), "r") _process_netcdf(output_file, output_dir, bands, openeo_bands)
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
x_coords = nc.variables["x"][:]
y_coords = nc.variables["y"][:]
band_vars = sorted(
[v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
)
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
transform = from_bounds(
float(x_coords.min()),
float(y_coords.min()),
float(x_coords.max()),
float(y_coords.max()),
len(x_coords),
len(y_coords),
)
print(f"[S3] Found {len(times)} time steps")
date_counts = {}
for t_idx, time_val in enumerate(times):
dt = (
time_val
if isinstance(time_val, datetime)
else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
)
date_str = dt.strftime("%Y%m%d")
increment = date_counts.get(date_str, 0)
date_counts[date_str] = increment + 1
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
stacked = np.stack(band_data, axis=0)
output_path = output_dir / f"{date_str}_{increment}.geotiff"
with rasterio.open(
output_path,
"w",
driver="GTiff",
height=len(y_coords),
width=len(x_coords),
count=len(band_data),
dtype=stacked.dtype,
crs="EPSG:32632",
transform=transform,
compress="lzw",
) as dst:
dst.write(stacked)
for i, band_name in enumerate(band_names, 1):
dst.set_band_description(i, band_name)
print(f"[S3] Saved: {output_path}")
nc.close()
os.remove(output_file) os.remove(output_file)
print("[S3] Completed") print("[S3] Completed")

134
efast.py
View file

@ -6,28 +6,16 @@ from datetime import datetime, timedelta
import numpy as np import numpy as np
import rasterio import rasterio
from rasterio.warp import Resampling from rasterio.warp import Resampling
from rasterio.vrt import WarpedVRT
from scipy import ndimage from scipy import ndimage
_this_file = Path(__file__).resolve() RESOLUTION_RATIO = 21
_venv_lib = _this_file.parent.parent / "venv" / "lib"
_efast_pkg_path = None
if _venv_lib.exists():
for py_dir in _venv_lib.glob("python*"):
candidate = py_dir / "site-packages" / "efast" / "efast.py"
if candidate.exists():
_efast_pkg_path = candidate
break
if _efast_pkg_path and _efast_pkg_path.exists(): try:
spec = importlib.util.spec_from_file_location( import efast as efast_fusion
"efast_fusion_module", _efast_pkg_path except ImportError:
)
efast_fusion = importlib.util.module_from_spec(spec)
spec.loader.exec_module(efast_fusion)
else:
import site import site
efast_fusion = None
for site_pkg in site.getsitepackages(): for site_pkg in site.getsitepackages():
candidate = Path(site_pkg) / "efast" / "efast.py" candidate = Path(site_pkg) / "efast" / "efast.py"
if candidate.exists(): if candidate.exists():
@ -37,23 +25,51 @@ else:
efast_fusion = importlib.util.module_from_spec(spec) efast_fusion = importlib.util.module_from_spec(spec)
spec.loader.exec_module(efast_fusion) spec.loader.exec_module(efast_fusion)
break break
else: if efast_fusion is None:
raise ImportError( raise ImportError(
"efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git" "efast package not found. Install with: pip install git+https://github.com/DHI-GRAS/efast.git"
) )
def _load_clouds(clouds_file):
clouds = {"s2": set(), "s3": set()}
if clouds_file.exists():
clouds_data = json.loads(clouds_file.read_text())
clouds["s2"] = set(clouds_data.get("s2", []))
clouds["s3"] = set(clouds_data.get("s3", []))
return clouds
def _reproject_to_target(
data, src_transform, src_crs, target_bounds, target_crs, width, height, resampling
):
dst_transform = rasterio.transform.from_bounds(
target_bounds.left,
target_bounds.bottom,
target_bounds.right,
target_bounds.top,
width,
height,
)
reprojected, _ = rasterio.warp.reproject(
source=data,
destination=np.zeros((data.shape[0], height, width), dtype=data.dtype),
src_transform=src_transform,
src_crs=src_crs,
dst_transform=dst_transform,
dst_crs=target_crs,
resampling=resampling,
)
return reprojected, dst_transform
def prepare_s2(season, site_position, site_name, date_range=None): def prepare_s2(season, site_position, site_name, date_range=None):
s2_dir = Path(f"data/{site_name}/{season}/raw/s2/") s2_dir = Path(f"data/{site_name}/{season}/raw/s2/")
s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/")
s2_output_dir = Path(f"data/{site_name}/{season}/prepared/s2/") s2_output_dir = Path(f"data/{site_name}/{season}/prepared/s2/")
clouds_file = Path(f"data/{site_name}/{season}/clouds.json") clouds_file = Path(f"data/{site_name}/{season}/clouds.json")
clouds = {"s2": set(), "s3": set()} clouds = _load_clouds(clouds_file)
if clouds_file.exists():
clouds_data = json.loads(clouds_file.read_text())
clouds["s2"] = set(clouds_data.get("s2", []))
clouds["s3"] = set(clouds_data.get("s3", []))
s2_output_dir.mkdir(parents=True, exist_ok=True) s2_output_dir.mkdir(parents=True, exist_ok=True)
@ -66,9 +82,8 @@ def prepare_s2(season, site_position, site_name, date_range=None):
target_crs = s3_ref.crs target_crs = s3_ref.crs
s3_width = s3_ref.width s3_width = s3_ref.width
s3_height = s3_ref.height s3_height = s3_ref.height
ratio = 21 s2_width = s3_width * RESOLUTION_RATIO
s2_width = s3_width * ratio s2_height = s3_height * RESOLUTION_RATIO
s2_height = s3_height * ratio
for s2_file in s2_dir.glob("*.geotiff"): for s2_file in s2_dir.glob("*.geotiff"):
if s2_file.name in clouds["s2"]: if s2_file.name in clouds["s2"]:
@ -79,25 +94,15 @@ def prepare_s2(season, site_position, site_name, date_range=None):
if not refl_dst.exists(): if not refl_dst.exists():
with rasterio.open(s2_file) as src: with rasterio.open(s2_file) as src:
data = src.read().astype("float32") / 10000.0 data = src.read().astype("float32") / 10000.0
s2_res = (target_bounds.right - target_bounds.left) / s2_width reprojected_data, dst_transform = _reproject_to_target(
dst_transform = rasterio.transform.from_bounds( data,
target_bounds.left, src.transform,
target_bounds.bottom, src.crs,
target_bounds.right, target_bounds,
target_bounds.top, target_crs,
s2_width, s2_width,
s2_height, s2_height,
) Resampling.cubic,
reprojected_data, _ = rasterio.warp.reproject(
source=data,
destination=np.zeros(
(src.count, s2_height, s2_width), dtype=data.dtype
),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=dst_transform,
dst_crs=target_crs,
resampling=Resampling.cubic,
) )
profile = src.profile.copy() profile = src.profile.copy()
profile.update( profile.update(
@ -118,30 +123,19 @@ def prepare_s2(season, site_position, site_name, date_range=None):
with rasterio.open(refl_dst) as src: with rasterio.open(refl_dst) as src:
s2_hr = src.read(1) s2_hr = src.read(1)
mask = s2_hr == 0 mask = s2_hr == 0
distance_to_cloud_hr = ndimage.distance_transform_edt(~mask) distance_to_cloud_hr = np.clip(
distance_to_cloud_hr = np.clip(distance_to_cloud_hr, 0, 255).astype( ndimage.distance_transform_edt(~mask), 0, 255
"float32" ).astype("float32")
)
s3_res = (target_bounds.right - target_bounds.left) / s3_width distance_to_cloud_lr, lr_transform = _reproject_to_target(
lr_transform = rasterio.transform.from_bounds( distance_to_cloud_hr[np.newaxis, :, :],
target_bounds.left, src.transform,
target_bounds.bottom, src.crs,
target_bounds.right, target_bounds,
target_bounds.top, target_crs,
s3_width, s3_width,
s3_height, s3_height,
) Resampling.average,
distance_to_cloud_lr, _ = rasterio.warp.reproject(
source=distance_to_cloud_hr[np.newaxis, :, :],
destination=np.zeros(
(1, s3_height, s3_width), dtype=distance_to_cloud_hr.dtype
),
src_transform=src.transform,
src_crs=target_crs,
dst_transform=lr_transform,
dst_crs=target_crs,
resampling=Resampling.average,
) )
distance_to_cloud_lr = distance_to_cloud_lr[0] distance_to_cloud_lr = distance_to_cloud_lr[0]
@ -164,10 +158,7 @@ def prepare_s3(season, site_position, site_name, date_range=None):
s3_preprocessed_dir = Path(f"data/{site_name}/{season}/prepared/s3/") s3_preprocessed_dir = Path(f"data/{site_name}/{season}/prepared/s3/")
clouds_file = Path(f"data/{site_name}/{season}/clouds.json") clouds_file = Path(f"data/{site_name}/{season}/clouds.json")
clouds = {"s3": set()} clouds = _load_clouds(clouds_file)
if clouds_file.exists():
clouds_data = json.loads(clouds_file.read_text())
clouds["s3"] = set(clouds_data.get("s3", []))
s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) s3_preprocessed_dir.mkdir(parents=True, exist_ok=True)
for s3_file in s3_dir.glob("*.geotiff"): for s3_file in s3_dir.glob("*.geotiff"):
@ -193,8 +184,9 @@ def run_efast(season, site_position, site_name, date_range=None):
print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}") print(f"[EFAST] Starting fusion: {site_name} ({lat:.6f}, {lon:.6f}), {season}")
start_date = datetime.strptime(datetime_range.split("/")[0], "%Y-%m-%d") start_str, end_str = datetime_range.split("/")
end_date = datetime.strptime(datetime_range.split("/")[1], "%Y-%m-%d") start_date = datetime.strptime(start_str, "%Y-%m-%d")
end_date = datetime.strptime(end_str, "%Y-%m-%d")
current_date = start_date current_date = start_date
while current_date <= end_date: while current_date <= end_date:
@ -216,7 +208,7 @@ def run_efast(season, site_position, site_name, date_range=None):
max_days=30, max_days=30,
date_position=2, date_position=2,
minimum_acquisition_importance=0.0, minimum_acquisition_importance=0.0,
ratio=21, ratio=RESOLUTION_RATIO,
) )
if output_file.exists(): if output_file.exists():
print(f"[EFAST] Saved: {output_file}") print(f"[EFAST] Saved: {output_file}")

174
ndvi.py
View file

@ -5,12 +5,14 @@ from rasterio.warp import transform as transform_coords
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
RED_BAND = 3
NIR_BAND = 4
def _calculate_and_write_ndvi(input_file, output_file): def _calculate_and_write_ndvi(input_file, output_file):
"""Calculate NDVI from red and NIR bands and write to output file."""
with rasterio.open(input_file) as src: with rasterio.open(input_file) as src:
red = src.read(3).astype(np.float32) red = src.read(RED_BAND).astype(np.float32)
nir = src.read(4).astype(np.float32) nir = src.read(NIR_BAND).astype(np.float32)
mask = (red > 0) & (nir > 0) mask = (red > 0) & (nir > 0)
ndvi = np.zeros_like(red, dtype=np.float32) ndvi = np.zeros_like(red, dtype=np.float32)
@ -31,21 +33,7 @@ def _calculate_and_write_ndvi(input_file, output_file):
dst.set_band_description(1, "NDVI") dst.set_band_description(1, "NDVI")
def _create_timeseries_for_dir(output_dir, site_position, source_name): def _get_ndvi_value(ndvi_file, site_position):
"""Create timeseries.json for NDVI files in the given directory."""
print(f"[NDVI-{source_name}] Creating timeseries.json...")
timeseries = []
ndvi_files = sorted(output_dir.glob("*.geotiff"))
for ndvi_file in ndvi_files:
filename = ndvi_file.name
date_str = filename.split("_")[0]
try:
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
except ValueError:
date = date_str
ndvi_value = None
try: try:
with rasterio.open(ndvi_file) as src: with rasterio.open(ndvi_file) as src:
lon, lat = site_position[1], site_position[0] lon, lat = site_position[1], site_position[0]
@ -54,9 +42,27 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name):
if samples: if samples:
value = float(samples[0][0]) value = float(samples[0][0])
if value != 0 and not np.isnan(value): if value != 0 and not np.isnan(value):
ndvi_value = value return value
except Exception as e: except Exception:
print(f"[NDVI-{source_name}] Warning: Could not sample {filename}: {e}") pass
return None
def _create_timeseries_for_dir(output_dir, site_position, source_name):
print(f"[NDVI-{source_name}] Creating timeseries.json...")
timeseries = []
for ndvi_file in sorted(output_dir.glob("*.geotiff")):
filename = ndvi_file.name
date_str = filename.split("_")[0]
try:
date = datetime.strptime(date_str, "%Y%m%d").isoformat()
except ValueError:
date = date_str
ndvi_value = _get_ndvi_value(ndvi_file, site_position)
if ndvi_value is None:
print(f"[NDVI-{source_name}] Warning: Could not sample {filename}")
timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
@ -68,30 +74,35 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name):
print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)") print(f"[NDVI-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)")
def _process_ndvi_files(
input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None
):
output_dir.mkdir(parents=True, exist_ok=True)
print(f"[NDVI-{source_name}] Processing {input_dir}...")
geotiff_files = sorted(input_dir.glob(pattern))
if not geotiff_files:
print(f"[NDVI-{source_name}] No files found")
return
for geotiff_file in geotiff_files:
output_file = output_dir / (
output_namer(geotiff_file) if output_namer else geotiff_file.name
)
if output_file.exists():
print(f"[NDVI-{source_name}] Skipping {geotiff_file.name} (exists)")
continue
_calculate_and_write_ndvi(geotiff_file, output_file)
print(f"[NDVI-{source_name}] Saved: {output_file}")
def generate_ndvi_raw(season, site_position, site_name): def generate_ndvi_raw(season, site_position, site_name):
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/raw/{source}/") input_dir = Path(f"data/{site_name}/{season}/raw/{source}/")
output_dir = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/") output_dir = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/")
output_dir.mkdir(parents=True, exist_ok=True) _process_ndvi_files(input_dir, output_dir, source.upper())
print(f"[NDVI-{source.upper()}] Processing {input_dir}...")
geotiff_files = sorted(input_dir.glob("*.geotiff"))
if not geotiff_files:
print(f"[NDVI-{source.upper()}] No files found")
continue
for geotiff_file in geotiff_files:
output_file = output_dir / geotiff_file.name
if output_file.exists():
print(f"[NDVI-{source.upper()}] Skipping {geotiff_file.name} (exists)")
continue
_calculate_and_write_ndvi(geotiff_file, output_file)
print(f"[NDVI-{source.upper()}] Saved: {output_file}")
print(f"[NDVI-{source.upper()}] Completed")
def create_ndvi_timeseries_raw(season, site_position, site_name): def create_ndvi_timeseries_raw(season, site_position, site_name):
@ -100,67 +111,50 @@ def create_ndvi_timeseries_raw(season, site_position, site_name):
_create_timeseries_for_dir(output_dir, site_position, source.upper()) _create_timeseries_for_dir(output_dir, site_position, source.upper())
def _get_output_name_prepared(geotiff_file):
if geotiff_file.suffix == ".tif":
if "REFL" in geotiff_file.stem:
date_str = geotiff_file.stem.split("_")[1]
return f"{date_str}_ndvi.geotiff"
return geotiff_file.name.replace(".tif", ".geotiff")
return geotiff_file.name
def _fusion_namer(f):
date_str = f.stem.split("_")[1]
return f"{date_str}_ndvi.geotiff"
def generate_ndvi_prepared(season, site_position, site_name): def generate_ndvi_prepared(season, site_position, site_name):
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
input_dir = Path(f"data/{site_name}/{season}/prepared/{source}/") input_dir = Path(f"data/{site_name}/{season}/prepared/{source}/")
output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/")
output_dir.mkdir(parents=True, exist_ok=True) for pattern in ["*.geotiff", "*.tif"]:
_process_ndvi_files(
print(f"[NDVI-PREPARED-{source.upper()}] Processing {input_dir}...") input_dir,
output_dir,
geotiff_files = sorted(input_dir.glob("*.geotiff")) + sorted(input_dir.glob("*.tif")) f"PREPARED-{source.upper()}",
if not geotiff_files: pattern=pattern,
print(f"[NDVI-PREPARED-{source.upper()}] No files found") output_namer=_get_output_name_prepared,
continue )
for geotiff_file in geotiff_files:
if geotiff_file.suffix == ".tif":
if "REFL" in geotiff_file.stem:
date_str = geotiff_file.stem.split("_")[1]
output_file = output_dir / f"{date_str}_ndvi.geotiff"
else:
output_file = output_dir / geotiff_file.name.replace(".tif", ".geotiff")
else:
output_file = output_dir / geotiff_file.name
if output_file.exists():
print(f"[NDVI-PREPARED-{source.upper()}] Skipping {geotiff_file.name} (exists)")
continue
_calculate_and_write_ndvi(geotiff_file, output_file)
print(f"[NDVI-PREPARED-{source.upper()}] Saved: {output_file}")
print(f"[NDVI-PREPARED-{source.upper()}] Completed")
input_dir = Path(f"data/{site_name}/{season}/prepared/fusion/") input_dir = Path(f"data/{site_name}/{season}/prepared/fusion/")
output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/")
output_dir.mkdir(parents=True, exist_ok=True) _process_ndvi_files(
input_dir,
print(f"[NDVI-FUSION] Processing {input_dir}...") output_dir,
"FUSION",
geotiff_files = sorted(input_dir.glob("REFL_*.tif")) pattern="REFL_*.tif",
if not geotiff_files: output_namer=_fusion_namer,
print(f"[NDVI-FUSION] No files found") )
return
for geotiff_file in geotiff_files:
date_str = geotiff_file.stem.split("_")[1]
output_file = output_dir / f"{date_str}_ndvi.geotiff"
if output_file.exists():
print(f"[NDVI-FUSION] Skipping {geotiff_file.name} (exists)")
continue
_calculate_and_write_ndvi(geotiff_file, output_file)
print(f"[NDVI-FUSION] Saved: {output_file}")
print(f"[NDVI-FUSION] Completed")
def create_ndvi_timeseries_prepared(season, site_position, site_name): def create_ndvi_timeseries_prepared(season, site_position, site_name):
for source in ["s2", "s3"]: for source in ["s2", "s3"]:
output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/{source}/")
_create_timeseries_for_dir(output_dir, site_position, f"PREPARED-{source.upper()}") _create_timeseries_for_dir(
output_dir, site_position, f"PREPARED-{source.upper()}"
)
output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/") output_dir = Path(f"data/{site_name}/{season}/prepared/ndvi/fusion/")
_create_timeseries_for_dir(output_dir, site_position, "FUSION") _create_timeseries_for_dir(output_dir, site_position, "FUSION")

View file

@ -5,5 +5,6 @@ python-dotenv
netCDF4 netCDF4
numpy numpy
requests requests
scipy
ruff ruff
pre-commit pre-commit

29
run.py
View file

@ -14,25 +14,26 @@ def run_pipeline(season, site_position, site_name):
try: try:
print(f"Downloading data for {site_name}, {season}") print(f"Downloading data for {site_name}, {season}")
download_s2(season, site_position, site_name) download_s2(season, site_position, site_name)
download_s3(season, site_position, site_name) # download_s3(season, site_position, site_name)
print(f"Generating NDVI for raw data: {site_name}, {season}") # print(f"Generating NDVI for raw data: {site_name}, {season}")
generate_ndvi_raw(season, site_position, site_name) # generate_ndvi_raw(season, site_position, site_name)
create_ndvi_timeseries_raw(season, site_position, site_name) # create_ndvi_timeseries_raw(season, site_position, site_name)
print(f"Detecting clouds for {site_name}, {season}") # print(f"Detecting clouds for {site_name}, {season}")
detect_clouds(season, site_name) # detect_clouds(season, site_name)
print(f"Preparing data for EFAST fusion for {site_name}, {season}") # print(f"Preparing data for EFAST fusion for {site_name}, {season}")
prepare_s2(season, site_position, site_name) # prepare_s2(season, site_position, site_name)
prepare_s3(season, site_position, site_name) # prepare_s3(season, site_position, site_name)
print(f"Running EFAST fusion for {site_name}, {season}") # print(f"Running EFAST fusion for {site_name}, {season}")
run_efast(season, site_position, site_name) # run_efast(season, site_position, site_name)
# print(f"Generating NDVI for prepared outputs: {site_name}, {season}")
# generate_ndvi_prepared(season, site_position, site_name)
# create_ndvi_timeseries_prepared(season, site_position, site_name)
print(f"Generating NDVI for prepared outputs: {site_name}, {season}")
generate_ndvi_prepared(season, site_position, site_name)
create_ndvi_timeseries_prepared(season, site_position, site_name)
except Exception as e: except Exception as e:
print(f"Error: {e}") print(f"Error: {e}")
raise raise

View file

@ -21,6 +21,7 @@
.map-date { font-size: 11px; margin-top: 5px; color: #999; } .map-date { font-size: 11px; margin-top: 5px; color: #999; }
.map { height: 500px; border: 1px solid #ccc; } .map { height: 500px; border: 1px solid #ccc; }
.leaflet-image-layer { image-rendering: pixelated; } .leaflet-image-layer { image-rendering: pixelated; }
.leaflet-control-attribution { display: none; }
</style> </style>
</head> </head>
<body> <body>