From 41f363220fe7ef6c963dfc3498f7ab36c807a4cf Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Mon, 26 Jan 2026 09:36:00 +0100 Subject: [PATCH] Added plot of timeseries with sentinel-2 and fusion. --- ndvi.py | 285 +++++++++++++++++++++++++++++++++++++++------- run.py | 7 +- webapp/index.html | 276 +++++++++++++++++++++++++++----------------- 3 files changed, 421 insertions(+), 147 deletions(-) diff --git a/ndvi.py b/ndvi.py index 7c4a44a..ecc2376 100644 --- a/ndvi.py +++ b/ndvi.py @@ -7,6 +7,8 @@ from datetime import datetime RED_BAND = 3 NIR_BAND = 4 +BLUE_BAND = 1 +GREEN_BAND = 2 def _calculate_and_write_ndvi(input_file, output_file): @@ -62,21 +64,53 @@ def _get_ndvi_value(ndvi_file, site_position): return None -def _create_timeseries_for_dir(output_dir, site_position, source_name): +def _get_ndvi_from_original(input_file, site_position): + """Calculate NDVI directly from original file without creating GeoTIFF.""" + try: + with rasterio.open(input_file) as src: + if src.count < 4: + return None + + red = src.read(RED_BAND).astype(np.float32) + nir = src.read(NIR_BAND).astype(np.float32) + + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + + row, col = src.index(x[0], y[0]) + if row < 0 or row >= src.height or col < 0 or col >= src.width: + return None + + r_val = float(red[row, col]) + n_val = float(nir[row, col]) + + if r_val <= 0 or n_val <= 0 or np.isnan(r_val) or np.isnan(n_val): + return None + + ndvi = (n_val - r_val) / (n_val + r_val) + return ndvi if not np.isnan(ndvi) else None + except Exception as e: + return None + + +def _create_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): print(f"[NDVI-{source_name}] Creating timeseries.json...") timeseries = [] - for ndvi_file in sorted(output_dir.glob("*.geotiff")): - filename = ndvi_file.name - # Extract date from filename - # Format examples: - # - YYYYMMDD_ndvi.geotiff -> date is at [0] - # - YYYYMMDD_0.geotiff -> date is at [0] - # - composite_YYYYMMDD.geotiff -> date is at [1] + for input_file in sorted(input_dir.glob(pattern)): + if "DIST_CLOUD" in input_file.name: + continue + + filename = input_file.name parts = filename.replace(".geotiff", "").split("_") date_str = None - # Try to find a date pattern (8 digits) for part in parts: if len(part) == 8 and part.isdigit(): date_str = part @@ -88,21 +122,17 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name): except ValueError: date = date_str else: - # Fallback: use first part (for old MSIL2A_ndvi.geotiff files) date_str = parts[0] date = date_str print( f"[NDVI-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'" ) - ndvi_value = _get_ndvi_value(ndvi_file, site_position) + ndvi_value = _get_ndvi_from_original(input_file, site_position) if ndvi_value is None: print( f"[NDVI-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)" ) - # Note: 0 is a valid NDVI value (no vegetation), so we keep it - # The _get_ndvi_value function now properly distinguishes between - # valid 0 values and nodata values timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value}) @@ -154,16 +184,15 @@ def _process_ndvi_files( def generate_ndvi_raw(season, site_position, site_name): - for source in ["s2", "s3"]: - input_dir = Path(f"data/{site_name}/{season}/raw/{source}/") - output_dir = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/") - _process_ndvi_files(input_dir, output_dir, source.upper()) + # No longer creating NDVI GeoTIFF files, only timeseries + pass def create_ndvi_timeseries_raw(season, site_position, site_name): for source in ["s2", "s3"]: + input_dir = Path(f"data/{site_name}/{season}/raw/{source}/") output_dir = Path(f"data/{site_name}/{season}/raw/ndvi/{source}/") - _create_timeseries_for_dir(output_dir, site_position, source.upper()) + _create_timeseries_for_dir(input_dir, output_dir, site_position, source.upper()) def _get_output_name_prepared(geotiff_file): @@ -192,34 +221,208 @@ def _fusion_namer(f): def generate_ndvi_post_process(season, site_position, site_name): - for source in ["s2", "s3"]: - input_dir = Path(f"data/{site_name}/{season}/processed/{source}/") - output_dir = Path(f"data/{site_name}/{season}/processed/ndvi/{source}/") - _process_ndvi_files( - input_dir, - output_dir, - f"POST-PROCESS-{source.upper()}", - pattern="*.geotiff", - output_namer=lambda f: f.name.replace(".geotiff", "_ndvi.geotiff"), - ) - - input_dir = Path(f"data/{site_name}/{season}/processed/fusion/") - output_dir = Path(f"data/{site_name}/{season}/processed/ndvi/fusion/") - _process_ndvi_files( - input_dir, - output_dir, - "POST-PROCESS-FUSION", - pattern="*.geotiff", - output_namer=lambda f: f.name.replace(".geotiff", "_ndvi.geotiff"), - ) + # No longer creating NDVI GeoTIFF files, only timeseries + pass def create_ndvi_timeseries_post_process(season, site_position, site_name): for source in ["s2", "s3"]: + input_dir = Path(f"data/{site_name}/{season}/processed/{source}/") output_dir = Path(f"data/{site_name}/{season}/processed/ndvi/{source}/") _create_timeseries_for_dir( - output_dir, site_position, f"POST-PROCESS-{source.upper()}" + input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}" ) + input_dir = Path(f"data/{site_name}/{season}/processed/fusion/") output_dir = Path(f"data/{site_name}/{season}/processed/ndvi/fusion/") - _create_timeseries_for_dir(output_dir, site_position, "POST-PROCESS-FUSION") + _create_timeseries_for_dir(input_dir, output_dir, site_position, "POST-PROCESS-FUSION") + + +def _calculate_and_write_gcc(input_file, output_file): + with rasterio.open(input_file) as src: + blue = src.read(BLUE_BAND).astype(np.float32) + green = src.read(GREEN_BAND).astype(np.float32) + red = src.read(RED_BAND).astype(np.float32) + + total = red + green + blue + mask = total > 0 + gcc = np.zeros_like(green, dtype=np.float32) + gcc[mask] = green[mask] / total[mask] + + profile = src.profile.copy() + profile.update( + { + "count": 1, + "dtype": "float32", + "nodata": 0, + "compress": "lzw", + } + ) + + with rasterio.open(output_file, "w", **profile) as dst: + dst.write(gcc, 1) + dst.set_band_description(1, "GCC") + + +def _get_gcc_value(gcc_file, site_position): + try: + with rasterio.open(gcc_file) as src: + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + + samples = list(src.sample([(x[0], y[0])])) + if samples: + value = float(samples[0][0]) + if src.nodata is not None and value == src.nodata: + return None + if np.isnan(value): + return None + return value + except Exception as e: + print(f"Error sampling {gcc_file.name}: {e}") + pass + return None + + +def _get_gcc_from_original(input_file, site_position): + """Calculate GCC directly from original file without creating GeoTIFF.""" + try: + with rasterio.open(input_file) as src: + if src.count < 3: + return None + + blue = src.read(BLUE_BAND).astype(np.float32) + green = src.read(GREEN_BAND).astype(np.float32) + red = src.read(RED_BAND).astype(np.float32) + + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + + row, col = src.index(x[0], y[0]) + if row < 0 or row >= src.height or col < 0 or col >= src.width: + return None + + b_val = float(blue[row, col]) + g_val = float(green[row, col]) + r_val = float(red[row, col]) + + total = r_val + g_val + b_val + if total <= 0 or np.isnan(total): + return None + + gcc = g_val / total + return gcc if not np.isnan(gcc) else None + except Exception as e: + return None + + +def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): + print(f"[GCC-{source_name}] Creating timeseries.json...") + timeseries = [] + + for input_file in sorted(input_dir.glob(pattern)): + if "DIST_CLOUD" in input_file.name: + continue + + filename = input_file.name + parts = filename.replace(".geotiff", "").split("_") + date_str = None + + for part in parts: + if len(part) == 8 and part.isdigit(): + date_str = part + break + + if date_str: + try: + date = datetime.strptime(date_str, "%Y%m%d").isoformat() + except ValueError: + date = date_str + else: + date_str = parts[0] + date = date_str + print( + f"[GCC-{source_name}] Warning: Could not extract date from {filename}, using '{date_str}'" + ) + + gcc_value = _get_gcc_from_original(input_file, site_position) + if gcc_value is None: + print( + f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)" + ) + + timeseries.append({"date": date, "filename": filename, "greenness_index": gcc_value}) + + timeseries.sort(key=lambda x: x["date"]) + output_dir.mkdir(parents=True, exist_ok=True) + timeseries_file = output_dir / "timeseries.json" + with open(timeseries_file, "w") as f: + json.dump(timeseries, f, indent=2) + + print(f"[GCC-{source_name}] Saved: {timeseries_file} ({len(timeseries)} entries)") + + +def _process_gcc_files( + input_dir, output_dir, source_name, pattern="*.geotiff", output_namer=None +): + output_dir.mkdir(parents=True, exist_ok=True) + print(f"[GCC-{source_name}] Processing {input_dir}...") + + geotiff_files = sorted(input_dir.glob(pattern)) + if not geotiff_files: + print(f"[GCC-{source_name}] No files found") + return + + for geotiff_file in geotiff_files: + if "DIST_CLOUD" in geotiff_file.name: + continue + + try: + with rasterio.open(geotiff_file) as src: + if src.count < 3: + print( + f"[GCC-{source_name}] Skipping {geotiff_file.name} (only {src.count} band(s), need 3+)" + ) + continue + except Exception as e: + print( + f"[GCC-{source_name}] Skipping {geotiff_file.name} (error reading: {e})" + ) + continue + + output_file = output_dir / ( + output_namer(geotiff_file) if output_namer else geotiff_file.name + ) + + _calculate_and_write_gcc(geotiff_file, output_file) + print(f"[GCC-{source_name}] Saved: {output_file}") + + +def generate_gcc_post_process(season, site_position, site_name): + # No longer creating GCC GeoTIFF files, only timeseries + pass + + +def create_gcc_timeseries_post_process(season, site_position, site_name): + for source in ["s2", "s3"]: + input_dir = Path(f"data/{site_name}/{season}/processed/{source}/") + output_dir = Path(f"data/{site_name}/{season}/processed/gcc/{source}/") + _create_gcc_timeseries_for_dir( + input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}" + ) + + input_dir = Path(f"data/{site_name}/{season}/processed/fusion/") + output_dir = Path(f"data/{site_name}/{season}/processed/gcc/fusion/") + _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, "POST-PROCESS-FUSION") diff --git a/run.py b/run.py index d4b74d8..0261e01 100644 --- a/run.py +++ b/run.py @@ -5,6 +5,8 @@ from ndvi import ( create_ndvi_timeseries_raw, generate_ndvi_post_process, create_ndvi_timeseries_post_process, + generate_gcc_post_process, + create_gcc_timeseries_post_process, ) from download_s2 import download_s2 from download_s3 import download_s3 @@ -18,7 +20,7 @@ def run_pipeline(season, site_position, site_name): # download_s2(season, site_position, site_name) # download_s3(season, site_position, site_name) # download_phenocam(season, site_position, site_name) - download_phenocam_greenness(season, site_position, site_name) + #download_phenocam_greenness(season, site_position, site_name) # print(f"Generating NDVI for raw data: {site_name}, {season}") # generate_ndvi_raw(season, site_position, site_name) @@ -39,6 +41,9 @@ def run_pipeline(season, site_position, site_name): #print(f"Generating NDVI for final outputs: {site_name}, {season}") #generate_ndvi_post_process(season, site_position, site_name) #create_ndvi_timeseries_post_process(season, site_position, site_name) + #print(f"Generating GCC for final outputs: {site_name}, {season}") + generate_gcc_post_process(season, site_position, site_name) + create_gcc_timeseries_post_process(season, site_position, site_name) except Exception as e: print(f"Error: {e}") diff --git a/webapp/index.html b/webapp/index.html index 49574f0..d8b4dd0 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -29,6 +29,9 @@ .map-label { font-size: 12px; margin-bottom: 5px; color: #666; } .map-date { font-size: 11px; margin-top: 5px; color: #999; } .map { height: 500px; border: 1px solid #ccc; } + .combined-plot { margin-top: 20px; } + .combined-plot-label { font-size: 12px; margin-bottom: 5px; color: #666; } + .combined-plot-canvas { width: 100%; height: 120px; border: 1px solid #ccc; } .leaflet-image-layer { image-rendering: pixelated; } .leaflet-control-attribution { display: none; } @@ -60,36 +63,37 @@

S2

NDVI Timeseries
+
Greenness Index Timeseries
+
RGB Composite
-
NDVI
-
-

Fusion

NDVI Timeseries
+
Greenness Index Timeseries
+
RGB Composite
-
NDVI
-
-

S3

NDVI Timeseries
+
Greenness Index Timeseries
+
RGB Composite
-
NDVI
-
-
+
+
Greenness Index Timeseries (S2 & Fusion)
+ +