diff --git a/.gitignore b/.gitignore index 624743d..3a62807 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,7 @@ dist/ # OS .DS_Store Thumbs.db + +AGENTS.md +METHODOLOGY.md +.vibe \ No newline at end of file diff --git a/fusion.py b/fusion.py index 75a5fb5..22dda00 100644 --- a/fusion.py +++ b/fusion.py @@ -1,13 +1,15 @@ """EFAST fusion: S2/S3 reflectance fusion for four scenarios.""" + from datetime import datetime, timedelta -from preparation import _get_base_dir, RESOLUTION_RATIO +from preparation import _get_base_dir, _get_itb_base_dir, RESOLUTION_RATIO def _import_efast(): """Lazy import of efast to avoid import errors when not using efast functions.""" try: import efast + return efast except ImportError: raise ImportError( @@ -15,7 +17,14 @@ def _import_efast(): ) -def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None, date_range=None): +def run_efast( + season, + site_position, + site_name, + cleaning_strategy="aggressive", + sigma=None, + date_range=None, +): lat, lon = site_position datetime_range = date_range or f"{season}-01-01/{season}-12-31" @@ -47,7 +56,9 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", } if sigma is not None: kwargs["sigma"] = sigma - efast.fusion(current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs) + efast.fusion( + current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs + ) print( f"[EFAST] Saved: {output_file}" if output_file.exists() @@ -60,8 +71,94 @@ def run_efast(season, site_position, site_name, cleaning_strategy="aggressive", print("[EFAST] Completed") -def run_all_efast_scenarios(season, site_position, site_name, sigma_value=30, date_range=None): +def run_all_efast_scenarios( + season, site_position, site_name, sigma_value=30, date_range=None +): """Run EFAST fusion for all 4 scenarios. Expects prepared_*/s2 and prepared_*/s3 to exist.""" for strategy in ["aggressive", "nonaggressive"]: - run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=None, date_range=date_range) - run_efast(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma_value, date_range=date_range) + run_efast( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=None, + date_range=date_range, + ) + run_efast( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma_value, + date_range=date_range, + ) + + +def run_efast_itb( + season, + site_position, + site_name, + cleaning_strategy="aggressive", + sigma=None, + date_range=None, +): + lat, lon = site_position + datetime_range = date_range or f"{season}-01-01/{season}-12-31" + efast_base_dir = _get_itb_base_dir(season, site_name, cleaning_strategy) + s2_output_dir = efast_base_dir / "s2" + s3_output_dir = efast_base_dir / "s3" + fusion_output_dir = efast_base_dir / (f"fusion_sigma{sigma}" if sigma else "fusion") + fusion_output_dir.mkdir(parents=True, exist_ok=True) + print(f"[EFAST-ITB] Fusion GCC: {site_name} ({lat:.6f}, {lon:.6f}), {season}") + efast = _import_efast() + start_str, end_str = datetime_range.split("/") + start_date = datetime.strptime(start_str, "%Y-%m-%d") + end_date = datetime.strptime(end_str, "%Y-%m-%d") + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime("%Y%m%d") + output_file = fusion_output_dir / f"GCC_{date_str}.tif" + try: + kwargs = { + "product": "GCC", + "max_days": 30, + "date_position": 2, + "minimum_acquisition_importance": 0.0, + "ratio": RESOLUTION_RATIO, + } + if sigma is not None: + kwargs["sigma"] = sigma + efast.fusion( + current_date, s3_output_dir, s2_output_dir, fusion_output_dir, **kwargs + ) + print( + f"[EFAST-ITB] Saved: {output_file}" + if output_file.exists() + else f"[EFAST-ITB] No output for {date_str}" + ) + except Exception as e: + print(f"[EFAST-ITB] Error {date_str}: {e}") + current_date += timedelta(days=1) + print("[EFAST-ITB] Completed") + + +def run_all_efast_itb_scenarios( + season, site_position, site_name, sigma_value=30, date_range=None +): + for strategy in ["aggressive", "nonaggressive"]: + run_efast_itb( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=None, + date_range=date_range, + ) + run_efast_itb( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma_value, + date_range=date_range, + ) diff --git a/metrics_indices.py b/metrics_indices.py index c9095d0..f1c8b35 100644 --- a/metrics_indices.py +++ b/metrics_indices.py @@ -1,4 +1,5 @@ """Index generation: NDVI and GCC from S2/S3/fusion GeoTIFFs.""" + import json import numpy as np import rasterio @@ -67,7 +68,9 @@ def _get_ndvi_value(ndvi_file, site_position): return None -def _create_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): +def _create_timeseries_for_dir( + input_dir, output_dir, site_position, source_name, pattern="*.geotiff" +): print(f"[NDVI-{source_name}] Creating timeseries.json...") timeseries = [] @@ -196,13 +199,23 @@ def create_ndvi_timeseries_post_process(season, site_position, site_name): processed_dir = f"processed_{strategy}_sigma{sigma}" for source in ["s2", "s3"]: input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/") - output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/") + output_dir = Path( + f"data/{site_name}/{season}/{processed_dir}/ndvi/{source}/" + ) _create_timeseries_for_dir( - input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}" + input_dir, + output_dir, + site_position, + f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}", ) input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/") output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/ndvi/fusion/") - _create_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}") + _create_timeseries_for_dir( + input_dir, + output_dir, + site_position, + f"POST-PROCESS-FUSION-{strategy}-σ{sigma}", + ) def _calculate_and_write_gcc(input_file, output_file): @@ -261,6 +274,25 @@ def _get_gcc_from_original(input_file, site_position): """Calculate GCC directly from original file without creating GeoTIFF.""" try: with rasterio.open(input_file) as src: + if src.count == 1: + g = src.read(1).astype(np.float32) + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + row, col = src.index(x[0], y[0]) + if row < 0 or row >= src.height or col < 0 or col >= src.width: + return None + r0, r1 = max(0, row - 1), min(src.height, row + 2) + c0, c1 = max(0, col - 1), min(src.width, col + 2) + win = g[r0:r1, c0:c1] + mask = np.isfinite(win) & (win > 0) + if not np.any(mask): + return None + return float(np.mean(win[mask])) if src.count < 3: return None @@ -290,11 +322,21 @@ def _get_gcc_from_original(input_file, site_position): # Calculate GCC for each pixel in window total = red_window + green_window + blue_window - mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) + mask = ( + (total > 0) + & ~np.isnan(total) + & (blue_window >= 0) + & (green_window >= 0) + & (red_window >= 0) + ) if not np.any(mask): - negative_pixels = np.sum((blue_window < 0) | (green_window < 0) | (red_window < 0)) + negative_pixels = np.sum( + (blue_window < 0) | (green_window < 0) | (red_window < 0) + ) if negative_pixels > 0: - print(f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)") + print( + f"Warning: {input_file.name} excluded - all pixels have negative band values ({negative_pixels} negative pixels in window)" + ) return None gcc_window = np.zeros_like(green_window, dtype=np.float32) @@ -303,11 +345,13 @@ def _get_gcc_from_original(input_file, site_position): # Return mean of valid GCC values valid_gcc = gcc_window[mask] return float(np.mean(valid_gcc)) if len(valid_gcc) > 0 else None - except Exception as e: + except Exception: return None -def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): +def _create_gcc_timeseries_for_dir( + input_dir, output_dir, site_position, source_name, pattern="*.geotiff" +): print(f"[GCC-{source_name}] Creating timeseries.json...") timeseries = [] @@ -342,7 +386,9 @@ def _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, source_ f"[GCC-{source_name}] Warning: Could not sample {filename} (outside bounds or nodata)" ) - timeseries.append({"date": date, "filename": filename, "greenness_index": gcc_value}) + timeseries.append( + {"date": date, "filename": filename, "greenness_index": gcc_value} + ) timeseries.sort(key=lambda x: x["date"]) output_dir.mkdir(parents=True, exist_ok=True) @@ -400,13 +446,41 @@ def create_gcc_timeseries_post_process(season, site_position, site_name): processed_dir = f"processed_{strategy}_sigma{sigma}" for source in ["s2", "s3"]: input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/{source}/") - output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/") + output_dir = Path( + f"data/{site_name}/{season}/{processed_dir}/gcc/{source}/" + ) _create_gcc_timeseries_for_dir( - input_dir, output_dir, site_position, f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}" + input_dir, + output_dir, + site_position, + f"POST-PROCESS-{source.upper()}-{strategy}-σ{sigma}", ) input_dir = Path(f"data/{site_name}/{season}/{processed_dir}/fusion/") output_dir = Path(f"data/{site_name}/{season}/{processed_dir}/gcc/fusion/") - _create_gcc_timeseries_for_dir(input_dir, output_dir, site_position, f"POST-PROCESS-FUSION-{strategy}-σ{sigma}") + _create_gcc_timeseries_for_dir( + input_dir, + output_dir, + site_position, + f"POST-PROCESS-FUSION-{strategy}-σ{sigma}", + ) + itb_dir = f"processed_{strategy}_itb_sigma{sigma}" + base_itb = Path(f"data/{site_name}/{season}/{itb_dir}") + if not base_itb.exists(): + continue + for source in ["s2", "s3"]: + inp, out = base_itb / source, base_itb / "gcc" / source + _create_gcc_timeseries_for_dir( + inp, + out, + site_position, + f"POST-ITB-{source.upper()}-{strategy}-σ{sigma}", + ) + _create_gcc_timeseries_for_dir( + base_itb / "fusion", + base_itb / "gcc" / "fusion", + site_position, + f"POST-ITB-FUSION-{strategy}-σ{sigma}", + ) def _get_bands_from_original(input_file, site_position): @@ -425,7 +499,10 @@ def _get_bands_from_original(input_file, site_position): row, col = src.index(x[0], y[0]) r0, r1 = max(0, row - 1), min(src.height, row + 2) c0, c1 = max(0, col - 1), min(src.width, col + 2) - bands = [src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32) for i in range(4)] + bands = [ + src.read(i + 1, window=((r0, r1), (c0, c1))).astype(np.float32) + for i in range(4) + ] mask = ~np.any([np.isnan(b) for b in bands], axis=0) mask &= np.all([b > 0 for b in bands], axis=0) if not np.any(mask): @@ -440,7 +517,9 @@ def _get_bands_from_original(input_file, site_position): return None -def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, source_name, pattern="*.geotiff"): +def _create_bands_timeseries_for_dir( + input_dir, output_dir, site_position, source_name, pattern="*.geotiff" +): print(f"[BANDS-{source_name}] Creating timeseries.json...") timeseries = [] for f in sorted(input_dir.glob(pattern)): @@ -456,11 +535,14 @@ def _create_bands_timeseries_for_dir(input_dir, output_dir, site_position, sourc timeseries.sort(key=lambda x: x["date"]) output_dir.mkdir(parents=True, exist_ok=True) (output_dir / "timeseries.json").write_text(json.dumps(timeseries, indent=2)) - print(f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)") + print( + f"[BANDS-{source_name}] Saved: {output_dir / 'timeseries.json'} ({len(timeseries)} entries)" + ) def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir): """Merge ndvi, gcc, bands into combined timeseries.json and timeseries.csv.""" + def load(p): p = Path(p) if not p.exists(): @@ -469,6 +551,7 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir): return json.loads((p / "timeseries.json").read_text()) except Exception: return [] + ndvi = {str(t.get("date", ""))[:10]: t for t in load(ndvi_dir)} gcc = {str(t.get("date", ""))[:10]: t for t in load(gcc_dir)} bands = {str(t.get("date", ""))[:10]: t for t in load(bands_dir)} @@ -482,12 +565,16 @@ def _write_export(ndvi_dir, gcc_dir, bands_dir, export_dir): export_dir.mkdir(parents=True, exist_ok=True) (export_dir / "timeseries.json").write_text(json.dumps(merged, indent=2)) cols = ["date", "filename", "ndvi", "greenness_index", "b02", "b03", "b04", "b8a"] + def esc(v): s = str(v) if v is not None else "" return f'"{s}"' if "," in s or '"' in s else s + rows = [cols] + [[esc(r.get(c)) for c in cols] for r in merged] (export_dir / "timeseries.csv").write_text("\n".join(",".join(x) for x in rows)) - print(f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)") + print( + f"[EXPORT] Saved {export_dir / 'timeseries.json'} and timeseries.csv ({len(merged)} entries)" + ) def create_prepared_fusion_timeseries(season, site_position, site_name): @@ -497,17 +584,86 @@ def create_prepared_fusion_timeseries(season, site_position, site_name): for source in ["s2", "s3"]: inp = base / source if inp.exists(): - _create_timeseries_for_dir(inp, base / "ndvi" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") - _create_gcc_timeseries_for_dir(inp, base / "gcc" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") - _create_bands_timeseries_for_dir(inp, base / "bands" / source, site_position, f"PREPARED-{source.upper()}-{strategy}", "*.tif") - _write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source) + _create_timeseries_for_dir( + inp, + base / "ndvi" / source, + site_position, + f"PREPARED-{source.upper()}-{strategy}", + "*.tif", + ) + _create_gcc_timeseries_for_dir( + inp, + base / "gcc" / source, + site_position, + f"PREPARED-{source.upper()}-{strategy}", + "*.tif", + ) + _create_bands_timeseries_for_dir( + inp, + base / "bands" / source, + site_position, + f"PREPARED-{source.upper()}-{strategy}", + "*.tif", + ) + _write_export( + base / "ndvi" / source, + base / "gcc" / source, + base / "bands" / source, + base / "export" / source, + ) for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]: inp = base / fusion_sub if inp.exists(): - _create_timeseries_for_dir(inp, base / "ndvi" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") - _create_gcc_timeseries_for_dir(inp, base / "gcc" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") - _create_bands_timeseries_for_dir(inp, base / "bands" / fusion_sub, site_position, f"FUSION-{strategy}-σ{sig or 20}", "*.tif") - _write_export(base / "ndvi" / fusion_sub, base / "gcc" / fusion_sub, base / "bands" / fusion_sub, base / "export" / fusion_sub) + _create_timeseries_for_dir( + inp, + base / "ndvi" / fusion_sub, + site_position, + f"FUSION-{strategy}-σ{sig or 20}", + "*.tif", + ) + _create_gcc_timeseries_for_dir( + inp, + base / "gcc" / fusion_sub, + site_position, + f"FUSION-{strategy}-σ{sig or 20}", + "*.tif", + ) + _create_bands_timeseries_for_dir( + inp, + base / "bands" / fusion_sub, + site_position, + f"FUSION-{strategy}-σ{sig or 20}", + "*.tif", + ) + _write_export( + base / "ndvi" / fusion_sub, + base / "gcc" / fusion_sub, + base / "bands" / fusion_sub, + base / "export" / fusion_sub, + ) + itb = Path(f"data/{site_name}/{season}/prepared_{strategy}_itb") + if not itb.exists(): + continue + for source in ["s2", "s3"]: + inp = itb / source + if inp.exists(): + _create_gcc_timeseries_for_dir( + inp, + itb / "gcc" / source, + site_position, + f"PREPARED-ITB-{source.upper()}-{strategy}", + "*.tif", + ) + for sig, fusion_sub in [(None, "fusion"), (30, "fusion_sigma30")]: + inp = itb / fusion_sub + if inp.exists(): + _create_gcc_timeseries_for_dir( + inp, + itb / "gcc" / fusion_sub, + site_position, + f"FUSION-ITB-{strategy}-σ{sig or 20}", + "*.tif", + ) def create_bands_timeseries_post_process(season, site_position, site_name): @@ -518,5 +674,16 @@ def create_bands_timeseries_post_process(season, site_position, site_name): for source in ["s2", "s3", "fusion"]: inp, out = base / source, base / "bands" / source if inp.exists(): - _create_bands_timeseries_for_dir(inp, out, site_position, f"POST-{source.upper()}-{strategy}-σ{sigma}", "*.geotiff") - _write_export(base / "ndvi" / source, base / "gcc" / source, base / "bands" / source, base / "export" / source) + _create_bands_timeseries_for_dir( + inp, + out, + site_position, + f"POST-{source.upper()}-{strategy}-σ{sigma}", + "*.geotiff", + ) + _write_export( + base / "ndvi" / source, + base / "gcc" / source, + base / "bands" / source, + base / "export" / source, + ) diff --git a/metrics_stats.py b/metrics_stats.py index fa82389..5f8934e 100644 --- a/metrics_stats.py +++ b/metrics_stats.py @@ -1,4 +1,5 @@ """Metrics and statistics: temporal/spatial metrics and PhenoCam stats.""" + import json import numpy as np from pathlib import Path @@ -132,6 +133,31 @@ def _get_spatial_stats_from_raster(raster_file, site_position): """Extract spatial statistics (mean, std, min, max) from GCC raster in 3x3 window.""" try: with rasterio.open(raster_file) as src: + if src.count == 1: + g = src.read(1).astype(np.float32) + lon, lat = site_position[1], site_position[0] + x, y = transform_coords("EPSG:4326", src.crs, [lon], [lat]) + if not ( + src.bounds.left <= x[0] <= src.bounds.right + and src.bounds.bottom <= y[0] <= src.bounds.top + ): + return None + row, col = src.index(x[0], y[0]) + if row < 0 or row >= src.height or col < 0 or col >= src.width: + return None + r0, r1 = max(0, row - 1), min(src.height, row + 2) + c0, c1 = max(0, col - 1), min(src.width, col + 2) + win = g[r0:r1, c0:c1] + mask = np.isfinite(win) & (win > 0) + if not np.any(mask): + return None + valid = win[mask] + return { + "mean": float(np.mean(valid)), + "std": float(np.std(valid)), + "min": float(np.min(valid)), + "max": float(np.max(valid)), + } if src.count < 3: return None @@ -161,7 +187,13 @@ def _get_spatial_stats_from_raster(raster_file, site_position): # Calculate GCC for each pixel in window total = red_window + green_window + blue_window - mask = (total > 0) & ~np.isnan(total) & (blue_window >= 0) & (green_window >= 0) & (red_window >= 0) + mask = ( + (total > 0) + & ~np.isnan(total) + & (blue_window >= 0) + & (green_window >= 0) + & (red_window >= 0) + ) if not np.any(mask): return None @@ -259,7 +291,9 @@ def calculate_scenario_metrics(season, site_name, strategy, sigma, site_position # Calculate spatial metrics fusion_raster_dir = base / processed_dir / "fusion" - spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) + spatial_metrics = calculate_spatial_metrics( + fusion_raster_dir, phenocam_ts, site_position + ) return temporal_metrics, spatial_metrics @@ -283,7 +317,9 @@ def calculate_all_metrics(season, site_name, site_position): results["phenocam_stats"] = phenocam_stats # Calculate S2 baseline metrics once (S2 data is identical across scenarios) - s2_ts_path = base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json" + s2_ts_path = ( + base / "processed_aggressive_sigma20" / "gcc" / "s2" / "timeseries.json" + ) s2_ts = load_timeseries(s2_ts_path) if s2_ts: s2_metrics = calculate_temporal_metrics(s2_ts, phenocam_ts) @@ -303,7 +339,9 @@ def calculate_all_metrics(season, site_name, site_position): fusion_ts = load_timeseries(fusion_ts_path) if not fusion_ts: - print(f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping") + print( + f"[METRICS] Warning: Missing fusion data for {scenario_name}, skipping" + ) continue # Calculate temporal metrics @@ -313,7 +351,30 @@ def calculate_all_metrics(season, site_name, site_position): # Calculate spatial metrics fusion_raster_dir = base / processed_dir / "fusion" - spatial_metrics = calculate_spatial_metrics(fusion_raster_dir, phenocam_ts, site_position) + spatial_metrics = calculate_spatial_metrics( + fusion_raster_dir, phenocam_ts, site_position + ) + if spatial_metrics: + results["spatial"][scenario_name] = spatial_metrics + + for strategy in ["aggressive", "nonaggressive"]: + for sigma in [20, 30]: + scenario_name = f"{strategy}_sigma{sigma}_itb" + processed_dir = f"processed_{strategy}_itb_sigma{sigma}" + fusion_ts_path = base / processed_dir / "gcc" / "fusion" / "timeseries.json" + fusion_ts = load_timeseries(fusion_ts_path) + if not fusion_ts: + print( + f"[METRICS] Warning: Missing ItB fusion data for {scenario_name}, skipping" + ) + continue + temporal_metrics = calculate_temporal_metrics(fusion_ts, phenocam_ts) + if temporal_metrics: + results["temporal"][scenario_name] = temporal_metrics + fusion_raster_dir = base / processed_dir / "fusion" + spatial_metrics = calculate_spatial_metrics( + fusion_raster_dir, phenocam_ts, site_position + ) if spatial_metrics: results["spatial"][scenario_name] = spatial_metrics @@ -321,14 +382,18 @@ def calculate_all_metrics(season, site_name, site_position): if results["temporal"]: best_temporal = max( results["temporal"].items(), - key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1 + key=lambda x: x[1].get("r_squared", -1) + if x[1].get("r_squared") is not None + else -1, ) results["summary"] = {"best_temporal_scenario": best_temporal[0]} if results["spatial"]: best_spatial = max( results["spatial"].items(), - key=lambda x: x[1].get("r_squared", -1) if x[1].get("r_squared") is not None else -1 + key=lambda x: x[1].get("r_squared", -1) + if x[1].get("r_squared") is not None + else -1, ) if "summary" not in results: results["summary"] = {} diff --git a/postprocessing.py b/postprocessing.py index cb07eb3..a02293e 100644 --- a/postprocessing.py +++ b/postprocessing.py @@ -1,17 +1,23 @@ """Post-processing: crop fusion/S2/S3 to valid pixels.""" + from pathlib import Path import numpy as np import rasterio from rasterio import windows from rasterio.warp import reproject, Resampling -from rasterio.io import MemoryFile -def process_cropped(season, site_position, site_name, cleaning_strategy="aggressive", sigma=None): +def process_cropped( + season, site_position, site_name, cleaning_strategy="aggressive", sigma=None +): """Crop fusion to valid data, then crop S2/S3 to match.""" base = Path(f"data/{site_name}/{season}") prepared = base / f"prepared_{cleaning_strategy}" - processed_dir = f"processed_{cleaning_strategy}_sigma{sigma}" if sigma else f"processed_{cleaning_strategy}_sigma20" + processed_dir = ( + f"processed_{cleaning_strategy}_sigma{sigma}" + if sigma + else f"processed_{cleaning_strategy}_sigma20" + ) processed = base / processed_dir s2_prep = prepared / "s2" @@ -21,7 +27,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: output_dir.mkdir(parents=True, exist_ok=True) - print(f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}") + print( + f"[PROCESS] Processing files: {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}" + ) # Crop fusion to valid data and get dimensions fusion_dims = {} @@ -48,11 +56,29 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress output_file = processed / "fusion" / f"{date_str}_0.geotiff" with rasterio.open(output_file, "w", **p) as dst: dst.write(data_crop) - fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile) + fusion_dims[date_str] = ( + c0, + r0, + w, + h, + transform, + src.transform, + src.crs, + src.profile, + ) print(f"[PROCESS] Cropped fusion: {output_file}") # Crop S2 and S3 to fusion size - for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items(): + for date_str, ( + c0, + r0, + w, + h, + transform, + fusion_transform, + crs, + fusion_profile, + ) in fusion_dims.items(): window = windows.Window(c0, r0, w, h) # S2 for s2_file in s2_prep.glob("*REFL.tif"): @@ -61,7 +87,9 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress with rasterio.open(s2_file) as src: data = src.read(window=window) p2 = src.profile.copy() - p2.update({"width": w, "height": h, "transform": transform, "crs": crs}) + p2.update( + {"width": w, "height": h, "transform": transform, "crs": crs} + ) with rasterio.open(output_file, "w", **p2) as dst: dst.write(data) print(f"[PROCESS] Cropped: {output_file}") @@ -83,7 +111,7 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress src_crs=src.crs, dst_transform=fusion_transform, dst_crs=crs, - resampling=Resampling.nearest + resampling=Resampling.nearest, ) # Crop using same window data = resampled.read(window=window) @@ -96,11 +124,135 @@ def process_cropped(season, site_position, site_name, cleaning_strategy="aggress print("[PROCESS] Completed") +def process_cropped_itb( + season, site_position, site_name, cleaning_strategy="aggressive", sigma=None +): + base = Path(f"data/{site_name}/{season}") + prepared = base / f"prepared_{cleaning_strategy}_itb" + processed_dir = ( + f"processed_{cleaning_strategy}_itb_sigma{sigma}" + if sigma + else f"processed_{cleaning_strategy}_itb_sigma20" + ) + processed = base / processed_dir + s2_prep = prepared / "s2" + s3_prep = prepared / "s3" + fusion_prep = prepared / (f"fusion_sigma{sigma}" if sigma else "fusion") + for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]: + output_dir.mkdir(parents=True, exist_ok=True) + print( + f"[PROCESS-ITB] {site_name}, {season}, {cleaning_strategy}, sigma={sigma or 20}" + ) + fusion_dims = {} + for fusion_file in fusion_prep.glob("GCC_*.tif"): + date_str = fusion_file.stem.split("_")[1] + with rasterio.open(fusion_file) as src: + data = src.read() + valid = ~np.isnan(data) & (data > 0.001) + rows = np.any(valid, axis=(0, 2)) + cols = np.any(valid, axis=(0, 1)) + row_idx = np.where(rows)[0] + col_idx = np.where(cols)[0] + if len(row_idx) == 0 or len(col_idx) == 0: + print(f"[PROCESS-ITB] Skip {fusion_file.name} (no valid pixels)") + continue + r0, r1 = row_idx[0], row_idx[-1] + c0, c1 = col_idx[0], col_idx[-1] + w, h = c1 - c0 + 1, r1 - r0 + 1 + window = windows.Window(c0, r0, w, h) + data_crop = src.read(window=window) + transform = rasterio.windows.transform(window, src.transform) + p = src.profile.copy() + p.update({"width": w, "height": h, "transform": transform}) + output_file = processed / "fusion" / f"{date_str}_0.geotiff" + with rasterio.open(output_file, "w", **p) as dst: + dst.write(data_crop) + fusion_dims[date_str] = ( + c0, + r0, + w, + h, + transform, + src.transform, + src.crs, + src.profile, + ) + print(f"[PROCESS-ITB] Cropped fusion: {output_file}") + for date_str, ( + c0, + r0, + w, + h, + transform, + fusion_transform, + crs, + fusion_profile, + ) in fusion_dims.items(): + window = windows.Window(c0, r0, w, h) + for s2_file in s2_prep.glob("*GCC.tif"): + parts = s2_file.stem.split("_") + if len(parts) > 2 and parts[2] == date_str: + output_file = processed / "s2" / f"{date_str}_0.geotiff" + with rasterio.open(s2_file) as src: + data = src.read(window=window) + p2 = src.profile.copy() + p2.update( + {"width": w, "height": h, "transform": transform, "crs": crs} + ) + with rasterio.open(output_file, "w", **p2) as dst: + dst.write(data) + print(f"[PROCESS-ITB] Cropped: {output_file}") + break + s3_file = s3_prep / f"composite_{date_str}.tif" + if s3_file.exists(): + output_file = processed / "s3" / f"{date_str}_0.geotiff" + with rasterio.open(s3_file) as src: + temp_profile = fusion_profile.copy() + temp_profile.update({"dtype": src.profile["dtype"], "count": src.count}) + with rasterio.MemoryFile() as memfile: + with memfile.open(**temp_profile) as resampled: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(resampled, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=fusion_transform, + dst_crs=crs, + resampling=Resampling.nearest, + ) + data = resampled.read(window=window) + p2 = resampled.profile.copy() + p2.update({"width": w, "height": h, "transform": transform}) + with rasterio.open(output_file, "w", **p2) as dst: + dst.write(data) + print(f"[PROCESS-ITB] Cropped: {output_file}") + print("[PROCESS-ITB] Completed") + + +def post_process_all_itb_scenarios(season, site_position, site_name): + for strategy in ["aggressive", "nonaggressive"]: + for sigma in [None, 30]: + process_cropped_itb( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma, + ) + + def post_process_all_scenarios(season, site_position, site_name): """Crop fusion/S2/S3 to valid pixels for all 4 scenarios.""" for strategy in ["aggressive", "nonaggressive"]: for sigma in [None, 30]: - process_cropped(season, site_position, site_name, cleaning_strategy=strategy, sigma=sigma) + process_cropped( + season, + site_position, + site_name, + cleaning_strategy=strategy, + sigma=sigma, + ) def post_process_timeseries(season, site_position, site_name): @@ -110,6 +262,7 @@ def post_process_timeseries(season, site_position, site_name): create_gcc_timeseries_post_process, create_bands_timeseries_post_process, ) + create_ndvi_timeseries_post_process(season, site_position, site_name) create_gcc_timeseries_post_process(season, site_position, site_name) create_bands_timeseries_post_process(season, site_position, site_name) diff --git a/preparation.py b/preparation.py index f4791bc..1bcbcdc 100644 --- a/preparation.py +++ b/preparation.py @@ -1,4 +1,5 @@ """Data preparation: S2/S3 preprocessing for fusion.""" + import json import shutil from pathlib import Path @@ -16,6 +17,7 @@ def _import_distance_to_clouds(): """Lazy import of efast.distance_to_clouds.""" try: from efast.s2_processing import distance_to_clouds + return distance_to_clouds except ImportError: raise ImportError( @@ -40,6 +42,76 @@ def _get_base_dir(season, site_name, cleaning_strategy): return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}/") +def _get_itb_base_dir(season, site_name, cleaning_strategy): + return Path(f"data/{site_name}/{season}/prepared_{cleaning_strategy}_itb") + + +def _compute_gcc_from_refl_array(blue, green, red): + total = red.astype(np.float32) + green.astype(np.float32) + red.astype(np.float32) + mask = (total > 0) & np.isfinite(total) + gcc = np.zeros_like(green, dtype=np.float32) + gcc[mask] = green[mask].astype(np.float32) / total[mask] + return gcc + + +def _link_dist_cloud_from_prepared(src_s2_dir, dst_s2_dir): + dst_s2_dir.mkdir(parents=True, exist_ok=True) + for src in src_s2_dir.glob("*DIST_CLOUD.tif"): + dst = dst_s2_dir / src.name + if dst.exists(): + continue + try: + dst.symlink_to(src.resolve()) + except OSError: + shutil.copy2(src, dst) + + +def prepare_s2_gcc_for_itb( + season, site_position, site_name, cleaning_strategy="aggressive" +): + base = _get_base_dir(season, site_name, cleaning_strategy) + itb_s2 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s2" + s2_prep = base / "s2" + itb_s2.mkdir(parents=True, exist_ok=True) + for refl in sorted(s2_prep.glob("*REFL.tif")): + out = itb_s2 / refl.name.replace("_REFL.tif", "_GCC.tif") + if out.exists(): + continue + with rasterio.open(refl) as src: + if src.count < 4: + continue + b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4)) + gcc = _compute_gcc_from_refl_array(b, g, r) + profile = src.profile.copy() + profile.update({"count": 1, "dtype": "float32", "nodata": 0}) + with rasterio.open(out, "w", **profile) as dst: + dst.write(gcc, 1) + print(f"[S2-ITB] Saved {out.name}") + _link_dist_cloud_from_prepared(s2_prep, itb_s2) + + +def prepare_s3_gcc_for_itb( + season, site_position, site_name, cleaning_strategy="aggressive" +): + base = _get_base_dir(season, site_name, cleaning_strategy) + itb_s3 = _get_itb_base_dir(season, site_name, cleaning_strategy) / "s3" + itb_s3.mkdir(parents=True, exist_ok=True) + for comp in sorted((base / "s3").glob("composite_*.tif")): + out = itb_s3 / comp.name + if out.exists(): + continue + with rasterio.open(comp) as src: + if src.count < 4: + continue + b, g, r = (src.read(i).astype(np.float32) for i in range(1, 4)) + gcc = _compute_gcc_from_refl_array(b, g, r) + profile = src.profile.copy() + profile.update({"count": 1, "dtype": "float32", "nodata": 0}) + with rasterio.open(out, "w", **profile) as dst: + dst.write(gcc, 1) + print(f"[S3-ITB] Saved {out.name}") + + def _reproject_raster_to_target( src_path, dst_path, @@ -90,7 +162,9 @@ def _rescale_dist_cloud_for_small_roi(s2_output_dir): print(f"[S2-PREP] Rescaled DIST_CLOUD for {dc_path.name} (max was {d_max})") -def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): +def prepare_s2( + season, site_position, site_name, cleaning_strategy="aggressive", date_range=None +): lat, lon = site_position s2_dir = Path(f"data/{site_name}/{season}/raw/s2/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") @@ -99,7 +173,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", clouds = _load_excluded(season, site_name, cleaning_strategy) s2_output_dir.mkdir(parents=True, exist_ok=True) - print(f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") + print( + f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}" + ) s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]] if not s3_files: @@ -113,7 +189,9 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", for s2_file in sorted(s2_dir.glob("*.geotiff")): if s2_file.name in clouds["s2"]: - print(f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})") + print( + f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})" + ) continue date_str = s2_file.name.split("_")[0] refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif" @@ -136,14 +214,16 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", temp_normalized.unlink() print(f"[S2-PREP] Saved: {refl_dst}") - print(f"[S2-PREP] Computing distance-to-clouds...") + print("[S2-PREP] Computing distance-to-clouds...") distance_to_clouds = _import_distance_to_clouds() distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO) _rescale_dist_cloud_for_small_roi(s2_output_dir) print("[S2-PREP] Completed") -def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): +def prepare_s3( + season, site_position, site_name, cleaning_strategy="aggressive", date_range=None +): lat, lon = site_position s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") base_dir = _get_base_dir(season, site_name, cleaning_strategy) @@ -153,16 +233,22 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", clouds = _load_excluded(season, site_name, cleaning_strategy) s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) - print(f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") + print( + f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}" + ) s3_by_date = defaultdict(list) for s3_file in s3_dir.glob("*.geotiff"): if s3_file.name not in clouds["s3"]: s3_by_date[s3_file.name.split("_")[0]].append(s3_file) else: - print(f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})") + print( + f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})" + ) - print(f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates") + print( + f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates" + ) temp_composite_dir = s3_preprocessed_dir / "temp_composites" if temp_composite_dir.exists(): @@ -187,7 +273,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", profile.update({"count": composite.shape[0], "dtype": "float32"}) with rasterio.open(composite_path, "w", **profile) as dst: dst.write(composite) - print(f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged") + print( + f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged" + ) # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds # This ensures fusion covers the same area as S2 and dimensions match @@ -212,7 +300,9 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", height, ) - print(f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)...") + print( + f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)..." + ) # Reproject each S3 composite to match S2 REFL bounds sen3_paths = sorted(temp_composite_dir.glob("*.tif")) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a617c5a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.ruff.lint.per-file-ignores] +"run.py" = ["F401"] diff --git a/run.py b/run.py index a535f2e..ff5af95 100644 --- a/run.py +++ b/run.py @@ -1,10 +1,19 @@ -from fusion import run_all_efast_scenarios -from postprocessing import post_process_all_scenarios, post_process_timeseries +from fusion import run_all_efast_scenarios, run_all_efast_itb_scenarios +from postprocessing import ( + post_process_all_scenarios, + post_process_all_itb_scenarios, + post_process_timeseries, +) from acquisition_s2 import download_s2 from acquisition_s3 import download_s3 from acquisition_phenocam import download_phenocam from preselection import create_timeseries -from preparation import prepare_s2, prepare_s3 +from preparation import ( + prepare_s2, + prepare_s3, + prepare_s2_gcc_for_itb, + prepare_s3_gcc_for_itb, +) from metrics_indices import create_prepared_fusion_timeseries from metrics_stats import calculate_all_metrics @@ -12,10 +21,10 @@ from metrics_stats import calculate_all_metrics def run_pipeline(season, site_position, site_name): """Run pipeline.""" try: - #print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}") - #download_s2(season, site_position, site_name) - #download_s3(season, site_position, site_name) - #download_phenocam(season, site_position, site_name) + # print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}") + # download_s2(season, site_position, site_name) + # download_s3(season, site_position, site_name) + # download_phenocam(season, site_position, site_name) # print(f"Creating preselection timeseries: {site_name}, {season}") # create_timeseries(season, site_position, site_name) @@ -28,13 +37,20 @@ def run_pipeline(season, site_position, site_name): # print(f"Running EFAST fusion for all scenarios: {site_name}, {season}") # run_all_efast_scenarios(season, site_position, site_name) + # Index-then-Blend (ItB): GCC stacks, EFAST fusion with product=GCC + # for strategy in ["aggressive", "nonaggressive"]: + # prepare_s2_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy) + # prepare_s3_gcc_for_itb(season, site_position, site_name, cleaning_strategy=strategy) + # run_all_efast_itb_scenarios(season, site_position, site_name) + # post_process_all_itb_scenarios(season, site_position, site_name) + print(f"Creating prepared/fusion timeseries: {site_name}, {season}") create_prepared_fusion_timeseries(season, site_position, site_name) print(f"Post-processing: {site_name}, {season}") # post_process_all_scenarios(season, site_position, site_name) post_process_timeseries(season, site_position, site_name) - + print(f"Calculating metrics: {site_name}, {season}") # calculate_all_metrics(season, site_name, site_position) @@ -51,4 +67,3 @@ if __name__ == "__main__": run_pipeline(2023, (64.2437, 19.7673), "vindeln2") run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1") run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat") -