diff --git a/preparation.py b/preparation.py index 02b99e6..02b2042 100644 --- a/preparation.py +++ b/preparation.py @@ -72,6 +72,7 @@ def _reproject_raster_to_target( def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): + lat, lon = site_position s2_dir = Path(f"data/{site_name}/{season}/raw/s2/") s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") s2_output_dir = _get_base_dir(season, site_name, cleaning_strategy) / "s2" @@ -79,6 +80,8 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", clouds = _load_excluded(season, site_name, cleaning_strategy) s2_output_dir.mkdir(parents=True, exist_ok=True) + print(f"[S2-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") + s3_files = [f for f in s3_dir.glob("*.geotiff") if f.name not in clouds["s3"]] if not s3_files: raise ValueError("No non-cloud S3 files found for reference bounds") @@ -89,14 +92,17 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", s2_width = s3_ref.width * RESOLUTION_RATIO s2_height = s3_ref.height * RESOLUTION_RATIO - for s2_file in s2_dir.glob("*.geotiff"): + for s2_file in sorted(s2_dir.glob("*.geotiff")): if s2_file.name in clouds["s2"]: + print(f"[S2-PREP] Skipping {s2_file.name} (excluded by {cleaning_strategy})") continue date_str = s2_file.name.split("_")[0] refl_dst = s2_output_dir / f"S2A_MSIL2A_{date_str}_REFL.tif" if refl_dst.exists(): + print(f"[S2-PREP] Skipping {s2_file.name} (exists)") continue + print(f"[S2-PREP] Processing {s2_file.name}...") temp_normalized = s2_output_dir / f"temp_{s2_file.name}" with rasterio.open(s2_file) as src: data = src.read().astype("float32") / 10000.0 @@ -109,12 +115,16 @@ def prepare_s2(season, site_position, site_name, cleaning_strategy="aggressive", temp_normalized, refl_dst, target_bounds, target_crs, s2_width, s2_height ) temp_normalized.unlink() + print(f"[S2-PREP] Saved: {refl_dst}") + print(f"[S2-PREP] Computing distance-to-clouds...") distance_to_clouds = _import_distance_to_clouds() distance_to_clouds(s2_output_dir, ratio=RESOLUTION_RATIO) + print("[S2-PREP] Completed") def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", date_range=None): + lat, lon = site_position s3_dir = Path(f"data/{site_name}/{season}/raw/s3/") base_dir = _get_base_dir(season, site_name, cleaning_strategy) s2_prepared_dir = base_dir / "s2" @@ -123,20 +133,27 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", clouds = _load_excluded(season, site_name, cleaning_strategy) s3_preprocessed_dir.mkdir(parents=True, exist_ok=True) + print(f"[S3-PREP] Starting preparation: {site_name} ({lat:.6f}, {lon:.6f}), {season}, strategy={cleaning_strategy}") + s3_by_date = defaultdict(list) for s3_file in s3_dir.glob("*.geotiff"): if s3_file.name not in clouds["s3"]: s3_by_date[s3_file.name.split("_")[0]].append(s3_file) + else: + print(f"[S3-PREP] Skipping {s3_file.name} (excluded by {cleaning_strategy})") + + print(f"[S3-PREP] Found {sum(len(v) for v in s3_by_date.values())} acquisitions across {len(s3_by_date)} dates") temp_composite_dir = s3_preprocessed_dir / "temp_composites" if temp_composite_dir.exists(): shutil.rmtree(temp_composite_dir) temp_composite_dir.mkdir() - for date_str, s3_files in s3_by_date.items(): + for date_str, s3_files in sorted(s3_by_date.items()): composite_path = temp_composite_dir / f"composite_{date_str}.tif" if len(s3_files) == 1: shutil.copy(s3_files[0], composite_path) + print(f"[S3-PREP] Composite {date_str}: 1 acquisition") else: s3_stack = [] for s3_file in s3_files: @@ -150,6 +167,7 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", profile.update({"count": composite.shape[0], "dtype": "float32"}) with rasterio.open(composite_path, "w", **profile) as dst: dst.write(composite) + print(f"[S3-PREP] Composite {date_str}: {len(s3_files)} acquisitions merged") # Reproject S3 to match S2 REFL bounds (full coverage) instead of DIST_CLOUD bounds # This ensures fusion covers the same area as S2 and dimensions match @@ -174,8 +192,10 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", height, ) + print(f"[S3-PREP] Reprojecting {len(list(temp_composite_dir.glob('*.tif')))} composites to S2 grid ({width}×{height} px)...") + # Reproject each S3 composite to match S2 REFL bounds - sen3_paths = list(temp_composite_dir.glob("*.tif")) + sen3_paths = sorted(temp_composite_dir.glob("*.tif")) for sen3_path in sen3_paths: vrt_options = { "transform": s3_transform, @@ -191,5 +211,7 @@ def prepare_s3(season, site_position, site_name, cleaning_strategy="aggressive", profile = vrt.profile.copy() profile.update({"dtype": "float32", "nodata": 0, "driver": "GTiff"}) rio_shutil.copy(vrt, outfile, **profile) + print(f"[S3-PREP] Saved: {outfile}") shutil.rmtree(temp_composite_dir) + print("[S3-PREP] Completed") diff --git a/run.py b/run.py index 217aa6d..cc824d9 100644 --- a/run.py +++ b/run.py @@ -9,22 +9,28 @@ from acquisition_s2 import download_s2 from acquisition_s3 import download_s3 from acquisition_phenocam import download_phenocam from preselection import create_timeseries +from preparation import prepare_s2, prepare_s3 # from metrics_stats import calculate_all_metrics def run_pipeline(season, site_position, site_name): - """Run pipeline (downloads + preselection).""" + """Run pipeline.""" try: - print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}") - download_s2(season, site_position, site_name) - download_s3(season, site_position, site_name) - download_phenocam(season, site_position, site_name) + #print(f"Downloading S2, S3, and PhenoCam: {site_name}, {season}") + #download_s2(season, site_position, site_name) + #download_s3(season, site_position, site_name) + #download_phenocam(season, site_position, site_name) - print(f"Creating preselection timeseries: {site_name}, {season}") - create_timeseries(season, site_position, site_name) + #print(f"Creating preselection timeseries: {site_name}, {season}") + #create_timeseries(season, site_position, site_name) - # print(f"Running EFAST fusion for all scenarios: {site_name}, {season}") - # run_all_efast_scenarios(season, site_position, site_name) + #print(f"Preparing S2 and S3 for fusion: {site_name}, {season}") + #for strategy in ["aggressive", "nonaggressive"]: + # prepare_s2(season, site_position, site_name, cleaning_strategy=strategy) + # prepare_s3(season, site_position, site_name, cleaning_strategy=strategy) + + print(f"Running EFAST fusion for all scenarios: {site_name}, {season}") + run_all_efast_scenarios(season, site_position, site_name) # print(f"Post-processing data: {site_name}, {season}") # process_all_scenarios(season, site_position, site_name) # print(f"Generating NDVI for final outputs: {site_name}, {season}") @@ -42,11 +48,11 @@ def run_pipeline(season, site_position, site_name): if __name__ == "__main__": - run_pipeline(2024, (35.3045, 25.0743), "forthgr") - #run_pipeline(2024, (47.116171, 11.320308), "innsbruck") - run_pipeline(2020, (47.116171, 11.320308), "innsbruck") - run_pipeline(2024, (58.5633, 24.3688), "pitsalu") - run_pipeline(2023, (64.2437, 19.7673), "vindeln2") - run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1") - run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat") + run_pipeline(2024, (47.116171, 11.320308), "innsbruck") + # run_pipeline(2024, (35.3045, 25.0743), "forthgr") + # run_pipeline(2020, (47.116171, 11.320308), "innsbruck") + # run_pipeline(2024, (58.5633, 24.3688), "pitsalu") + # run_pipeline(2023, (64.2437, 19.7673), "vindeln2") + # run_pipeline(2024, (36.7455, -6.0033), "sunflowerjerez1") + # run_pipeline(2024, (42.6558, 26.9837), "institutekarnobat")