From 60dbf932f8dff7736a02c4b1bce8c61f813e57d6 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Thu, 11 Jun 2026 16:36:08 +0200 Subject: [PATCH] Removed temporary fix-tile-boundary script. --- fix-tile-boundary.py | 195 ------------------------------------------- 1 file changed, 195 deletions(-) delete mode 100644 fix-tile-boundary.py diff --git a/fix-tile-boundary.py b/fix-tile-boundary.py deleted file mode 100644 index acf6f8a..0000000 --- a/fix-tile-boundary.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Detect and repair sites where S2 downloads span multiple MGRS tile extents. - -Sites on MGRS tile boundaries produce REFL files from two tiles with different -spatial extents (e.g. 16SDA and 16SDB). This breaks EFAST, which requires all -S2 files to share the same grid. This script: - - 1. Scans all downloaded sites for the given year. - 2. Reports any site where ``prepared/s2`` contains REFL files of mixed shapes. - 3. With ``--fix``: - - Removes the minority-shape REFL / DIST_CLOUD / GCC files. - - Deletes the stale ``prepared/s3`` and ``prepared/gcc_s3`` composites. - - Regenerates ``prepared/s3`` composites from the existing raw S3 data using - the largest-extent S2 tile as the reference grid. - -``prepared/gcc_s3`` is intentionally left empty — step 4 (``4-fusion.py``) -regenerates it on its next run. - -Usage:: - - uv run python fix-tile-boundary.py # detect only - uv run python fix-tile-boundary.py --fix # detect + repair - uv run python fix-tile-boundary.py --fix --evaluation-year 2024 - -Prior step: :mod:`3-sentinel-data`. -Next step after fixing: :mod:`4-fusion`. -""" - -from __future__ import annotations - -import argparse -import importlib.util -import shutil -import sys -from pathlib import Path - -import rasterio - -DATA_DIR = Path("data") -DEFAULT_YEAR = 2025 - - -# --------------------------------------------------------------------------- -# Detection -# --------------------------------------------------------------------------- - - -def _refl_shapes(s2_dir: Path) -> dict[tuple[int, int], list[Path]]: - """Return a mapping of shape → REFL file paths for a prepared/s2 directory.""" - shape_to_files: dict[tuple[int, int], list[Path]] = {} - for f in sorted(s2_dir.glob("*_REFL.tif")): - with rasterio.open(f) as src: - shape: tuple[int, int] = src.shape # type: ignore[assignment] - shape_to_files.setdefault(shape, []).append(f) - return shape_to_files - - -def detect(year: int) -> list[Path]: - """Return site directories whose prepared/s2 has mixed REFL shapes.""" - sentinel_dir = DATA_DIR / "sentinel_data" / str(year) - if not sentinel_dir.exists(): - print(f"[detect] No sentinel data found at {sentinel_dir}") - return [] - - bad_sites: list[Path] = [] - for site_dir in sorted(sentinel_dir.iterdir()): - s2_dir = site_dir / "prepared" / "s2" - if not s2_dir.exists(): - continue - shapes = _refl_shapes(s2_dir) - if len(shapes) > 1: - summary = ", ".join( - f"{s[0]}×{s[1]} ({len(fs)} files)" for s, fs in shapes.items() - ) - print(f"[detect] {site_dir.name}: mixed shapes — {summary}") - bad_sites.append(site_dir) - - if not bad_sites: - print("[detect] All sites OK — no mixed tile shapes found.") - return bad_sites - - -# --------------------------------------------------------------------------- -# Repair -# --------------------------------------------------------------------------- - - -def _load_step3(): - """Import helpers from 3-sentinel-data.py without executing its main().""" - spec = importlib.util.spec_from_file_location("step3", "3-sentinel-data.py") - mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type] - spec.loader.exec_module(mod) # type: ignore[union-attr] - return mod - - -def repair(site_dir: Path) -> None: - """Remove minority-shape S2 files and regenerate S3 composites for one site.""" - s2_dir = site_dir / "prepared" / "s2" - s3_raw = site_dir / "raw" / "s3" - s3_out = site_dir / "prepared" / "s3" - gcc_s3_out = site_dir / "prepared" / "gcc_s3" - name = site_dir.name - - # --- 1. Identify reference shape (largest extent) ------------------------- - shapes = _refl_shapes(s2_dir) - if len(shapes) <= 1: - print(f"[repair] {name}: already consistent — nothing to do.") - return - - ref_shape = max(shapes.keys(), key=lambda s: s[0] * s[1]) - - # --- 2. Remove non-reference REFL + companions ---------------------------- - n_removed = 0 - for shape, files in shapes.items(): - if shape == ref_shape: - continue - for refl_path in files: - stem = refl_path.stem[: -len("_REFL")] - for companion in s2_dir.glob(f"{stem}_*.tif"): - companion.unlink() - refl_path.unlink(missing_ok=True) - n_removed += 1 - - print( - f"[repair] {name}: removed {n_removed} minority-shape file-sets (kept {ref_shape[0]}×{ref_shape[1]})" - ) - - # --- 3. Remove stale GCC files from prepared/s2 --------------------------- - gcc_removed = sum(1 for f in s2_dir.glob("*_GCC.tif") if f.unlink() or True) - if gcc_removed: - print( - f"[repair] {name}: removed {gcc_removed} stale GCC files from prepared/s2" - ) - - # --- 4. Wipe stale S3 composites ------------------------------------------ - for d in (s3_out, gcc_s3_out): - if d.exists(): - shutil.rmtree(d) - print(f"[repair] {name}: removed {d.relative_to(site_dir)}/") - - # --- 5. Regenerate S3 composites with the correct reference --------------- - if not s3_raw.exists() or not any(s3_raw.glob("S3*.tif")): - print( - f"[repair] {name}: WARNING — no raw S3 data in {s3_raw}; skipping S3 regeneration." - ) - return - - s2_refl_path = next(iter(sorted(s2_dir.glob("*_REFL.tif"))), None) - if s2_refl_path is None: - print( - f"[repair] {name}: WARNING — no REFL files left; cannot regenerate S3 composites." - ) - return - - print( - f"[repair] {name}: regenerating S3 composites (reference: {s2_refl_path.name})..." - ) - step3 = _load_step3() - s3_out.mkdir(parents=True, exist_ok=True) - step3._prepare_s3(s3_raw, s2_refl_path, s3_out) - n_composites = len(list(s3_out.glob("composite_*.tif"))) - print(f"[repair] {name}: wrote {n_composites} composites → ready for 4-fusion.py") - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR) - parser.add_argument( - "--fix", - action="store_true", - help="Actually repair detected sites (default: detect only)", - ) - args = parser.parse_args(argv) - - bad_sites = detect(args.evaluation_year) - if not bad_sites: - return 0 - - if not args.fix: - print(f"\nRun with --fix to repair {len(bad_sites)} site(s).") - return 0 - - print() - for site_dir in bad_sites: - repair(site_dir) - - return 0 - - -if __name__ == "__main__": - sys.exit(main())