Removed temporary fix-tile-boundary script.
This commit is contained in:
parent
3ac5d348d0
commit
60dbf932f8
1 changed files with 0 additions and 195 deletions
|
|
@ -1,195 +0,0 @@
|
|||
"""Detect and repair sites where S2 downloads span multiple MGRS tile extents.
|
||||
|
||||
Sites on MGRS tile boundaries produce REFL files from two tiles with different
|
||||
spatial extents (e.g. 16SDA and 16SDB). This breaks EFAST, which requires all
|
||||
S2 files to share the same grid. This script:
|
||||
|
||||
1. Scans all downloaded sites for the given year.
|
||||
2. Reports any site where ``prepared/s2`` contains REFL files of mixed shapes.
|
||||
3. With ``--fix``:
|
||||
- Removes the minority-shape REFL / DIST_CLOUD / GCC files.
|
||||
- Deletes the stale ``prepared/s3`` and ``prepared/gcc_s3`` composites.
|
||||
- Regenerates ``prepared/s3`` composites from the existing raw S3 data using
|
||||
the largest-extent S2 tile as the reference grid.
|
||||
|
||||
``prepared/gcc_s3`` is intentionally left empty — step 4 (``4-fusion.py``)
|
||||
regenerates it on its next run.
|
||||
|
||||
Usage::
|
||||
|
||||
uv run python fix-tile-boundary.py # detect only
|
||||
uv run python fix-tile-boundary.py --fix # detect + repair
|
||||
uv run python fix-tile-boundary.py --fix --evaluation-year 2024
|
||||
|
||||
Prior step: :mod:`3-sentinel-data`.
|
||||
Next step after fixing: :mod:`4-fusion`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import rasterio
|
||||
|
||||
DATA_DIR = Path("data")
|
||||
DEFAULT_YEAR = 2025
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _refl_shapes(s2_dir: Path) -> dict[tuple[int, int], list[Path]]:
|
||||
"""Return a mapping of shape → REFL file paths for a prepared/s2 directory."""
|
||||
shape_to_files: dict[tuple[int, int], list[Path]] = {}
|
||||
for f in sorted(s2_dir.glob("*_REFL.tif")):
|
||||
with rasterio.open(f) as src:
|
||||
shape: tuple[int, int] = src.shape # type: ignore[assignment]
|
||||
shape_to_files.setdefault(shape, []).append(f)
|
||||
return shape_to_files
|
||||
|
||||
|
||||
def detect(year: int) -> list[Path]:
|
||||
"""Return site directories whose prepared/s2 has mixed REFL shapes."""
|
||||
sentinel_dir = DATA_DIR / "sentinel_data" / str(year)
|
||||
if not sentinel_dir.exists():
|
||||
print(f"[detect] No sentinel data found at {sentinel_dir}")
|
||||
return []
|
||||
|
||||
bad_sites: list[Path] = []
|
||||
for site_dir in sorted(sentinel_dir.iterdir()):
|
||||
s2_dir = site_dir / "prepared" / "s2"
|
||||
if not s2_dir.exists():
|
||||
continue
|
||||
shapes = _refl_shapes(s2_dir)
|
||||
if len(shapes) > 1:
|
||||
summary = ", ".join(
|
||||
f"{s[0]}×{s[1]} ({len(fs)} files)" for s, fs in shapes.items()
|
||||
)
|
||||
print(f"[detect] {site_dir.name}: mixed shapes — {summary}")
|
||||
bad_sites.append(site_dir)
|
||||
|
||||
if not bad_sites:
|
||||
print("[detect] All sites OK — no mixed tile shapes found.")
|
||||
return bad_sites
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Repair
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_step3():
|
||||
"""Import helpers from 3-sentinel-data.py without executing its main()."""
|
||||
spec = importlib.util.spec_from_file_location("step3", "3-sentinel-data.py")
|
||||
mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
|
||||
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
||||
return mod
|
||||
|
||||
|
||||
def repair(site_dir: Path) -> None:
|
||||
"""Remove minority-shape S2 files and regenerate S3 composites for one site."""
|
||||
s2_dir = site_dir / "prepared" / "s2"
|
||||
s3_raw = site_dir / "raw" / "s3"
|
||||
s3_out = site_dir / "prepared" / "s3"
|
||||
gcc_s3_out = site_dir / "prepared" / "gcc_s3"
|
||||
name = site_dir.name
|
||||
|
||||
# --- 1. Identify reference shape (largest extent) -------------------------
|
||||
shapes = _refl_shapes(s2_dir)
|
||||
if len(shapes) <= 1:
|
||||
print(f"[repair] {name}: already consistent — nothing to do.")
|
||||
return
|
||||
|
||||
ref_shape = max(shapes.keys(), key=lambda s: s[0] * s[1])
|
||||
|
||||
# --- 2. Remove non-reference REFL + companions ----------------------------
|
||||
n_removed = 0
|
||||
for shape, files in shapes.items():
|
||||
if shape == ref_shape:
|
||||
continue
|
||||
for refl_path in files:
|
||||
stem = refl_path.stem[: -len("_REFL")]
|
||||
for companion in s2_dir.glob(f"{stem}_*.tif"):
|
||||
companion.unlink()
|
||||
refl_path.unlink(missing_ok=True)
|
||||
n_removed += 1
|
||||
|
||||
print(
|
||||
f"[repair] {name}: removed {n_removed} minority-shape file-sets (kept {ref_shape[0]}×{ref_shape[1]})"
|
||||
)
|
||||
|
||||
# --- 3. Remove stale GCC files from prepared/s2 ---------------------------
|
||||
gcc_removed = sum(1 for f in s2_dir.glob("*_GCC.tif") if f.unlink() or True)
|
||||
if gcc_removed:
|
||||
print(
|
||||
f"[repair] {name}: removed {gcc_removed} stale GCC files from prepared/s2"
|
||||
)
|
||||
|
||||
# --- 4. Wipe stale S3 composites ------------------------------------------
|
||||
for d in (s3_out, gcc_s3_out):
|
||||
if d.exists():
|
||||
shutil.rmtree(d)
|
||||
print(f"[repair] {name}: removed {d.relative_to(site_dir)}/")
|
||||
|
||||
# --- 5. Regenerate S3 composites with the correct reference ---------------
|
||||
if not s3_raw.exists() or not any(s3_raw.glob("S3*.tif")):
|
||||
print(
|
||||
f"[repair] {name}: WARNING — no raw S3 data in {s3_raw}; skipping S3 regeneration."
|
||||
)
|
||||
return
|
||||
|
||||
s2_refl_path = next(iter(sorted(s2_dir.glob("*_REFL.tif"))), None)
|
||||
if s2_refl_path is None:
|
||||
print(
|
||||
f"[repair] {name}: WARNING — no REFL files left; cannot regenerate S3 composites."
|
||||
)
|
||||
return
|
||||
|
||||
print(
|
||||
f"[repair] {name}: regenerating S3 composites (reference: {s2_refl_path.name})..."
|
||||
)
|
||||
step3 = _load_step3()
|
||||
s3_out.mkdir(parents=True, exist_ok=True)
|
||||
step3._prepare_s3(s3_raw, s2_refl_path, s3_out)
|
||||
n_composites = len(list(s3_out.glob("composite_*.tif")))
|
||||
print(f"[repair] {name}: wrote {n_composites} composites → ready for 4-fusion.py")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--evaluation-year", type=int, default=DEFAULT_YEAR)
|
||||
parser.add_argument(
|
||||
"--fix",
|
||||
action="store_true",
|
||||
help="Actually repair detected sites (default: detect only)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
bad_sites = detect(args.evaluation_year)
|
||||
if not bad_sites:
|
||||
return 0
|
||||
|
||||
if not args.fix:
|
||||
print(f"\nRun with --fix to repair {len(bad_sites)} site(s).")
|
||||
return 0
|
||||
|
||||
print()
|
||||
for site_dir in bad_sites:
|
||||
repair(site_dir)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue