Added skip-download
This commit is contained in:
parent
c033f5f527
commit
8683624557
1 changed files with 55 additions and 14 deletions
|
|
@ -97,13 +97,15 @@ _SCL_ASSET = "scl"
|
||||||
_MIN_BBOX_HALF_DEG = 0.008
|
_MIN_BBOX_HALF_DEG = 0.008
|
||||||
|
|
||||||
_GDAL_COG_ENV = {
|
_GDAL_COG_ENV = {
|
||||||
"GDAL_HTTP_VERSION": "2",
|
# HTTP/1.1 avoids HTTP/2 multiplexing connection-reset cascades on S3.
|
||||||
|
"GDAL_HTTP_VERSION": "1.1",
|
||||||
"GDAL_HTTP_MERGE_CONSECUTIVE_RANGES": "YES",
|
"GDAL_HTTP_MERGE_CONSECUTIVE_RANGES": "YES",
|
||||||
"GDAL_HTTP_MULTIPLEX": "YES",
|
|
||||||
"GDAL_HTTP_TCP_KEEPALIVE": "YES",
|
"GDAL_HTTP_TCP_KEEPALIVE": "YES",
|
||||||
"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR",
|
"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR",
|
||||||
"CPL_VSIL_CURL_CACHE_SIZE": "200000000",
|
"CPL_VSIL_CURL_CACHE_SIZE": "200000000",
|
||||||
"GDAL_MAX_CONNECTIONS": "100",
|
# Built-in GDAL retries for 429/502/503/504 and transient resets.
|
||||||
|
"GDAL_HTTP_MAX_RETRY": "3",
|
||||||
|
"GDAL_HTTP_RETRY_DELAY": "0.5",
|
||||||
"AWS_NO_SIGN_REQUEST": "YES",
|
"AWS_NO_SIGN_REQUEST": "YES",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -373,7 +375,7 @@ def download_s2_window(
|
||||||
output_dir: Path,
|
output_dir: Path,
|
||||||
bands: list[str],
|
bands: list[str],
|
||||||
ratio: int = RESOLUTION_RATIO,
|
ratio: int = RESOLUTION_RATIO,
|
||||||
max_workers: int = 32,
|
max_workers: int = 12,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
||||||
|
|
||||||
|
|
@ -491,6 +493,39 @@ def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
|
||||||
return written
|
return written
|
||||||
|
|
||||||
|
|
||||||
|
_S3_DOWNLOAD_RETRIES = 4
|
||||||
|
_S3_DOWNLOAD_BACKOFF = 30 # seconds; doubled on each retry
|
||||||
|
|
||||||
|
|
||||||
|
def _download_with_retry(datacube: Any, nc_path: Path) -> None:
|
||||||
|
"""Download an OpenEO datacube to *nc_path*, retrying on transient errors.
|
||||||
|
|
||||||
|
Retries up to ``_S3_DOWNLOAD_RETRIES`` times with exponential backoff
|
||||||
|
starting at ``_S3_DOWNLOAD_BACKOFF`` seconds. Re-authenticates on each
|
||||||
|
attempt so an expired token never blocks a retry.
|
||||||
|
"""
|
||||||
|
delay = _S3_DOWNLOAD_BACKOFF
|
||||||
|
last_exc: Exception | None = None
|
||||||
|
for attempt in range(1, _S3_DOWNLOAD_RETRIES + 1):
|
||||||
|
try:
|
||||||
|
if nc_path.exists():
|
||||||
|
nc_path.unlink()
|
||||||
|
datacube.download(str(nc_path), format="NetCDF")
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
last_exc = exc
|
||||||
|
if attempt < _S3_DOWNLOAD_RETRIES:
|
||||||
|
print(
|
||||||
|
f"[S3-OEO] Download attempt {attempt} failed ({exc}); "
|
||||||
|
f"retrying in {delay}s..."
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
delay *= 2
|
||||||
|
else:
|
||||||
|
print(f"[S3-OEO] All {_S3_DOWNLOAD_RETRIES} download attempts failed")
|
||||||
|
raise RuntimeError(f"S3 download failed after {_S3_DOWNLOAD_RETRIES} attempts") from last_exc
|
||||||
|
|
||||||
|
|
||||||
def download_s3_openeo(
|
def download_s3_openeo(
|
||||||
start_date: datetime,
|
start_date: datetime,
|
||||||
end_date: datetime,
|
end_date: datetime,
|
||||||
|
|
@ -537,7 +572,7 @@ def download_s3_openeo(
|
||||||
nc_path = output_dir / "_s3_syn_l2.nc"
|
nc_path = output_dir / "_s3_syn_l2.nc"
|
||||||
print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
|
print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
datacube.download(str(nc_path), format="NetCDF")
|
_download_with_retry(datacube, nc_path)
|
||||||
print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
|
print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
|
||||||
|
|
||||||
print("[S3-OEO] Splitting into per-date GeoTIFFs...")
|
print("[S3-OEO] Splitting into per-date GeoTIFFs...")
|
||||||
|
|
@ -804,6 +839,11 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
default=None,
|
default=None,
|
||||||
help="Single sitename to process (default: all step-2 PASS sites)",
|
help="Single sitename to process (default: all step-2 PASS sites)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-downloaded",
|
||||||
|
action="store_true",
|
||||||
|
help="Skip sites whose directory already exists under data/sentinel_data/{year}/",
|
||||||
|
)
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
year = args.evaluation_year
|
year = args.evaluation_year
|
||||||
|
|
||||||
|
|
@ -821,16 +861,17 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
|
print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
|
||||||
for i, site in enumerate(pass_sites, 1):
|
for i, site in enumerate(pass_sites, 1):
|
||||||
sitename = site["sitename"]
|
sitename = site["sitename"]
|
||||||
|
site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
|
||||||
|
if args.skip_downloaded and site_dir.exists():
|
||||||
|
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename} — skipping (directory exists)")
|
||||||
|
continue
|
||||||
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
|
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
|
||||||
try:
|
summary = process_site(sitename, site["lat"], site["lon"], year)
|
||||||
summary = process_site(sitename, site["lat"], site["lon"], year)
|
print(
|
||||||
print(
|
f"[Sentinel-3] {sitename} done — "
|
||||||
f"[Sentinel-3] {sitename} done — "
|
f"{summary['s2_refl_count']} REFL, "
|
||||||
f"{summary['s2_refl_count']} REFL, "
|
f"{summary['s3_composite_count']} composites"
|
||||||
f"{summary['s3_composite_count']} composites"
|
)
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[Sentinel-3] {sitename} FAILED: {exc}")
|
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue