diff --git a/3-sentinel-data.py b/3-sentinel-data.py index 4ea88bf..4ed3090 100644 --- a/3-sentinel-data.py +++ b/3-sentinel-data.py @@ -97,13 +97,15 @@ _SCL_ASSET = "scl" _MIN_BBOX_HALF_DEG = 0.008 _GDAL_COG_ENV = { - "GDAL_HTTP_VERSION": "2", + # HTTP/1.1 avoids HTTP/2 multiplexing connection-reset cascades on S3. + "GDAL_HTTP_VERSION": "1.1", "GDAL_HTTP_MERGE_CONSECUTIVE_RANGES": "YES", - "GDAL_HTTP_MULTIPLEX": "YES", "GDAL_HTTP_TCP_KEEPALIVE": "YES", "GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR", "CPL_VSIL_CURL_CACHE_SIZE": "200000000", - "GDAL_MAX_CONNECTIONS": "100", + # Built-in GDAL retries for 429/502/503/504 and transient resets. + "GDAL_HTTP_MAX_RETRY": "3", + "GDAL_HTTP_RETRY_DELAY": "0.5", "AWS_NO_SIGN_REQUEST": "YES", } @@ -373,7 +375,7 @@ def download_s2_window( output_dir: Path, bands: list[str], ratio: int = RESOLUTION_RATIO, - max_workers: int = 32, + max_workers: int = 12, ) -> None: """Range-read S2 L2A COG windows and write masked REFL GeoTIFFs. @@ -491,6 +493,39 @@ def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int: return written +_S3_DOWNLOAD_RETRIES = 4 +_S3_DOWNLOAD_BACKOFF = 30 # seconds; doubled on each retry + + +def _download_with_retry(datacube: Any, nc_path: Path) -> None: + """Download an OpenEO datacube to *nc_path*, retrying on transient errors. + + Retries up to ``_S3_DOWNLOAD_RETRIES`` times with exponential backoff + starting at ``_S3_DOWNLOAD_BACKOFF`` seconds. Re-authenticates on each + attempt so an expired token never blocks a retry. + """ + delay = _S3_DOWNLOAD_BACKOFF + last_exc: Exception | None = None + for attempt in range(1, _S3_DOWNLOAD_RETRIES + 1): + try: + if nc_path.exists(): + nc_path.unlink() + datacube.download(str(nc_path), format="NetCDF") + return + except Exception as exc: + last_exc = exc + if attempt < _S3_DOWNLOAD_RETRIES: + print( + f"[S3-OEO] Download attempt {attempt} failed ({exc}); " + f"retrying in {delay}s..." + ) + time.sleep(delay) + delay *= 2 + else: + print(f"[S3-OEO] All {_S3_DOWNLOAD_RETRIES} download attempts failed") + raise RuntimeError(f"S3 download failed after {_S3_DOWNLOAD_RETRIES} attempts") from last_exc + + def download_s3_openeo( start_date: datetime, end_date: datetime, @@ -537,7 +572,7 @@ def download_s3_openeo( nc_path = output_dir / "_s3_syn_l2.nc" print(f"[S3-OEO] Downloading NetCDF to {nc_path}...") t0 = time.time() - datacube.download(str(nc_path), format="NetCDF") + _download_with_retry(datacube, nc_path) print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s") print("[S3-OEO] Splitting into per-date GeoTIFFs...") @@ -804,6 +839,11 @@ def main(argv: list[str] | None = None) -> int: default=None, help="Single sitename to process (default: all step-2 PASS sites)", ) + parser.add_argument( + "--skip-downloaded", + action="store_true", + help="Skip sites whose directory already exists under data/sentinel_data/{year}/", + ) args = parser.parse_args(argv) year = args.evaluation_year @@ -821,16 +861,17 @@ def main(argv: list[str] | None = None) -> int: print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)") for i, site in enumerate(pass_sites, 1): sitename = site["sitename"] + site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename + if args.skip_downloaded and site_dir.exists(): + print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename} — skipping (directory exists)") + continue print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}") - try: - summary = process_site(sitename, site["lat"], site["lon"], year) - print( - f"[Sentinel-3] {sitename} done — " - f"{summary['s2_refl_count']} REFL, " - f"{summary['s3_composite_count']} composites" - ) - except Exception as exc: - print(f"[Sentinel-3] {sitename} FAILED: {exc}") + summary = process_site(sitename, site["lat"], site["lon"], year) + print( + f"[Sentinel-3] {sitename} done — " + f"{summary['s2_refl_count']} REFL, " + f"{summary['s3_composite_count']} composites" + ) return 0