Added skip-download
This commit is contained in:
parent
c033f5f527
commit
8683624557
1 changed files with 55 additions and 14 deletions
|
|
@ -97,13 +97,15 @@ _SCL_ASSET = "scl"
|
|||
_MIN_BBOX_HALF_DEG = 0.008
|
||||
|
||||
_GDAL_COG_ENV = {
|
||||
"GDAL_HTTP_VERSION": "2",
|
||||
# HTTP/1.1 avoids HTTP/2 multiplexing connection-reset cascades on S3.
|
||||
"GDAL_HTTP_VERSION": "1.1",
|
||||
"GDAL_HTTP_MERGE_CONSECUTIVE_RANGES": "YES",
|
||||
"GDAL_HTTP_MULTIPLEX": "YES",
|
||||
"GDAL_HTTP_TCP_KEEPALIVE": "YES",
|
||||
"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR",
|
||||
"CPL_VSIL_CURL_CACHE_SIZE": "200000000",
|
||||
"GDAL_MAX_CONNECTIONS": "100",
|
||||
# Built-in GDAL retries for 429/502/503/504 and transient resets.
|
||||
"GDAL_HTTP_MAX_RETRY": "3",
|
||||
"GDAL_HTTP_RETRY_DELAY": "0.5",
|
||||
"AWS_NO_SIGN_REQUEST": "YES",
|
||||
}
|
||||
|
||||
|
|
@ -373,7 +375,7 @@ def download_s2_window(
|
|||
output_dir: Path,
|
||||
bands: list[str],
|
||||
ratio: int = RESOLUTION_RATIO,
|
||||
max_workers: int = 32,
|
||||
max_workers: int = 12,
|
||||
) -> None:
|
||||
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
||||
|
||||
|
|
@ -491,6 +493,39 @@ def _netcdf_to_geotiffs(nc_path: Path, output_dir: Path, epsg: int) -> int:
|
|||
return written
|
||||
|
||||
|
||||
_S3_DOWNLOAD_RETRIES = 4
|
||||
_S3_DOWNLOAD_BACKOFF = 30 # seconds; doubled on each retry
|
||||
|
||||
|
||||
def _download_with_retry(datacube: Any, nc_path: Path) -> None:
|
||||
"""Download an OpenEO datacube to *nc_path*, retrying on transient errors.
|
||||
|
||||
Retries up to ``_S3_DOWNLOAD_RETRIES`` times with exponential backoff
|
||||
starting at ``_S3_DOWNLOAD_BACKOFF`` seconds. Re-authenticates on each
|
||||
attempt so an expired token never blocks a retry.
|
||||
"""
|
||||
delay = _S3_DOWNLOAD_BACKOFF
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(1, _S3_DOWNLOAD_RETRIES + 1):
|
||||
try:
|
||||
if nc_path.exists():
|
||||
nc_path.unlink()
|
||||
datacube.download(str(nc_path), format="NetCDF")
|
||||
return
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt < _S3_DOWNLOAD_RETRIES:
|
||||
print(
|
||||
f"[S3-OEO] Download attempt {attempt} failed ({exc}); "
|
||||
f"retrying in {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
delay *= 2
|
||||
else:
|
||||
print(f"[S3-OEO] All {_S3_DOWNLOAD_RETRIES} download attempts failed")
|
||||
raise RuntimeError(f"S3 download failed after {_S3_DOWNLOAD_RETRIES} attempts") from last_exc
|
||||
|
||||
|
||||
def download_s3_openeo(
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
|
|
@ -537,7 +572,7 @@ def download_s3_openeo(
|
|||
nc_path = output_dir / "_s3_syn_l2.nc"
|
||||
print(f"[S3-OEO] Downloading NetCDF to {nc_path}...")
|
||||
t0 = time.time()
|
||||
datacube.download(str(nc_path), format="NetCDF")
|
||||
_download_with_retry(datacube, nc_path)
|
||||
print(f"[S3-OEO] Download completed in {time.time() - t0:.1f}s")
|
||||
|
||||
print("[S3-OEO] Splitting into per-date GeoTIFFs...")
|
||||
|
|
@ -804,6 +839,11 @@ def main(argv: list[str] | None = None) -> int:
|
|||
default=None,
|
||||
help="Single sitename to process (default: all step-2 PASS sites)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-downloaded",
|
||||
action="store_true",
|
||||
help="Skip sites whose directory already exists under data/sentinel_data/{year}/",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
year = args.evaluation_year
|
||||
|
||||
|
|
@ -821,16 +861,17 @@ def main(argv: list[str] | None = None) -> int:
|
|||
print(f"[Sentinel-3] Processing {len(pass_sites)} site(s)")
|
||||
for i, site in enumerate(pass_sites, 1):
|
||||
sitename = site["sitename"]
|
||||
site_dir = DATA_DIR / "sentinel_data" / str(year) / sitename
|
||||
if args.skip_downloaded and site_dir.exists():
|
||||
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename} — skipping (directory exists)")
|
||||
continue
|
||||
print(f"[Sentinel-3] ({i}/{len(pass_sites)}) {sitename}")
|
||||
try:
|
||||
summary = process_site(sitename, site["lat"], site["lon"], year)
|
||||
print(
|
||||
f"[Sentinel-3] {sitename} done — "
|
||||
f"{summary['s2_refl_count']} REFL, "
|
||||
f"{summary['s3_composite_count']} composites"
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"[Sentinel-3] {sitename} FAILED: {exc}")
|
||||
|
||||
return 0
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue