Scale it up.
This commit is contained in:
parent
ba36dfe914
commit
c033f5f527
1 changed files with 67 additions and 34 deletions
|
|
@ -29,6 +29,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
@ -95,6 +96,17 @@ _BAND_ASSETS: dict[str, str] = {
|
||||||
_SCL_ASSET = "scl"
|
_SCL_ASSET = "scl"
|
||||||
_MIN_BBOX_HALF_DEG = 0.008
|
_MIN_BBOX_HALF_DEG = 0.008
|
||||||
|
|
||||||
|
_GDAL_COG_ENV = {
|
||||||
|
"GDAL_HTTP_VERSION": "2",
|
||||||
|
"GDAL_HTTP_MERGE_CONSECUTIVE_RANGES": "YES",
|
||||||
|
"GDAL_HTTP_MULTIPLEX": "YES",
|
||||||
|
"GDAL_HTTP_TCP_KEEPALIVE": "YES",
|
||||||
|
"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR",
|
||||||
|
"CPL_VSIL_CURL_CACHE_SIZE": "200000000",
|
||||||
|
"GDAL_MAX_CONNECTIONS": "100",
|
||||||
|
"AWS_NO_SIGN_REQUEST": "YES",
|
||||||
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Internal S3 constants
|
# Internal S3 constants
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -314,39 +326,29 @@ def stac_search_s2(
|
||||||
return list({item.id: item for item in search.items()}.values())
|
return list({item.id: item for item in search.items()}.values())
|
||||||
|
|
||||||
|
|
||||||
def download_s2_window(
|
def _process_item(
|
||||||
items: list[Any],
|
item: Any,
|
||||||
bbox: list[float],
|
bbox: list[float],
|
||||||
output_dir: Path,
|
|
||||||
bands: list[str],
|
bands: list[str],
|
||||||
ratio: int = RESOLUTION_RATIO,
|
output_dir: Path,
|
||||||
) -> None:
|
ratio: int,
|
||||||
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
) -> str | None:
|
||||||
|
"""Range-read one S2 item and write a masked REFL GeoTIFF.
|
||||||
|
|
||||||
Writes ``{item.id}_REFL.tif`` directly — no intermediate raw download.
|
Returns a skip-message string when the item cannot be processed, else None.
|
||||||
Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
|
|
||||||
``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
|
|
||||||
"""
|
"""
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
for item in tqdm(items, unit="granule", desc="S2 COG window read"):
|
|
||||||
out_path = output_dir / f"{item.id}_REFL.tif"
|
out_path = output_dir / f"{item.id}_REFL.tif"
|
||||||
if out_path.is_file():
|
if out_path.is_file():
|
||||||
continue
|
return None
|
||||||
|
|
||||||
bands_result = _read_bands(item, bbox, bands)
|
bands_result = _read_bands(item, bbox, bands)
|
||||||
if bands_result is None:
|
if bands_result is None:
|
||||||
tqdm.write(f"[S2] Skipping {item.id}: missing asset or no bbox overlap")
|
return f"[S2] Skipping {item.id}: missing asset or no bbox overlap"
|
||||||
continue
|
|
||||||
band_arrays, ref_profile = bands_result
|
band_arrays, ref_profile = bands_result
|
||||||
target_shape = (ref_profile["height"], ref_profile["width"])
|
mask = _cloud_mask(item, bbox, (ref_profile["height"], ref_profile["width"]))
|
||||||
mask = _cloud_mask(item, bbox, target_shape)
|
|
||||||
|
|
||||||
stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
|
stacked = (np.stack(band_arrays) + _boa_offset(item)) / 10_000.0
|
||||||
np.clip(stacked, 0, None, out=stacked)
|
np.clip(stacked, 0, None, out=stacked)
|
||||||
stacked[:, mask] = 0.0
|
stacked[:, mask] = 0.0
|
||||||
stacked = _pad_to_multiple(stacked, ratio)
|
stacked = _pad_to_multiple(stacked, ratio)
|
||||||
|
|
||||||
out_profile = {
|
out_profile = {
|
||||||
"driver": "GTiff",
|
"driver": "GTiff",
|
||||||
"count": len(bands),
|
"count": len(bands),
|
||||||
|
|
@ -362,6 +364,37 @@ def download_s2_window(
|
||||||
dst.write(stacked)
|
dst.write(stacked)
|
||||||
for i, band_name in enumerate(bands, 1):
|
for i, band_name in enumerate(bands, 1):
|
||||||
dst.set_band_description(i, band_name)
|
dst.set_band_description(i, band_name)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_s2_window(
|
||||||
|
items: list[Any],
|
||||||
|
bbox: list[float],
|
||||||
|
output_dir: Path,
|
||||||
|
bands: list[str],
|
||||||
|
ratio: int = RESOLUTION_RATIO,
|
||||||
|
max_workers: int = 32,
|
||||||
|
) -> None:
|
||||||
|
"""Range-read S2 L2A COG windows and write masked REFL GeoTIFFs.
|
||||||
|
|
||||||
|
Writes ``{item.id}_REFL.tif`` directly — no intermediate raw download.
|
||||||
|
Cloud/shadow pixels (SCL 0, 3, >7) are zeroed. BOA offset is inferred from
|
||||||
|
``processing:baseline``. Output is zero-padded to multiples of ``ratio``.
|
||||||
|
Items are fetched in parallel using ``max_workers`` threads.
|
||||||
|
"""
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with rasterio.Env(**_GDAL_COG_ENV):
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||||
|
futures = {
|
||||||
|
pool.submit(_process_item, item, bbox, bands, output_dir, ratio): item.id
|
||||||
|
for item in items
|
||||||
|
}
|
||||||
|
with tqdm(total=len(futures), unit="granule", desc="S2 COG window read") as pbar:
|
||||||
|
for fut in as_completed(futures):
|
||||||
|
msg = fut.result()
|
||||||
|
if msg:
|
||||||
|
tqdm.write(msg)
|
||||||
|
pbar.update(1)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue