Allow arguments for download.
This commit is contained in:
parent
484fa0bb4c
commit
22d493bc2d
5 changed files with 242 additions and 142 deletions
8
.pre-commit-config.yaml
Normal file
8
.pre-commit-config.yaml
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.8.4
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: [--fix]
|
||||||
|
- id: ruff-format
|
||||||
|
|
||||||
11
download.py
Normal file
11
download.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from download_s2 import download_s2
|
||||||
|
from download_s3 import download_s3
|
||||||
|
|
||||||
|
year = 2024
|
||||||
|
site_position = (47.116171, 11.320308)
|
||||||
|
site_name = "innsbruck"
|
||||||
|
|
||||||
|
print(f"Downloading data for {site_name}, {year}")
|
||||||
|
download_s2(year, site_position, site_name)
|
||||||
|
download_s3(year, site_position, site_name)
|
||||||
|
print("All downloads completed")
|
||||||
174
download_s2.py
174
download_s2.py
|
|
@ -4,79 +4,115 @@ from rasterio.warp import transform_geom
|
||||||
from rasterio.windows import from_bounds, transform as window_transform
|
from rasterio.windows import from_bounds, transform as window_transform
|
||||||
from pystac_client import Client
|
from pystac_client import Client
|
||||||
|
|
||||||
datetime_range = "2024-01-01/2024-01-03"
|
|
||||||
lon, lat = 11.320308, 47.116171
|
|
||||||
bbox_size = 0.009
|
|
||||||
bbox = [lon - bbox_size/2, lat - bbox_size/2, lon + bbox_size/2, lat + bbox_size/2]
|
|
||||||
bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
|
|
||||||
output_dir = "data/innsbruck/2024/s2/"
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
client = Client.open("https://earth-search.aws.element84.com/v1")
|
def download_s2(year, site_position, site_name):
|
||||||
search = client.search(
|
lat, lon = site_position
|
||||||
collections=["sentinel-2-l2a"],
|
datetime_range = f"{year}-01-01/{year}-12-31"
|
||||||
intersects={"type": "Point", "coordinates": [lon, lat]},
|
output_dir = f"data/{site_name}/{year}/s2/"
|
||||||
datetime=datetime_range,
|
|
||||||
max_items=1000,
|
|
||||||
)
|
|
||||||
|
|
||||||
items_by_key = {}
|
print(f"[S2] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {year}")
|
||||||
for item in search.items():
|
|
||||||
date = item.datetime.strftime("%Y%m%d")
|
|
||||||
parts = item.id.split("_")
|
|
||||||
increment = parts[3] if len(parts) > 3 else "0"
|
|
||||||
key = (date, increment)
|
|
||||||
if key not in items_by_key:
|
|
||||||
items_by_key[key] = item
|
|
||||||
|
|
||||||
for (date, increment), item in items_by_key.items():
|
bbox_size = 0.011
|
||||||
filepath = os.path.join(output_dir, f"{date}_{increment}.geotiff")
|
bbox = [
|
||||||
if os.path.exists(filepath):
|
lon - bbox_size / 2,
|
||||||
continue
|
lat - bbox_size / 2,
|
||||||
|
lon + bbox_size / 2,
|
||||||
|
lat + bbox_size / 2,
|
||||||
|
]
|
||||||
|
bands = {"B02": "blue", "B03": "green", "B04": "red", "B8A": "nir"}
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
band_data = {}
|
print("[S2] Connecting to STAC catalog...")
|
||||||
profile = None
|
client = Client.open("https://earth-search.aws.element84.com/v1")
|
||||||
|
search = client.search(
|
||||||
|
collections=["sentinel-2-l2a"],
|
||||||
|
intersects={"type": "Point", "coordinates": [lon, lat]},
|
||||||
|
datetime=datetime_range,
|
||||||
|
max_items=1000,
|
||||||
|
)
|
||||||
|
|
||||||
for band_name, asset_name in bands.items():
|
print("[S2] Searching items...")
|
||||||
if asset_name in item.assets:
|
items_by_key = {}
|
||||||
asset = item.assets[asset_name]
|
for item in search.items():
|
||||||
with rasterio.open(asset.href) as src:
|
date = item.datetime.strftime("%Y%m%d")
|
||||||
bbox_geom = {
|
parts = item.id.split("_")
|
||||||
"type": "Polygon",
|
increment = parts[3] if len(parts) > 3 else "0"
|
||||||
"coordinates": [[
|
key = (date, increment)
|
||||||
[bbox[0], bbox[1]], [bbox[2], bbox[1]],
|
if key not in items_by_key:
|
||||||
[bbox[2], bbox[3]], [bbox[0], bbox[3]], [bbox[0], bbox[1]]
|
items_by_key[key] = item
|
||||||
]]
|
|
||||||
}
|
|
||||||
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
|
|
||||||
coords = bbox_transformed["coordinates"][0]
|
|
||||||
x_coords = [c[0] for c in coords[:4]]
|
|
||||||
y_coords = [c[1] for c in coords[:4]]
|
|
||||||
bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]
|
|
||||||
src_bounds = src.bounds
|
|
||||||
intersect_bbox = [
|
|
||||||
max(bbox_crs[0], src_bounds.left), max(bbox_crs[1], src_bounds.bottom),
|
|
||||||
min(bbox_crs[2], src_bounds.right), min(bbox_crs[3], src_bounds.top),
|
|
||||||
]
|
|
||||||
window = from_bounds(*intersect_bbox, src.transform)
|
|
||||||
if window.height > 0 and window.width > 0:
|
|
||||||
data = src.read(window=window)
|
|
||||||
new_transform = window_transform(window, src.transform)
|
|
||||||
if profile is None:
|
|
||||||
profile = {
|
|
||||||
"driver": "GTiff", "height": window.height, "width": window.width,
|
|
||||||
"count": len(bands), "dtype": data.dtype, "crs": src.crs,
|
|
||||||
"transform": new_transform, "compress": "lzw"
|
|
||||||
}
|
|
||||||
band_idx = list(bands.keys()).index(band_name)
|
|
||||||
band_data[band_idx] = data[0]
|
|
||||||
|
|
||||||
if profile and len(band_data) == len(bands):
|
print(f"[S2] Found {len(items_by_key)} unique items")
|
||||||
stacked = [band_data[i] for i in sorted(band_data.keys())]
|
|
||||||
band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
|
|
||||||
with rasterio.open(filepath, "w", **profile) as dst:
|
|
||||||
for i, data in enumerate(stacked, 1):
|
|
||||||
dst.write(data, i)
|
|
||||||
dst.set_band_description(i, band_names[i-1])
|
|
||||||
print(f"Saved: {filepath}")
|
|
||||||
|
|
||||||
|
for (date, increment), item in items_by_key.items():
|
||||||
|
filepath = os.path.join(output_dir, f"{date}_{increment}.geotiff")
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
print(f"[S2] Skipping {date}_{increment}.geotiff (exists)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"[S2] Processing {date}_{increment}...")
|
||||||
|
band_data = {}
|
||||||
|
profile = None
|
||||||
|
|
||||||
|
for band_name, asset_name in bands.items():
|
||||||
|
if asset_name in item.assets:
|
||||||
|
asset = item.assets[asset_name]
|
||||||
|
with rasterio.open(asset.href) as src:
|
||||||
|
bbox_geom = {
|
||||||
|
"type": "Polygon",
|
||||||
|
"coordinates": [
|
||||||
|
[
|
||||||
|
[bbox[0], bbox[1]],
|
||||||
|
[bbox[2], bbox[1]],
|
||||||
|
[bbox[2], bbox[3]],
|
||||||
|
[bbox[0], bbox[3]],
|
||||||
|
[bbox[0], bbox[1]],
|
||||||
|
]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom)
|
||||||
|
coords = bbox_transformed["coordinates"][0]
|
||||||
|
x_coords = [c[0] for c in coords[:4]]
|
||||||
|
y_coords = [c[1] for c in coords[:4]]
|
||||||
|
bbox_crs = [
|
||||||
|
min(x_coords),
|
||||||
|
min(y_coords),
|
||||||
|
max(x_coords),
|
||||||
|
max(y_coords),
|
||||||
|
]
|
||||||
|
src_bounds = src.bounds
|
||||||
|
intersect_bbox = [
|
||||||
|
max(bbox_crs[0], src_bounds.left),
|
||||||
|
max(bbox_crs[1], src_bounds.bottom),
|
||||||
|
min(bbox_crs[2], src_bounds.right),
|
||||||
|
min(bbox_crs[3], src_bounds.top),
|
||||||
|
]
|
||||||
|
window = from_bounds(*intersect_bbox, src.transform)
|
||||||
|
if window.height > 0 and window.width > 0:
|
||||||
|
data = src.read(window=window)
|
||||||
|
new_transform = window_transform(window, src.transform)
|
||||||
|
if profile is None:
|
||||||
|
profile = {
|
||||||
|
"driver": "GTiff",
|
||||||
|
"height": window.height,
|
||||||
|
"width": window.width,
|
||||||
|
"count": len(bands),
|
||||||
|
"dtype": data.dtype,
|
||||||
|
"crs": src.crs,
|
||||||
|
"transform": new_transform,
|
||||||
|
"compress": "lzw",
|
||||||
|
}
|
||||||
|
band_idx = list(bands.keys()).index(band_name)
|
||||||
|
band_data[band_idx] = data[0]
|
||||||
|
|
||||||
|
if profile and len(band_data) == len(bands):
|
||||||
|
stacked = [band_data[i] for i in sorted(band_data.keys())]
|
||||||
|
band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())]
|
||||||
|
with rasterio.open(filepath, "w", **profile) as dst:
|
||||||
|
for i, data in enumerate(stacked, 1):
|
||||||
|
dst.write(data, i)
|
||||||
|
dst.set_band_description(i, band_names[i - 1])
|
||||||
|
print(f"[S2] Saved: {filepath}")
|
||||||
|
else:
|
||||||
|
print(f"[S2] Skipping {date}_{increment} (missing bands)")
|
||||||
|
|
||||||
|
print("[S2] Completed")
|
||||||
|
|
|
||||||
178
download_s3.py
178
download_s3.py
|
|
@ -11,83 +11,127 @@ from rasterio.transform import from_bounds
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
datetime_range = "2024-01-01/2024-01-03"
|
|
||||||
lon, lat = 11.320308, 47.116171
|
|
||||||
bbox_size = 0.009
|
|
||||||
bbox = [lon - bbox_size/2, lat - bbox_size/2, lon + bbox_size/2, lat + bbox_size/2]
|
|
||||||
bands = {"SDR_Oa04": "blue", "SDR_Oa06": "green", "SDR_Oa08": "red", "SDR_Oa17": "nir"}
|
|
||||||
output_dir = Path("data/innsbruck/2024/s3/")
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
band_map = {"SDR_Oa04": "B04", "SDR_Oa06": "B06", "SDR_Oa08": "B08", "SDR_Oa17": "B17"}
|
def download_s3(year, site_position, site_name):
|
||||||
openeo_bands = [band_map.get(b, b) for b in bands.keys()]
|
lat, lon = site_position
|
||||||
|
datetime_range = f"{year}-01-01/{year}-12-31"
|
||||||
|
output_dir = Path(f"data/{site_name}/{year}/s3/")
|
||||||
|
|
||||||
start_date, end_date = datetime_range.split("/")
|
print(f"[S3] Starting download: {site_name} ({lat:.6f}, {lon:.6f}), {year}")
|
||||||
spatial_extent = {
|
|
||||||
"west": bbox[0], "east": bbox[2],
|
|
||||||
"south": bbox[1], "north": bbox[3]
|
|
||||||
}
|
|
||||||
|
|
||||||
token_response = requests.post(
|
bbox_size = 0.011
|
||||||
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
|
bbox = [
|
||||||
data={
|
lon - bbox_size / 2,
|
||||||
"grant_type": "password",
|
lat - bbox_size / 2,
|
||||||
"username": os.getenv("CDSE_USER"),
|
lon + bbox_size / 2,
|
||||||
"password": os.getenv("CDSE_PASSWORD"),
|
lat + bbox_size / 2,
|
||||||
"client_id": "cdse-public"
|
]
|
||||||
|
bands = {
|
||||||
|
"SDR_Oa04": "blue",
|
||||||
|
"SDR_Oa06": "green",
|
||||||
|
"SDR_Oa08": "red",
|
||||||
|
"SDR_Oa17": "nir",
|
||||||
}
|
}
|
||||||
)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
token_response.raise_for_status()
|
|
||||||
tokens = token_response.json()
|
|
||||||
access_token = tokens["access_token"]
|
|
||||||
|
|
||||||
conn = openeo.connect("openeo.dataspace.copernicus.eu")
|
band_map = {
|
||||||
conn.authenticate_oidc_access_token(access_token)
|
"SDR_Oa04": "B04",
|
||||||
|
"SDR_Oa06": "B06",
|
||||||
|
"SDR_Oa08": "B08",
|
||||||
|
"SDR_Oa17": "B17",
|
||||||
|
}
|
||||||
|
openeo_bands = [band_map.get(b, b) for b in bands.keys()]
|
||||||
|
|
||||||
datacube = conn.load_collection(
|
start_date, end_date = datetime_range.split("/")
|
||||||
"SENTINEL3_OLCI_L1B",
|
spatial_extent = {
|
||||||
spatial_extent=spatial_extent,
|
"west": bbox[0],
|
||||||
temporal_extent=[start_date, end_date],
|
"east": bbox[2],
|
||||||
bands=openeo_bands,
|
"south": bbox[1],
|
||||||
).resample_spatial(projection=32632)
|
"north": bbox[3],
|
||||||
|
}
|
||||||
|
|
||||||
output_file = output_dir / "s3_data.nc"
|
print("[S3] Authenticating...")
|
||||||
datacube.download(str(output_file), format="NetCDF")
|
token_response = requests.post(
|
||||||
|
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
|
||||||
|
data={
|
||||||
|
"grant_type": "password",
|
||||||
|
"username": os.getenv("CDSE_USER"),
|
||||||
|
"password": os.getenv("CDSE_PASSWORD"),
|
||||||
|
"client_id": "cdse-public",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
token_response.raise_for_status()
|
||||||
|
tokens = token_response.json()
|
||||||
|
access_token = tokens["access_token"]
|
||||||
|
|
||||||
nc = netCDF4.Dataset(str(output_file), 'r')
|
print("[S3] Connecting to OpenEO...")
|
||||||
times = netCDF4.num2date(nc.variables['t'][:], nc.variables['t'].units)
|
conn = openeo.connect("openeo.dataspace.copernicus.eu")
|
||||||
x_coords = nc.variables['x'][:]
|
conn.authenticate_oidc_access_token(access_token)
|
||||||
y_coords = nc.variables['y'][:]
|
|
||||||
band_vars = sorted([v for v in nc.variables.keys() if v.startswith('B') and v[1:].isdigit()])
|
|
||||||
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
|
|
||||||
|
|
||||||
transform = from_bounds(
|
print("[S3] Loading collection...")
|
||||||
float(x_coords.min()), float(y_coords.min()),
|
datacube = conn.load_collection(
|
||||||
float(x_coords.max()), float(y_coords.max()),
|
"SENTINEL3_OLCI_L1B",
|
||||||
len(x_coords), len(y_coords)
|
spatial_extent=spatial_extent,
|
||||||
)
|
temporal_extent=[start_date, end_date],
|
||||||
|
bands=openeo_bands,
|
||||||
|
).resample_spatial(projection=32632)
|
||||||
|
|
||||||
date_counts = {}
|
output_file = output_dir / "s3_data.nc"
|
||||||
for t_idx, time_val in enumerate(times):
|
print("[S3] Downloading NetCDF...")
|
||||||
dt = time_val if isinstance(time_val, datetime) else netCDF4.num2date(nc.variables['t'][t_idx], nc.variables['t'].units)
|
datacube.download(str(output_file), format="NetCDF")
|
||||||
date_str = dt.strftime("%Y%m%d")
|
|
||||||
increment = date_counts.get(date_str, 0)
|
|
||||||
date_counts[date_str] = increment + 1
|
|
||||||
|
|
||||||
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
|
print("[S3] Processing NetCDF...")
|
||||||
stacked = np.stack(band_data, axis=0)
|
nc = netCDF4.Dataset(str(output_file), "r")
|
||||||
|
times = netCDF4.num2date(nc.variables["t"][:], nc.variables["t"].units)
|
||||||
|
x_coords = nc.variables["x"][:]
|
||||||
|
y_coords = nc.variables["y"][:]
|
||||||
|
band_vars = sorted(
|
||||||
|
[v for v in nc.variables.keys() if v.startswith("B") and v[1:].isdigit()]
|
||||||
|
)
|
||||||
|
band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars]
|
||||||
|
|
||||||
output_path = output_dir / f"{date_str}_{increment}.geotiff"
|
transform = from_bounds(
|
||||||
with rasterio.open(
|
float(x_coords.min()),
|
||||||
output_path, 'w',
|
float(y_coords.min()),
|
||||||
driver='GTiff', height=len(y_coords), width=len(x_coords),
|
float(x_coords.max()),
|
||||||
count=len(band_data), dtype=stacked.dtype, crs='EPSG:32632',
|
float(y_coords.max()),
|
||||||
transform=transform, compress='lzw'
|
len(x_coords),
|
||||||
) as dst:
|
len(y_coords),
|
||||||
dst.write(stacked)
|
)
|
||||||
for i, band_name in enumerate(band_names, 1):
|
|
||||||
dst.set_band_description(i, band_name)
|
|
||||||
print(f"Saved: {output_path}")
|
|
||||||
|
|
||||||
nc.close()
|
print(f"[S3] Found {len(times)} time steps")
|
||||||
os.remove(output_file)
|
date_counts = {}
|
||||||
|
for t_idx, time_val in enumerate(times):
|
||||||
|
dt = (
|
||||||
|
time_val
|
||||||
|
if isinstance(time_val, datetime)
|
||||||
|
else netCDF4.num2date(nc.variables["t"][t_idx], nc.variables["t"].units)
|
||||||
|
)
|
||||||
|
date_str = dt.strftime("%Y%m%d")
|
||||||
|
increment = date_counts.get(date_str, 0)
|
||||||
|
date_counts[date_str] = increment + 1
|
||||||
|
|
||||||
|
band_data = [nc.variables[b][t_idx, :, :] for b in band_vars]
|
||||||
|
stacked = np.stack(band_data, axis=0)
|
||||||
|
|
||||||
|
output_path = output_dir / f"{date_str}_{increment}.geotiff"
|
||||||
|
with rasterio.open(
|
||||||
|
output_path,
|
||||||
|
"w",
|
||||||
|
driver="GTiff",
|
||||||
|
height=len(y_coords),
|
||||||
|
width=len(x_coords),
|
||||||
|
count=len(band_data),
|
||||||
|
dtype=stacked.dtype,
|
||||||
|
crs="EPSG:32632",
|
||||||
|
transform=transform,
|
||||||
|
compress="lzw",
|
||||||
|
) as dst:
|
||||||
|
dst.write(stacked)
|
||||||
|
for i, band_name in enumerate(band_names, 1):
|
||||||
|
dst.set_band_description(i, band_name)
|
||||||
|
print(f"[S3] Saved: {output_path}")
|
||||||
|
|
||||||
|
nc.close()
|
||||||
|
os.remove(output_file)
|
||||||
|
print("[S3] Completed")
|
||||||
|
|
|
||||||
|
|
@ -5,4 +5,5 @@ python-dotenv
|
||||||
netCDF4
|
netCDF4
|
||||||
numpy
|
numpy
|
||||||
requests
|
requests
|
||||||
|
ruff
|
||||||
|
pre-commit
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue