diff --git a/download_s2.py b/download_s2.py index 41ae5a5..1313133 100644 --- a/download_s2.py +++ b/download_s2.py @@ -20,50 +20,63 @@ search = client.search( max_items=1000, ) +items_by_key = {} for item in search.items(): + date = item.datetime.strftime("%Y%m%d") + parts = item.id.split("_") + increment = parts[3] if len(parts) > 3 else "0" + key = (date, increment) + if key not in items_by_key: + items_by_key[key] = item + +for (date, increment), item in items_by_key.items(): + filepath = os.path.join(output_dir, f"{date}_{increment}.geotiff") + if os.path.exists(filepath): + continue + + band_data = {} + profile = None + for band_name, asset_name in bands.items(): if asset_name in item.assets: asset = item.assets[asset_name] - filepath = os.path.join(output_dir, f"{item.id}_{band_name}.tif") - if not os.path.exists(filepath): - with rasterio.open(asset.href) as src: - bbox_geom = { - "type": "Polygon", - "coordinates": [[ - [bbox[0], bbox[1]], - [bbox[2], bbox[1]], - [bbox[2], bbox[3]], - [bbox[0], bbox[3]], - [bbox[0], bbox[1]] - ]] - } - bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom) - coords = bbox_transformed["coordinates"][0] - x_coords = [c[0] for c in coords[:4]] - y_coords = [c[1] for c in coords[:4]] - bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)] - src_bounds = src.bounds - intersect_bbox = [ - max(bbox_crs[0], src_bounds.left), - max(bbox_crs[1], src_bounds.bottom), - min(bbox_crs[2], src_bounds.right), - min(bbox_crs[3], src_bounds.top), - ] - window = from_bounds(*intersect_bbox, src.transform) - if window.height > 0 and window.width > 0: - data = src.read(window=window) - new_transform = window_transform(window, src.transform) - with rasterio.open( - filepath, "w", - driver="COG", - height=window.height, - width=window.width, - count=src.count, - dtype=data.dtype, - crs=src.crs, - transform=new_transform, - compress="lzw", - ) as dst: - dst.write(data) - print(f"Downloaded: {filepath}") + with rasterio.open(asset.href) as src: + bbox_geom = { + "type": "Polygon", + "coordinates": [[ + [bbox[0], bbox[1]], [bbox[2], bbox[1]], + [bbox[2], bbox[3]], [bbox[0], bbox[3]], [bbox[0], bbox[1]] + ]] + } + bbox_transformed = transform_geom("EPSG:4326", src.crs, bbox_geom) + coords = bbox_transformed["coordinates"][0] + x_coords = [c[0] for c in coords[:4]] + y_coords = [c[1] for c in coords[:4]] + bbox_crs = [min(x_coords), min(y_coords), max(x_coords), max(y_coords)] + src_bounds = src.bounds + intersect_bbox = [ + max(bbox_crs[0], src_bounds.left), max(bbox_crs[1], src_bounds.bottom), + min(bbox_crs[2], src_bounds.right), min(bbox_crs[3], src_bounds.top), + ] + window = from_bounds(*intersect_bbox, src.transform) + if window.height > 0 and window.width > 0: + data = src.read(window=window) + new_transform = window_transform(window, src.transform) + if profile is None: + profile = { + "driver": "GTiff", "height": window.height, "width": window.width, + "count": len(bands), "dtype": data.dtype, "crs": src.crs, + "transform": new_transform, "compress": "lzw" + } + band_idx = list(bands.keys()).index(band_name) + band_data[band_idx] = data[0] + + if profile and len(band_data) == len(bands): + stacked = [band_data[i] for i in sorted(band_data.keys())] + band_names = [list(bands.keys())[i] for i in sorted(band_data.keys())] + with rasterio.open(filepath, "w", **profile) as dst: + for i, data in enumerate(stacked, 1): + dst.write(data, i) + dst.set_band_description(i, band_names[i-1]) + print(f"Saved: {filepath}") diff --git a/download_s3.py b/download_s3.py index e458261..ea3f1c1 100644 --- a/download_s3.py +++ b/download_s3.py @@ -59,6 +59,7 @@ times = netCDF4.num2date(nc.variables['t'][:], nc.variables['t'].units) x_coords = nc.variables['x'][:] y_coords = nc.variables['y'][:] band_vars = sorted([v for v in nc.variables.keys() if v.startswith('B') and v[1:].isdigit()]) +band_names = [list(bands.keys())[openeo_bands.index(b)] for b in band_vars] transform = from_bounds( float(x_coords.min()), float(y_coords.min()), @@ -66,13 +67,17 @@ transform = from_bounds( len(x_coords), len(y_coords) ) +date_counts = {} for t_idx, time_val in enumerate(times): - date_str = time_val.strftime("%Y%m%dT%H%M%S") if isinstance(time_val, datetime) else netCDF4.num2date(nc.variables['t'][t_idx], nc.variables['t'].units).strftime("%Y%m%dT%H%M%S") + dt = time_val if isinstance(time_val, datetime) else netCDF4.num2date(nc.variables['t'][t_idx], nc.variables['t'].units) + date_str = dt.strftime("%Y%m%d") + increment = date_counts.get(date_str, 0) + date_counts[date_str] = increment + 1 band_data = [nc.variables[b][t_idx, :, :] for b in band_vars] stacked = np.stack(band_data, axis=0) - output_path = output_dir / f"S3_OLCI__{date_str}.tif" + output_path = output_dir / f"{date_str}_{increment}.geotiff" with rasterio.open( output_path, 'w', driver='GTiff', height=len(y_coords), width=len(x_coords), @@ -80,6 +85,8 @@ for t_idx, time_val in enumerate(times): transform=transform, compress='lzw' ) as dst: dst.write(stacked) + for i, band_name in enumerate(band_names, 1): + dst.set_band_description(i, band_name) print(f"Saved: {output_path}") nc.close()