foo
This commit is contained in:
parent
7a695cc089
commit
415af89c7d
4 changed files with 80 additions and 143 deletions
|
|
@ -25,6 +25,11 @@ def detect_clouds(season, site_name):
|
||||||
with open(timeseries_file) as f:
|
with open(timeseries_file) as f:
|
||||||
timeseries = json.load(f)
|
timeseries = json.load(f)
|
||||||
|
|
||||||
|
# Flag entries with ndvi: None as outliers (bad/invalid data)
|
||||||
|
for e in timeseries:
|
||||||
|
if e.get("ndvi") is None:
|
||||||
|
clouds[source].append(e["filename"])
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
|
(e, datetime.fromisoformat(e["date"].replace("Z", "+00:00")))
|
||||||
for e in timeseries
|
for e in timeseries
|
||||||
|
|
|
||||||
1
ndvi.py
1
ndvi.py
|
|
@ -107,6 +107,7 @@ def _create_timeseries_for_dir(output_dir, site_position, source_name):
|
||||||
timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
|
timeseries.append({"date": date, "filename": filename, "ndvi": ndvi_value})
|
||||||
|
|
||||||
timeseries.sort(key=lambda x: x["date"])
|
timeseries.sort(key=lambda x: x["date"])
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
timeseries_file = output_dir / "timeseries.json"
|
timeseries_file = output_dir / "timeseries.json"
|
||||||
with open(timeseries_file, "w") as f:
|
with open(timeseries_file, "w") as f:
|
||||||
json.dump(timeseries, f, indent=2)
|
json.dump(timeseries, f, indent=2)
|
||||||
|
|
|
||||||
199
post_process.py
199
post_process.py
|
|
@ -1,59 +1,13 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import rasterio
|
import rasterio
|
||||||
from rasterio import windows
|
from rasterio import windows
|
||||||
|
from rasterio.warp import reproject, Resampling
|
||||||
|
from rasterio.io import MemoryFile
|
||||||
def _crop_to_bounds(src_file, bounds, output_file, row_based_height=None):
|
|
||||||
"""Crop a raster file to given bounds and save."""
|
|
||||||
crop_left, crop_bottom, crop_right, crop_top, crop_crs = bounds
|
|
||||||
|
|
||||||
with rasterio.open(src_file) as src:
|
|
||||||
# Calculate window from bounds
|
|
||||||
window = windows.from_bounds(crop_left, crop_bottom, crop_right, crop_top, src.transform)
|
|
||||||
|
|
||||||
# Use row-based height if provided (for fusion), otherwise calculate from bounds
|
|
||||||
if row_based_height is not None:
|
|
||||||
col_off = int(round(window.col_off))
|
|
||||||
window = windows.Window(col_off, 0, src.width, row_based_height)
|
|
||||||
# Calculate bottom Y from row index
|
|
||||||
bottom_y = src.transform[5] + row_based_height * src.transform[4]
|
|
||||||
else:
|
|
||||||
pixel_size = abs(src.transform[0])
|
|
||||||
width = int(round((crop_right - crop_left) / pixel_size))
|
|
||||||
height = int(round((crop_top - crop_bottom) / pixel_size))
|
|
||||||
window = windows.Window(
|
|
||||||
int(round(window.col_off)), int(round(window.row_off)), width, height
|
|
||||||
)
|
|
||||||
bottom_y = crop_bottom
|
|
||||||
|
|
||||||
# Clip window to source bounds
|
|
||||||
src_window = windows.Window(0, 0, src.width, src.height)
|
|
||||||
window = window.intersection(src_window)
|
|
||||||
if not window or window.height <= 0 or window.width <= 0:
|
|
||||||
return False
|
|
||||||
|
|
||||||
data = src.read(window=window)
|
|
||||||
transform = rasterio.transform.from_bounds(
|
|
||||||
crop_left, bottom_y, crop_right, crop_top, window.width, window.height
|
|
||||||
)
|
|
||||||
|
|
||||||
profile = src.profile.copy()
|
|
||||||
profile.update({
|
|
||||||
"height": window.height,
|
|
||||||
"width": window.width,
|
|
||||||
"transform": transform,
|
|
||||||
"crs": crop_crs,
|
|
||||||
})
|
|
||||||
|
|
||||||
with rasterio.open(output_file, "w", **profile) as dst:
|
|
||||||
dst.write(data)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def process_cropped(season, site_position, site_name):
|
def process_cropped(season, site_position, site_name):
|
||||||
"""Crop prepared S2, S3, and fusion files to fusion valid data bounds."""
|
"""Crop fusion to valid data, then crop S2/S3 to match."""
|
||||||
base = Path(f"data/{site_name}/{season}")
|
base = Path(f"data/{site_name}/{season}")
|
||||||
prepared = base / "prepared"
|
prepared = base / "prepared"
|
||||||
processed = base / "processed"
|
processed = base / "processed"
|
||||||
|
|
@ -65,94 +19,71 @@ def process_cropped(season, site_position, site_name):
|
||||||
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
|
for output_dir in [processed / "s2", processed / "s3", processed / "fusion"]:
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
print(f"[PROCESS] Cropping files to fusion valid data bounds: {site_name}, {season}")
|
print(f"[PROCESS] Processing files: {site_name}, {season}")
|
||||||
|
|
||||||
# Collect all available DIST_CLOUD files and their dates
|
|
||||||
dist_cloud_files = {}
|
|
||||||
for dist_cloud_file in s2_prep.glob("S2A_MSIL2A_*_DIST_CLOUD.tif"):
|
|
||||||
date_str = dist_cloud_file.stem.split("_")[2]
|
|
||||||
try:
|
|
||||||
date_obj = datetime.strptime(date_str, "%Y%m%d")
|
|
||||||
dist_cloud_files[date_obj] = dist_cloud_file
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not dist_cloud_files:
|
|
||||||
print("[PROCESS] Warning: No DIST_CLOUD files found. Cannot process fusion files.")
|
|
||||||
return
|
|
||||||
|
|
||||||
dist_cloud_dates = sorted(dist_cloud_files.keys())
|
|
||||||
|
|
||||||
def find_closest_dist_cloud(target_date_str):
|
|
||||||
"""Find the closest DIST_CLOUD file to the target date."""
|
|
||||||
try:
|
|
||||||
target_date = datetime.strptime(target_date_str, "%Y%m%d")
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Find closest date
|
|
||||||
closest_date = min(dist_cloud_dates, key=lambda d: abs((d - target_date).days))
|
|
||||||
return dist_cloud_files[closest_date]
|
|
||||||
|
|
||||||
# Determine valid bounds for each fusion file
|
|
||||||
fusion_bounds = {}
|
|
||||||
fusion_rows = {}
|
|
||||||
|
|
||||||
|
# Crop fusion to valid data and get dimensions
|
||||||
|
fusion_dims = {}
|
||||||
for fusion_file in fusion_prep.glob("REFL_*.tif"):
|
for fusion_file in fusion_prep.glob("REFL_*.tif"):
|
||||||
date_str = fusion_file.stem.split("_")[1]
|
date_str = fusion_file.stem.split("_")[1]
|
||||||
|
with rasterio.open(fusion_file) as src:
|
||||||
# Try exact date first, then find closest
|
data = src.read()
|
||||||
dist_cloud = s2_prep / f"S2A_MSIL2A_{date_str}_DIST_CLOUD.tif"
|
valid = ~np.isnan(data) & (data > 0.001)
|
||||||
if not dist_cloud.exists():
|
rows = np.any(valid, axis=(0, 2))
|
||||||
dist_cloud = find_closest_dist_cloud(date_str)
|
cols = np.any(valid, axis=(0, 1))
|
||||||
if dist_cloud is None:
|
r0, r1 = np.where(rows)[0][[0, -1]]
|
||||||
continue
|
c0, c1 = np.where(cols)[0][[0, -1]]
|
||||||
|
w, h = c1 - c0 + 1, r1 - r0 + 1
|
||||||
with rasterio.open(dist_cloud) as dist_src:
|
window = windows.Window(c0, r0, w, h)
|
||||||
dist_bounds = dist_src.bounds
|
data_crop = src.read(window=window)
|
||||||
dist_crs = dist_src.crs
|
transform = rasterio.windows.transform(window, src.transform)
|
||||||
|
p = src.profile.copy()
|
||||||
# Find first valid row from bottom in fusion file
|
p.update({"width": w, "height": h, "transform": transform})
|
||||||
with rasterio.open(fusion_file) as fusion_src:
|
|
||||||
data = fusion_src.read()
|
|
||||||
height = data.shape[1]
|
|
||||||
|
|
||||||
first_valid_row = height
|
|
||||||
for row_idx in range(height - 1, -1, -1):
|
|
||||||
if np.any(~np.isnan(data[:, row_idx, :]) & (data[:, row_idx, :] > 0.001)):
|
|
||||||
first_valid_row = row_idx
|
|
||||||
break
|
|
||||||
|
|
||||||
valid_bottom_y = (fusion_src.transform * (0, first_valid_row + 1))[1]
|
|
||||||
crop_bottom = max(dist_bounds.bottom, valid_bottom_y)
|
|
||||||
|
|
||||||
fusion_bounds[date_str] = (
|
|
||||||
dist_bounds.left, crop_bottom, dist_bounds.right, dist_bounds.top, dist_crs
|
|
||||||
)
|
|
||||||
fusion_rows[date_str] = first_valid_row
|
|
||||||
|
|
||||||
# Process S2 files
|
|
||||||
for refl_file in s2_prep.glob("*REFL.tif"):
|
|
||||||
date_str = refl_file.stem.split("_")[2]
|
|
||||||
if date_str in fusion_bounds:
|
|
||||||
output_file = processed / "s2" / f"{date_str}_0.geotiff"
|
|
||||||
if _crop_to_bounds(refl_file, fusion_bounds[date_str], output_file):
|
|
||||||
print(f"[PROCESS] Saved: {output_file}")
|
|
||||||
|
|
||||||
# Process S3 files
|
|
||||||
for s3_file in s3_prep.glob("composite_*.tif"):
|
|
||||||
date_str = s3_file.stem.split("_")[1]
|
|
||||||
if date_str in fusion_bounds:
|
|
||||||
output_file = processed / "s3" / f"{date_str}_0.geotiff"
|
|
||||||
if _crop_to_bounds(s3_file, fusion_bounds[date_str], output_file):
|
|
||||||
print(f"[PROCESS] Saved: {output_file}")
|
|
||||||
|
|
||||||
# Process fusion files (use row-based cropping)
|
|
||||||
for date_str, bounds in fusion_bounds.items():
|
|
||||||
fusion_file = fusion_prep / f"REFL_{date_str}.tif"
|
|
||||||
if fusion_file.exists():
|
|
||||||
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
|
output_file = processed / "fusion" / f"{date_str}_0.geotiff"
|
||||||
if _crop_to_bounds(fusion_file, bounds, output_file, row_based_height=fusion_rows[date_str] + 1):
|
with rasterio.open(output_file, "w", **p) as dst:
|
||||||
print(f"[PROCESS] Saved: {output_file}")
|
dst.write(data_crop)
|
||||||
|
fusion_dims[date_str] = (c0, r0, w, h, transform, src.transform, src.crs, src.profile)
|
||||||
|
print(f"[PROCESS] Cropped fusion: {output_file}")
|
||||||
|
|
||||||
|
# Crop S2 and S3 to fusion size
|
||||||
|
for date_str, (c0, r0, w, h, transform, fusion_transform, crs, fusion_profile) in fusion_dims.items():
|
||||||
|
window = windows.Window(c0, r0, w, h)
|
||||||
|
# S2
|
||||||
|
for s2_file in s2_prep.glob("*REFL.tif"):
|
||||||
|
if s2_file.stem.split("_")[2] == date_str:
|
||||||
|
output_file = processed / "s2" / f"{date_str}_0.geotiff"
|
||||||
|
with rasterio.open(s2_file) as src:
|
||||||
|
data = src.read(window=window)
|
||||||
|
p2 = src.profile.copy()
|
||||||
|
p2.update({"width": w, "height": h, "transform": transform, "crs": crs})
|
||||||
|
with rasterio.open(output_file, "w", **p2) as dst:
|
||||||
|
dst.write(data)
|
||||||
|
print(f"[PROCESS] Cropped: {output_file}")
|
||||||
|
# S3: resample to fusion pixel size, then crop
|
||||||
|
s3_file = s3_prep / f"composite_{date_str}.tif"
|
||||||
|
if s3_file.exists():
|
||||||
|
output_file = processed / "s3" / f"{date_str}_0.geotiff"
|
||||||
|
with rasterio.open(s3_file) as src:
|
||||||
|
# Resample to fusion pixel size
|
||||||
|
temp_profile = fusion_profile.copy()
|
||||||
|
temp_profile.update({"dtype": src.profile["dtype"], "count": src.count})
|
||||||
|
with rasterio.MemoryFile() as memfile:
|
||||||
|
with memfile.open(**temp_profile) as resampled:
|
||||||
|
for i in range(1, src.count + 1):
|
||||||
|
reproject(
|
||||||
|
source=rasterio.band(src, i),
|
||||||
|
destination=rasterio.band(resampled, i),
|
||||||
|
src_transform=src.transform,
|
||||||
|
src_crs=src.crs,
|
||||||
|
dst_transform=fusion_transform,
|
||||||
|
dst_crs=crs,
|
||||||
|
resampling=Resampling.nearest
|
||||||
|
)
|
||||||
|
# Crop using same window
|
||||||
|
data = resampled.read(window=window)
|
||||||
|
p2 = resampled.profile.copy()
|
||||||
|
p2.update({"width": w, "height": h, "transform": transform})
|
||||||
|
with rasterio.open(output_file, "w", **p2) as dst:
|
||||||
|
dst.write(data)
|
||||||
|
print(f"[PROCESS] Cropped: {output_file}")
|
||||||
|
|
||||||
print("[PROCESS] Completed")
|
print("[PROCESS] Completed")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue