Zonal Statistics Example¶

[ ]:

import wradlib as wrl
import matplotlib.pyplot as pl
import matplotlib as mpl
import warnings
warnings.filterwarnings('ignore')
try:
get_ipython().magic("matplotlib inline")
except:
pl.ion()
import numpy as np
import xarray as xr


Setup Examples¶

[ ]:

def testplot(ds, obj, col="mean",
levels=[0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 40, 50, 100],
title=""):
"""Quick test plot layout for this example file
"""
colors = pl.cm.viridis(np.linspace(0, 1, len(levels)))
mycmap, mynorm = from_levels_and_colors(levels, colors, extend="max")

radolevels = [0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 40, 50, 100]
extend="max")

fig = pl.figure(figsize=(10, 16))

# Average rainfall sum
obj.zdata.trg.geo.plot(column=col, ax=ax,
cmap=mycmap, norm=mynorm,
edgecolor="white", lw=0.5,
legend=True, legend_kwds=dict(shrink=0.5))
ax.autoscale()
pl.xlabel("UTM Zone 32 Easting")
pl.ylabel("UTM Zone 32 Northing")
pl.title(title)
pl.draw()

cbar_kwargs=dict(shrink=0.5))
obj.zdata.trg.geo.plot(ax=ax1, facecolor="None", edgecolor="white")
pl.xlabel("UTM Zone 32 Easting")
pl.ylabel("UTM Zone 32 Northing")
pl.draw()
pl.tight_layout()


Zonal Stats Rectangular Grid¶

[ ]:

from matplotlib.collections import PatchCollection
from matplotlib.colors import from_levels_and_colors
import matplotlib.patches as patches
import datetime as dt
from osgeo import osr

[ ]:

# check for GEOS enabled GDAL
if not wrl.util.has_geos():
print("NO GEOS support within GDAL, aborting...")
exit(0)

[ ]:

# Read and prepare the actual data (RADOLAN)

[ ]:

gridres = ds.x.diff("x")[0].values
gridres

[ ]:

# create radolan projection osr object
if ds.attrs["formatversion"] >= 5:
else:

# create UTM Zone 32 projection osr object
proj_utm = osr.SpatialReference()
proj_utm.ImportFromEPSG(32632)

# Source projection of the shape data (in GK2)
proj_gk2 = osr.SpatialReference()
proj_gk2.ImportFromEPSG(31466)

[ ]:

print(proj_gk2)

[ ]:

shpfile = wrl.util.get_wradlib_data_file(
'shapefiles/agger/agger_merge.shp')
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)
print(f"Found {len(trg)} sub-catchments in shapefile.")

[ ]:

print(trg.crs)

[ ]:

bbox = trg.extent
buffer = 5000.
bbox = dict(left=bbox[0] - buffer, right=bbox[1] + buffer,
bottom=bbox[2] - buffer, top=bbox[3] + buffer)
print(bbox)

[ ]:

# Get RADOLAN grid coordinates

projection_source=proj_stereo,
projection_target=proj_utm)

# assign as coordinates
ds = ds.assign_coords({"xc": (["y", "x"], xy[..., 0], dict(long_name="UTM Zone 32 Easting", units="m")),
"yc": (["y", "x"], xy[..., 1], dict(long_name="UTM Zone 32 Northing", units="m"))})
ds_clip = ds.where((((ds.yc > bbox['bottom']) & (ds.yc < bbox['top'])) &
((ds.xc > bbox['left']) & (ds.xc < bbox['right']))), drop=True)
display(ds_clip)

[ ]:

###########################################################################
# Approach #1: Assign grid points to each polygon and compute the average.
#
# - Uses matplotlib.path.Path
# - Each point is weighted equally (assumption: polygon >> grid cell)
# - this is quick, but theoretically dirty
###########################################################################

t1 = dt.datetime.now()

# Get RADOLAN center grid points for each grid cell
# (MUST BE DONE IN NATIVE RADOLAN COORDINATES)
grid_x, grid_y = np.meshgrid(ds_clip.x, ds_clip.y)
grdpoints = np.dstack([grid_x, grid_y]).reshape(-1, 2)

src = wrl.io.VectorSource(grdpoints, srs=proj_utm, name="src", projection_source=proj_stereo)
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

# Create instance of type ZonalDataPoint from source grid and
# catchment array
zd = wrl.zonalstats.ZonalDataPoint(src, trg, srs=proj_utm, buf=500.)
# dump to file (for later use - see below)
zd.dump_vector('test_zonal_points_cart')
# Create instance of type ZonalStatsPoint from zonal data object
obj1 = wrl.zonalstats.ZonalStatsPoint(zd)

isecs1 = obj1.zdata.isecs  # for plotting (see below)

t2 = dt.datetime.now()

t3 = dt.datetime.now()

# Create instance of type ZonalStatsPoint from zonal data file
# (much faster)
obj1 = wrl.zonalstats.ZonalStatsPoint('test_zonal_points_cart')

# Compute stats for target polygons
avg1 = obj1.mean(ds_clip.SF.values.ravel())
var1 = obj1.var(ds_clip.SF.values.ravel())

t4 = dt.datetime.now()

print("Approach #1 computation time:")
print("\tCreate object from scratch: %f "
"seconds" % (t2 - t1).total_seconds())
print("\tCreate object from dumped file: %f "
"seconds" % (t4 - t3).total_seconds())
print("\tCompute stats using object: %f "
"seconds" % (t3 - t2).total_seconds())

# PLOTTING Approach #1

src = wrl.io.VectorSource(grdpoints, srs=proj_utm, name="src", projection_source=proj_stereo)
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)
# Just a test for plotting results with zero buffer
zd2 = wrl.zonalstats.ZonalDataPoint(src, trg, buf=0)
# Create instance of type ZonalStatsPoint from zonal data object
obj2 = wrl.zonalstats.ZonalStatsPoint(zd2)
# copy attributes to target layer
obj2.zdata.trg.set_attribute("mean", avg1)
obj2.zdata.trg.set_attribute("var", var1)
isecs2 = obj2.zdata.isecs

[ ]:

# Illustrate results for an example catchment i
i = 6  # try e.g. 5, 2
fig = pl.figure(figsize=(10,8))

# Target polygon patches
trg_patch = obj2.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="black", linewidth=2)
trg_patch = obj1.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="grey", linewidth=2)

# pips
sources = obj1.zdata.src.geo
sources.plot(ax=ax, label="all points", c="grey", markersize=200)
isecs1 = obj2.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs1.plot(ax=ax, label="buffer=0 m", c="green", markersize=200)
isecs2 = obj1.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs2.plot(ax=ax, label="buffer=500 m", c="red", markersize=50)

cat = trg.get_data_by_idx([i])[0]
bbox = wrl.zonalstats.get_bbox(cat[..., 0], cat[..., 1])
pl.xlim(bbox["left"] - 2000, bbox["right"] + 2000)
pl.ylim(bbox["bottom"] - 2000, bbox["top"] + 2000)
pl.legend()
pl.title("Catchment #%d: Points considered for stats" % i)

[ ]:

# Plot average rainfall and original data
testplot(ds_clip.SF, obj2, col="mean", title="Catchment rainfall mean (ZonalStatsPoint)")

[ ]:

testplot(ds_clip.SF, obj2, col="var",
levels=np.arange(0, np.max(var1), 1.),
title="Catchment rainfall variance (ZonalStatsPoint)")

[ ]:

###########################################################################
# Approach #2: Compute weighted mean based on fraction of source polygons
# in target polygons
#
# - This is more accurate (no assumptions), but probably slower...
###########################################################################

# Create vertices for each grid cell
# (MUST BE DONE IN NATIVE RADOLAN COORDINATES)
grid_x, grid_y = np.meshgrid(ds_clip.x, ds_clip.y)
grdverts = wrl.zonalstats.grid_centers_to_vertices(grid_x,
grid_y,
gridres, gridres)
# And reproject to Cartesian reference system (here: UTM Zone 32)
src = wrl.io.VectorSource(grdverts, srs=proj_utm, name="src", projection_source=proj_stereo)
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

t1 = dt.datetime.now()

# Create instance of type ZonalDataPoly from source grid and
# catchment array
zd = wrl.zonalstats.ZonalDataPoly(src, trg, srs=proj_utm)
# dump to file
zd.dump_vector('test_zonal_poly_cart')
# Create instance of type ZonalStatsPoint from zonal data object
obj3 = wrl.zonalstats.ZonalStatsPoly(zd)

t2 = dt.datetime.now()

t3 = dt.datetime.now()

# Create instance of type ZonalStatsPoly from zonal data file
obj3 = wrl.zonalstats.ZonalStatsPoly('test_zonal_poly_cart')
# Compute stats for target polygons
avg3 = obj3.mean(ds_clip.SF.values.ravel())
var3 = obj3.var(ds_clip.SF.values.ravel())

t4 = dt.datetime.now()

print("Approach #2 computation time:")
print("\tCreate object from scratch: %f "
"seconds" % (t2 - t1).total_seconds())
print("\tCreate object from dumped file: %f "
"seconds" % (t4 - t3).total_seconds())
print("\tCompute stats using object: %f "
"seconds" % (t3 - t2).total_seconds())

[ ]:

# PLOTTING Approach #2

# Plot average rainfall and original data
testplot(ds.SF, obj3, col="mean",
title="Catchment rainfall mean (ZonalStatsPoly)")

[ ]:

testplot(ds.SF, obj3, col="var",
levels=np.arange(0, np.max(var3), 1.),
title="Catchment rainfall variance (ZonalStatsPoly)")

[ ]:

ds_clip

[ ]:

# Illustrate results for an example catchment i
i = 6  # try e.g. 5, 2
fig = pl.figure(figsize=(10,8))

# Grid cell patches
src_index = obj3.zdata.get_source_index(i)
trg_patch = obj3.zdata.src.get_data_by_idx(src_index, mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="black")

# Target polygon patches
trg_patch = obj3.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="red", linewidth=2)

# intersections
isecs1 = obj3.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs1.plot(column="src_index", ax=ax, cmap=pl.cm.plasma, alpha=0.5)

# scatter center points
ds_clip.plot.scatter(x="xc", y="yc", s=10)

cat = trg.get_data_by_idx([i])[0]
bbox = wrl.zonalstats.get_bbox(cat[..., 0], cat[..., 1])
pl.xlim(bbox["left"] - 2000, bbox["right"] + 2000)
pl.ylim(bbox["bottom"] - 2000, bbox["top"] + 2000)
pl.legend()
pl.title("Catchment #%d: Polygons considered for stats" % i)
#pl.gca().set_xlim(402000, 404000)
#pl.gca().set_ylim(5642000, 5644000)

[ ]:

# Compare estimates
maxlim = np.max(np.concatenate((avg1, avg3)))
fig = pl.figure(figsize=(10, 8))
pl.scatter(avg1, avg3, edgecolor="None", alpha=0.5)
pl.xlabel("Average of points in or close to polygon (mm)")
pl.ylabel("Area-weighted average (mm)")
pl.xlim(0, maxlim)
pl.ylim(0, maxlim)
pl.plot([-1, maxlim + 1], [-1, maxlim + 1], color="black")
pl.show()


Zonal Stats Polar Grid¶

[ ]:

def create_center_coords(ds, proj=None):
# create polar grid centroids in GK2
center = wrl.georef.spherical_to_centroids(ds.data.r,
ds.azimuth.values,
ds.elevation.values,
(ds.longitude.values, ds.latitude.values, ds.altitude.values),
proj=proj)
ds = ds.assign_coords({"xc": (["azimuth", "range"], center[..., 0]),
"yc": (["azimuth", "range"], center[..., 1]),
"zc": (["azimuth", "range"], center[..., 2])})
return ds

[ ]:

filename = wrl.util.get_wradlib_data_file('hdf5/rainsum_boxpol_20140609.h5')
ds = xr.open_dataset(filename)
ds = ds.rename_dims({"phony_dim_0": "azimuth", "phony_dim_1": "range"})
ds = ds.assign_coords({"latitude": ds.data.Latitude,
"longitude": ds.data.Longitude,
"altitude": 99.5,
"azimuth": ds.data.az,
# bin centers
"range": ds.data.r - np.median(np.diff(ds.data.r)) / 2.,
"sweep_mode": "azimuth_surveillance",
"elevation": 0.5}
)

[ ]:

ds = ds.pipe(wrl.georef.georeference_dataset, proj=proj_utm)
ds = ds.pipe(create_center_coords, proj=proj_utm)
display(ds)

[ ]:

# reshape
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

bbox = trg.extent

# create catchment bounding box
buffer = 5000.
bbox = dict(left=bbox[0] - buffer, right=bbox[1] + buffer,
bottom=bbox[2] - buffer, top=bbox[3] + buffer)

[ ]:

ds_clip = ds.where((((ds.yc > bbox['bottom']) & (ds.yc < bbox['top'])) &
((ds.xc > bbox['left']) & (ds.xc < bbox['right']))), drop=True)
display(ds_clip)

[ ]:

radar_utmc = np.dstack([ds_clip.xc, ds_clip.yc]).reshape(-1, 2)

[ ]:

trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

[ ]:

###########################################################################
# Approach #1: Assign grid points to each polygon and compute the average.
#
# - Uses matplotlib.path.Path
# - Each point is weighted equally (assumption: polygon >> grid cell)
# - this is quick, but theoretically dirty
# - for polar grids a range-area dependency has to be taken into account
###########################################################################

t1 = dt.datetime.now()

# Create instance of type ZonalDataPoint from source grid and
# catchment array
zd = wrl.zonalstats.ZonalDataPoint(src, trg, srs=proj_utm,
buf=500.)
# dump to file
zd.dump_vector('test_zonal_points')
# Create instance of type ZonalStatsPoint from zonal data object
obj1 = wrl.zonalstats.ZonalStatsPoint(zd)

isecs1 = obj1.zdata.isecs
t2 = dt.datetime.now()

t3 = dt.datetime.now()

# Create instance of type ZonalStatsPoint from zonal data file
obj1 = wrl.zonalstats.ZonalStatsPoint('test_zonal_points')
# Compute stats for target polygons
avg1 = obj1.mean(ds_clip.data.values.ravel())
var1 = obj1.var(ds_clip.data.values.ravel())

t4 = dt.datetime.now()

print ("Approach #1 computation time:")
print(
"\tCreate object from scratch: %f seconds" % (t2 - t1).total_seconds())
print(
"\tCreate object from dumped file: %f seconds" % (t4 - t3).total_seconds())
print(
"\tCompute stats using object: %f seconds" % (t3 - t2).total_seconds())

[ ]:

# PLOTTING Approach #2
trg1 = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

# Just a test for plotting results with zero buffer
zd = wrl.zonalstats.ZonalDataPoint(src1, trg1, buf=0)
# Create instance of type ZonalStatsPoint from zonal data object
obj2 = wrl.zonalstats.ZonalStatsPoint(zd)
obj2.zdata.trg.set_attribute("mean", avg1)
obj2.zdata.trg.set_attribute("var", var1)

isecs2 = obj2.zdata.isecs

[ ]:

# Illustrate results for an example catchment i
i = 0  # try e.g. 5, 2
fig = pl.figure(figsize=(10,8))

# Target polygon patches
trg_patch = obj2.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="black", linewidth=2)
trg_patch = obj1.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="grey", linewidth=2)

# pips
sources = obj1.zdata.src.geo
sources.plot(ax=ax, label="all points", c="grey", markersize=200)
isecs1 = obj2.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs1.plot(ax=ax, label="buffer=0 m", c="green", markersize=200)
isecs2 = obj1.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs2.plot(ax=ax, label="buffer=500 m", c="red", markersize=50)

cat = trg.get_data_by_idx([i])[0]
bbox = wrl.zonalstats.get_bbox(cat[..., 0], cat[..., 1])
pl.xlim(bbox["left"] - 2000, bbox["right"] + 2000)
pl.ylim(bbox["bottom"] - 2000, bbox["top"] + 2000)
pl.legend()
pl.title("Catchment #%d: Points considered for stats" % i)

[ ]:

# Plot average rainfall and original data
testplot(ds_clip.data, obj2, col="mean",
title="Catchment rainfall mean (ZonalStatsPoint)")

[ ]:

testplot(ds_clip.data, obj2, col="var", levels=np.arange(0, 20, 1.0),
title="Catchment rainfall variance (ZonalStatsPoint)")

[ ]:

radar_utm = wrl.georef.spherical_to_polyvert(ds.range.values + np.median(np.diff(ds.range.values)) / 2.,
ds.azimuth.values,
0.5,
(ds.longitude.values, ds.latitude.values, ds.altitude.values),
proj=proj_utm)
radar_utm.shape = (360, 1000, 5, 3)
ds = ds.assign_coords({"xp": (["azimuth", "range", "verts"], radar_utm[..., 0]),
"yp": (["azimuth", "range", "verts"], radar_utm[..., 1]),
"zp": (["azimuth", "range", "verts"], radar_utm[..., 2])})

trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)
bbox = trg.extent

# create catchment bounding box
buffer = 5000.
bbox = dict(left=bbox[0] - buffer, right=bbox[1] + buffer,
bottom=bbox[2] - buffer, top=bbox[3] + buffer)
ds_clip = ds.where((((ds.yc > bbox['bottom']) & (ds.yc < bbox['top'])) &
((ds.xc > bbox['left']) & (ds.xc < bbox['right']))), drop=True)
display(ds_clip)

[ ]:

radar_utm = np.stack([ds_clip.xp, ds_clip.yp], axis=-1).reshape(-1, 5, 2)
trg = wrl.io.VectorSource(shpfile, srs=proj_utm, name="trg", projection_source=proj_gk2)

[ ]:

###########################################################################
# Approach #2: Compute weighted mean based on fraction of source polygons
# in target polygons
#
# - This is more accurate (no assumptions), but probably slower...
###########################################################################

t1 = dt.datetime.now()

# Create instance of type ZonalDataPoly from source grid and
# catchment array
zd = wrl.zonalstats.ZonalDataPoly(src, trg, srs=proj_utm)
# dump to file
zd.dump_vector('test_zonal_poly')
# Create instance of type ZonalStatsPoint from zonal data object
obj3 = wrl.zonalstats.ZonalStatsPoly(zd)

obj3.zdata.dump_vector('test_zonal_poly')
t2 = dt.datetime.now()

t3 = dt.datetime.now()

# Create instance of type ZonalStatsPoly from zonal data file
obj4 = wrl.zonalstats.ZonalStatsPoly('test_zonal_poly')

avg3 = obj4.mean(ds_clip.data.values.ravel())
var3 = obj4.var(ds_clip.data.values.ravel())

t4 = dt.datetime.now()

print ("Approach #2 computation time:")
print(
"\tCreate object from scratch: %f seconds" % (t2 - t1).total_seconds())
print(
"\tCreate object from dumped file: %f seconds" % (t4 - t3).total_seconds())
print(
"\tCompute stats using object: %f seconds" % (t3 - t2).total_seconds())

obj4.zdata.trg.dump_raster('test_zonal_hdr.nc', 'netCDF', 'mean',
pixel_size=100.)

obj4.zdata.trg.dump_vector('test_zonal_shp')
obj4.zdata.trg.dump_vector('test_zonal_json.geojson', 'GeoJSON')

# Target polygon patches
trg_patches = [patches.Polygon(item, True) for item in obj3.zdata.trg.data]

[ ]:

ds_clip.data.plot(x="x", y="y")

[ ]:

# Plot average rainfall and original data
testplot(ds_clip.data, obj4, col="mean",
title="Catchment rainfall mean (PolarZonalStatsPoly)")

[ ]:

testplot(ds_clip.data, obj4, col="var", levels=np.arange(0, 20, 1.0),
title="Catchment rainfall variance (PolarZonalStatsPoly)")

[ ]:

# Illustrate results for an example catchment i
i = 0  # try e.g. 5, 2
fig = pl.figure(figsize=(10,8))

# Grid cell patches
src_index = obj3.zdata.get_source_index(i)
trg_patch = obj3.zdata.src.get_data_by_idx(src_index, mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="black")

# Target polygon patches
trg_patch = obj3.zdata.trg.get_data_by_idx([i], mode="geo")
trg_patch.plot(ax=ax, facecolor="None", edgecolor="red", linewidth=2)

# intersections
isecs1 = obj3.zdata.dst.get_data_by_att(attr="trg_index", value=[i], mode="geo")
isecs1.plot(column="src_index", ax=ax, cmap=pl.cm.plasma, alpha=0.5)

# scatter center points
ds_clip.plot.scatter(x="xc", y="yc", s=10)

cat = trg.get_data_by_idx([i])[0]
bbox = wrl.zonalstats.get_bbox(cat[..., 0], cat[..., 1])
pl.xlim(bbox["left"] - 2000, bbox["right"] + 2000)
pl.ylim(bbox["bottom"] - 2000, bbox["top"] + 2000)
pl.legend()
pl.title("Catchment #%d: Polygons considered for stats" % i)
pl.gca().set_xlim(402000, 404000)
pl.gca().set_ylim(5654000, 5656000)

[ ]:

# Compare estimates
maxlim = np.max(np.concatenate((avg1, avg3)))
fig = pl.figure(figsize=(10, 8))