National Water ModelΒΆ

Compute and visualize the mean annual river discharge from the National Water Model v2 from 2.7 million rivers in minutes using Pangeo

[1]:
import xarray as xr
import fsspec
import numpy as np
[2]:
import hvplot.pandas
import hvplot.xarray
import geoviews as gv
from holoviews.operation.datashader import rasterize
import cartopy.crs as ccrs
[3]:
from dask.distributed import Client, progress

from dask_gateway import Gateway
gateway = Gateway()
[4]:
cluster = gateway.new_cluster()
[5]:
cluster.adapt(minimum=4, maximum=20);
[6]:
client = Client(cluster)
client
[6]:

Client

Cluster

  • Workers: 0
  • Cores: 0
  • Memory: 0 B
[7]:
ds = xr.open_zarr(fsspec.get_mapper('s3://pangeo-data-uswest2/esip/NWM2/2017', anon=False, requester_pays=True))
[8]:
ds
[8]:
<xarray.Dataset>
Dimensions:         (feature_id: 2729077, time: 8760)
Coordinates:
  * feature_id      (feature_id) int32 101 179 181 ... 1180001803 1180001804
    latitude        (feature_id) float32 dask.array<chunksize=(209929,), meta=np.ndarray>
    longitude       (feature_id) float32 dask.array<chunksize=(209929,), meta=np.ndarray>
  * time            (time) datetime64[ns] 2017-01-01 ... 2017-12-31T23:00:00
Data variables:
    crs             (time) |S1 dask.array<chunksize=(72,), meta=np.ndarray>
    elevation       (time, feature_id) float32 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    order           (time, feature_id) int32 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    qBtmVertRunoff  (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    qBucket         (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    qSfcLatRunoff   (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    q_lateral       (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    streamflow      (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
    velocity        (time, feature_id) float64 dask.array<chunksize=(72, 209929), meta=np.ndarray>
Attributes:
    Conventions:                CF-1.6
    cdm_datatype:               Station
    code_version:               v5.1.0-alpha11
    dev:                        dev_ prefix indicates development/internal me...
    dev_NOAH_TIMESTEP:          3600
    dev_OVRTSWCRT:              1
    dev_channelBucket_only:     0
    dev_channel_only:           0
    featureType:                timeSeries
    model_configuration:        retrospective
    model_initialization_time:  2016-10-01_00:00:00
    model_output_type:          channel_rt
    model_output_valid_time:    2017-01-01_00:00:00
    model_total_valid_times:    2208
    proj4:                      +proj=lcc +units=m +a=6370000.0 +b=6370000.0 ...
    station_dimension:          feature_id
    stream_order_output:        1
[9]:
with fsspec.open('s3://pangeo-data-uswest2/esip/NWM2/nwm-v1.2-channel_spatial_index.nc', anon=False, requester_pays=True) as f:
    ds_lonlat = xr.open_dataset(f)
    lat = ds_lonlat['latitude'].values
    lon = ds_lonlat['longitude'].values
[10]:
print(lat.max(), lon.max())
52.86352 -66.99203

Let’s find the site with the largest streamflow on June 1

[11]:
imax = ds.streamflow.sel(time='2017-06-01 00:00:00').argmax().values

Let’s plot the whole year-long time series at that location

[12]:
%%time
ds.streamflow[:,imax].hvplot()
CPU times: user 84 ms, sys: 12.6 ms, total: 96.6 ms
Wall time: 8.95 s
[12]:
[13]:
var='streamflow'
[14]:
ds[var].nbytes/1e9
[14]:
191.25371616
[15]:
var_mean = ds[var].mean(dim='time').persist()
progress(var_mean)
[16]:
df = var_mean.to_pandas().to_frame()
[17]:
df = df.assign(latitude=lat)
df = df.assign(longitude=lon)
df.rename(columns={0: "transport"}, inplace=True)
[18]:
p = df.hvplot.points('longitude', 'latitude', crs=ccrs.PlateCarree(),
                     c='transport', colorbar=True, size=14)
[19]:
g = rasterize(p, aggregator='mean', x_sampling=0.02, y_sampling=0.02, width=500).opts(tools=['hover'],
                aspect='equal', logz=True, clim=(1e-2, np.nan))
[20]:
g * gv.tile_sources.OSM
[20]: