import pathlib as pl
import tempfile
import urllib
from typing import Union
from warnings import warn
import numpy as np
import pandas as pd
import xarray as xr
from ..base.data_model import DatasetDict
from ..base.parameters import Parameters
from ..constants import nan, nat
from ..utils.optional_import import import_optional_dependency
gpd = import_optional_dependency("geopandas", errors="ignore")
# from ..hydrology.starfit import Starfit
# from pywatershed import Starfit is circular so copy the needed info
starfit_param_names = (
"grand_id",
"GRanD_NAME",
"initial_storage",
"start_time",
"end_time",
"inflow_mean",
"NORhi_min",
"NORhi_max",
"NORhi_alpha",
"NORhi_beta",
"NORhi_mu",
"NORlo_min",
"NORlo_max",
"NORlo_alpha",
"NORlo_beta",
"NORlo_mu",
"Release_min",
"Release_max",
"Release_alpha1",
"Release_alpha2",
"Release_beta1",
"Release_beta2",
"Release_p1",
"Release_p2",
"Release_c",
"GRanD_CAP_MCM",
"Obs_MEANFLOW_CUMECS",
"GRanD_MEANFLOW_CUMECS",
)
[docs]
class StarfitParameters(Parameters):
"""Starfit parameter class.
This parameter class provides STARFIT parameters to for modeling. This
class does NOT calculate the parameters from inputs (e.g. as ISTARF-CONUS
did using ResOpsUS), it simply provides the format for the model to get the
the parameter data.
The data supplied can come from whatever means. The method
`from_istarf_conus_grand` uses existing ISTARF-CONUS and GRanD data to
create a parameter object for the user.
References:
**ISTARF-CONUS (Inferred Storage Targets and Release Functions - Continental
US)**: Sean W.D. Turner, Jennie Clarice Steyaert, Laura Condon,
Nathalie Voisin, Water storage and release policies for all large
reservoirs of conterminous United States, Journal of Hydrology,
Volume 603, Part A, 2021, 126843, ISSN 0022-1694,
https://doi.org/10.1016/j.jhydrol.2021.126843.
https://zenodo.org/records/4602277
**GRanD (Global Reservoir and Dam) database**: Lehner, Bernhard, Catherine
Reidy Liermann, Carmen Revenga, Charles
Vörösmarty, Balazs Fekete, Philippe Crouzet, Petra Döll et al. "High‐
resolution mapping of the world's reservoirs and dams for sustainable
river‐flow management." Frontiers in Ecology and the Environment 9, no. 9
(2011): 494-502.
https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013
**ResOpsUS**: Steyaert, Jennie C., Laura E. Condon, Sean WD Turner, and
Nathalie Voisin. "ResOpsUS, a dataset of historical reservoir operations
in the contiguous United States." Scientific Data 9, no. 1 (2022): 34.
https://zenodo.org/records/6612040
Parameters
----------
parameter_dict : dict
Parameters dictionary with either structure:
* param: value
* process: {param: value ... }
where the later is a parameter dictionary grouped by process.
The keys for process should be either the class itself, class.name, or
type(class.__name__).
parameter_dimensions_dict : dict
Parameters dimensions dictionary with a structure mirroring the parameter
dict as described above but with shape tuples in place of parameter
value data.
Returns
-------
StarfitParameters
StarfitParameters object
""" # noqa: E501
[docs]
def __init__(
self,
dims: dict,
coords: dict,
data_vars: dict,
metadata: dict,
encoding: dict = {},
) -> "StarfitParameters":
super().__init__(
dims=dims,
coords=coords,
data_vars=data_vars,
metadata=metadata,
encoding=encoding,
)
# remove this throughout, no prms specific parameter methods should
# be used in netcdf utils
self.nhm_coordinates = {"grand_id": self.parameters["grand_id"]}
[docs]
@staticmethod
def from_netcdf(
resops_domain: Union[str, pl.Path],
istarf_conus: Union[str, pl.Path],
grand_dams: Union[str, pl.Path],
grand_ids: Union[str, pl.Path] = None,
param_names: list = None,
) -> dict:
"""
TODO: what are the netcdf parameter files? describe their format
"""
resops_dd = DatasetDict.from_netcdf(resops_domain)
istarf_conus_dd = DatasetDict.from_netcdf(istarf_conus)
istarf_rename = {"GRanD_ID": "grand_id"}
istarf_conus_dd.rename_dim(istarf_rename)
istarf_conus_dd.rename_var(istarf_rename)
wh_subset = np.where(
np.isin(
istarf_conus_dd.coords["grand_id"],
resops_dd.coords["grand_id"],
)
)
istarf_conus_dd.subset_on_coord("grand_id", wh_subset)
grand_dams_dd = DatasetDict.from_netcdf(grand_dams)
grand_rename = {"GRAND_ID": "grand_id"}
grand_dams_dd.rename_dim(grand_rename)
grand_dams_dd.rename_var(grand_rename)
wh_subset = np.where(
np.isin(
grand_dams_dd.coords["grand_id"],
resops_dd.coords["grand_id"],
)
)
grand_dams_dd.subset_on_coord("grand_id", wh_subset)
istarf_conus_dd.drop_var("subset_inds")
grand_dams_dd.drop_var("subset_inds")
params_dd = DatasetDict.merge(
resops_dd, istarf_conus_dd, grand_dams_dd
)
# probably should have named the spatial dim nreservoirs when
# I created the netcdf file
_ = params_dd.rename_dim({"grand_id": "nreservoirs"})
if param_names:
params_dd = params_dd.subset(param_names)
return StarfitParameters.from_dict(params_dd.data)
[docs]
@staticmethod
def from_istarf_conus_grand(
grand_file: Union[pl.Path, str],
istarf_file: Union[pl.Path, str] = None,
files_directory: Union[pl.Path, str] = pl.Path("."),
grand_ids: list = None,
):
"""Build parameter object from istarf-conus and the GRanD v1.3 sources.
This returns the parameters for the STARFIT method. The parameters
are in the original units of the method.
Note that this method returns nan for the fields of start_time,
end_time, and initial_storage. The user can edit the parameter set if
she would like to change these with the following basic steps (outlined
in an example notebook) export the parameters to and xarray data set
via params.to_xr_ds(), then edit params using xarray, finally
instantiate a parameter object from the xarray dataset using
params = StarfitParameters.from_ds(param_ds). The units of
initial_storage supplied should match the units of flow input to
Starfit.
Parameters
----------
grand_file : Union[pl.Path, str]
A path to an existing dbf or shp file. If the file does not
exist, an error will be thrown and you must download it manually
at https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/de
istarf_file : Union[pl.Path, str], optional
A path to an existing file. If file does not exist or is
None then the file will be downloaded to files_directory. You can
download the file yourself here
https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013
files_directory : Union[pl.Path, str], optional
A local directory where to download the file. Default is current directory.
grand_ids : list, optional
A subset of grand_ids to keep.
Examples
--------
Read the full ISTART-CONUS dataset, identify the "big sandy" reservoir
by name to get its grand_id, then subset the parameters to this
grand_id. This requires downloading the GRanD and ISTARF-CONUS datasets
in advance and specifying the paths to those files.
>>> import pywatershed as pws
>>> grand_file = (
... your_data_dir / "GRanD_Version_1_3/GRanD_reservoirs_v1_3.dbf"
... )
>>> istarf_file = your_data_dir / "ISTARF-CONUS.csv"
>>> sf_params = (
... pws.parameters.StarfitParameters.from_istarf_conus_grand(
... grand_file=grand_file, istarf_file=istarf_file
... )
... )
>>> grand_names = sf_params.parameters["GRanD_NAME"].tolist()
>>> # where is Big Sandy?
>>> big_sandy_index = [
... ii
... for ii, nn in enumerate(grand_names)
... if "big sandy" in str(nn).lower()
... ][0]
>>> big_sandy_grand_id = sf_params.parameters["grand_id"][
... big_sandy_index
... ]
>>> # get a parameter set with just the big sandy dike
>>> sf_params = (
... pws.parameters.StarfitParameters.from_istarf_conus_grand(
... grand_file=grand_file,
... istarf_file=istarf_file,
... grand_ids=[big_sandy_grand_id],
... )
... )
""" # noqa: E501
grand_ds = _get_grand(grand_file)
istarf_ds = _get_istarf_conus(istarf_file, files_directory)
common_grand_ids = list(
set(grand_ds.grand_id.values).intersection(
set(istarf_ds.grand_id.values)
)
)
if grand_ids is not None:
# make sure all requested grand_ids are in common_grand_ids
avail_grand_ids = set(grand_ids).intersection(
set(common_grand_ids)
)
if avail_grand_ids != set(grand_ids):
unavail_grand_ids = set(grand_ids) - set(avail_grand_ids)
msg = (
"The following requested grand_ids wer not available "
f"in the data sources: {unavail_grand_ids}"
)
raise ValueError(msg)
else:
common_grand_ids = list(avail_grand_ids)
# <<
common_grand_ids = np.array(common_grand_ids)
grand_ds = grand_ds.where(
grand_ds.grand_id.isin(common_grand_ids), drop=True
)
istarf_ds = istarf_ds.where(
istarf_ds.grand_id.isin(common_grand_ids), drop=True
)
ds = xr.combine_by_coords([grand_ds, istarf_ds])
nreservoirs = len(ds.nreservoirs)
for vv in ["start_time", "end_time"]:
ds[vv] = xr.Variable(
"nreservoirs", np.array([nat] * nreservoirs, "<M8[ns]")
)
# <
vv = "initial_storage"
ds[vv] = xr.Variable("nreservoirs", np.array([nan] * nreservoirs))
params_dd = DatasetDict.from_ds(ds)
return StarfitParameters.from_dict(params_dd.data)
def _get_grand(grand_file):
if grand_file is None:
msg = (
"You must acquire the GRanD file manually at\n"
"https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013" # noqa: E501
)
raise IOError(msg)
if not pl.Path(grand_file).exists():
msg = f"the GRanD file {grand_file} does not exist."
raise ValueError(msg)
# check that it's a dbf or a shp file?
cols_keep = ["GRAND_ID", "LONG_DD", "LAT_DD"]
grand_ds = (
gpd.read_file(grand_file)[cols_keep].to_xarray().drop_vars("index")
)
grand_ds = grand_ds.rename(
{"GRAND_ID": "grand_id", "index": "nreservoirs"}
).set_coords("grand_id")
return grand_ds
def _get_istarf_conus(istarf_file, files_directory):
files_directory_in = files_directory
istarf_file_in = istarf_file
files_directory_in_exists = (
files_directory_in is not None and pl.Path(files_directory_in).exists()
)
if files_directory_in is None or not files_directory_in_exists:
files_directory = pl.Path(tempfile.mkdtemp())
istarf_file_in_exists = (
istarf_file_in is not None and pl.Path(istarf_file_in).exists()
)
if istarf_file_in is None or not istarf_file_in_exists:
# dowload source to files_directory
istarf_url = (
"https://zenodo.org/records/4602277/files/"
"ISTARF-CONUS.csv?download=1"
)
istarf_file = files_directory / "ISTARF-CONUS.csv"
if not istarf_file_in_exists and istarf_file_in is not None:
warn(f"The specified istarf_file does not exist: {istarf_file_in}")
if files_directory_in is not None and not files_directory_in_exists:
warn(
"The specified files_directory does not exist: "
f" {files_directory_in}"
)
print(f"Downloading and saving ISTARF-CONUS.csv to {istarf_file}")
urllib.request.urlretrieve(istarf_url, istarf_file)
# <
istarf_ds = pd.read_csv(istarf_file).to_xarray()
rename_map = {
"GRanD_ID": "grand_id",
"index": "nreservoirs",
"GRanD_MEANFLOW_CUMECS": "inflow_mean",
}
istarf_ds = istarf_ds.rename(rename_map)
istarf_ds = istarf_ds.set_coords(["nreservoirs", "grand_id"])
# drop variables not in the starfit parameters
data_vars = list(istarf_ds.variables)
for vv in data_vars:
if vv not in starfit_param_names:
# print(vv) # fit, match, nreservoirs, GRanD_MEANFLOW_CUMECS
del istarf_ds[vv]
return istarf_ds