Source code for pywatershed.parameters.starfit_parameters

import pathlib as pl
import tempfile
import urllib
from typing import Union
from warnings import warn

import numpy as np
import pandas as pd
import xarray as xr

from ..base.data_model import DatasetDict
from ..base.parameters import Parameters
from ..constants import nan, nat
from ..utils.optional_import import import_optional_dependency

gpd = import_optional_dependency("geopandas", errors="ignore")

# from ..hydrology.starfit import Starfit
# from pywatershed import Starfit is circular so copy the needed info
starfit_param_names = (
    "grand_id",
    "GRanD_NAME",
    "initial_storage",
    "start_time",
    "end_time",
    "inflow_mean",
    "NORhi_min",
    "NORhi_max",
    "NORhi_alpha",
    "NORhi_beta",
    "NORhi_mu",
    "NORlo_min",
    "NORlo_max",
    "NORlo_alpha",
    "NORlo_beta",
    "NORlo_mu",
    "Release_min",
    "Release_max",
    "Release_alpha1",
    "Release_alpha2",
    "Release_beta1",
    "Release_beta2",
    "Release_p1",
    "Release_p2",
    "Release_c",
    "GRanD_CAP_MCM",
    "Obs_MEANFLOW_CUMECS",
    "GRanD_MEANFLOW_CUMECS",
)


[docs] class StarfitParameters(Parameters): """Starfit parameter class. This parameter class provides STARFIT parameters to for modeling. This class does NOT calculate the parameters from inputs (e.g. as ISTARF-CONUS did using ResOpsUS), it simply provides the format for the model to get the the parameter data. The data supplied can come from whatever means. The method `from_istarf_conus_grand` uses existing ISTARF-CONUS and GRanD data to create a parameter object for the user. References: **ISTARF-CONUS (Inferred Storage Targets and Release Functions - Continental US)**: Sean W.D. Turner, Jennie Clarice Steyaert, Laura Condon, Nathalie Voisin, Water storage and release policies for all large reservoirs of conterminous United States, Journal of Hydrology, Volume 603, Part A, 2021, 126843, ISSN 0022-1694, https://doi.org/10.1016/j.jhydrol.2021.126843. https://zenodo.org/records/4602277 **GRanD (Global Reservoir and Dam) database**: Lehner, Bernhard, Catherine Reidy Liermann, Carmen Revenga, Charles Vörösmarty, Balazs Fekete, Philippe Crouzet, Petra Döll et al. "High‐ resolution mapping of the world's reservoirs and dams for sustainable river‐flow management." Frontiers in Ecology and the Environment 9, no. 9 (2011): 494-502. https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013 **ResOpsUS**: Steyaert, Jennie C., Laura E. Condon, Sean WD Turner, and Nathalie Voisin. "ResOpsUS, a dataset of historical reservoir operations in the contiguous United States." Scientific Data 9, no. 1 (2022): 34. https://zenodo.org/records/6612040 Parameters ---------- parameter_dict : dict Parameters dictionary with either structure: * param: value * process: {param: value ... } where the later is a parameter dictionary grouped by process. The keys for process should be either the class itself, class.name, or type(class.__name__). parameter_dimensions_dict : dict Parameters dimensions dictionary with a structure mirroring the parameter dict as described above but with shape tuples in place of parameter value data. Returns ------- StarfitParameters StarfitParameters object """ # noqa: E501
[docs] def __init__( self, dims: dict, coords: dict, data_vars: dict, metadata: dict, encoding: dict = {}, ) -> "StarfitParameters": super().__init__( dims=dims, coords=coords, data_vars=data_vars, metadata=metadata, encoding=encoding, ) # remove this throughout, no prms specific parameter methods should # be used in netcdf utils self.nhm_coordinates = {"grand_id": self.parameters["grand_id"]}
[docs] @staticmethod def from_netcdf( resops_domain: Union[str, pl.Path], istarf_conus: Union[str, pl.Path], grand_dams: Union[str, pl.Path], grand_ids: Union[str, pl.Path] = None, param_names: list = None, ) -> dict: """ TODO: what are the netcdf parameter files? describe their format """ resops_dd = DatasetDict.from_netcdf(resops_domain) istarf_conus_dd = DatasetDict.from_netcdf(istarf_conus) istarf_rename = {"GRanD_ID": "grand_id"} istarf_conus_dd.rename_dim(istarf_rename) istarf_conus_dd.rename_var(istarf_rename) wh_subset = np.where( np.isin( istarf_conus_dd.coords["grand_id"], resops_dd.coords["grand_id"], ) ) istarf_conus_dd.subset_on_coord("grand_id", wh_subset) grand_dams_dd = DatasetDict.from_netcdf(grand_dams) grand_rename = {"GRAND_ID": "grand_id"} grand_dams_dd.rename_dim(grand_rename) grand_dams_dd.rename_var(grand_rename) wh_subset = np.where( np.isin( grand_dams_dd.coords["grand_id"], resops_dd.coords["grand_id"], ) ) grand_dams_dd.subset_on_coord("grand_id", wh_subset) istarf_conus_dd.drop_var("subset_inds") grand_dams_dd.drop_var("subset_inds") params_dd = DatasetDict.merge( resops_dd, istarf_conus_dd, grand_dams_dd ) # probably should have named the spatial dim nreservoirs when # I created the netcdf file _ = params_dd.rename_dim({"grand_id": "nreservoirs"}) if param_names: params_dd = params_dd.subset(param_names) return StarfitParameters.from_dict(params_dd.data)
[docs] @staticmethod def from_istarf_conus_grand( grand_file: Union[pl.Path, str], istarf_file: Union[pl.Path, str] = None, files_directory: Union[pl.Path, str] = pl.Path("."), grand_ids: list = None, ): """Build parameter object from istarf-conus and the GRanD v1.3 sources. This returns the parameters for the STARFIT method. The parameters are in the original units of the method. Note that this method returns nan for the fields of start_time, end_time, and initial_storage. The user can edit the parameter set if she would like to change these with the following basic steps (outlined in an example notebook) export the parameters to and xarray data set via params.to_xr_ds(), then edit params using xarray, finally instantiate a parameter object from the xarray dataset using params = StarfitParameters.from_ds(param_ds). The units of initial_storage supplied should match the units of flow input to Starfit. Parameters ---------- grand_file : Union[pl.Path, str] A path to an existing dbf or shp file. If the file does not exist, an error will be thrown and you must download it manually at https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/de istarf_file : Union[pl.Path, str], optional A path to an existing file. If file does not exist or is None then the file will be downloaded to files_directory. You can download the file yourself here https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013 files_directory : Union[pl.Path, str], optional A local directory where to download the file. Default is current directory. grand_ids : list, optional A subset of grand_ids to keep. Examples -------- Read the full ISTART-CONUS dataset, identify the "big sandy" reservoir by name to get its grand_id, then subset the parameters to this grand_id. This requires downloading the GRanD and ISTARF-CONUS datasets in advance and specifying the paths to those files. >>> import pywatershed as pws >>> grand_file = ( ... your_data_dir / "GRanD_Version_1_3/GRanD_reservoirs_v1_3.dbf" ... ) >>> istarf_file = your_data_dir / "ISTARF-CONUS.csv" >>> sf_params = ( ... pws.parameters.StarfitParameters.from_istarf_conus_grand( ... grand_file=grand_file, istarf_file=istarf_file ... ) ... ) >>> grand_names = sf_params.parameters["GRanD_NAME"].tolist() >>> # where is Big Sandy? >>> big_sandy_index = [ ... ii ... for ii, nn in enumerate(grand_names) ... if "big sandy" in str(nn).lower() ... ][0] >>> big_sandy_grand_id = sf_params.parameters["grand_id"][ ... big_sandy_index ... ] >>> # get a parameter set with just the big sandy dike >>> sf_params = ( ... pws.parameters.StarfitParameters.from_istarf_conus_grand( ... grand_file=grand_file, ... istarf_file=istarf_file, ... grand_ids=[big_sandy_grand_id], ... ) ... ) """ # noqa: E501 grand_ds = _get_grand(grand_file) istarf_ds = _get_istarf_conus(istarf_file, files_directory) common_grand_ids = list( set(grand_ds.grand_id.values).intersection( set(istarf_ds.grand_id.values) ) ) if grand_ids is not None: # make sure all requested grand_ids are in common_grand_ids avail_grand_ids = set(grand_ids).intersection( set(common_grand_ids) ) if avail_grand_ids != set(grand_ids): unavail_grand_ids = set(grand_ids) - set(avail_grand_ids) msg = ( "The following requested grand_ids wer not available " f"in the data sources: {unavail_grand_ids}" ) raise ValueError(msg) else: common_grand_ids = list(avail_grand_ids) # << common_grand_ids = np.array(common_grand_ids) grand_ds = grand_ds.where( grand_ds.grand_id.isin(common_grand_ids), drop=True ) istarf_ds = istarf_ds.where( istarf_ds.grand_id.isin(common_grand_ids), drop=True ) ds = xr.combine_by_coords([grand_ds, istarf_ds]) nreservoirs = len(ds.nreservoirs) for vv in ["start_time", "end_time"]: ds[vv] = xr.Variable( "nreservoirs", np.array([nat] * nreservoirs, "<M8[ns]") ) # < vv = "initial_storage" ds[vv] = xr.Variable("nreservoirs", np.array([nan] * nreservoirs)) params_dd = DatasetDict.from_ds(ds) return StarfitParameters.from_dict(params_dd.data)
def _get_grand(grand_file): if grand_file is None: msg = ( "You must acquire the GRanD file manually at\n" "https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013" # noqa: E501 ) raise IOError(msg) if not pl.Path(grand_file).exists(): msg = f"the GRanD file {grand_file} does not exist." raise ValueError(msg) # check that it's a dbf or a shp file? cols_keep = ["GRAND_ID", "LONG_DD", "LAT_DD"] grand_ds = ( gpd.read_file(grand_file)[cols_keep].to_xarray().drop_vars("index") ) grand_ds = grand_ds.rename( {"GRAND_ID": "grand_id", "index": "nreservoirs"} ).set_coords("grand_id") return grand_ds def _get_istarf_conus(istarf_file, files_directory): files_directory_in = files_directory istarf_file_in = istarf_file files_directory_in_exists = ( files_directory_in is not None and pl.Path(files_directory_in).exists() ) if files_directory_in is None or not files_directory_in_exists: files_directory = pl.Path(tempfile.mkdtemp()) istarf_file_in_exists = ( istarf_file_in is not None and pl.Path(istarf_file_in).exists() ) if istarf_file_in is None or not istarf_file_in_exists: # dowload source to files_directory istarf_url = ( "https://zenodo.org/records/4602277/files/" "ISTARF-CONUS.csv?download=1" ) istarf_file = files_directory / "ISTARF-CONUS.csv" if not istarf_file_in_exists and istarf_file_in is not None: warn(f"The specified istarf_file does not exist: {istarf_file_in}") if files_directory_in is not None and not files_directory_in_exists: warn( "The specified files_directory does not exist: " f" {files_directory_in}" ) print(f"Downloading and saving ISTARF-CONUS.csv to {istarf_file}") urllib.request.urlretrieve(istarf_url, istarf_file) # < istarf_ds = pd.read_csv(istarf_file).to_xarray() rename_map = { "GRanD_ID": "grand_id", "index": "nreservoirs", "GRanD_MEANFLOW_CUMECS": "inflow_mean", } istarf_ds = istarf_ds.rename(rename_map) istarf_ds = istarf_ds.set_coords(["nreservoirs", "grand_id"]) # drop variables not in the starfit parameters data_vars = list(istarf_ds.variables) for vv in data_vars: if vv not in starfit_param_names: # print(vv) # fit, match, nreservoirs, GRanD_MEANFLOW_CUMECS del istarf_ds[vv] return istarf_ds