Source code for cordex.tutorial
"""
Useful for:
* users learning py-cordex
"""
# code stolen from xarray, I am sorry!
import os
import pathlib
from xarray import open_dataset as _open_dataset
from .preprocessing import cordex_dataset_id
_default_cache_dir_name = "py-cordex_tutorial_data"
base_url = "https://github.com/euro-cordex/py-cordex-data"
version = "main"
external_urls = {} # type: dict
file_formats = {
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_GERICS-REMO2015_v1_mon_197902-198012.nc": 3,
"rasm": 3,
"ROMS_example": 4,
"tiny": 3,
"eraint_uvz": 3,
}
def _check_netcdf_engine_installed(name):
version = file_formats.get(name)
if version == 3:
try:
import scipy # noqa
except ImportError:
try:
import netCDF4 # noqa
except ImportError:
raise ImportError(
f"opening tutorial dataset {name} requires either scipy or "
"netCDF4 to be installed."
)
if version == 4:
try:
import h5netcdf # noqa
except ImportError:
try:
import netCDF4 # noqa
except ImportError:
raise ImportError(
f"opening tutorial dataset {name} requires either h5netcdf "
"or netCDF4 to be installed."
)
def _construct_cache_dir(path):
import pooch
if isinstance(path, os.PathLike):
path = os.fspath(path)
elif path is None:
path = pooch.os_cache(_default_cache_dir_name)
return path
# idea borrowed from Seaborn
[docs]
def open_dataset(
name,
cache=True,
cache_dir=None,
*,
engine=None,
**kws,
):
"""
Open a dataset from the online repository (requires internet).
If a local copy is found then always use that to avoid network traffic.
Available datasets:
* ``"remo_EUR-11_TEMP2_1hr"``: Remo hourly output
* ``"remo_EUR-11_TEMP2_mon"``: Remo monthly output
* ``"remo_EUR-44.nc"``: Remo 3D output on model levels
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_GERICS-REMO2015_v1_mon_197902-198012"``: Remo output (rotated pole)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_DHMZ-RegCM4-2_v1_mon_198901-199012"``: RegCM4 output (lambert conformal)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_CNRM-ALADIN53_v1_mon_197901-198012"``: Aladin Output (lambert conformal)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_KNMI-RACMO22E_v1_mon_197901-198012"``: Racmo Output (rotated pole)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_RMIB-UGent-ALARO-0_v1_mon_198001-198012"``: Alaro output (lambert conformal)
Parameters
----------
name : str
Name of the file containing the dataset.
e.g. 'tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_GERICS-REMO2015_v1_mon_197902-198012'
cache_dir : path-like, optional
The directory in which to search for and write cached data.
cache : bool, optional
If True, then cache data locally for use on subsequent calls
**kws : dict, optional
Passed to xarray.open_dataset
See Also
--------
xarray.open_dataset
"""
try:
import pooch
# from pooch import HTTPDownloader
except ImportError as e:
raise ImportError(
"tutorial.open_dataset depends on pooch to download and manage datasets."
" To proceed please install pooch."
) from e
logger = pooch.get_logger()
logger.setLevel("WARNING")
cache_dir = _construct_cache_dir(cache_dir)
if name in external_urls:
url = external_urls[name]
else:
path = pathlib.Path(name)
if not path.suffix:
# process the name
default_extension = ".nc"
if engine is None:
_check_netcdf_engine_installed(name)
path = path.with_suffix(default_extension)
elif path.suffix == ".grib":
if engine is None:
engine = "cfgrib"
url = f"{base_url}/raw/{version}/{path.name}"
# retrieve the file
filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
ds = _open_dataset(filepath, engine=engine, **kws)
if not cache:
ds = ds.load()
pathlib.Path(filepath).unlink()
return ds
[docs]
def ensemble():
"""Retrieve a mini CORDEX test ensemble.
Available datasets:
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_GERICS-REMO2015_v1_mon_197902-198012"``: Remo output (rotated pole)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_DHMZ-RegCM4-2_v1_mon_198901-199012"``: RegCM4 output (lambert conformal)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_CNRM-ALADIN53_v1_mon_197901-198012"``: Aladin Output (lambert conformal)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_KNMI-RACMO22E_v1_mon_197901-198012"``: Racmo Output (rotated pole)
* ``"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_RMIB-UGent-ALARO-0_v1_mon_198001-198012"``: Alaro output (lambert conformal)
"""
files = [
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_GERICS-REMO2015_v1_mon_197902-198012",
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_DHMZ-RegCM4-2_v1_mon_198901-199012",
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_CNRM-ALADIN53_v1_mon_197901-198012",
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_KNMI-RACMO22E_v1_mon_197901-198012",
"tas_EUR-11_ECMWF-ERAINT_evaluation_r1i1p1_RMIB-UGent-ALARO-0_v1_mon_198001-198012",
]
return {cordex_dataset_id(ds): ds for ds in [open_dataset(f) for f in files]}