"""Open netcdf dataset
"""
import numbers
import os
import numpy as np
import xarray as xr
import xesmf as xe
from xarray.backends import BackendEntrypoint
from lenapy.utils.geo import longitude_increase, rename_data, split_duplicate_coords
from lenapy.utils.time import to_datetime
[docs]
class lenapyNetcdf(BackendEntrypoint):
"""Open netcdf file
Open a dataset base on xr.open_dataset method, while normalizing coordinates names and choosing NaN values
Parameters
----------
file : path
pathname of the file to open
rename : dict, optional
dictionnary {old_name:new_name,...}
nan : optional
value to be replaced by NaN
time_type : str, optional
type used for time coordinate, used to convert to datetime64
possible values : 'frac_year' or '360_day' or 'cftime' or 'custom'
if 'custom', "decode_times=False" and "format" must be specified
format : str, optional
strftime to parse time (if time_type=='custom')
duplicate_coords : boolean, optional
rename duplicate coordinates, by adding a "_" at the end of duplicate ones
decode_times : boolean, optional
try to decode time coordinate to a datetime64 type
set_time : None or func, optional
if set to a function name, apply the result of the function passed to the file basename to set the time coordinate
open_dataset_kwargs : dict, optional
Additional keyword arguments passed on to native open_dataset function
Returns
-------
data : Dataset
Dataset loaded from file
Example
-------
.. code-block:: python
data = xr.open_dataset('/home/user/lenapy/data/gohc_2020.nc', engine="lenapyNetcdf")
"""
[docs]
def open_dataset(
self,
filename,
rename={},
nan=None,
time_type=None,
format=None,
duplicate_coords=False,
drop_variables=None,
decode_times=True,
set_time=None,
open_dataset_kwargs={},
):
open_kw = open_dataset_kwargs.copy()
# If available and not specified, use 'h5netcdf' as engine, which is more dask-friendly than netcdf4
if not ("engine" in open_dataset_kwargs.keys()) and (
"h5netcdf" in xr.backends.list_engines()
):
open_kw["engine"] = "h5netcdf"
res = rename_data(
xr.open_dataset(
filename,
drop_variables=drop_variables,
decode_times=decode_times,
**open_kw,
),
**rename,
)
if type(set_time) != type(None):
res = res.assign_coords(
time=set_time(
os.path.basename(os.path.splitext(res.encoding["source"])[0])
)
).expand_dims(dim="time")
if time_type != None:
res = to_datetime(res, time_type=time_type, format=format)
if duplicate_coords:
res = split_duplicate_coords(res)
res = longitude_increase(res)
if type(nan) != type(None):
return res.where(res != nan)
else:
return res
[docs]
class lenapyMask(BackendEntrypoint):
"""Open mask file
Open a mask in a dataset file, choose a given field, and regrid according to a given geometry.
The returned mask contains an extra dimension named 'zone', corresponding to the different values
of the mask contained in the opened dataset.
Required format for the dataset to be opened :
- contains several dataarrays, each one being a mask (identified as 'field')
- each mask is defined by values, whose signification is given in the attributes of the dataarray : {'value1' : 'label1',...}
If there is no valid attribute, the mask is returned with False where NaN, False or 0 are found, True for any other value.
Parameters
----------
filename : path
path and filename of the mask file to be opened
field : string or array of strings
name of the data to be used as a mask in the dataset
grid : dataset, optional
dataset to be regridded on. If None, no regridding is performed
Returns
-------
mask : DataArray
DataArray with regridded mask
Example
-------
.. code-block:: python
mask = xr.open_dataset('/home/user/lenapy/data/mask/GEO_mask_1deg_20210406.nc',engine="lenapyMask",field='mask_continents').mask
mask.zone
<xarray.DataArray (latitude: 360, longitude: 720, zone: 9)>
array([[[False, True, False, ..., False, False, False],
...
[False, False, False, ..., False, False, False]]])
Coordinates:
* longitude (longitude) float64 -179.8 -179.2 -178.8 ... 178.8 179.2 179.8
* latitude (latitude) float64 -89.75 -89.25 -88.75 ... 88.75 89.25 89.75
* zone (zone) <U18 'Greenland' 'Antarctica' ... 'Maritime continent'
"""
[docs]
def open_dataset(self, filename, field=None, grid=None, drop_variables=None):
res = []
# Opening
file = xr.open_dataset(filename)
if field == None:
field = file.data_vars
for f in np.ravel(field):
mask = file[f]
if not (
isinstance(mask.values.ravel()[0], numbers.Number)
and "latitude" in mask.coords
):
continue
# Labels reading
c = []
d = []
for k in mask.attrs.keys():
if k.isnumeric():
d.append(int(k))
c.append(mask.attrs[k])
if isinstance(mask.attrs[k], numbers.Number):
d.append(mask.attrs[k])
c.append(k)
if len(d) == 0:
d = [False]
c = [f]
mask = mask.where(mask.notnull(), 0).astype("int") == 0
labels = xr.DataArray(data=d, dims="zone", coords=dict(zone=c))
# Resampling
if type(grid) != type(None):
reg = xe.Regridder(mask, grid, method="nearest_s2d")
mask = reg(mask)
# Returning
res.append(xr.Dataset({"mask": xr.where(mask == labels, True, False)}))
return xr.concat(res, dim="zone")