# -*- coding: utf-8 -*-
"""Load temperature and salinity data from ocean products
"""
import os.path
import sys
from glob import glob
import gsw
import numpy as np
import xarray as xr
from xarray.backends import BackendEntrypoint
[docs]
def filtered_list(files, year, ymin, ymax, pattern):
"""
Returns a filtered list of files fitting year range and pattern
Parameters
----------
files : Array of strings
Array with filenames to be filtered
year : function
Function to be applied on each filename, returning the data year
ymin : integer
Lowest bound of the time range
ymax : integer
Highest bound of the time range
pattern : string
Pattern that must fit the filenames
Return
------
filtered : Array
Extract of th input array fitting year and pattern conditions
Example
-------
.. code-block:: python
def year(f):
return f.split('_')[1]
fics=filtre_liste(glob(os.path.join(rep,'**','*.nc')),year,2005,2006,'ARGO')
"""
r = []
for u in files:
d = int(year(os.path.basename(os.path.splitext(u)[0])))
if d >= ymin and d <= ymax and pattern in u:
r.append(u)
return r
[docs]
class lenapyOceanProducts(BackendEntrypoint):
"""Open netcdf ocean product
This module allows to load temperature and salinity data from different products and format the data with unified definition for variables and coordinates, compatible with the use of lenapy_ocean :
- standardized coordinates names : latitude, longitude, depth, time
- standardized variables names : temp or PT or CT for temperature, psal, SA, SR for salinity
When loading a product, all the files present in the product directory are parsed. To gain computing time, a first filter on the years to load can be applied, as well as a text filter.
A second date filter can be apply afterwards with the .sel(time=slice('begin_date','end_date') method.
All keyword arguments associated with xr.open_dataset can be passed.
Dask is implicitely used when using these interface methods.
It is imperative to chunk the dataset along time dimension (chunks=dict(depth=10)), and recommanded to chunk the dataset along depth dimension (chunks=dict(depth=10))
Parameters
----------
directory : string
path of the product's directory
product : string
identifier of the product to be opened. Can be one of :
"ISAS","NCEI","SIO","IPRC","ISHII","IAP","EN4","JAMSTEC","ECCO","NOC_OI","SODA"
corr : string, optional
additional parameter for EN4 prodcut ensemble. Can be one of:
'g10','l09','c13','c14'
ymin : int, optional
lowest bound of the time intervalle to be loaded (year)
ymax : int, optional
highest bound of the time intervalle to be loaded (year)
filter : string, optionnal
string pattern to filter datafiles names
chunks : dict, optionnal
dictionnary {dim:chunks}
open_dataset_kwargs : dict, optional
The keyword arguments form of open_mfdataset
Returns
-------
product : Dataset
New dataset containing temperature and salinity data from the product
Examples
--------
.. code-block:: python
data=xr.open_dataset('/home/usr/lenapy/data/ISAS20', engine='lenapyOceanProducts', product='ISAS', ymin=2005, ymax=2007, filter='ARGO', chunks={'time':1,'depth':10})
"""
# pylint: disable=too-many-statements
[docs]
def open_dataset(
self,
directory,
product=None,
corr=None,
ymin=0,
ymax=9999,
filter="",
drop_variables=None,
open_dataset_kwargs={},
):
open_kw = open_dataset_kwargs.copy()
# If available and not specified, use 'h5netcdf' as engine, which is more dask-friendly than netcdf4
if not ("engine" in open_dataset_kwargs.keys()) and (
"h5netcdf" in xr.backends.list_engines()
):
open_kw["engine"] = "h5netcdf"
# --------------------- ISAS ---------------------
if product == "ISAS":
"""
Load data from ISAS product
Product's directory must have the following format:
-base directory (ex: ISAS20_ARGO)
|-year (ex: 2020)
|-release_type_timestamp_xxx_data.nc (ex: ISAS20_ARGO_20200915_fld_TEMP.nc)
"""
def year(f):
return f.split("_")[2][0:4]
fics = filtered_list(
glob(os.path.join(directory, "**", "*.nc")), year, ymin, ymax, filter
)
data = xr.open_mfdataset(fics, **open_kw)
return xr.Dataset({"temp": data.TEMP, "psal": data.PSAL})
# --------------------- NCEI ---------------------
elif product == "NCEI":
"""
Load data from NCEI product
Temperature and salinity are reconstructed from anomaly and trimestrial climatology
Product's directory must have the following format:
-base directory (ex: NCEI)
|-salinity
|-sanom_timestamp.nc (ex: sanom_C1C107-09.nc)
|-temperature
|-tanom_timestamp.nc (ex: tanom_C1C107-09.nc)
"""
def year(f):
a = f.split("_")[1][0]
b = f.split("_")[1][1]
return "%04d" % (1900 + 10 * int("0x%s" % a, 16) + int(b))
# Salinite
fics_sal = filtered_list(
glob(os.path.join(directory, "**", "sanom*.nc")),
year,
ymin,
ymax,
filter,
)
sal = xr.open_mfdataset(
fics_sal,
engine="lenapyNetcdf",
decode_times=False,
time_type="360_day",
open_dataset_kwargs=open_kw,
).s_an
fics_climsal = glob(os.path.join(directory, "climato", "*s1[3-6]_01.nc"))
climsal = xr.open_mfdataset(
fics_climsal,
engine="lenapyNetcdf",
decode_times=False,
time_type="360_day",
open_dataset_kwargs=open_kw,
).s_an
# Temperature
fics_temp = filtered_list(
glob(os.path.join(directory, "**", "tanom*.nc")),
year,
ymin,
ymax,
filter,
)
temp = xr.open_mfdataset(
fics_temp,
engine="lenapyNetcdf",
decode_times=False,
time_type="360_day",
open_dataset_kwargs=open_kw,
).t_an
fics_climtemp = glob(os.path.join(directory, "climato", "*t1[3-6]_01.nc"))
climtemp = xr.open_mfdataset(
fics_climtemp,
engine="lenapyNetcdf",
decode_times=False,
time_type="360_day",
open_dataset_kwargs=open_kw,
).t_an
return xr.Dataset(
{
"temp": (
temp.groupby("time.month")
+ climtemp.groupby("time.month").mean("time")
).drop("month"),
"psal": (
sal.groupby("time.month")
+ climsal.groupby("time.month").mean("time")
).drop("month"),
}
)
# --------------------- SIO ---------------------
elif product == "SIO":
"""
Load data from SIO product.
Temperature and salinity are reconstructed from anomaly and annual climatology, and depth coordinate is derived from pressure.
Product's directory must have the following format:
-base directory (ex: SIO)
|-climato
|-RG_ArgoClim_climato_release.nc (ex: RG_ArgoClim_climato_2004_2018.nc)
|-monthly
|-RG_ArgoClim_timestamp_release.nc (ex: RG_ArgoClim_202210_2019.nc)
"""
def year(f):
return f.split("_")[-2][0:4]
fics_anom = filtered_list(
glob(os.path.join(directory, "**", "RG_ArgoClim_20*.nc")),
year,
ymin,
ymax,
filter,
)
fics_clim = glob(os.path.join(directory, "**", "RG_ArgoClim_cl*.nc"))
anom = xr.open_mfdataset(
fics_anom,
engine="lenapyNetcdf",
decode_times=False,
time_type="360_day",
open_dataset_kwargs=open_kw,
)
clim = xr.open_mfdataset(
fics_clim, engine="lenapyNetcdf", open_dataset_kwargs=open_kw
)
temp = anom.ARGO_TEMPERATURE_ANOMALY + clim.ARGO_TEMPERATURE_MEAN
psal = anom.ARGO_SALINITY_ANOMALY + clim.ARGO_SALINITY_MEAN
# Corresponding depth for pressure at mean latitude of 30 degrees
depth = -gsw.z_from_p(anom.PRESSURE, 30)
return xr.Dataset(
{
"temp": temp.assign_coords(PRESSURE=depth).rename(PRESSURE="depth"),
"psal": psal.assign_coords(PRESSURE=depth).rename(PRESSURE="depth"),
}
)
# --------------------- IPRC ---------------------
elif product == "IPRC":
"""
Load data from IPRC product.
Product's directory must have the following format:
-base directory (ex: IPRC)
|-monthly
|-ArgoData_year_month.nc (ex: ArgoData_2020_01.nc)
"""
def set_time(ds):
file_basename = os.path.basename(
os.path.splitext(ds.encoding["source"])[0]
)
date = file_basename.split("_")
ts = np.datetime64("%s-%s-15" % (date[1], date[2]), "ns")
return ds.assign_coords(time=ts).expand_dims(dim="time")
def year(f):
return f.split("_")[1]
fics = filtered_list(
glob(os.path.join(directory, "**", "ArgoData*.nc")),
year,
ymin,
ymax,
filter,
)
data = xr.open_mfdataset(
fics,
engine="lenapyNetcdf",
preprocess=set_time,
open_dataset_kwargs=open_kw,
)
return xr.Dataset({"temp": data.TEMP, "psal": data.SALT})
# --------------------- ISHII ---------------------
elif product == "ISHII":
"""
Load data from ISHII product.
Product's directory must have the following format:
-base directory (ex: ISHII)
|-monthly
|-xxx.year_month.nc (ex: sal.2022_08.nc)
"""
def year(f):
return f.split("_")[0].split(".")[1]
fics = filtered_list(
glob(os.path.join(directory, "**", "*.*.nc")), year, ymin, ymax, filter
)
data = xr.open_mfdataset(
fics,
engine="lenapyNetcdf",
drop_variables=[
"VAR_10_4_201_P0_L160_GLL0",
"VAR_10_4_202_P0_L160_GLL0",
],
open_dataset_kwargs=open_kw,
)
return xr.Dataset({"temp": data.temperature, "psal": data.salinity})
# --------------------- IAP ---------------------
elif product == "IAP":
"""
Load data from IAP product.
Product's directory must have the following format:
-base directory (ex: IAP)
|-sal
|-CZ16_depth_range_data_year_yyyy_month_mm.year_month.nc (ex: CZ16_1_2000m_salinity_year_2020_month_01.nc)
|-temp
|-CZ16_depth_range_data_year_yyyy_month_mm.year_month.nc (ex: CZ16_1_2000m_temperature_year_2020_month_01.nc)
"""
def set_time(ds):
file_basename = os.path.basename(
os.path.splitext(ds.encoding["source"])[0]
)
date = file_basename.split("_")
ts = np.datetime64("%s-%s-15" % (date[-3], date[-1]), "ns")
return ds.assign_coords(time=ts).expand_dims(dim="time")
def year(f):
return f.split("_")[-3]
fics = filtered_list(
glob(os.path.join(directory, "**", "CZ16*.nc")),
year,
ymin,
ymax,
filter,
)
data = xr.open_mfdataset(
fics,
engine="lenapyNetcdf",
preprocess=set_time,
open_dataset_kwargs=open_kw,
)
return xr.Dataset({"temp": data.temp, "SA": data.salinity})
# --------------------- EN_422 --------------------
elif product == "EN4":
"""
Load data from EN product.
Product's directory must have the following format:
-base directory (ex: EN)
|-4.2.2.corr (ex : 4.2.2.g10)
|-EN.4.2.2.xxx.corr.timestamp.nc (ex: EN.4.2.2.f.analysis.g10.202108.nc)
"""
def year(f):
return f.split(".")[-1][0:4]
if not (corr in ["g10", "l09", "c13", "c14"]):
sys.exit(-1)
fics = filtered_list(
glob(os.path.join(directory, "4.2.2.%s" % corr, "EN.4.2.2*.nc")),
year,
ymin,
ymax,
filter,
)
data = xr.open_mfdataset(
fics, engine="lenapyNetcdf", open_dataset_kwargs=open_kw
)
return xr.Dataset({"PT": data.temperature - 273.15, "psal": data.salinity})
# --------------------- JAMSTEC --------------------
elif product == "JAMSTEC":
"""
Load data from JAMSTEC product.
Depth coordinate is derived from pressure.
Product's directory must have the following format:
-base directory (ex: JAMSTEC)
|-monthly
|-TS_timestamp_xxx.nc (ex: TS_202105_GLB.nc)
"""
def set_time(ds):
file_basename = os.path.basename(
os.path.splitext(ds.encoding["source"])[0]
)
date = file_basename.split("_")[1]
ts = np.datetime64("%s-%s-15" % (date[0:4], date[4:6]), "ns")
return ds.assign_coords(time=ts).expand_dims(dim="time")
def year(f):
return f.split("_")[1][0:4]
fics = filtered_list(
glob(os.path.join(directory, "**", "*GLB.nc")), year, ymin, ymax, filter
)
data = xr.open_mfdataset(
fics,
engine="lenapyNetcdf",
preprocess=set_time,
open_dataset_kwargs=open_kw,
)
# Corresponding depth for pressure at mean latitude of 30 degrees
depth = -gsw.z_from_p(data.PRES, 30)
return xr.Dataset(
{
"temp": data.TOI.assign_coords(PRES=depth).rename(PRES="depth"),
"psal": data.SOI.assign_coords(PRES=depth).rename(PRES="depth"),
}
)
# --------------------- ECCO --------------------
elif product == "ECCO":
"""
Load data from ECCO product.
Product's directory must have the following format:
-base directory (ex: ECCO)
|-SALT
|-SALT_year_month.nc (ex: SALT_2016_12.nc)
|-THETA
|-THETA_year_month.nc (ex: THETA_2016_12.nc)
"""
def preproc(ds):
ds = (
ds.set_index(i="longitude", j="latitude", k="Z")
.rename(i="longitude", j="latitude", k="depth")
.drop("timestep")
)
return ds.assign_coords(depth=-ds.depth)
def year(f):
return f.split("_")[1]
fics = filtered_list(
glob(os.path.join(directory, "**", "*.nc")), year, ymin, ymax, filter
)
data = xr.open_mfdataset(fics, preprocess=preproc, **open_kw)
data = data.where(data != 0.0)
return xr.Dataset({"PT": data.THETA, "psal": data.SALT})
# --------------------- NOC OI --------------------
elif product == "NOC_OI":
"""
Load data from NOC ARGO OI product.
Depth coordinate is derived from pressure.
"""
def preproc(ds):
ds["time"] = ds.indexes["time"].to_datetimeindex()
return ds
fics = glob(os.path.join(directory, "*.nc"))
data = xr.open_mfdataset(
fics,
engine="lenapyNetcdf",
preprocess=preproc,
open_dataset_kwargs=open_kw,
).sel(time=slice(str(ymin), str(ymax)))
# Corresponding depth for pressure at mean latitude of 30 degrees
depth = -gsw.z_from_p(data.pressure, 30)
return xr.Dataset(
{
"temp": data.temperature.assign_coords(pressure=depth).rename(
pressure="depth"
),
"psal": data.practical_salinity.assign_coords(
pressure=depth
).rename(pressure="depth"),
}
)
# --------------------- SODA --------------------
elif product == "SODA":
"""
Load data from SODA product.
"""
def preproc(ds):
return ds.rename(
dict(xt_ocean="longitude", yt_ocean="latitude", st_ocean="depth")
)
def year(f):
return f.split("_")[-1]
fics = filtered_list(
glob(os.path.join(directory, "*.nc")), year, ymin, ymax, filter
)
print(fics)
data = xr.open_mfdataset(fics, preprocess=preproc, **open_kw)
return xr.Dataset({"PT": data.temp, "psal": data.salt})