Source code for lenapy.readers.ocean

# -*- coding: utf-8 -*-
"""Load temperature and salinity data from ocean products

"""
import os.path
import sys
from glob import glob

import gsw
import numpy as np
import xarray as xr
from xarray.backends import BackendEntrypoint


[docs] def filtered_list(files, year, ymin, ymax, pattern): """ Returns a filtered list of files fitting year range and pattern Parameters ---------- files : Array of strings Array with filenames to be filtered year : function Function to be applied on each filename, returning the data year ymin : integer Lowest bound of the time range ymax : integer Highest bound of the time range pattern : string Pattern that must fit the filenames Return ------ filtered : Array Extract of th input array fitting year and pattern conditions Example ------- .. code-block:: python def year(f): return f.split('_')[1] fics=filtre_liste(glob(os.path.join(rep,'**','*.nc')),year,2005,2006,'ARGO') """ r = [] for u in files: d = int(year(os.path.basename(os.path.splitext(u)[0]))) if d >= ymin and d <= ymax and pattern in u: r.append(u) return r
[docs] class lenapyOceanProducts(BackendEntrypoint): """Open netcdf ocean product This module allows to load temperature and salinity data from different products and format the data with unified definition for variables and coordinates, compatible with the use of lenapy_ocean : - standardized coordinates names : latitude, longitude, depth, time - standardized variables names : temp or PT or CT for temperature, psal, SA, SR for salinity When loading a product, all the files present in the product directory are parsed. To gain computing time, a first filter on the years to load can be applied, as well as a text filter. A second date filter can be apply afterwards with the .sel(time=slice('begin_date','end_date') method. All keyword arguments associated with xr.open_dataset can be passed. Dask is implicitely used when using these interface methods. It is imperative to chunk the dataset along time dimension (chunks=dict(depth=10)), and recommanded to chunk the dataset along depth dimension (chunks=dict(depth=10)) Parameters ---------- directory : string path of the product's directory product : string identifier of the product to be opened. Can be one of : "ISAS","NCEI","SIO","IPRC","ISHII","IAP","EN4","JAMSTEC","ECCO","NOC_OI","SODA" corr : string, optional additional parameter for EN4 prodcut ensemble. Can be one of: 'g10','l09','c13','c14' ymin : int, optional lowest bound of the time intervalle to be loaded (year) ymax : int, optional highest bound of the time intervalle to be loaded (year) filter : string, optionnal string pattern to filter datafiles names chunks : dict, optionnal dictionnary {dim:chunks} open_dataset_kwargs : dict, optional The keyword arguments form of open_mfdataset Returns ------- product : Dataset New dataset containing temperature and salinity data from the product Examples -------- .. code-block:: python data=xr.open_dataset('/home/usr/lenapy/data/ISAS20', engine='lenapyOceanProducts', product='ISAS', ymin=2005, ymax=2007, filter='ARGO', chunks={'time':1,'depth':10}) """ # pylint: disable=too-many-statements
[docs] def open_dataset( self, directory, product=None, corr=None, ymin=0, ymax=9999, filter="", drop_variables=None, open_dataset_kwargs={}, ): open_kw = open_dataset_kwargs.copy() # If available and not specified, use 'h5netcdf' as engine, which is more dask-friendly than netcdf4 if not ("engine" in open_dataset_kwargs.keys()) and ( "h5netcdf" in xr.backends.list_engines() ): open_kw["engine"] = "h5netcdf" # --------------------- ISAS --------------------- if product == "ISAS": """ Load data from ISAS product Product's directory must have the following format: -base directory (ex: ISAS20_ARGO) |-year (ex: 2020) |-release_type_timestamp_xxx_data.nc (ex: ISAS20_ARGO_20200915_fld_TEMP.nc) """ def year(f): return f.split("_")[2][0:4] fics = filtered_list( glob(os.path.join(directory, "**", "*.nc")), year, ymin, ymax, filter ) data = xr.open_mfdataset(fics, **open_kw) return xr.Dataset({"temp": data.TEMP, "psal": data.PSAL}) # --------------------- NCEI --------------------- elif product == "NCEI": """ Load data from NCEI product Temperature and salinity are reconstructed from anomaly and trimestrial climatology Product's directory must have the following format: -base directory (ex: NCEI) |-salinity |-sanom_timestamp.nc (ex: sanom_C1C107-09.nc) |-temperature |-tanom_timestamp.nc (ex: tanom_C1C107-09.nc) """ def year(f): a = f.split("_")[1][0] b = f.split("_")[1][1] return "%04d" % (1900 + 10 * int("0x%s" % a, 16) + int(b)) # Salinite fics_sal = filtered_list( glob(os.path.join(directory, "**", "sanom*.nc")), year, ymin, ymax, filter, ) sal = xr.open_mfdataset( fics_sal, engine="lenapyNetcdf", decode_times=False, time_type="360_day", open_dataset_kwargs=open_kw, ).s_an fics_climsal = glob(os.path.join(directory, "climato", "*s1[3-6]_01.nc")) climsal = xr.open_mfdataset( fics_climsal, engine="lenapyNetcdf", decode_times=False, time_type="360_day", open_dataset_kwargs=open_kw, ).s_an # Temperature fics_temp = filtered_list( glob(os.path.join(directory, "**", "tanom*.nc")), year, ymin, ymax, filter, ) temp = xr.open_mfdataset( fics_temp, engine="lenapyNetcdf", decode_times=False, time_type="360_day", open_dataset_kwargs=open_kw, ).t_an fics_climtemp = glob(os.path.join(directory, "climato", "*t1[3-6]_01.nc")) climtemp = xr.open_mfdataset( fics_climtemp, engine="lenapyNetcdf", decode_times=False, time_type="360_day", open_dataset_kwargs=open_kw, ).t_an return xr.Dataset( { "temp": ( temp.groupby("time.month") + climtemp.groupby("time.month").mean("time") ).drop("month"), "psal": ( sal.groupby("time.month") + climsal.groupby("time.month").mean("time") ).drop("month"), } ) # --------------------- SIO --------------------- elif product == "SIO": """ Load data from SIO product. Temperature and salinity are reconstructed from anomaly and annual climatology, and depth coordinate is derived from pressure. Product's directory must have the following format: -base directory (ex: SIO) |-climato |-RG_ArgoClim_climato_release.nc (ex: RG_ArgoClim_climato_2004_2018.nc) |-monthly |-RG_ArgoClim_timestamp_release.nc (ex: RG_ArgoClim_202210_2019.nc) """ def year(f): return f.split("_")[-2][0:4] fics_anom = filtered_list( glob(os.path.join(directory, "**", "RG_ArgoClim_20*.nc")), year, ymin, ymax, filter, ) fics_clim = glob(os.path.join(directory, "**", "RG_ArgoClim_cl*.nc")) anom = xr.open_mfdataset( fics_anom, engine="lenapyNetcdf", decode_times=False, time_type="360_day", open_dataset_kwargs=open_kw, ) clim = xr.open_mfdataset( fics_clim, engine="lenapyNetcdf", open_dataset_kwargs=open_kw ) temp = anom.ARGO_TEMPERATURE_ANOMALY + clim.ARGO_TEMPERATURE_MEAN psal = anom.ARGO_SALINITY_ANOMALY + clim.ARGO_SALINITY_MEAN # Corresponding depth for pressure at mean latitude of 30 degrees depth = -gsw.z_from_p(anom.PRESSURE, 30) return xr.Dataset( { "temp": temp.assign_coords(PRESSURE=depth).rename(PRESSURE="depth"), "psal": psal.assign_coords(PRESSURE=depth).rename(PRESSURE="depth"), } ) # --------------------- IPRC --------------------- elif product == "IPRC": """ Load data from IPRC product. Product's directory must have the following format: -base directory (ex: IPRC) |-monthly |-ArgoData_year_month.nc (ex: ArgoData_2020_01.nc) """ def set_time(ds): file_basename = os.path.basename( os.path.splitext(ds.encoding["source"])[0] ) date = file_basename.split("_") ts = np.datetime64("%s-%s-15" % (date[1], date[2]), "ns") return ds.assign_coords(time=ts).expand_dims(dim="time") def year(f): return f.split("_")[1] fics = filtered_list( glob(os.path.join(directory, "**", "ArgoData*.nc")), year, ymin, ymax, filter, ) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", preprocess=set_time, open_dataset_kwargs=open_kw, ) return xr.Dataset({"temp": data.TEMP, "psal": data.SALT}) # --------------------- ISHII --------------------- elif product == "ISHII": """ Load data from ISHII product. Product's directory must have the following format: -base directory (ex: ISHII) |-monthly |-xxx.year_month.nc (ex: sal.2022_08.nc) """ def year(f): return f.split("_")[0].split(".")[1] fics = filtered_list( glob(os.path.join(directory, "**", "*.*.nc")), year, ymin, ymax, filter ) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", drop_variables=[ "VAR_10_4_201_P0_L160_GLL0", "VAR_10_4_202_P0_L160_GLL0", ], open_dataset_kwargs=open_kw, ) return xr.Dataset({"temp": data.temperature, "psal": data.salinity}) # --------------------- IAP --------------------- elif product == "IAP": """ Load data from IAP product. Product's directory must have the following format: -base directory (ex: IAP) |-sal |-CZ16_depth_range_data_year_yyyy_month_mm.year_month.nc (ex: CZ16_1_2000m_salinity_year_2020_month_01.nc) |-temp |-CZ16_depth_range_data_year_yyyy_month_mm.year_month.nc (ex: CZ16_1_2000m_temperature_year_2020_month_01.nc) """ def set_time(ds): file_basename = os.path.basename( os.path.splitext(ds.encoding["source"])[0] ) date = file_basename.split("_") ts = np.datetime64("%s-%s-15" % (date[-3], date[-1]), "ns") return ds.assign_coords(time=ts).expand_dims(dim="time") def year(f): return f.split("_")[-3] fics = filtered_list( glob(os.path.join(directory, "**", "CZ16*.nc")), year, ymin, ymax, filter, ) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", preprocess=set_time, open_dataset_kwargs=open_kw, ) return xr.Dataset({"temp": data.temp, "SA": data.salinity}) # --------------------- EN_422 -------------------- elif product == "EN4": """ Load data from EN product. Product's directory must have the following format: -base directory (ex: EN) |-4.2.2.corr (ex : 4.2.2.g10) |-EN.4.2.2.xxx.corr.timestamp.nc (ex: EN.4.2.2.f.analysis.g10.202108.nc) """ def year(f): return f.split(".")[-1][0:4] if not (corr in ["g10", "l09", "c13", "c14"]): sys.exit(-1) fics = filtered_list( glob(os.path.join(directory, "4.2.2.%s" % corr, "EN.4.2.2*.nc")), year, ymin, ymax, filter, ) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", open_dataset_kwargs=open_kw ) return xr.Dataset({"PT": data.temperature - 273.15, "psal": data.salinity}) # --------------------- JAMSTEC -------------------- elif product == "JAMSTEC": """ Load data from JAMSTEC product. Depth coordinate is derived from pressure. Product's directory must have the following format: -base directory (ex: JAMSTEC) |-monthly |-TS_timestamp_xxx.nc (ex: TS_202105_GLB.nc) """ def set_time(ds): file_basename = os.path.basename( os.path.splitext(ds.encoding["source"])[0] ) date = file_basename.split("_")[1] ts = np.datetime64("%s-%s-15" % (date[0:4], date[4:6]), "ns") return ds.assign_coords(time=ts).expand_dims(dim="time") def year(f): return f.split("_")[1][0:4] fics = filtered_list( glob(os.path.join(directory, "**", "*GLB.nc")), year, ymin, ymax, filter ) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", preprocess=set_time, open_dataset_kwargs=open_kw, ) # Corresponding depth for pressure at mean latitude of 30 degrees depth = -gsw.z_from_p(data.PRES, 30) return xr.Dataset( { "temp": data.TOI.assign_coords(PRES=depth).rename(PRES="depth"), "psal": data.SOI.assign_coords(PRES=depth).rename(PRES="depth"), } ) # --------------------- ECCO -------------------- elif product == "ECCO": """ Load data from ECCO product. Product's directory must have the following format: -base directory (ex: ECCO) |-SALT |-SALT_year_month.nc (ex: SALT_2016_12.nc) |-THETA |-THETA_year_month.nc (ex: THETA_2016_12.nc) """ def preproc(ds): ds = ( ds.set_index(i="longitude", j="latitude", k="Z") .rename(i="longitude", j="latitude", k="depth") .drop("timestep") ) return ds.assign_coords(depth=-ds.depth) def year(f): return f.split("_")[1] fics = filtered_list( glob(os.path.join(directory, "**", "*.nc")), year, ymin, ymax, filter ) data = xr.open_mfdataset(fics, preprocess=preproc, **open_kw) data = data.where(data != 0.0) return xr.Dataset({"PT": data.THETA, "psal": data.SALT}) # --------------------- NOC OI -------------------- elif product == "NOC_OI": """ Load data from NOC ARGO OI product. Depth coordinate is derived from pressure. """ def preproc(ds): ds["time"] = ds.indexes["time"].to_datetimeindex() return ds fics = glob(os.path.join(directory, "*.nc")) data = xr.open_mfdataset( fics, engine="lenapyNetcdf", preprocess=preproc, open_dataset_kwargs=open_kw, ).sel(time=slice(str(ymin), str(ymax))) # Corresponding depth for pressure at mean latitude of 30 degrees depth = -gsw.z_from_p(data.pressure, 30) return xr.Dataset( { "temp": data.temperature.assign_coords(pressure=depth).rename( pressure="depth" ), "psal": data.practical_salinity.assign_coords( pressure=depth ).rename(pressure="depth"), } ) # --------------------- SODA -------------------- elif product == "SODA": """ Load data from SODA product. """ def preproc(ds): return ds.rename( dict(xt_ocean="longitude", yt_ocean="latitude", st_ocean="depth") ) def year(f): return f.split("_")[-1] fics = filtered_list( glob(os.path.join(directory, "*.nc")), year, ymin, ymax, filter ) print(fics) data = xr.open_mfdataset(fics, preprocess=preproc, **open_kw) return xr.Dataset({"PT": data.temp, "psal": data.salt})