Source code for lenapy.lenapy_time

"""
The **lenapy_time** module implements some usuals functions to be applied on timeseries.

"""

# -*- coding: utf-8 -*-

import xarray as xr

from lenapy.plots.plotting import *
from lenapy.utils.climato import *
from lenapy.utils.covariance import *
from lenapy.utils.eof import *
from lenapy.utils.time import *


[docs] @xr.register_dataset_accessor("lntime") class TimeSet: """This class implements an extension of any dataset to add some usefull methods often used on timeseries in earth science data handling""" def __init__(self, xarray_obj): self._obj = xarray_obj if "time" not in list(xarray_obj.keys()) + list(xarray_obj.coords): raise AssertionError("The time coordinates does not exist")
[docs] def Coeffs_climato(self, **kwargs): return Coeffs_climato(self._obj, **kwargs)
[docs] def climato(self, **kwargs): """Perform climato analysis on all the variables in a dataset Input data are decomposed into : * annual cycle * semi-annual cycle * trend * mean * residual signal The returned data are a combination of these elements depending on passed arguments (signal, mean, trend, cycle) If return_coeffs=True, the coefficients of the decompositions are returned Parameters ---------- signal : Bool (default=True) returns residual signal mean : Bool (default=True) returns mean signal trend : Bool (default=True) returns trend (unit=day**-1) cycle : Bool (default=False) return annual and semi-annual cycles (cos and sin) return_coeffs : Bool (default=False) returns cycle coefficient, mean and trend time_period : slice (default=slice(None,None), ie the whole time period of the data) Reference time period when climatology has to be computed fillna : Bool (default=False) if fillna=True and signal=True, Nan in signal is replaced by the other selected components Only for 1D signal, for higher dimensions any NaN in the signal will produce a NaN in the output Returns ------- climato : dataset a dataset with the same structure as the input, with modified data according to the chosen options if return_coeffs=True, an extra dataset is provided with the coefficients of the decomposition Example ------- .. code-block:: python data = lntime.open_geodata('/home/user/lenapy/data/gohc_2020.nc') output,coeffs = data.lntime.climato(mean=True, trend=True, signal=True,return_coeffs=True) """ # Pour toutes les données dépendant du temps, retourne l'analyse de la climato res = {} for var in self._obj.data_vars: if "time" in self._obj[var].coords: res[var] = climato(self._obj[var], **kwargs) else: res[var] = self._obj[var] return xr.Dataset(res)
[docs] def generate_climato(self, coeffs, **kwargs): """ Returns a signal based on a given climatology (mean, trend, cycles) Parameters ---------- coeffs : xr.DataArray returned by the climato method with return_climato=True mean: Bool (default=True) returns mean signal trend: Bool (default=True) returns trend cycle: Bool (default=False) return annual and semi-annual cycles """ return generate_climato(self._obj.time, coeffs, **kwargs)
[docs] def filter(self, filter_name="lanczos", q=3, **kwargs): """ Apply a specified filter on all the time-dependent data in the dataset. Boundaries are handled by operating a mirror operation on the residual data after removing a q-order polyfit from the data. Available filters are in the .utils python file Parameters ---------- filter_name : function or str if string, filter function name, from the .filters file if function, external function defined by user, returning a kernel q : int order of the polyfit to handle boundary effects **kwargs : Keyword arguments for the chosen filter Returns ------- filtered : xr.Dataset Filtered dataset Example ------- >>>data = lntime.open_geodata('/home/user/lenapy/data/isas.nc') >>>data.lntime.filter(lanczos,q=3,coupure=12,order=2) """ res = {} for var in self._obj.data_vars: if "time" in self._obj[var].coords: res[var] = self._obj[var].lntime.filter( filter_name=filter_name, q=q, **kwargs ) else: res[var] = self._obj[var] return xr.Dataset(res)
[docs] def interp_time(self, other, **kwargs): """ Interpolate DataArray at the same dates than other Parameters ---------- other : xr.DataArray must have a time dimension Return ------ interpolated : xr.DataArray new DataArray interpolated """ res = {} for var in self._obj.data_vars: if "time" in self._obj[var].coords: res[var] = self._obj[var].lntime.interp_time(other, **kwargs) else: res[var] = self._obj[var] return xr.Dataset(res)
[docs] def to_datetime(self, time_type): """ Convert dataset time format to standard pandas time format Parameters ---------- time_type : string Can be 'frac_year' or '360_day' Return ------ converted : dataset new dataset with the time dimension in a standard pandas format """ return to_datetime(self._obj, time_type)
[docs] def fill_time(self): """ Fill missing values in a timeseries in adding some new points, by respecting the time sampling. Missing values are not NaN but real absent points in the timeseries. A linear interpolation is performed at the missing points. """ return fill_time(self._obj)
[docs] @xr.register_dataarray_accessor("lntime") class TimeArray: """ This class implements an extension of any dataArray to add some usefull methods often used on timeseries in earth science data handling. """ def __init__(self, xarray_obj): self._obj = xarray_obj if "time" not in xarray_obj.coords: raise AssertionError("The time coordinates does not exist")
[docs] def Coeffs_climato(self, **kwargs): return Coeffs_climato(self._obj, **kwargs)
[docs] def climato(self, **kwargs): """ Perform climato analysis on a dataarray Input data are decomposed into : * annual cycle * semi-annual cycle * trend * mean * residual signal The returned data are a combination of these elements depending on passed arguments (signal, mean, trend, cycle) If return_coeffs=True, the coefficients of the decompositions are returned Parameters ---------- signal: Bool (default=True) returns residual signal mean: Bool (default=True) returns mean signal trend: Bool (default=True) returns trend (unit=day**-1) cycle: Bool (default=False) return annual and semi-annual cycles (cos and sin) return_coeffs: Bool (default=False) returns cycle coefficient, mean and trend t_min, t_max: datetime format or string (default=None,None), ie the whole time period of the data) Reference time period when climatology has to be computed fillna: Bool (default=False) if fillna=True and signal=True, Nan in signal is replaced by the other selected components Only for 1D signal, for higher dimensions any NaN in the signal will produce a NaN in the output Returns ------- climato : dataset a dataset with the same structure as the input, with modified data according to the chosen options if return_coeffs=True, an extra dataset is provided with the coefficients of the decomposition Example ------- .. code-block:: python data = lntime.open_geodata('/home/user/lenapy/data/gohc_2020.nc').ohc output,coeffs = data.lntime.climato(mean=True, trend=True, signal=True,return_coeffs=True) """ return climato(self._obj, **kwargs)
[docs] def generate_climato(self, coeffs, **kwargs): """ Returns a signal based on a given climatology (mean, trend, cycles) Parameters ---------- coeffs: DataArray returned by the climato method with return_climato=True mean: Bool (default=True) returns mean signal trend: Bool (default=True) returns trend cycle: Bool (default=False) return annual and semi-annual cycles """ return generate_climato(self._obj.time, coeffs, **kwargs)
[docs] def filter(self, filter_name="lanczos", q=3, **kwargs): """ Apply a specified filter on all the time-dependent datarray Boundaries are handled by operating a mirror operation on the residual data after removing a q-order polyfit from the data. Available filters are in the .utils python file Parameters ---------- filter_name : function or string if string, filter function name, from the .filters file if function, external function defined by user, returning a kernel q : int order of the polyfit to handle boundary effects **kwargs : keyword arguments for the chosen filter Returns ------- filtered : filtered dataset Example ------- .. code-block:: python data = lntime.open_geodata('/home/user/lenapy/data/isas.nc').temp data.lntime.filter(lanczos,q=3,coupure=12,order=2) """ return filter(self._obj, filter_name=filter_name, q=q, **kwargs)
[docs] def interp_time(self, other, **kwargs): """ Interpolate DataArray at the same dates than other Parameters ---------- other : xr.DataArray must have a time dimension Return ------ interpolated : xr.DataArray new DataArray interpolated """ return interp_time(self._obj, other, **kwargs)
[docs] def plot(self, **kwargs): """ Plots the timeseries of the data in the TimeArray, including an uncertainty. Computes the uncertainty on all dimensions that are not time. Parameters ---------- thick_line: String (default='median') How to aggregate the data to plot the main thick line. Can be: * `median`: computes the median * `mean`: computes the mean * None: does not plot a main thick line shaded_area: String (default='auto') How to aggregate the data to plot the uncertainty around the thick line. Can be: * `auto`: plots 1.645 standard deviation if thick_line is `mean` and quantiles 5-95 if thick_line is `median`. * `auto-multiple`: plots 1,2 and 3 standard deviations if thick_line is `mean` and quantiles 5-95, 17-83 and 25-75 if thick_line is `median`. * `std`: plots a multiple of the standard deviation based on kwarg `standard_deviation_multiple` * `quantiles`: plots quantiles based on the kwargs `quantile_min` and `quantile_max` * None: does not plot uncertainty hue: String (default=None) Similar to hue in xarray.DataArray.plot(hue=...), group data by the dimension before aggregating and computing uncertainties. Has to be a dimension other than time in the dataarray. standard_deviation_multiple: Float > 0 (default=1.65) The multiple of standard deviations to use for the uncertainty with `shaded_area=std` quantile_min: Float between 0 and 1 (default=0.05) lower quantile to compute uncertainty with `shaded_area=quantiles` quantile_max: Float between 0 and 1 (default=0.95) upper quantile to compute uncertainty with `shaded_area=quantiles` color: String or List (default=None) color of the main thick line and the shaded area. Must be a string thick_line_color: String or List (default=None) color of the main thick line. Must be a string If hue and one color are provided, the single color is used for all line plots. If hue and a list of colors are provided, the colors are cycled. shaded_area_color: String or List (default=None) color of the shaded area. Must be a string. If not provided, defaults to the thick_line_color value. If hue and one color are provided, the single color is used for all area plots. If hue and a list of colors are provided, the colors are cycled. shaded_area_alpha: Float between 0 and 1 (default=0.2) Transparency of the uncertainty plots ax: matplotlib.pyplot.Axes instance (default=None) If not provided, plots on the current axes. label: String (default=None) If provided, label that is provided to ax.plot. Does not work if hue is provided. line_kwargs: kwargs Additional arguments provided to the plot function for the main thick line area_kwargs: kwargs Additional arguments provided to the plot function for the uncertainty add_legend: Bool (default=True) if True, adds matplotlib legend to the current ax after plotting the data. """ plot_timeseries_uncertainty(self._obj, **kwargs)
[docs] def to_datetime(self, time_type): """ Convert DataArray time format to standard pandas time format Parameters ---------- time_type : string Can be 'frac_year' or '360_day' Return ------ converted : xr.DataArray new DataArray with the time dimension in a standard pandas format """ return to_datetime(self._obj, time_type)
[docs] def diff_3pts(self, dim, **kw): """ Derivative formula along the selected dimension, returning on each point the linear regression on the three points defined by the selected point and its two neighbours """ return diff_3pts(self._obj, dim, **kw)
[docs] def diff_2pts(self, dim, **kw): """ Derivative formula along the selected dimension, returning for each pair of points the slope, set at the middle coordinates of these two points """ return diff_2pts(self._obj, dim, **kw)
[docs] def trend(self, time_unit="1s"): """ Perform a linear regression on the data, and returns the slope coefficient """ return trend(self._obj, time_unit=time_unit)
[docs] def detrend(self): """ remove the trend from a dataarray """ return detrend(self._obj)
[docs] def fill_time(self): """ Fill missing values in a timeseries in adding some new points, by respecting the time sampling. Missing values are not NaN but real absent points in the timeseries. A linear interpolation is performed at the missing points. """ return fill_time(self._obj)
[docs] def covariance_analysis(self): """ Returns an instance of the *covariance* class based on the dataArray """ return covariance(self._obj.time)
[docs] def OLS(self, degree, tref=None, sigma=None, datetime_unit="s"): """ Returns the OLS estimator performed with a degree "degree" regression """ est = estimator( self._obj, degree, tref=tref, sigma=sigma, datetime_unit=datetime_unit ) est.OLS() return est
[docs] def GLS(self, degree, tref=None, sigma=None, datetime_unit="s"): """ Returns the GLS estimator performed with a degree "degree" regression and a covariance matrix "sigma" """ est = estimator( self._obj, degree, tref=tref, sigma=sigma, datetime_unit=datetime_unit ) est.GLS() return est
[docs] def corr(self, other, remove_trend=False, **kwargs): """ Returns the Pearson correlation coefficient between the timeseries and another one. The other one is interpolated at the dates of the calling timeseries. If remove_trend=True, the two timeseries are detrended before correlation. """ if remove_trend: r1 = detrend(interp_time(other, self._obj)) r2 = detrend(self._obj) else: r1 = interp_time(other, self._obj) r2 = self._obj return xr.corr(r1, r2, **kwargs)
[docs] def fillna_climato(self): """ Returns a DataArray with all NaN values replaced by climatology and trend Climatology is computed over the optional time_period slice """ return fillna_climato(self._obj)
[docs] def EOF(self, dim, k): """ Return an instance of the *eof* class based on the data array and the dimension names of the eof """ return EOF(self._obj, dim, k)
[docs] def SavitzkyGolay(self, dim="time", window=5, order=1, step=1, sigma=None): """ Perform a Savitzky-Golay filter on a dataArray and return filtered derivatives up to maximal order Parameters ---------- dim : string name of the dimension along which to apply the filter window : int length of the filtering window (must be odd) order : int order of the polynome to fit the function across the window step : float or time type distance between to consecutive points of abscissa sigma : same type as step (optionnal) standard deviation of the weights function to be applied on the window Return ------ filtered : xr.DataArray new DataArray filtered with an extra dimension 'order', giving the successive filtered derivatives of the signal """ return SavitzkyGolay(self._obj, dim, window, order, step, sigma)