"""
The **lenapy_time** module implements some usuals functions to be applied on timeseries.
"""
# -*- coding: utf-8 -*-
import xarray as xr
from lenapy.plots.plotting import *
from lenapy.utils.climato import *
from lenapy.utils.covariance import *
from lenapy.utils.eof import *
from lenapy.utils.time import *
[docs]
@xr.register_dataset_accessor("lntime")
class TimeSet:
"""This class implements an extension of any dataset to add some usefull methods often used on timeseries in earth science data handling"""
def __init__(self, xarray_obj):
self._obj = xarray_obj
if "time" not in list(xarray_obj.keys()) + list(xarray_obj.coords):
raise AssertionError("The time coordinates does not exist")
[docs]
def Coeffs_climato(self, **kwargs):
return Coeffs_climato(self._obj, **kwargs)
[docs]
def climato(self, **kwargs):
"""Perform climato analysis on all the variables in a dataset
Input data are decomposed into :
* annual cycle
* semi-annual cycle
* trend
* mean
* residual signal
The returned data are a combination of these elements depending on passed arguments (signal, mean, trend, cycle)
If return_coeffs=True, the coefficients of the decompositions are returned
Parameters
----------
signal : Bool (default=True)
returns residual signal
mean : Bool (default=True)
returns mean signal
trend : Bool (default=True)
returns trend (unit=day**-1)
cycle : Bool (default=False)
return annual and semi-annual cycles (cos and sin)
return_coeffs : Bool (default=False)
returns cycle coefficient, mean and trend
time_period : slice (default=slice(None,None), ie the whole time period of the data)
Reference time period when climatology has to be computed
fillna : Bool (default=False)
if fillna=True and signal=True, Nan in signal is replaced by the other selected components
Only for 1D signal, for higher dimensions any NaN in the signal will produce a NaN in the output
Returns
-------
climato : dataset
a dataset with the same structure as the input, with modified data according to the chosen options
if return_coeffs=True, an extra dataset is provided with the coefficients of the decomposition
Example
-------
.. code-block:: python
data = lntime.open_geodata('/home/user/lenapy/data/gohc_2020.nc')
output,coeffs = data.lntime.climato(mean=True, trend=True, signal=True,return_coeffs=True)
"""
# Pour toutes les données dépendant du temps, retourne l'analyse de la climato
res = {}
for var in self._obj.data_vars:
if "time" in self._obj[var].coords:
res[var] = climato(self._obj[var], **kwargs)
else:
res[var] = self._obj[var]
return xr.Dataset(res)
[docs]
def generate_climato(self, coeffs, **kwargs):
"""
Returns a signal based on a given climatology (mean, trend, cycles)
Parameters
----------
coeffs : xr.DataArray
returned by the climato method with return_climato=True
mean: Bool (default=True)
returns mean signal
trend: Bool (default=True)
returns trend
cycle: Bool (default=False)
return annual and semi-annual cycles
"""
return generate_climato(self._obj.time, coeffs, **kwargs)
[docs]
def filter(self, filter_name="lanczos", q=3, **kwargs):
"""
Apply a specified filter on all the time-dependent data in the dataset.
Boundaries are handled by operating a mirror operation on the residual data after removing a q-order polyfit
from the data. Available filters are in the .utils python file
Parameters
----------
filter_name : function or str
if string, filter function name, from the .filters file
if function, external function defined by user, returning a kernel
q : int
order of the polyfit to handle boundary effects
**kwargs :
Keyword arguments for the chosen filter
Returns
-------
filtered : xr.Dataset
Filtered dataset
Example
-------
>>>data = lntime.open_geodata('/home/user/lenapy/data/isas.nc')
>>>data.lntime.filter(lanczos,q=3,coupure=12,order=2)
"""
res = {}
for var in self._obj.data_vars:
if "time" in self._obj[var].coords:
res[var] = self._obj[var].lntime.filter(
filter_name=filter_name, q=q, **kwargs
)
else:
res[var] = self._obj[var]
return xr.Dataset(res)
[docs]
def interp_time(self, other, **kwargs):
"""
Interpolate DataArray at the same dates than other
Parameters
----------
other : xr.DataArray
must have a time dimension
Return
------
interpolated : xr.DataArray
new DataArray interpolated
"""
res = {}
for var in self._obj.data_vars:
if "time" in self._obj[var].coords:
res[var] = self._obj[var].lntime.interp_time(other, **kwargs)
else:
res[var] = self._obj[var]
return xr.Dataset(res)
[docs]
def to_datetime(self, time_type):
"""
Convert dataset time format to standard pandas time format
Parameters
----------
time_type : string
Can be 'frac_year' or '360_day'
Return
------
converted : dataset
new dataset with the time dimension in a standard pandas format
"""
return to_datetime(self._obj, time_type)
[docs]
def fill_time(self):
"""
Fill missing values in a timeseries in adding some new points, by respecting the time sampling. Missing values are not NaN
but real absent points in the timeseries. A linear interpolation is performed at the missing points.
"""
return fill_time(self._obj)
[docs]
@xr.register_dataarray_accessor("lntime")
class TimeArray:
"""
This class implements an extension of any dataArray to add some usefull methods often used on timeseries in
earth science data handling.
"""
def __init__(self, xarray_obj):
self._obj = xarray_obj
if "time" not in xarray_obj.coords:
raise AssertionError("The time coordinates does not exist")
[docs]
def Coeffs_climato(self, **kwargs):
return Coeffs_climato(self._obj, **kwargs)
[docs]
def climato(self, **kwargs):
"""
Perform climato analysis on a dataarray
Input data are decomposed into :
* annual cycle
* semi-annual cycle
* trend
* mean
* residual signal
The returned data are a combination of these elements depending on passed arguments (signal, mean, trend, cycle)
If return_coeffs=True, the coefficients of the decompositions are returned
Parameters
----------
signal: Bool (default=True)
returns residual signal
mean: Bool (default=True)
returns mean signal
trend: Bool (default=True)
returns trend (unit=day**-1)
cycle: Bool (default=False)
return annual and semi-annual cycles (cos and sin)
return_coeffs: Bool (default=False)
returns cycle coefficient, mean and trend
t_min, t_max: datetime format or string (default=None,None), ie the whole time period of the data)
Reference time period when climatology has to be computed
fillna: Bool (default=False)
if fillna=True and signal=True, Nan in signal is replaced by the other selected components
Only for 1D signal, for higher dimensions any NaN in the signal will produce a NaN in the output
Returns
-------
climato : dataset
a dataset with the same structure as the input, with modified data according to the chosen options
if return_coeffs=True, an extra dataset is provided with the coefficients of the decomposition
Example
-------
.. code-block:: python
data = lntime.open_geodata('/home/user/lenapy/data/gohc_2020.nc').ohc
output,coeffs = data.lntime.climato(mean=True, trend=True, signal=True,return_coeffs=True)
"""
return climato(self._obj, **kwargs)
[docs]
def generate_climato(self, coeffs, **kwargs):
"""
Returns a signal based on a given climatology (mean, trend, cycles)
Parameters
----------
coeffs: DataArray
returned by the climato method with return_climato=True
mean: Bool (default=True)
returns mean signal
trend: Bool (default=True)
returns trend
cycle: Bool (default=False)
return annual and semi-annual cycles
"""
return generate_climato(self._obj.time, coeffs, **kwargs)
[docs]
def filter(self, filter_name="lanczos", q=3, **kwargs):
"""
Apply a specified filter on all the time-dependent datarray
Boundaries are handled by operating a mirror operation on the residual data after removing a q-order polyfit
from the data. Available filters are in the .utils python file
Parameters
----------
filter_name : function or string
if string, filter function name, from the .filters file
if function, external function defined by user, returning a kernel
q : int
order of the polyfit to handle boundary effects
**kwargs :
keyword arguments for the chosen filter
Returns
-------
filtered : filtered dataset
Example
-------
.. code-block:: python
data = lntime.open_geodata('/home/user/lenapy/data/isas.nc').temp
data.lntime.filter(lanczos,q=3,coupure=12,order=2)
"""
return filter(self._obj, filter_name=filter_name, q=q, **kwargs)
[docs]
def interp_time(self, other, **kwargs):
"""
Interpolate DataArray at the same dates than other
Parameters
----------
other : xr.DataArray
must have a time dimension
Return
------
interpolated : xr.DataArray
new DataArray interpolated
"""
return interp_time(self._obj, other, **kwargs)
[docs]
def plot(self, **kwargs):
"""
Plots the timeseries of the data in the TimeArray, including an uncertainty.
Computes the uncertainty on all dimensions that are not time.
Parameters
----------
thick_line: String (default='median')
How to aggregate the data to plot the main thick line. Can be:
* `median`: computes the median
* `mean`: computes the mean
* None: does not plot a main thick line
shaded_area: String (default='auto')
How to aggregate the data to plot the uncertainty around the thick line. Can be:
* `auto`: plots 1.645 standard deviation if thick_line is `mean` and quantiles 5-95 if thick_line is `median`.
* `auto-multiple`: plots 1,2 and 3 standard deviations if thick_line is `mean` and quantiles 5-95, 17-83 and 25-75 if thick_line is `median`.
* `std`: plots a multiple of the standard deviation based on kwarg `standard_deviation_multiple`
* `quantiles`: plots quantiles based on the kwargs `quantile_min` and `quantile_max`
* None: does not plot uncertainty
hue: String (default=None)
Similar to hue in xarray.DataArray.plot(hue=...), group data by the dimension before aggregating and computing uncertainties.
Has to be a dimension other than time in the dataarray.
standard_deviation_multiple: Float > 0 (default=1.65)
The multiple of standard deviations to use for the uncertainty with `shaded_area=std`
quantile_min: Float between 0 and 1 (default=0.05)
lower quantile to compute uncertainty with `shaded_area=quantiles`
quantile_max: Float between 0 and 1 (default=0.95)
upper quantile to compute uncertainty with `shaded_area=quantiles`
color: String or List (default=None)
color of the main thick line and the shaded area. Must be a string
thick_line_color: String or List (default=None)
color of the main thick line. Must be a string
If hue and one color are provided, the single color is used for all line plots.
If hue and a list of colors are provided, the colors are cycled.
shaded_area_color: String or List (default=None)
color of the shaded area. Must be a string.
If not provided, defaults to the thick_line_color value.
If hue and one color are provided, the single color is used for all area plots.
If hue and a list of colors are provided, the colors are cycled.
shaded_area_alpha: Float between 0 and 1 (default=0.2)
Transparency of the uncertainty plots
ax: matplotlib.pyplot.Axes instance (default=None)
If not provided, plots on the current axes.
label: String (default=None)
If provided, label that is provided to ax.plot.
Does not work if hue is provided.
line_kwargs: kwargs
Additional arguments provided to the plot function for the main thick line
area_kwargs: kwargs
Additional arguments provided to the plot function for the uncertainty
add_legend: Bool (default=True)
if True, adds matplotlib legend to the current ax after plotting the data.
"""
plot_timeseries_uncertainty(self._obj, **kwargs)
[docs]
def to_datetime(self, time_type):
"""
Convert DataArray time format to standard pandas time format
Parameters
----------
time_type : string
Can be 'frac_year' or '360_day'
Return
------
converted : xr.DataArray
new DataArray with the time dimension in a standard pandas format
"""
return to_datetime(self._obj, time_type)
[docs]
def diff_3pts(self, dim, **kw):
"""
Derivative formula along the selected dimension, returning on each point the linear regression on the three points
defined by the selected point and its two neighbours
"""
return diff_3pts(self._obj, dim, **kw)
[docs]
def diff_2pts(self, dim, **kw):
"""
Derivative formula along the selected dimension, returning for each pair of points the slope, set at the middle coordinates of
these two points
"""
return diff_2pts(self._obj, dim, **kw)
[docs]
def trend(self, time_unit="1s"):
"""
Perform a linear regression on the data, and returns the slope coefficient
"""
return trend(self._obj, time_unit=time_unit)
[docs]
def detrend(self):
"""
remove the trend from a dataarray
"""
return detrend(self._obj)
[docs]
def fill_time(self):
"""
Fill missing values in a timeseries in adding some new points, by respecting the time sampling. Missing values are not NaN
but real absent points in the timeseries. A linear interpolation is performed at the missing points.
"""
return fill_time(self._obj)
[docs]
def covariance_analysis(self):
"""
Returns an instance of the *covariance* class based on the dataArray
"""
return covariance(self._obj.time)
[docs]
def OLS(self, degree, tref=None, sigma=None, datetime_unit="s"):
"""
Returns the OLS estimator performed with a degree "degree" regression
"""
est = estimator(
self._obj, degree, tref=tref, sigma=sigma, datetime_unit=datetime_unit
)
est.OLS()
return est
[docs]
def GLS(self, degree, tref=None, sigma=None, datetime_unit="s"):
"""
Returns the GLS estimator performed with a degree "degree" regression and a covariance matrix "sigma"
"""
est = estimator(
self._obj, degree, tref=tref, sigma=sigma, datetime_unit=datetime_unit
)
est.GLS()
return est
[docs]
def corr(self, other, remove_trend=False, **kwargs):
"""
Returns the Pearson correlation coefficient between the timeseries and another one. The other one is
interpolated at the dates of the calling timeseries.
If remove_trend=True, the two timeseries are detrended before correlation.
"""
if remove_trend:
r1 = detrend(interp_time(other, self._obj))
r2 = detrend(self._obj)
else:
r1 = interp_time(other, self._obj)
r2 = self._obj
return xr.corr(r1, r2, **kwargs)
[docs]
def fillna_climato(self):
"""
Returns a DataArray with all NaN values replaced by climatology and trend
Climatology is computed over the optional time_period slice
"""
return fillna_climato(self._obj)
[docs]
def EOF(self, dim, k):
"""
Return an instance of the *eof* class based on the data array and the dimension names of the eof
"""
return EOF(self._obj, dim, k)
[docs]
def SavitzkyGolay(self, dim="time", window=5, order=1, step=1, sigma=None):
"""
Perform a Savitzky-Golay filter on a dataArray and return filtered derivatives up to maximal order
Parameters
----------
dim : string
name of the dimension along which to apply the filter
window : int
length of the filtering window (must be odd)
order : int
order of the polynome to fit the function across the window
step : float or time type
distance between to consecutive points of abscissa
sigma : same type as step (optionnal)
standard deviation of the weights function to be applied on the window
Return
------
filtered : xr.DataArray
new DataArray filtered with an extra dimension 'order', giving the successive filtered derivatives of the signal
"""
return SavitzkyGolay(self._obj, dim, window, order, step, sigma)