Source code for metacsv.io.to_xarray

'''
Utilities for converting metacsv Containers to xarray containers
'''

import pandas as pd
import numpy as np
from collections import OrderedDict
from .._compat import string_types
from .yaml_tools import ordered_dump

xr = None


def _import_xarray():
    global xr
    if xr is None:
        import xarray as xr


def _check_series_unique(series):
    def check_unique(group):
        try:
            name = group.name if isinstance(group.name, string_types) else ','.join(group.name)
        except TypeError:
            name = group.name

        msg = "Data not uniquely indexed for base coords: ({})".format(name)

        if len(group.drop_duplicates()) != 1:
            raise ValueError(msg)

    if len(series.index.names) > 1:
        series.groupby(level=series.index.names).apply(check_unique)
    else:
        series.groupby(by=series.index).apply(check_unique)


def _append_coords_to_dataset(ds, container, base_only, attrs=None):

    global xr
    if xr is None:
        _import_xarray()

    if container.coords == None:
        container.add_coords()

    for coord in container.base_coords:
        ds.coords[str(coord)] = container.index.get_level_values(
            coord).unique()
        ds.coords[str(coord)].attrs = container.variables.get(coord, {})

    for coord in container.coords:
        if coord in container.base_coords:
            continue

        data = base_only[coord]
        if len(data.index.names) > len(container.coords._base_dependencies[coord]):
            data.reset_index([c for c in data.index.names if c not in container.coords._base_dependencies[
                             coord]], inplace=True, drop=True)
        ds.coords[str(coord)] = metacsv_series_to_dataarray(
            data, attrs=container.variables.get(coord, {}))


[docs]def metacsv_series_to_dataarray(series, attrs=None): global xr if xr is None: _import_xarray() if attrs is None: attrs = series.attrs if series.base_coords != None: reset = [c for c in series.index.names if c not in series.base_coords] if len(reset) > 0: series = series.reset_index(reset, drop=True) _check_series_unique(series) series = series.iloc[np.unique(series.index.values, return_index=True)[1]] series.index.names = list(map(str, series.index.names)) da = xr.DataArray.from_series(series) da.attrs = attrs return da
[docs]def metacsv_series_to_dataset(series, name='data', attrs=None): global xr if xr is None: _import_xarray() ds = xr.Dataset() if attrs is None: attrs = series.attrs reset = [c for c in series.coords if c not in series.base_coords] if len(reset) > 0: base_only = series.reset_index(reset, drop=False) else: base_only = series _check_series_unique(base_only) _append_coords_to_dataset(ds, series, base_only, attrs) if len(reset) > 0: data = series.reset_index(reset, drop=True) else: data = series ds[name] = xr.DataArray.from_series(data) ds[name].attrs = series.variables.get(name, {}) ds.attrs = series.attrs return ds
[docs]def metacsv_dataframe_to_dataset(dataframe, name='data', attrs=None): global xr if xr is None: _import_xarray() ds = xr.Dataset() if attrs is None: attrs = dataframe.attrs reset = [c for c in dataframe.coords if c not in dataframe.base_coords] if len(reset) > 0: base_only = dataframe.reset_index(reset, drop=False, inplace=False) else: base_only = dataframe _check_series_unique(base_only) _append_coords_to_dataset(ds, dataframe, base_only, attrs) if len(reset) > 0: data = dataframe.reset_index(reset, drop=True) else: data = dataframe for col in dataframe.columns: ds[col] = xr.DataArray.from_series(data[col]) ds[col].attrs = dataframe.variables.get(col, {}) ds.attrs = dataframe.attrs return ds
[docs]def metacsv_dataframe_to_dataarray(dataframe, names=None, attrs=None): global xr if xr is None: _import_xarray() dataframe = dataframe.copy() if attrs is None: attrs = dataframe.attrs coords = dataframe.coords.copy() dataframe.index.names = [ str(ind) if not pd.isnull(ind) else 'ind_{}'.format(i) for i, ind in enumerate(dataframe.index.names)] if dataframe.coords == None: coords.update({c: None for c in dataframe.index.names}) dataframe.columns.names = [ str(c) if not pd.isnull(c) else 'coldim_{}'.format(i) for i, c in enumerate(dataframe.columns.names)] colnames = dataframe.columns.names series = dataframe._constructor_sliced(dataframe.stack(colnames)) coords.update({c: None for c in colnames}) series.coords.update(coords) return metacsv_series_to_dataarray(series, attrs=attrs)