#!/usr/bin/env python
"""Contains in situ data Base and Core classes.
This module provides abstract base classes for handling in situ data in solar wind
physics applications.
"""
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from typing import Any, Tuple
import numpy as np
import pandas as pd
from pandas import MultiIndex as MI
from . import units_constants as uc
[docs]
class Core(ABC):
"""Base class for all :mod:`solarwindpy` objects.
The class sets up logging, unit definitions, and physical constants. It
provides a common interface that all other core objects inherit from.
Attributes
----------
logger : :class:`logging.Logger`
Logger instance associated with the object.
units : :class:`~solarwindpy.core.units_constants.Units`
Conversion factors used throughout the package.
constants : :class:`~solarwindpy.core.units_constants.Constants`
Collection of physical constants.
data : :class:`pandas.DataFrame`
Container for the underlying data.
"""
[docs]
def __init__(self) -> None:
self._init_logger()
self._init_units()
self._init_constants()
def __str__(self) -> str:
"""Return string representation of the object.
Returns
-------
str
Class name or class name(species) if the class has a species.
"""
try:
return f"{self.__class__.__name__}({self.species})"
except AttributeError:
return self.__class__.__name__
def __eq__(self, other: Any) -> bool:
"""Check equality between Base objects.
Parameters
----------
other : Any
Object to compare with.
Returns
-------
bool
True if objects are equal, False otherwise.
"""
if id(self) == id(other):
return True
if not isinstance(other, type(self)):
return False
try:
eq_data = self.data.equals(other.data)
return eq_data
except ValueError as e:
if "Can only compare identically-labeled DataFrame objects" in str(e):
return False
raise
@property
def logger(self) -> logging.Logger:
"""Logger instance for this object.
Returns
-------
logging.Logger
Logger instance.
"""
return self._logger
@property
def units(self) -> uc.Units:
"""Units conversion factors.
Returns
-------
uc.Units
Units conversion instance.
"""
return self._units
@property
def constants(self) -> uc.Constants:
"""Physical constants.
Returns
-------
uc.Constants
Physical constants instance.
"""
return self._constants
@property
def data(self) -> pd.DataFrame:
"""Underlying DataFrame containing the data.
Returns
-------
pd.DataFrame
Data with MultiIndex columns.
"""
return self._data
def _init_logger(self) -> None:
self._logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
def _init_units(self) -> None:
self._units = uc.Units()
def _init_constants(self) -> None:
self._constants = uc.Constants()
@staticmethod
def _conform_species(*species: str) -> Tuple[str, ...]:
"""Conform the species inputs to a standard form.
Parameters
----------
*species : str
Species to be conformed.
Returns
-------
Tuple[str, ...]
Conformed species.
Raises
------
TypeError
If any species is not a string.
ValueError
If species contain invalid characters or combinations.
"""
if not all(isinstance(s, str) for s in species):
raise TypeError(f"Invalid species: {species}")
if any("," in s for s in species):
raise ValueError(f"Invalid species: {species}")
if any("+" in s for s in species) and len(species) > 1:
raise ValueError(
f"Invalid species: {species}\n\nA multi-species list for which "
"one species includes '+' may not be uniformly "
"implementable across methods."
)
slist = species[0].split("+") if len(species) == 1 else species
return tuple(sorted(slist))
@abstractmethod
def _clean_species_for_setting(self, *species: str) -> Tuple[str, ...]:
if not species:
raise ValueError(
f"You must specify a species to instantiate a {self.__class__.__name__}."
)
return species
def _verify_datetimeindex(self, data: pd.DataFrame) -> None:
if not isinstance(data.index, pd.DatetimeIndex):
self.logger.warning(
"A non-DatetimeIndex will prevent some DatetimeIndex-dependent functionality from working."
)
if not data.index.is_monotonic_increasing:
self.logger.warning(
"An Index that is not monotonically increasing typically indicates the presence of bad data. This will impact performance, especially if it is a DatetimeIndex."
)
[docs]
class Base(Core):
"""Base class for objects backed by a :class:`pandas.DataFrame`.
Parameters
----------
data : :class:`pandas.DataFrame`
Data used to initialise the object.
Notes
-----
Subclasses override :meth:`set_data` to validate the underlying
:class:`DataFrame` structure.
"""
[docs]
def __init__(self, data: pd.DataFrame) -> None:
super().__init__()
self.set_data(data)
[docs]
@staticmethod
def mi_tuples(x: Tuple[Tuple[str, ...], ...]) -> MI:
"""Create a MultiIndex from tuples with appropriate names.
Parameters
----------
x : Tuple[Tuple[str, ...], ...]
Tuples to create MultiIndex from.
Returns
-------
MI
MultiIndex created from tuples.
"""
names = ["M", "C", "S"]
return MI.from_tuples(x, names=names)
[docs]
@abstractmethod
def set_data(self, new: pd.DataFrame) -> None:
"""Set new data for the class.
Parameters
----------
new : pd.DataFrame
New data to set.
Raises
------
ValueError
If the new data is empty.
"""
if new.empty:
raise ValueError("You can't set an object with empty data.")
self._verify_datetimeindex(new)
def _clean_species_for_setting(self, *species):
species = super(Base, self)._clean_species_for_setting(*species)
assert np.all(
["+" not in s for s in species]
), "%s.species can't contain '+'." % (self.__class__.__name__)
species = tuple(sorted(species))
return species
[docs]
def head(self):
"""Return the first few rows of the data.
Returns
-------
pd.DataFrame
First few rows of the data.
"""
return self.data.head()
[docs]
def tail(self):
"""Return the last few rows of the data.
Returns
-------
pd.DataFrame
Last few rows of the data.
"""
return self.data.tail()