Source code for solarwindpy.tools

#!/usr/bin/env python
"""Utility functions for manipulating solar wind data.

This module contains helper functions that are not yet organized into
their own submodules. The functions are primarily used for handling
proton data and for converting log-normal parameters to their normal
form.

Functions
---------
swap_protons
    Swap beam and core proton labels when the beam density exceeds the
    core density.
normal_parameters
    Convert log-normal distribution parameters to normal parameters.

Examples
--------
>>> import pandas as pd  # doctest: +SKIP
>>> import numpy as np  # doctest: +SKIP
>>> columns = pd.MultiIndex.from_tuples([  # doctest: +SKIP
...     ('n', '', 'p1'), ('n', '', 'p2')
... ], names=['M', 'C', 'S'])
>>> df = pd.DataFrame([[1, 0.1], [2, 0.2]], columns=columns)  # doctest: +SKIP
>>> new_df, mask = swap_protons(df)  # doctest: +SKIP
>>> 'swapped_protons' in new_df.columns.get_level_values('M')  # doctest: +SKIP
True
"""

import logging
import numpy as np
import pandas as pd


[docs] def swap_protons(data, logger=None): """Swap beam and core proton labels when the beam density dominates. Parameters ---------- data : pandas.DataFrame Data containing proton information. Proton species are stored in the ``S`` level of the column index. logger : logging.Logger, optional Logger used to report indices of swapped protons. If ``None`` a simple logger is created. Returns ------- new_data : pandas.DataFrame Copy of ``data`` with ``p1`` and ``p2`` columns swapped where the beam density exceeds the core density. swap : pandas.Series Boolean mask indicating where swaps occurred. Examples -------- >>> import pandas as pd # doctest: +SKIP >>> import numpy as np # doctest: +SKIP >>> columns = pd.MultiIndex.from_tuples([ # doctest: +SKIP ... ('n', '', 'p1'), ('n', '', 'p2') ... ], names=['M', 'C', 'S']) >>> df = pd.DataFrame([[2, 1], [1, 2]], columns=columns) # p1 < p2 in first row # doctest: +SKIP >>> new_df, mask = swap_protons(df) # doctest: +SKIP >>> mask.iloc[0] # First row should be swapped # doctest: +SKIP True """ p1 = data.xs("p1", axis=1, level="S") p2 = data.xs("p2", axis=1, level="S") n1 = p1.n n2 = p2.n swap = n2.divide(n1) > 1.0 swapped = swap.to_frame(name=("swapped_protons", "", "")) p1_into_p2 = p1.where(swap, axis=0).dropna(axis=0, how="all") p2_into_p1 = p2.where(swap, axis=0).dropna(axis=0, how="all") p1 = p1.mask(swap, p2_into_p1, axis=0) p2 = p2.mask(swap, p1_into_p2, axis=0) new_protons = ( pd.concat([p1, p2], axis=1, keys=["p1", "p2"], names=["S"]) .reorder_levels(["M", "C", "S"], axis=1) .sort_index(axis=1) ) new_data = pd.concat( [data.drop(["p1", "p2"], axis=1, level="S"), new_protons, swapped], axis=1 ).sort_index(axis=1) chk = new_data.loc[:, ("n", "", "p2")].divide( new_data.loc[:, ("n", "", "p2")], axis=0 ) assert (chk.dropna() <= 1.0).all() if logger is None: logger = logging.getLogger("main.{}".format(__name__)) hdlr = logging.StreamHandler() hdlr.setLevel(logging.INFO) logger.addHandler(hdlr) logger.setLevel(logging.DEBUG) assert isinstance(logger, logging.Logger) stats = pd.Series( {"mean": swap.mean(), "count": swap.sum()}, name="stats", dtype=object ) # `dtype=object` lets the count print as an int. logger.info("Swap proton labels when n2/n1 > 1\nstats\n%s", stats.to_string()) return new_data, swap
[docs] def normal_parameters(m, s): r"""Convert log-normal parameters to normal distribution parameters. Parameters ---------- m : pandas.Series or numpy.ndarray Mean of the log-normal distribution. s : pandas.Series or numpy.ndarray Standard deviation of the log-normal distribution. Returns ------- pandas.DataFrame Data frame with columns ``mu`` and ``sigma``. Notes ----- The conversion uses .. math:: \mu = \exp[m + s^2/2] .. math:: \sigma = \sqrt{\exp[s^2 + 2m]\,(\exp[s^2] - 1)} These expressions apply to both natural logarithms and base-10 logarithms. Examples -------- >>> import numpy as np >>> m, s = 1.0, 0.5 # log-normal parameters >>> mu, sigma = normal_parameters(m, s) >>> mu > 1.0 # Normal mean should be > 1 True """ mu = np.exp(m + ((s**2.0) / 2.0)) sigma = np.exp(s**2.0 + 2.0 * m) sigma *= np.exp(s**2.0) - 1.0 sigma = np.sqrt(sigma) out = {"mu": mu, "sigma": sigma} try: out = pd.concat(out, axis=1) except TypeError: out = pd.Series(out) return out