Source code for solarwindpy.plotting.hist2d

#!/usr/bin/env python
r"""Two-dimensional histogram and heatmap plotting utilities."""

import pdb  # noqa: F401

import numpy as np
import pandas as pd
import matplotlib as mpl

from matplotlib import pyplot as plt
from collections import namedtuple
from scipy.signal import savgol_filter


from . import base
from . import labels as labels_module

# from .agg_plot import AggPlot
# from .hist1d import Hist1D

from . import agg_plot
from . import hist1d

AggPlot = agg_plot.AggPlot
Hist1D = hist1d.Hist1D

# import os
# import psutil


# def log_mem_usage():
#    usage = psutil.Process(os.getpid()).memory_info()
#    usage = "\n".join(
#        ["{} {:.3f} GB".format(k, v * 1e-9) for k, v in usage._asdict().items()]
#    )
#    logging.getLogger("main").warning("Memory usage\n%s", usage)


# class Hist2D(base.Plot2D, AggPlot):
[docs] class Hist2D(base.PlotWithZdata, base.CbarMaker, AggPlot): r"""Create a 2D histogram with an optional z-value using an equal number. of bins along the x and y axis. Parameters ---------- x, y: pd.Series x and y data to aggregate z: None, pd.Series If not None, the z-value to aggregate. axnorm: str Normalize the histogram. key normalization --- ------------- c column r row t total d density logx, logy: bool If True, log10 scale the axis. Attributes ---------- data: bins: cut: axnorm: log<x,y>: <x,y,z>label: path: None, Path Methods ------- calc_bins: calculate the x, y bins. make_cut: Utilize the calculated bins to convert (x, y) into pd.Categoral or pd.Interval values used in aggregation. set_[x,y,z]label: Set the x, y, or z label. agg: Aggregate the data in the bins. If z-value is None, count the number of points in each bin. If z-value is not None, calculate the mean for each bin. make_plot: Make a 2D plot of the data with an optional color bar. """
[docs] def __init__( self, x, y, z=None, axnorm=None, logx=False, logy=False, clip_data=False, nbins=101, bin_precision=None, ): super().__init__() self.set_log(x=logx, y=logy) self.set_data(x, y, z, clip_data) self.set_labels( x="x", y="y", z=labels_module.Count(norm=axnorm) if z is None else "z" ) self.set_axnorm(axnorm) self.calc_bins_intervals(nbins=nbins, precision=bin_precision) self.make_cut() self.set_clim(None, None) self.set_alim(None, None)
@property def _gb_axes(self): return ("x", "y") def _maybe_convert_to_log_scale(self, x, y): if self.log.x: x = 10.0**x if self.log.y: y = 10.0**y return x, y # def set_path(self, new, add_scale=True): # # Bug: path doesn't auto-set log information. # path, x, y, z, scale_info = super().set_path(new, add_scale) # if new == "auto": # path = path / x / y / z # else: # assert x is None # assert y is None # assert z is None # if add_scale: # assert scale_info is not None # scale_info = "-".join(scale_info) # if bool(len(path.parts)) and path.parts[-1].endswith("norm"): # # Insert <norm> at end of path so scale order is (x, y, z). # path = path.parts # path = path[:-1] + (scale_info + "-" + path[-1],) # path = Path(*path) # else: # path = path / scale_info # self._path = path # set_path.__doc__ = base.Base.set_path.__doc__
[docs] def set_labels(self, **kwargs): z = kwargs.pop("z", self.labels.z) if isinstance(z, labels_module.Count): try: z.set_axnorm(self.axnorm) except AttributeError: pass z.build_label() super().set_labels(z=z, **kwargs)
# def set_data(self, x, y, z, clip): # data = pd.DataFrame( # { # "x": np.log10(np.abs(x)) if self.log.x else x, # "y": np.log10(np.abs(y)) if self.log.y else y, # } # ) # # # if z is None: # z = pd.Series(1, index=x.index) # # data.loc[:, "z"] = z # data = data.dropna() # if not data.shape[0]: # raise ValueError( # "You can't build a %s with data that is exclusively NaNs" # % self.__class__.__name__ # ) # # self._data = data # self._clip = clip
[docs] def set_data(self, x, y, z, clip): super().set_data(x, y, z, clip) data = self.data if self.log.x: data.loc[:, "x"] = np.log10(np.abs(data.loc[:, "x"])) if self.log.y: data.loc[:, "y"] = np.log10(np.abs(data.loc[:, "y"])) self._data = data
[docs] def set_axnorm(self, new): r"""The method by which the gridded data is normalized. ===== ============================================================= key description ===== ============================================================= c Column normalize d Density normalize r Row normalize t Total normalize cd PDFs in each column rd PDFs in each row ===== =============================================================""" if new is not None: new = new.lower() assert new in ( "c", "r", "t", "d", "cd", "rd", ), f"Unrecgonized axnorm `{new}`" zlbl = self.labels.z if isinstance(zlbl, labels_module.Count): zlbl.set_axnorm(new) zlbl.build_label() self._axnorm = new
def _axis_normalizer(self, agg): r"""Takes care of row, column, total, and density normaliation. Written basically as `staticmethod` so that can be called in `OrbitHist2D`, but as actual method with `self` passed so we have access to `self.log` for density normalization. """ axnorm = self.axnorm if axnorm is None: pass elif axnorm == "c": agg = agg.divide(agg.groupby(level="x").max(), level="x") elif axnorm == "r": agg = agg.divide(agg.groupby(level="y").max(), level="y") elif axnorm == "t": agg = agg.divide(agg.max()) elif axnorm == "d": N = agg.sum().sum() x = pd.IntervalIndex(agg.index.get_level_values("x").unique()) y = pd.IntervalIndex(agg.index.get_level_values("y").unique()) dx = pd.Series( x.length, index=x ) # dx = pd.Series(x.right - x.left, index=x) dy = pd.Series( y.length, index=y ) # dy = pd.Series(y.right - y.left, index=y) if self.log.x: dx = 10.0**dx if self.log.y: dy = 10.0**dy agg = agg.divide(dx, level="x").divide(dy, level="y").divide(N) elif axnorm == "cd": # raise NotImplementedError("Need to verify data alignment, especially `dx` values and index") N = agg.groupby(level="x").sum() dy = pd.IntervalIndex( agg.index.get_level_values("y").unique() ).sort_values() dy = pd.Series(dy.length, index=dy).sort_index() # Divide by total in each column and each row's width agg = agg.divide(N, level="x").divide(dy, level="y") elif axnorm == "rd": # raise NotImplementedError("Need to verify data alignment, especially `dx` values and index") N = agg.groupby(level="y").sum() dx = pd.IntervalIndex( agg.index.get_level_values("x").unique() ).sort_values() dx = pd.Series(dx.length, index=dx).sort_index() # Divide by total in each column and each row's width agg = agg.divide(N, level="y").divide(dx, level="x") elif hasattr(axnorm, "__iter__"): # TODO: This is an undocumented feature. I do not know if it is # tested nor how it interacts with colorbar labels, etc. # We need to investigate this issue (20250804). kind, fcn = axnorm if kind == "c": agg = agg.divide(agg.groupby(level="x").agg(fcn), level="x") elif kind == "r": agg = agg.divide(agg.groupby(level="y").agg(fcn), level="y") else: raise ValueError(f"Unrecognized axnorm with function ({kind}, {fcn})") else: raise ValueError(f"Unrecognized axnorm ({axnorm})") return agg
[docs] def agg(self, **kwargs): agg = super().agg(**kwargs) agg = self._axis_normalizer(agg) agg = self._agg_reindexer(agg) a0, a1 = self.alim if a0 is not None or a1 is not None: tk = pd.Series(True, index=agg.index) # tk = pd.DataFrame(True, # index=agg.index, # columns=agg.columns # ) if a0 is not None: tk = tk & (agg >= a0) if a1 is not None: tk = tk & (agg <= a1) agg = agg.where(tk) return agg
def _make_cbar(self, mappable, **kwargs): ticks = kwargs.pop( "ticks", mpl.ticker.MultipleLocator(0.1) if self.axnorm in ("c", "r") else None, ) return super()._make_cbar(mappable, ticks=ticks, **kwargs) def _limit_color_norm(self, norm): if self.axnorm in ("c", "r"): # Don't limit us to (1%, 99%) interval. return None pct = self.data.loc[:, "z"].quantile([0.01, 0.99]) v0 = pct.loc[0.01] v1 = pct.loc[0.99] if norm.vmin is None: norm.vmin = v0 if norm.vmax is None: norm.vmax = v1 norm.clip = True
[docs] def make_plot( self, ax=None, cbar=True, limit_color_norm=False, cbar_kwargs=None, fcn=None, alpha_fcn=None, **kwargs, ): r"""Make a 2D plot on `ax` using `ax.pcolormesh`. Parameters ---------- ax: mpl.axes.Axes, None If None, create an `Axes` instance from `plt.subplots`. cbar: bool If True, create color bar with `labels.z`. limit_color_norm: bool If True, limit the color range to 0.001 and 0.999 percentile range of the z-value, count or otherwise. cbar_kwargs: dict, None If not None, kwargs passed to `self._make_cbar`. fcn: FunctionType, None Aggregation function. If None, automatically select in :py:meth:`agg`. alpha_fcn: None, str If not None, the function used to aggregate the data for setting alpha value. kwargs: Passed to `ax.pcolormesh`. If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`. Returns ------- ax: mpl.axes.Axes Axes upon which plot was made. cbar_or_mappable: colorbar.Colorbar, mpl.collections.QuadMesh If `cbar` is True, return the colorbar. Otherwise, return the `Quadmesh` used to create the colorbar. """ agg = self.agg(fcn=fcn).unstack("x") x = self.edges["x"] y = self.edges["y"] # assert x.size == agg.shape[1] + 1 # assert y.size == agg.shape[0] + 1 # HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381) if x.size != agg.shape[1] + 1: # agg = agg.reindex(columns=self.intervals["x"]) agg = agg.reindex(columns=self.categoricals["x"]) if y.size != agg.shape[0] + 1: # agg = agg.reindex(index=self.intervals["y"]) agg = agg.reindex(index=self.categoricals["y"]) if ax is None: fig, ax = plt.subplots() # if self.log.x: # x = 10.0 ** x # if self.log.y: # y = 10.0 ** y x, y = self._maybe_convert_to_log_scale(x, y) axnorm = self.axnorm default_norm = None if axnorm in ("c", "r"): default_norm = mpl.colors.BoundaryNorm( np.linspace(0, 1, 11), 256, clip=True ) elif axnorm in ("d", "cd", "rd"): default_norm = mpl.colors.LogNorm(clip=True) norm = kwargs.pop("norm", default_norm) if limit_color_norm: self._limit_color_norm(norm) C = np.ma.masked_invalid(agg.values) XX, YY = np.meshgrid(x, y) pc = ax.pcolormesh(XX, YY, C, norm=norm, **kwargs) cbar_or_mappable = pc if cbar: if cbar_kwargs is None: cbar_kwargs = dict() if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys(): cbar_kwargs["ax"] = ax # Pass `norm` to `self._make_cbar` so that we can choose the ticks to use. cbar = self._make_cbar(pc, **cbar_kwargs) cbar_or_mappable = cbar self._format_axis(ax) color_plot = self.data.loc[:, self.agg_axes].dropna().unique().size > 1 if (alpha_fcn is not None) and color_plot: self.logger.warning( "Make sure you verify alpha actually set. I don't yet trust this." ) alpha_agg = self.agg(fcn=alpha_fcn) alpha_agg = alpha_agg.unstack("x") alpha_agg = np.ma.masked_invalid(alpha_agg.values.ravel()) # Feature scale then invert so smallest STD # is most opaque. alpha = 1 - mpl.colors.Normalize()(alpha_agg) self.logger.warning("Scaling alpha filter as alpha**0.25") alpha = alpha**0.25 # Set masked values to zero. Otherwise, masked # values are rendered as black. alpha = alpha.filled(0) # Must draw to initialize `facecolor`s plt.draw() # Remove `pc` from axis so we can redraw with std # pc.remove() colors = pc.get_facecolors() colors[:, 3] = alpha pc.set_facecolor(colors) # ax.add_collection(pc) elif alpha_fcn is not None: self.logger.warning("Ignoring `alpha_fcn` because plotting counts") return ax, cbar_or_mappable
[docs] def get_border(self): r"""Get the top and bottom edges of the plot. Returns ------- border: namedtuple Contains "top" and "bottom" fields, each with a :py:class:`pd.Series`. """ Border = namedtuple("Border", "top,bottom") top = {} bottom = {} for x, v in self.agg().unstack("x").items(): yt = v.last_valid_index() if yt is not None: z = v.loc[yt] top[(yt, x)] = z yb = v.first_valid_index() if yb is not None: z = v.loc[yb] bottom[(yb, x)] = z top = pd.Series(top) bottom = pd.Series(bottom) for edge in (top, bottom): edge.index.names = ["y", "x"] border = Border(top, bottom) return border
def _plot_one_edge( self, ax, edge, smooth=False, sg_kwargs=None, xlim=(None, None), ylim=(None, None), **kwargs, ): x = edge.index.get_level_values("x").mid y = edge.index.get_level_values("y").mid if sg_kwargs is None: sg_kwargs = dict() if smooth: wlength = sg_kwargs.pop("window_length", int(np.floor(y.shape[0] / 10))) polyorder = sg_kwargs.pop("polyorder", 3) if not wlength % 2: wlength -= 1 y = savgol_filter(y, wlength, polyorder, **sg_kwargs) if self.log.x: x = 10.0**x if self.log.y: y = 10.0**y x0, x1 = xlim y0, y1 = ylim tk = np.full_like(x, True, dtype=bool) if x0 is not None: tk = tk & (x0 <= x) if x1 is not None: tk = tk & (x <= x1) if y0 is not None: tk = tk & (y0 <= y) if y1 is not None: tk = tk & (y <= y1) # if (~tk).any(): x = x[tk] y = y[tk] return ax.plot(x, y, **kwargs)
[docs] def plot_edges(self, ax, smooth=True, sg_kwargs=None, **kwargs): """Overplot the edges. Parameters ---------- ax: Axis on which to plot. smooth: bool If True, apply a Savitzky-Golay filter (:py:func:`scipy.signal.savgol_filter`) to the y-values before plotting to smooth the curve. sg_kwargs: dict, None If not None, dict of kwargs passed to Savitzky-Golay filter. Also allows for setting of `window_length` and `polyorder` as kwargs. They default to 10% of the number of observations (`window_length`) and 3 (`polyorder`). Note that because `window_length` must be odd, if the 10% value is even, we take 1-window_length. kwargs: Passed to `ax.plot` """ top, bottom = self.get_border() color = kwargs.pop("color", "cyan") label = kwargs.pop("label", None) etop = self._plot_one_edge( ax, top, smooth, sg_kwargs, color=color, label=label, **kwargs ) ebottom = self._plot_one_edge( ax, bottom, smooth, sg_kwargs, color=color, **kwargs ) return etop, ebottom
def _get_contour_levels(self, levels): if (levels is not None) or (self.axnorm is None): pass elif (levels is None) and (self.axnorm == "t"): levels = [0.01, 0.1, 0.3, 0.7, 0.99] elif (levels is None) and (self.axnorm == "d"): levels = [3e-5, 1e-4, 3e-4, 1e-3, 1.7e-3, 2.3e-3] elif (levels is None) and (self.axnorm in ["r", "c"]): levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] elif (levels is None) and (self.axnorm in ["cd", "rd"]): levels = None else: raise ValueError( f"Unrecognized axis normalization {self.axnorm} for default levels." ) return levels def _verify_contour_passthrough_kwargs( self, ax, clabel_kwargs, edges_kwargs, cbar_kwargs ): if clabel_kwargs is None: clabel_kwargs = dict() if edges_kwargs is None: edges_kwargs = dict() if cbar_kwargs is None: cbar_kwargs = dict() if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys(): cbar_kwargs["ax"] = ax return clabel_kwargs, edges_kwargs, cbar_kwargs
[docs] def plot_contours( self, ax=None, label_levels=True, cbar=True, limit_color_norm=False, cbar_kwargs=None, fcn=None, plot_edges=False, edges_kwargs=None, clabel_kwargs=None, skip_max_clbl=True, use_contourf=False, gaussian_filter_std=0, gaussian_filter_kwargs=None, **kwargs, ): """Make a contour plot on `ax` using `ax.contour`. Parameters ---------- ax: mpl.axes.Axes, None If None, create an `Axes` instance from `plt.subplots`. label_levels: bool If True, add labels to contours with `ax.clabel`. cbar: bool If True, create color bar with `labels.z`. limit_color_norm: bool If True, limit the color range to 0.001 and 0.999 percentile range of the z-value, count or otherwise. cbar_kwargs: dict, None If not None, kwargs passed to `self._make_cbar`. fcn: FunctionType, None Aggregation function. If None, automatically select in :py:meth:`agg`. plot_edges: bool If True, plot the smoothed, extreme edges of the 2D histogram. edges_kwargs: None, dict Passed to {self.plot_edges!s}. clabel_kwargs: None, dict If not None, dictionary of kwargs passed to `ax.clabel`. skip_max_clbl: bool If True, don't label the maximum contour. Primarily used when the maximum contour is, effectively, a point. maximum_color: The color for the maximum of the PDF. use_contourf: bool If True, use `ax.contourf`. Else use `ax.contour`. gaussian_filter_std: int If > 0, apply `scipy.ndimage.gaussian_filter` to the z-values using the standard deviation specified by `gaussian_filter_std`. gaussian_filter_kwargs: None, dict If not None and gaussian_filter_std > 0, passed to :py:meth:`scipy.ndimage.gaussian_filter` kwargs: Passed to :py:meth:`ax.pcolormesh`. If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`. """ levels = kwargs.pop("levels", None) cmap = kwargs.pop("cmap", None) norm = kwargs.pop( "norm", ( mpl.colors.BoundaryNorm(np.linspace(0, 1, 11), 256, clip=True) if self.axnorm in ("c", "r") else None ), ) linestyles = kwargs.pop( "linestyles", [ "-", ":", "--", (0, (7, 3, 1, 3, 1, 3, 1, 3, 1, 3)), "--", ":", "-", (0, (7, 3, 1, 3, 1, 3)), ], ) if ax is None: fig, ax = plt.subplots() ( clabel_kwargs, edges_kwargs, cbar_kwargs, ) = self._verify_contour_passthrough_kwargs( ax, clabel_kwargs, edges_kwargs, cbar_kwargs ) inline = clabel_kwargs.pop("inline", True) inline_spacing = clabel_kwargs.pop("inline_spacing", -3) fmt = clabel_kwargs.pop("fmt", "%s") agg = self.agg(fcn=fcn).unstack("x") x = self.intervals["x"].mid y = self.intervals["y"].mid # assert x.size == agg.shape[1] # assert y.size == agg.shape[0] # HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381) if x.size != agg.shape[1]: # agg = agg.reindex(columns=self.intervals["x"]) agg = agg.reindex(columns=self.categoricals["x"]) if y.size != agg.shape[0]: # agg = agg.reindex(index=self.intervals["y"]) agg = agg.reindex(index=self.categoricals["y"]) x, y = self._maybe_convert_to_log_scale(x, y) XX, YY = np.meshgrid(x, y) C = agg.values if gaussian_filter_std: from scipy.ndimage import gaussian_filter if gaussian_filter_kwargs is None: gaussian_filter_kwargs = dict() C = gaussian_filter(C, gaussian_filter_std, **gaussian_filter_kwargs) C = np.ma.masked_invalid(C) assert XX.shape == C.shape assert YY.shape == C.shape class nf(float): # Source: https://matplotlib.org/3.1.0/gallery/images_contours_and_fields/contour_label_demo.html # Define a class that forces representation of float to look a certain way # This remove trailing zero so '1.0' becomes '1' def __repr__(self): return str(self).rstrip("0") levels = self._get_contour_levels(levels) if (norm is None) and (levels is not None): norm = mpl.colors.BoundaryNorm(levels, 256, clip=True) contour_fcn = ax.contour if use_contourf: contour_fcn = ax.contourf if levels is None: args = [XX, YY, C] else: args = [XX, YY, C, levels] qset = contour_fcn(*args, linestyles=linestyles, cmap=cmap, norm=norm, **kwargs) try: args = (qset, levels[:-1] if skip_max_clbl else levels) except TypeError: # None can't be subscripted. args = (qset,) lbls = None if label_levels: qset.levels = [nf(level) for level in qset.levels] lbls = ax.clabel( *args, inline=inline, inline_spacing=inline_spacing, fmt=fmt, **clabel_kwargs, ) if plot_edges: etop, ebottom = self.plot_edges(ax, **edges_kwargs) cbar_or_mappable = qset if cbar: # Pass `norm` to `self._make_cbar` so that we can choose the ticks to use. cbar = self._make_cbar(qset, norm=norm, **cbar_kwargs) cbar_or_mappable = cbar self._format_axis(ax) return ax, lbls, cbar_or_mappable, qset
[docs] def project_1d(self, axis, only_plotted=True, project_counts=False, **kwargs): """Make a `Hist1D` from the data stored in this `His2D`. Parameters ---------- axis: str "x" or "y", specifying the axis to project into 1D. only_plotted: bool If True, only pass data that appears in the {self.__class__.__name__} plot to the :py:class:`Hist1D`. project_counts: bool If True, only send the variable plotted along `axis` to :py:class:`Hist1D`. Otherwise, send both axes (but not z-values). kwargs: Passed to `Hist1D`. Primarily to allow specifying `bin_precision`. Returns ------- h1: :py:class:`Hist1D` """ axis = axis.lower() assert axis in ("x", "y") data = self.data if data.loc[:, "z"].unique().size >= 2: # Either all 1 or 1 and NaN. other = "z" else: possible_axes = {"x", "y"} possible_axes.remove(axis) other = possible_axes.pop() logx = self.log._asdict()[axis] x = self.data.loc[:, axis] if logx: # Need to convert back to regular from log-space for data setting. x = 10.0**x y = self.data.loc[:, other] if not project_counts else None logy = False # Defined b/c project_counts option. if y is not None and (other == "y"): # Only select y-values plotted. logy = self.log._asdict()[other] yedges = self.edges[other].values y = y.where((yedges[0] <= y) & (y <= yedges[-1])) if logy: y = 10.0**y if only_plotted: tk = self.get_plotted_data_boolean_series() x = x.loc[tk] if y is not None: y = y.loc[tk] h1 = Hist1D( x, y=y, logx=logx, clip_data=False, # Any clipping will be addressed by bins. nbins=self.edges[axis].values, **kwargs, ) h1.set_log(y=logy) # Need to propagate logy. h1.set_labels(x=self.labels._asdict()[axis]) if not project_counts: h1.set_labels(y=self.labels._asdict()[other]) return h1
[docs] def make_joint_h2_h1_plot( self, project_counts=True, kwargs_1d=None, fig_axes=None, **kwargs ): figsize = kwargs.pop("figsize", (5, 6)) height_ratios = kwargs.pop("height_ratios", [0.25, 1, 0.2, 0.1]) width_ratios = kwargs.pop("width_ratios", [1, 0.25]) hspace = kwargs.pop("hspace", 0) wspace = kwargs.pop("wspace", 0) # if fig_axes is not None: # fig, axes = fig_axes # hax, xax, yax, cax = axes # else: fig = plt.figure(figsize=figsize) gs = mpl.gridspec.GridSpec( 4, 2, height_ratios=height_ratios, width_ratios=width_ratios, hspace=hspace, wspace=wspace, ) hax = fig.add_subplot(gs[1, 0]) xax = fig.add_subplot(gs[0, 0], sharex=hax) yax = fig.add_subplot(gs[1, 1], sharey=hax) cax = fig.add_subplot(gs[3, 0]) cbar_kwargs = kwargs.pop("cbar_kwargs", dict()) cax = cbar_kwargs.pop("cax", cax) orientation = cbar_kwargs.pop("orientation", "horizontal") _, cbar = self.make_plot( ax=hax, cbar_kwargs=dict(cax=cax, orientation=orientation, **cbar_kwargs), **kwargs, ) if kwargs_1d is None: kwargs_1d = dict() self.project_1d("x", project_counts=project_counts).make_plot( ax=xax, **kwargs_1d ) self.project_1d("y", project_counts=project_counts).make_plot( ax=yax, **kwargs_1d, transpose_axes=True ) xax.label_outer() # Mimic `ax.label_outer` for `yax`. for label in yax.get_yticklabels(which="both"): label.set_visible(False) yax.get_yaxis().get_offset_text().set_visible(False) yax.set_ylabel("") log = self.log if not log.x: hax.xaxis.set_major_locator( mpl.ticker.MaxNLocator( nbins=hax.xaxis.get_ticklocs().size - 1, prune="upper" ) ) if not log.y: hax.yaxis.set_major_locator( mpl.ticker.MaxNLocator( nbins=hax.yaxis.get_ticklocs().size - 1, prune="upper" ) ) return hax, xax, yax, cbar
[docs] def id_data_above_contour(self, level): r"""Gets data above the `level`. Parameters ---------- level: scalar The z-value above which to select data. Data is aggregated according to `ax_norm`. Returns ------- above_contour: pd.Series For data in a bin above `level`, indicates the x-`pd.Interval` within which the observation falls. `NaN` are observations that are below `level`. This object is purposely the same length as the data stored by Hist2D and can be used in groupby operations. """ x = self.data.x y = self.data.y above_contour = pd.Series(np.nan, self.data.index) for k, v in self.agg().unstack("x").items(): tk = v >= level left, right = k.left, k.right bottom, top = v[tk].index.min().left, v[tk].index.max().right above_contour_at_x = (left < x) & (x <= right) & (bottom < y) & (y <= top) above_contour[above_contour_at_x] = k above_contour = pd.Series( pd.Categorical(above_contour), index=above_contour.index ) return above_contour
[docs] def take_data_in_yrange_across_x( self, ranges_by_x, get_x_bounds, get_y_bounds, ): r"""Take data within y-ranges across x-values. Parameters ---------- ranges_by_x: iterable An iterable with keys used to get the left and right bounds for the data and values used to get the top and bottom bounds for the data. get_x_bounds: function First argument is one key of `ranges_by_x` and returns `left, right`. Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according to whether or not the keys are :math:`log(x)` or :math:`x` in a manner that matches data stored in Hist2D. get_y_bounds: functions Takes on value of `ranges_by_x` and returns `top, bottom`. Second argument Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according to whether or not the keys are :math:`log(y)` or :math:`y` in a manner that matches data stored in Hist2D. Returns ------- taken: np.ndarray 1D Array of indices for selecting data in interval. """ available_x = self.agg().unstack("x").columns if ranges_by_x.index.symmetric_difference(available_x).size: drop = ranges_by_x.index.symmetric_difference(available_x) if not drop.isin(available_x).all(): raise ValueError( "Need a way to drop values in selector that aren't available." ) else: self.logger.warning( f"Dropping {drop.size} intervals from available for selecting." ) data = self.data logx = self.log.x logy = self.log.y taken = [] for x, at_x in ranges_by_x.iterrows(): l, r = get_x_bounds(x, expected_logx=logx) b, t = get_y_bounds(at_x, expected_logy=logy) assert l < r assert b < t tkx = (l < data.x) & (data.x <= r) tky = (b < data.y) & (data.y <= t) tk = tkx & tky tk = tk.loc[tk].index taken.append(tk) taken = np.sort(np.concatenate(taken)) return taken