Source code for solarwindpy.plotting.hist2d

#!/usr/bin/env python
r"""Two-dimensional histogram and heatmap plotting utilities."""


import numpy as np
import pandas as pd
import matplotlib as mpl

from matplotlib import pyplot as plt
from collections import namedtuple
from scipy.signal import savgol_filter


from . import base
from . import labels as labels_module
from .tools import nan_gaussian_filter

from . import agg_plot
from . import hist1d

AggPlot = agg_plot.AggPlot
Hist1D = hist1d.Hist1D



[docs]
class Hist2D(base.PlotWithZdata, base.CbarMaker, AggPlot):
    r"""Create a 2D histogram with an optional z-value using an equal number.

    of bins along the x and y axis.

    Parameters
    ----------
    x, y: pd.Series
        x and y data to aggregate
    z: None, pd.Series
        If not None, the z-value to aggregate.
    axnorm: str
        Normalize the histogram.
            key  normalization
            ---  -------------
            c    column
            r    row
            t    total
            d    density
    logx, logy: bool
        If True, log10 scale the axis.

    Attributes
    ----------
    data:
    bins:
    cut:
    axnorm:
    log<x,y>:
    <x,y,z>label:
    path: None, Path

    Methods
    -------
    calc_bins:
        calculate the x, y bins.
    make_cut:
        Utilize the calculated bins to convert (x, y) into pd.Categoral
        or pd.Interval values used in aggregation.
    set_[x,y,z]label:
        Set the x, y, or z label.
    agg:
        Aggregate the data in the bins.
        If z-value is None, count the number of points in each bin.
        If z-value is not None, calculate the mean for each bin.
    make_plot:
        Make a 2D plot of the data with an optional color bar.
    """


[docs]
    def __init__(
        self,
        x,
        y,
        z=None,
        axnorm=None,
        logx=False,
        logy=False,
        clip_data=False,
        nbins=101,
        bin_precision=None,
    ):
        super().__init__()
        self.set_log(x=logx, y=logy)
        self.set_data(x, y, z, clip_data)
        self.set_labels(
            x="x", y="y", z=labels_module.Count(norm=axnorm) if z is None else "z"
        )

        self.set_axnorm(axnorm)
        self.calc_bins_intervals(nbins=nbins, precision=bin_precision)
        self.make_cut()
        self.set_clim(None, None)
        self.set_alim(None, None)


    @property
    def _gb_axes(self):
        return ("x", "y")

    def _maybe_convert_to_log_scale(self, x, y):
        if self.log.x:
            x = 10.0**x
        if self.log.y:
            y = 10.0**y

        return x, y


[docs]
    def set_labels(self, **kwargs):
        z = kwargs.pop("z", self.labels.z)
        if isinstance(z, labels_module.Count):
            try:
                z.set_axnorm(self.axnorm)
            except AttributeError:
                pass

            z.build_label()

        super().set_labels(z=z, **kwargs)



[docs]
    def set_data(self, x, y, z, clip):
        super().set_data(x, y, z, clip)
        data = self.data
        if self.log.x:
            data.loc[:, "x"] = np.log10(np.abs(data.loc[:, "x"]))
        if self.log.y:
            data.loc[:, "y"] = np.log10(np.abs(data.loc[:, "y"]))
        self._data = data



[docs]
    def set_axnorm(self, new):
        r"""The method by which the gridded data is normalized.

        ===== =============================================================
         key                           description
        ===== =============================================================
         c     Column normalize
         d     Density normalize
         r     Row normalize
         t     Total normalize
         cd    PDFs in each column
         rd    PDFs in each row
        ===== ============================================================="""
        if new is not None:
            new = new.lower()
            assert new in (
                "c",
                "r",
                "t",
                "d",
                "cd",
                "rd",
            ), f"Unrecgonized axnorm `{new}`"

        zlbl = self.labels.z
        if isinstance(zlbl, labels_module.Count):
            zlbl.set_axnorm(new)
            zlbl.build_label()

        self._axnorm = new


    def _axis_normalizer(self, agg):
        r"""Takes care of row, column, total, and density normaliation.

        Written basically as `staticmethod` so that can be called in `OrbitHist2D`, but
        as actual method with `self` passed so we have access to `self.log` for density
        normalization.
        """

        axnorm = self.axnorm
        if axnorm is None:
            pass
        elif axnorm == "c":
            agg = agg.divide(agg.groupby(level="x").max(), level="x")
        elif axnorm == "r":
            agg = agg.divide(agg.groupby(level="y").max(), level="y")
        elif axnorm == "t":
            agg = agg.divide(agg.max())
        elif axnorm == "d":
            N = agg.sum().sum()
            x = pd.IntervalIndex(agg.index.get_level_values("x").unique())
            y = pd.IntervalIndex(agg.index.get_level_values("y").unique())
            dx = pd.Series(
                x.length, index=x
            )  # dx = pd.Series(x.right - x.left, index=x)
            dy = pd.Series(
                y.length, index=y
            )  # dy = pd.Series(y.right - y.left, index=y)

            if self.log.x:
                dx = 10.0**dx
            if self.log.y:
                dy = 10.0**dy

            agg = agg.divide(dx, level="x").divide(dy, level="y").divide(N)

        elif axnorm == "cd":
            #             raise NotImplementedError("Need to verify data alignment, especially `dx` values and index")
            N = agg.groupby(level="x").sum()
            dy = pd.IntervalIndex(
                agg.index.get_level_values("y").unique()
            ).sort_values()
            dy = pd.Series(dy.length, index=dy).sort_index()
            # Divide by total in each column and each row's width
            agg = agg.divide(N, level="x").divide(dy, level="y")

        elif axnorm == "rd":
            #             raise NotImplementedError("Need to verify data alignment, especially `dx` values and index")
            N = agg.groupby(level="y").sum()
            dx = pd.IntervalIndex(
                agg.index.get_level_values("x").unique()
            ).sort_values()
            dx = pd.Series(dx.length, index=dx).sort_index()
            # Divide by total in each column and each row's width
            agg = agg.divide(N, level="y").divide(dx, level="x")

        elif hasattr(axnorm, "__iter__"):
            # TODO: This is an undocumented feature. I do not know if it is
            #       tested nor how it interacts with colorbar labels, etc.
            #       We need to investigate this issue (20250804).
            kind, fcn = axnorm
            if kind == "c":
                agg = agg.divide(agg.groupby(level="x").agg(fcn), level="x")
            elif kind == "r":
                agg = agg.divide(agg.groupby(level="y").agg(fcn), level="y")
            else:
                raise ValueError(f"Unrecognized axnorm with function ({kind}, {fcn})")
        else:
            raise ValueError(f"Unrecognized axnorm ({axnorm})")

        return agg


[docs]
    def agg(self, **kwargs):
        agg = super().agg(**kwargs)
        agg = self._axis_normalizer(agg)
        agg = self._agg_reindexer(agg)

        a0, a1 = self.alim
        if a0 is not None or a1 is not None:
            tk = pd.Series(True, index=agg.index)
            if a0 is not None:
                tk = tk & (agg >= a0)
            if a1 is not None:
                tk = tk & (agg <= a1)

            agg = agg.where(tk)

        return agg


    def _make_cbar(self, mappable, **kwargs):
        ticks = kwargs.pop(
            "ticks",
            mpl.ticker.MultipleLocator(0.1) if self.axnorm in ("c", "r") else None,
        )
        return super()._make_cbar(mappable, ticks=ticks, **kwargs)

    def _limit_color_norm(self, norm):
        if self.axnorm in ("c", "r"):
            # Don't limit us to (1%, 99%) interval.
            return None

        pct = self.data.loc[:, "z"].quantile([0.01, 0.99])
        v0 = pct.loc[0.01]
        v1 = pct.loc[0.99]
        if norm.vmin is None:
            norm.vmin = v0
        if norm.vmax is None:
            norm.vmax = v1
        norm.clip = True

    def _prep_agg_for_plot(self, fcn=None, use_edges=True, mask_invalid=True):
        """Prepare aggregated data and coordinates for plotting.

        Parameters
        ----------
        fcn : FunctionType, None
            Aggregation function. If None, automatically select in :py:meth:`agg`.
        use_edges : bool
            If True, return bin edges (for pcolormesh).
            If False, return bin centers (for contour).
        mask_invalid : bool
            If True, return masked array with NaN/inf masked.
            If False, return raw values (use when applying gaussian_filter).

        Returns
        -------
        C : np.ma.MaskedArray or np.ndarray
            2D array of aggregated values (masked if mask_invalid=True).
        x : np.ndarray
            X coordinates (edges or centers based on use_edges).
        y : np.ndarray
            Y coordinates (edges or centers based on use_edges).
        """
        agg = self.agg(fcn=fcn).unstack("x")

        if use_edges:
            x = self.edges["x"]
            y = self.edges["y"]
            expected_offset = 1  # edges have n+1 points for n bins
        else:
            x = self.intervals["x"].mid
            y = self.intervals["y"].mid
            expected_offset = 0  # centers have n points for n bins

        # HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
        if x.size != agg.shape[1] + expected_offset:
            agg = agg.reindex(columns=self.categoricals["x"])
        if y.size != agg.shape[0] + expected_offset:
            agg = agg.reindex(index=self.categoricals["y"])

        x, y = self._maybe_convert_to_log_scale(x, y)

        C = agg.values
        if mask_invalid:
            C = np.ma.masked_invalid(C)

        return C, x, y

    def _nan_gaussian_filter(self, array, sigma, **kwargs):
        """Wrapper for shared nan_gaussian_filter. See tools.nan_gaussian_filter."""
        return nan_gaussian_filter(array, sigma, **kwargs)


[docs]
    def make_plot(
        self,
        ax=None,
        cbar=True,
        limit_color_norm=False,
        cbar_kwargs=None,
        fcn=None,
        alpha_fcn=None,
        **kwargs,
    ):
        r"""Make a 2D plot on `ax` using `ax.pcolormesh`.

        Parameters
        ----------
        ax: mpl.axes.Axes, None
            If None, create an `Axes` instance from `plt.subplots`.
        cbar: bool
            If True, create color bar with `labels.z`.
        limit_color_norm: bool
            If True, limit the color range to 0.001 and 0.999 percentile range
            of the z-value, count or otherwise.
        cbar_kwargs: dict, None
            If not None, kwargs passed to `self._make_cbar`.
        fcn: FunctionType, None
            Aggregation function. If None, automatically select in :py:meth:`agg`.
        alpha_fcn: None, str
            If not None, the function used to aggregate the data for setting alpha
            value.
        kwargs:
            Passed to `ax.pcolormesh`.
            If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`.

        Returns
        -------
        ax: mpl.axes.Axes
            Axes upon which plot was made.
        cbar_or_mappable: colorbar.Colorbar, mpl.collections.QuadMesh
            If `cbar` is True, return the colorbar. Otherwise, return the `Quadmesh` used
            to create the colorbar.
        """
        agg = self.agg(fcn=fcn).unstack("x")
        x = self.edges["x"]
        y = self.edges["y"]

        # HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
        if x.size != agg.shape[1] + 1:
            agg = agg.reindex(columns=self.categoricals["x"])
        if y.size != agg.shape[0] + 1:
            agg = agg.reindex(index=self.categoricals["y"])

        if ax is None:
            fig, ax = plt.subplots()

        x, y = self._maybe_convert_to_log_scale(x, y)

        axnorm = self.axnorm
        default_norm = None
        if axnorm in ("c", "r"):
            default_norm = mpl.colors.BoundaryNorm(
                np.linspace(0, 1, 11), 256, clip=True
            )
        elif axnorm in ("d", "cd", "rd"):
            default_norm = mpl.colors.LogNorm(clip=True)
        norm = kwargs.pop("norm", default_norm)

        if limit_color_norm:
            self._limit_color_norm(norm)

        C = np.ma.masked_invalid(agg.values)
        XX, YY = np.meshgrid(x, y)
        pc = ax.pcolormesh(XX, YY, C, norm=norm, **kwargs)

        cbar_or_mappable = pc
        if cbar:
            if cbar_kwargs is None:
                cbar_kwargs = dict()

            if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys():
                cbar_kwargs["ax"] = ax

            # Pass `norm` to `self._make_cbar` so that we can choose the ticks to use.
            cbar = self._make_cbar(pc, **cbar_kwargs)
            cbar_or_mappable = cbar

        self._format_axis(ax)

        color_plot = self.data.loc[:, self.agg_axes].dropna().unique().size > 1
        if (alpha_fcn is not None) and color_plot:
            self.logger.warning(
                "Make sure you verify alpha actually set. I don't yet trust this."
            )
            alpha_agg = self.agg(fcn=alpha_fcn)
            alpha_agg = alpha_agg.unstack("x")
            alpha_agg = np.ma.masked_invalid(alpha_agg.values.ravel())
            # Feature scale then invert so smallest STD
            # is most opaque.
            alpha = 1 - mpl.colors.Normalize()(alpha_agg)
            self.logger.warning("Scaling alpha filter as alpha**0.25")
            alpha = alpha**0.25

            # Set masked values to zero. Otherwise, masked
            # values are rendered as black.
            alpha = alpha.filled(0)
            # Must draw to initialize `facecolor`s
            plt.draw()
            colors = pc.get_facecolors()
            colors[:, 3] = alpha
            pc.set_facecolor(colors)

        elif alpha_fcn is not None:
            self.logger.warning("Ignoring `alpha_fcn` because plotting counts")

        return ax, cbar_or_mappable



[docs]
    def plot_hist_with_contours(
        self,
        ax=None,
        cbar=True,
        limit_color_norm=False,
        cbar_kwargs=None,
        fcn=None,
        # Contour-specific parameters
        levels=None,
        label_levels=False,
        use_contourf=True,
        contour_kwargs=None,
        clabel_kwargs=None,
        skip_max_clbl=True,
        gaussian_filter_std=0,
        gaussian_filter_kwargs=None,
        nan_aware_filter=False,
        **kwargs,
    ):
        """Make a 2D pcolormesh plot with contour overlay.

        Combines `make_plot` (pcolormesh background) with `plot_contours`
        (contour/contourf overlay) in a single call.

        Parameters
        ----------
        ax : mpl.axes.Axes, None
            If None, create an `Axes` instance from `plt.subplots`.
        cbar : bool
            If True, create color bar with `labels.z`.
        limit_color_norm : bool
            If True, limit the color range to 0.001 and 0.999 percentile range.
        cbar_kwargs : dict, None
            If not None, kwargs passed to `self._make_cbar`.
        fcn : FunctionType, None
            Aggregation function. If None, automatically select.
        levels : array-like, int, None
            Contour levels. If None, automatically determined.
        label_levels : bool
            If True, add labels to contours with `ax.clabel`.
        use_contourf : bool
            If True, use filled contours. Else use line contours.
        contour_kwargs : dict, None
            Additional kwargs passed to contour/contourf (e.g., linestyles, colors).
        clabel_kwargs : dict, None
            Kwargs passed to `ax.clabel`.
        skip_max_clbl : bool
            If True, don't label the maximum contour level.
        gaussian_filter_std : int
            If > 0, apply Gaussian filter to contour data.
        gaussian_filter_kwargs : dict, None
            Kwargs passed to `scipy.ndimage.gaussian_filter`.
        nan_aware_filter : bool
            If True and gaussian_filter_std > 0, use NaN-aware filtering via
            normalized convolution. Otherwise use standard scipy.ndimage.gaussian_filter.
        kwargs :
            Passed to `ax.pcolormesh`.

        Returns
        -------
        ax : mpl.axes.Axes
        cbar_or_mappable : colorbar.Colorbar or QuadMesh
        qset : QuadContourSet
            The contour set from the overlay.
        lbls : list or None
            Contour labels if label_levels is True.
        """
        if ax is None:
            fig, ax = plt.subplots()

        if contour_kwargs is None:
            contour_kwargs = {}

        # Determine normalization
        axnorm = self.axnorm
        default_norm = None
        if axnorm in ("c", "r"):
            default_norm = mpl.colors.BoundaryNorm(
                np.linspace(0, 1, 11), 256, clip=True
            )
        elif axnorm in ("d", "cd", "rd"):
            default_norm = mpl.colors.LogNorm(clip=True)
        norm = kwargs.pop("norm", default_norm)

        if limit_color_norm:
            self._limit_color_norm(norm)

        # Get cmap from kwargs (shared between pcolormesh and contour)
        cmap = kwargs.pop("cmap", None)

        # --- 1. Plot pcolormesh background ---
        C_edges, x_edges, y_edges = self._prep_agg_for_plot(fcn=fcn, use_edges=True)
        XX_edges, YY_edges = np.meshgrid(x_edges, y_edges)
        pc = ax.pcolormesh(XX_edges, YY_edges, C_edges, norm=norm, cmap=cmap, **kwargs)

        # --- 2. Plot contour overlay ---
        # Delay masking if gaussian filter will be applied
        needs_filter = gaussian_filter_std > 0
        C_centers, x_centers, y_centers = self._prep_agg_for_plot(
            fcn=fcn, use_edges=False, mask_invalid=not needs_filter
        )

        # Apply Gaussian filter if requested
        if needs_filter:
            if gaussian_filter_kwargs is None:
                gaussian_filter_kwargs = {}

            if nan_aware_filter:
                C_centers = self._nan_gaussian_filter(
                    C_centers, gaussian_filter_std, **gaussian_filter_kwargs
                )
            else:
                from scipy.ndimage import gaussian_filter

                C_centers = gaussian_filter(
                    C_centers, gaussian_filter_std, **gaussian_filter_kwargs
                )

            C_centers = np.ma.masked_invalid(C_centers)

        XX_centers, YY_centers = np.meshgrid(x_centers, y_centers)

        # Get contour levels
        levels = self._get_contour_levels(levels)

        # Contour function
        contour_fcn = ax.contourf if use_contourf else ax.contour

        # Default linestyles for contour
        linestyles = contour_kwargs.pop(
            "linestyles",
            [
                "-",
                ":",
                "--",
                (0, (7, 3, 1, 3, 1, 3, 1, 3, 1, 3)),
                "--",
                ":",
                "-",
                (0, (7, 3, 1, 3)),
            ],
        )

        if levels is None:
            args = [XX_centers, YY_centers, C_centers]
        else:
            args = [XX_centers, YY_centers, C_centers, levels]

        qset = contour_fcn(
            *args, linestyles=linestyles, cmap=cmap, norm=norm, **contour_kwargs
        )

        # --- 3. Contour labels ---
        lbls = None
        if label_levels:
            if clabel_kwargs is None:
                clabel_kwargs = {}

            inline = clabel_kwargs.pop("inline", True)
            inline_spacing = clabel_kwargs.pop("inline_spacing", -3)
            fmt = clabel_kwargs.pop("fmt", "%s")

            class nf(float):
                def __repr__(self):
                    return float.__repr__(self).rstrip("0")

            try:
                clabel_args = (qset, levels[:-1] if skip_max_clbl else levels)
            except TypeError:
                clabel_args = (qset,)

            qset.levels = [nf(level) for level in qset.levels]
            lbls = ax.clabel(
                *clabel_args,
                inline=inline,
                inline_spacing=inline_spacing,
                fmt=fmt,
                **clabel_kwargs,
            )

        # --- 4. Colorbar ---
        cbar_or_mappable = pc
        if cbar:
            if cbar_kwargs is None:
                cbar_kwargs = {}
            if "cax" not in cbar_kwargs and "ax" not in cbar_kwargs:
                cbar_kwargs["ax"] = ax
            cbar_or_mappable = self._make_cbar(pc, **cbar_kwargs)

        # --- 5. Format axis ---
        self._format_axis(ax)

        return ax, cbar_or_mappable, qset, lbls



[docs]
    def get_border(self):
        r"""Get the top and bottom edges of the plot.

        Returns
        -------
        border: namedtuple
            Contains "top" and "bottom" fields, each with a :py:class:`pd.Series`.
        """

        Border = namedtuple("Border", "top,bottom")
        top = {}
        bottom = {}
        for x, v in self.agg().unstack("x").items():
            yt = v.last_valid_index()
            if yt is not None:
                z = v.loc[yt]
                top[(yt, x)] = z

            yb = v.first_valid_index()
            if yb is not None:
                z = v.loc[yb]
                bottom[(yb, x)] = z

        top = pd.Series(top)
        bottom = pd.Series(bottom)
        for edge in (top, bottom):
            edge.index.names = ["y", "x"]

        border = Border(top, bottom)
        return border


    def _plot_one_edge(
        self,
        ax,
        edge,
        smooth=False,
        sg_kwargs=None,
        xlim=(None, None),
        ylim=(None, None),
        **kwargs,
    ):
        x = edge.index.get_level_values("x").mid
        y = edge.index.get_level_values("y").mid

        if sg_kwargs is None:
            sg_kwargs = dict()

        if smooth:
            wlength = sg_kwargs.pop("window_length", int(np.floor(y.shape[0] / 10)))
            polyorder = sg_kwargs.pop("polyorder", 3)

            if not wlength % 2:
                wlength -= 1

            y = savgol_filter(y, wlength, polyorder, **sg_kwargs)

        if self.log.x:
            x = 10.0**x
        if self.log.y:
            y = 10.0**y

        x0, x1 = xlim
        y0, y1 = ylim

        tk = np.full_like(x, True, dtype=bool)
        if x0 is not None:
            tk = tk & (x0 <= x)
        if x1 is not None:
            tk = tk & (x <= x1)
        if y0 is not None:
            tk = tk & (y0 <= y)
        if y1 is not None:
            tk = tk & (y <= y1)

        #         if (~tk).any():
        x = x[tk]
        y = y[tk]

        return ax.plot(x, y, **kwargs)


[docs]
    def plot_edges(self, ax, smooth=True, sg_kwargs=None, **kwargs):
        """Overplot the edges.

        Parameters
        ----------
        ax:
            Axis on which to plot.
        smooth: bool
            If True, apply a Savitzky-Golay filter (:py:func:`scipy.signal.savgol_filter`)
            to the y-values before plotting to smooth the curve.
        sg_kwargs: dict, None
            If not None, dict of kwargs passed to Savitzky-Golay filter. Also allows
            for setting of `window_length` and `polyorder` as kwargs. They default to
            10% of the number of observations (`window_length`) and 3 (`polyorder`).
            Note that because `window_length` must be odd, if the 10% value is even, we
            take 1-window_length.
        kwargs:
            Passed to `ax.plot`
        """

        top, bottom = self.get_border()

        color = kwargs.pop("color", "cyan")
        label = kwargs.pop("label", None)
        etop = self._plot_one_edge(
            ax, top, smooth, sg_kwargs, color=color, label=label, **kwargs
        )
        ebottom = self._plot_one_edge(
            ax, bottom, smooth, sg_kwargs, color=color, **kwargs
        )

        return etop, ebottom


    def _get_contour_levels(self, levels):
        if (levels is not None) or (self.axnorm is None):
            pass

        elif (levels is None) and (self.axnorm == "t"):
            levels = [0.01, 0.1, 0.3, 0.7, 0.99]

        elif (levels is None) and (self.axnorm == "d"):
            levels = [3e-5, 1e-4, 3e-4, 1e-3, 1.7e-3, 2.3e-3]

        elif (levels is None) and (self.axnorm in ["r", "c"]):
            levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

        elif (levels is None) and (self.axnorm in ["cd", "rd"]):
            levels = None

        else:
            raise ValueError(
                f"Unrecognized axis normalization {self.axnorm} for default levels."
            )

        return levels

    def _verify_contour_passthrough_kwargs(
        self, ax, clabel_kwargs, edges_kwargs, cbar_kwargs
    ):
        if clabel_kwargs is None:
            clabel_kwargs = dict()
        if edges_kwargs is None:
            edges_kwargs = dict()
        if cbar_kwargs is None:
            cbar_kwargs = dict()
        if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys():
            cbar_kwargs["ax"] = ax

        return clabel_kwargs, edges_kwargs, cbar_kwargs


[docs]
    def plot_contours(
        self,
        ax=None,
        label_levels=True,
        cbar=True,
        limit_color_norm=False,
        cbar_kwargs=None,
        fcn=None,
        plot_edges=False,
        edges_kwargs=None,
        clabel_kwargs=None,
        skip_max_clbl=True,
        use_contourf=False,
        gaussian_filter_std=0,
        gaussian_filter_kwargs=None,
        nan_aware_filter=False,
        **kwargs,
    ):
        """Make a contour plot on `ax` using `ax.contour`.

        Parameters
        ----------
        ax: mpl.axes.Axes, None
            If None, create an `Axes` instance from `plt.subplots`.
        label_levels: bool
            If True, add labels to contours with `ax.clabel`.
        cbar: bool
            If True, create color bar with `labels.z`.
        limit_color_norm: bool
            If True, limit the color range to 0.001 and 0.999 percentile range
            of the z-value, count or otherwise.
        cbar_kwargs: dict, None
            If not None, kwargs passed to `self._make_cbar`.
        fcn: FunctionType, None
            Aggregation function. If None, automatically select in :py:meth:`agg`.
        plot_edges: bool
            If True, plot the smoothed, extreme edges of the 2D histogram.
        edges_kwargs: None, dict
            Passed to {self.plot_edges!s}.
        clabel_kwargs: None, dict
            If not None, dictionary of kwargs passed to `ax.clabel`.
        skip_max_clbl: bool
            If True, don't label the maximum contour. Primarily used when the maximum
            contour is, effectively, a point.
        maximum_color:
            The color for the maximum of the PDF.
        use_contourf: bool
            If True, use `ax.contourf`. Else use `ax.contour`.
        gaussian_filter_std: int
            If > 0, apply `scipy.ndimage.gaussian_filter` to the z-values using the
            standard deviation specified by `gaussian_filter_std`.
        gaussian_filter_kwargs: None, dict
            If not None and gaussian_filter_std > 0, passed to :py:meth:`scipy.ndimage.gaussian_filter`
        nan_aware_filter: bool
            If True and gaussian_filter_std > 0, use NaN-aware filtering via
            normalized convolution. Otherwise use standard scipy.ndimage.gaussian_filter.
        kwargs:
            Passed to :py:meth:`ax.pcolormesh`.
            If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`.
        """
        levels = kwargs.pop("levels", None)
        cmap = kwargs.pop("cmap", None)
        norm = kwargs.pop(
            "norm",
            (
                mpl.colors.BoundaryNorm(np.linspace(0, 1, 11), 256, clip=True)
                if self.axnorm in ("c", "r")
                else None
            ),
        )
        linestyles = kwargs.pop(
            "linestyles",
            [
                "-",
                ":",
                "--",
                (0, (7, 3, 1, 3, 1, 3, 1, 3, 1, 3)),
                "--",
                ":",
                "-",
                (0, (7, 3, 1, 3, 1, 3)),
            ],
        )

        if ax is None:
            fig, ax = plt.subplots()

        (
            clabel_kwargs,
            edges_kwargs,
            cbar_kwargs,
        ) = self._verify_contour_passthrough_kwargs(
            ax, clabel_kwargs, edges_kwargs, cbar_kwargs
        )

        inline = clabel_kwargs.pop("inline", True)
        inline_spacing = clabel_kwargs.pop("inline_spacing", -3)
        fmt = clabel_kwargs.pop("fmt", "%s")

        agg = self.agg(fcn=fcn).unstack("x")
        x = self.intervals["x"].mid
        y = self.intervals["y"].mid

        # HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
        if x.size != agg.shape[1]:
            agg = agg.reindex(columns=self.categoricals["x"])
        if y.size != agg.shape[0]:
            agg = agg.reindex(index=self.categoricals["y"])

        x, y = self._maybe_convert_to_log_scale(x, y)

        XX, YY = np.meshgrid(x, y)

        C = agg.values
        if gaussian_filter_std:
            if gaussian_filter_kwargs is None:
                gaussian_filter_kwargs = dict()

            if nan_aware_filter:
                C = self._nan_gaussian_filter(
                    C, gaussian_filter_std, **gaussian_filter_kwargs
                )
            else:
                from scipy.ndimage import gaussian_filter

                C = gaussian_filter(C, gaussian_filter_std, **gaussian_filter_kwargs)

        C = np.ma.masked_invalid(C)

        assert XX.shape == C.shape
        assert YY.shape == C.shape

        class nf(float):
            # Source: https://matplotlib.org/3.1.0/gallery/images_contours_and_fields/contour_label_demo.html
            # Define a class that forces representation of float to look a certain way
            # This remove trailing zero so '1.0' becomes '1'
            def __repr__(self):
                return float.__repr__(self).rstrip("0")

        levels = self._get_contour_levels(levels)

        if (norm is None) and (levels is not None) and (len(levels) >= 2):
            norm = mpl.colors.BoundaryNorm(levels, 256, clip=True)

        contour_fcn = ax.contour
        if use_contourf:
            contour_fcn = ax.contourf

        if levels is None:
            args = [XX, YY, C]
        else:
            args = [XX, YY, C, levels]

        qset = contour_fcn(*args, linestyles=linestyles, cmap=cmap, norm=norm, **kwargs)

        try:
            args = (qset, levels[:-1] if skip_max_clbl else levels)
        except TypeError:
            # None can't be subscripted.
            args = (qset,)

        lbls = None
        if label_levels:
            qset.levels = [nf(level) for level in qset.levels]
            lbls = ax.clabel(
                *args,
                inline=inline,
                inline_spacing=inline_spacing,
                fmt=fmt,
                **clabel_kwargs,
            )

        if plot_edges:
            etop, ebottom = self.plot_edges(ax, **edges_kwargs)

        cbar_or_mappable = qset
        if cbar:
            # Pass `norm` to `self._make_cbar` so that we can choose the ticks to use.
            cbar = self._make_cbar(qset, norm=norm, **cbar_kwargs)
            cbar_or_mappable = cbar

        self._format_axis(ax)

        return ax, lbls, cbar_or_mappable, qset



[docs]
    def project_1d(self, axis, only_plotted=True, project_counts=False, **kwargs):
        """Make a `Hist1D` from the data stored in this `His2D`.

        Parameters
        ----------
        axis: str
            "x" or "y", specifying the axis to project into 1D.
        only_plotted: bool
            If True, only pass data that appears in the {self.__class__.__name__} plot
            to the :py:class:`Hist1D`.
        project_counts: bool
            If True, only send the variable plotted along `axis` to :py:class:`Hist1D`.
            Otherwise, send both axes (but not z-values).
        kwargs:
            Passed to `Hist1D`. Primarily to allow specifying `bin_precision`.

        Returns
        -------
        h1: :py:class:`Hist1D`
        """
        axis = axis.lower()
        assert axis in ("x", "y")

        data = self.data

        if data.loc[:, "z"].unique().size >= 2:
            # Either all 1 or 1 and NaN.
            other = "z"
        else:
            possible_axes = {"x", "y"}
            possible_axes.remove(axis)
            other = possible_axes.pop()

        logx = self.log._asdict()[axis]
        x = self.data.loc[:, axis]
        if logx:
            # Need to convert back to regular from log-space for data setting.
            x = 10.0**x

        y = self.data.loc[:, other] if not project_counts else None
        logy = False  # Defined b/c project_counts option.
        if y is not None and (other == "y"):
            # Only select y-values plotted.
            logy = self.log._asdict()[other]
            yedges = self.edges[other].values
            y = y.where((yedges[0] <= y) & (y <= yedges[-1]))
            if logy:
                y = 10.0**y

        if only_plotted:
            tk = self.get_plotted_data_boolean_series()
            x = x.loc[tk]
            if y is not None:
                y = y.loc[tk]

        h1 = Hist1D(
            x,
            y=y,
            logx=logx,
            clip_data=False,  # Any clipping will be addressed by bins.
            nbins=self.edges[axis].values,
            **kwargs,
        )

        h1.set_log(y=logy)  # Need to propagate logy.
        h1.set_labels(x=self.labels._asdict()[axis])
        if not project_counts:
            h1.set_labels(y=self.labels._asdict()[other])

        return h1



[docs]
    def make_joint_h2_h1_plot(
        self, project_counts=True, kwargs_1d=None, fig_axes=None, **kwargs
    ):
        figsize = kwargs.pop("figsize", (5, 6))
        height_ratios = kwargs.pop("height_ratios", [0.25, 1, 0.2, 0.1])
        width_ratios = kwargs.pop("width_ratios", [1, 0.25])
        hspace = kwargs.pop("hspace", 0)
        wspace = kwargs.pop("wspace", 0)

        fig = plt.figure(figsize=figsize)
        gs = mpl.gridspec.GridSpec(
            4,
            2,
            height_ratios=height_ratios,
            width_ratios=width_ratios,
            hspace=hspace,
            wspace=wspace,
        )

        hax = fig.add_subplot(gs[1, 0])
        xax = fig.add_subplot(gs[0, 0], sharex=hax)
        yax = fig.add_subplot(gs[1, 1], sharey=hax)
        cax = fig.add_subplot(gs[3, 0])

        cbar_kwargs = kwargs.pop("cbar_kwargs", dict())
        cax = cbar_kwargs.pop("cax", cax)
        orientation = cbar_kwargs.pop("orientation", "horizontal")
        _, cbar = self.make_plot(
            ax=hax,
            cbar_kwargs=dict(cax=cax, orientation=orientation, **cbar_kwargs),
            **kwargs,
        )

        if kwargs_1d is None:
            kwargs_1d = dict()

        self.project_1d("x", project_counts=project_counts).make_plot(
            ax=xax, **kwargs_1d
        )
        self.project_1d("y", project_counts=project_counts).make_plot(
            ax=yax, **kwargs_1d, transpose_axes=True
        )

        xax.label_outer()
        # Mimic `ax.label_outer` for `yax`.
        for label in yax.get_yticklabels(which="both"):
            label.set_visible(False)
        yax.get_yaxis().get_offset_text().set_visible(False)
        yax.set_ylabel("")

        log = self.log
        if not log.x:
            hax.xaxis.set_major_locator(
                mpl.ticker.MaxNLocator(
                    nbins=hax.xaxis.get_ticklocs().size - 1, prune="upper"
                )
            )
        if not log.y:
            hax.yaxis.set_major_locator(
                mpl.ticker.MaxNLocator(
                    nbins=hax.yaxis.get_ticklocs().size - 1, prune="upper"
                )
            )

        return hax, xax, yax, cbar



[docs]
    def id_data_above_contour(self, level):
        r"""Gets data above the `level`.

        Parameters
        ----------
        level: scalar
             The z-value above which to select data. Data is aggregated according
             to `ax_norm`.

        Returns
        -------
        above_contour: pd.Series
            For data in a bin above `level`, indicates the x-`pd.Interval` within
            which the observation falls. `NaN` are observations that are below
            `level`. This object is purposely the same length as the data stored by
            Hist2D and can be used in groupby operations.
        """
        x = self.data.x
        y = self.data.y
        above_contour = pd.Series(np.nan, self.data.index)
        for k, v in self.agg().unstack("x").items():
            tk = v >= level
            left, right = k.left, k.right
            bottom, top = v[tk].index.min().left, v[tk].index.max().right
            above_contour_at_x = (left < x) & (x <= right) & (bottom < y) & (y <= top)
            above_contour[above_contour_at_x] = k

        above_contour = pd.Series(
            pd.Categorical(above_contour), index=above_contour.index
        )

        return above_contour



[docs]
    def take_data_in_yrange_across_x(
        self,
        ranges_by_x,
        get_x_bounds,
        get_y_bounds,
    ):
        r"""Take data within y-ranges across x-values.

        Parameters
        ----------
        ranges_by_x: iterable
            An iterable with keys used to get the left and right bounds for the data
            and values used to get the top and bottom bounds for the data.

        get_x_bounds: function
            First argument is one key of `ranges_by_x` and returns `left, right`.
            Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according
            to whether or not the keys are :math:`log(x)` or :math:`x` in a manner
            that matches data stored in Hist2D.

        get_y_bounds: functions
            Takes on value of `ranges_by_x` and returns `top, bottom`. Second argument
            Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according
            to whether or not the keys are :math:`log(y)` or :math:`y` in a manner
            that matches data stored in Hist2D.

        Returns
        -------
        taken: np.ndarray 1D
            Array of indices for selecting data in interval.
        """

        available_x = self.agg().unstack("x").columns
        if ranges_by_x.index.symmetric_difference(available_x).size:
            drop = ranges_by_x.index.symmetric_difference(available_x)
            if not drop.isin(available_x).all():
                raise ValueError(
                    "Need a way to drop values in selector that aren't available."
                )
            else:
                self.logger.warning(
                    f"Dropping {drop.size} intervals from available for selecting."
                )

        data = self.data
        logx = self.log.x
        logy = self.log.y

        taken = []
        for x, at_x in ranges_by_x.iterrows():
            l, r = get_x_bounds(x, expected_logx=logx)
            b, t = get_y_bounds(at_x, expected_logy=logy)

            assert l < r
            assert b < t

            tkx = (l < data.x) & (data.x <= r)
            tky = (b < data.y) & (data.y <= t)
            tk = tkx & tky
            tk = tk.loc[tk].index
            taken.append(tk)

        taken = np.sort(np.concatenate(taken))
        return taken