#!/usr/bin/env python
r"""Two-dimensional histogram and heatmap plotting utilities."""
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from collections import namedtuple
from scipy.signal import savgol_filter
from . import base
from . import labels as labels_module
from .tools import nan_gaussian_filter
from . import agg_plot
from . import hist1d
AggPlot = agg_plot.AggPlot
Hist1D = hist1d.Hist1D
[docs]
class Hist2D(base.PlotWithZdata, base.CbarMaker, AggPlot):
r"""Create a 2D histogram with an optional z-value using an equal number.
of bins along the x and y axis.
Parameters
----------
x, y: pd.Series
x and y data to aggregate
z: None, pd.Series
If not None, the z-value to aggregate.
axnorm: str
Normalize the histogram.
key normalization
--- -------------
c column
r row
t total
d density
logx, logy: bool
If True, log10 scale the axis.
Attributes
----------
data:
bins:
cut:
axnorm:
log<x,y>:
<x,y,z>label:
path: None, Path
Methods
-------
calc_bins:
calculate the x, y bins.
make_cut:
Utilize the calculated bins to convert (x, y) into pd.Categoral
or pd.Interval values used in aggregation.
set_[x,y,z]label:
Set the x, y, or z label.
agg:
Aggregate the data in the bins.
If z-value is None, count the number of points in each bin.
If z-value is not None, calculate the mean for each bin.
make_plot:
Make a 2D plot of the data with an optional color bar.
"""
[docs]
def __init__(
self,
x,
y,
z=None,
axnorm=None,
logx=False,
logy=False,
clip_data=False,
nbins=101,
bin_precision=None,
):
super().__init__()
self.set_log(x=logx, y=logy)
self.set_data(x, y, z, clip_data)
self.set_labels(
x="x", y="y", z=labels_module.Count(norm=axnorm) if z is None else "z"
)
self.set_axnorm(axnorm)
self.calc_bins_intervals(nbins=nbins, precision=bin_precision)
self.make_cut()
self.set_clim(None, None)
self.set_alim(None, None)
@property
def _gb_axes(self):
return ("x", "y")
def _maybe_convert_to_log_scale(self, x, y):
if self.log.x:
x = 10.0**x
if self.log.y:
y = 10.0**y
return x, y
[docs]
def set_labels(self, **kwargs):
z = kwargs.pop("z", self.labels.z)
if isinstance(z, labels_module.Count):
try:
z.set_axnorm(self.axnorm)
except AttributeError:
pass
z.build_label()
super().set_labels(z=z, **kwargs)
[docs]
def set_data(self, x, y, z, clip):
super().set_data(x, y, z, clip)
data = self.data
if self.log.x:
data.loc[:, "x"] = np.log10(np.abs(data.loc[:, "x"]))
if self.log.y:
data.loc[:, "y"] = np.log10(np.abs(data.loc[:, "y"]))
self._data = data
[docs]
def set_axnorm(self, new):
r"""The method by which the gridded data is normalized.
===== =============================================================
key description
===== =============================================================
c Column normalize
d Density normalize
r Row normalize
t Total normalize
cd PDFs in each column
rd PDFs in each row
===== ============================================================="""
if new is not None:
new = new.lower()
assert new in (
"c",
"r",
"t",
"d",
"cd",
"rd",
), f"Unrecgonized axnorm `{new}`"
zlbl = self.labels.z
if isinstance(zlbl, labels_module.Count):
zlbl.set_axnorm(new)
zlbl.build_label()
self._axnorm = new
def _axis_normalizer(self, agg):
r"""Takes care of row, column, total, and density normaliation.
Written basically as `staticmethod` so that can be called in `OrbitHist2D`, but
as actual method with `self` passed so we have access to `self.log` for density
normalization.
"""
axnorm = self.axnorm
if axnorm is None:
pass
elif axnorm == "c":
agg = agg.divide(agg.groupby(level="x").max(), level="x")
elif axnorm == "r":
agg = agg.divide(agg.groupby(level="y").max(), level="y")
elif axnorm == "t":
agg = agg.divide(agg.max())
elif axnorm == "d":
N = agg.sum().sum()
x = pd.IntervalIndex(agg.index.get_level_values("x").unique())
y = pd.IntervalIndex(agg.index.get_level_values("y").unique())
dx = pd.Series(
x.length, index=x
) # dx = pd.Series(x.right - x.left, index=x)
dy = pd.Series(
y.length, index=y
) # dy = pd.Series(y.right - y.left, index=y)
if self.log.x:
dx = 10.0**dx
if self.log.y:
dy = 10.0**dy
agg = agg.divide(dx, level="x").divide(dy, level="y").divide(N)
elif axnorm == "cd":
# raise NotImplementedError("Need to verify data alignment, especially `dx` values and index")
N = agg.groupby(level="x").sum()
dy = pd.IntervalIndex(
agg.index.get_level_values("y").unique()
).sort_values()
dy = pd.Series(dy.length, index=dy).sort_index()
# Divide by total in each column and each row's width
agg = agg.divide(N, level="x").divide(dy, level="y")
elif axnorm == "rd":
# raise NotImplementedError("Need to verify data alignment, especially `dx` values and index")
N = agg.groupby(level="y").sum()
dx = pd.IntervalIndex(
agg.index.get_level_values("x").unique()
).sort_values()
dx = pd.Series(dx.length, index=dx).sort_index()
# Divide by total in each column and each row's width
agg = agg.divide(N, level="y").divide(dx, level="x")
elif hasattr(axnorm, "__iter__"):
# TODO: This is an undocumented feature. I do not know if it is
# tested nor how it interacts with colorbar labels, etc.
# We need to investigate this issue (20250804).
kind, fcn = axnorm
if kind == "c":
agg = agg.divide(agg.groupby(level="x").agg(fcn), level="x")
elif kind == "r":
agg = agg.divide(agg.groupby(level="y").agg(fcn), level="y")
else:
raise ValueError(f"Unrecognized axnorm with function ({kind}, {fcn})")
else:
raise ValueError(f"Unrecognized axnorm ({axnorm})")
return agg
[docs]
def agg(self, **kwargs):
agg = super().agg(**kwargs)
agg = self._axis_normalizer(agg)
agg = self._agg_reindexer(agg)
a0, a1 = self.alim
if a0 is not None or a1 is not None:
tk = pd.Series(True, index=agg.index)
if a0 is not None:
tk = tk & (agg >= a0)
if a1 is not None:
tk = tk & (agg <= a1)
agg = agg.where(tk)
return agg
def _make_cbar(self, mappable, **kwargs):
ticks = kwargs.pop(
"ticks",
mpl.ticker.MultipleLocator(0.1) if self.axnorm in ("c", "r") else None,
)
return super()._make_cbar(mappable, ticks=ticks, **kwargs)
def _limit_color_norm(self, norm):
if self.axnorm in ("c", "r"):
# Don't limit us to (1%, 99%) interval.
return None
pct = self.data.loc[:, "z"].quantile([0.01, 0.99])
v0 = pct.loc[0.01]
v1 = pct.loc[0.99]
if norm.vmin is None:
norm.vmin = v0
if norm.vmax is None:
norm.vmax = v1
norm.clip = True
def _prep_agg_for_plot(self, fcn=None, use_edges=True, mask_invalid=True):
"""Prepare aggregated data and coordinates for plotting.
Parameters
----------
fcn : FunctionType, None
Aggregation function. If None, automatically select in :py:meth:`agg`.
use_edges : bool
If True, return bin edges (for pcolormesh).
If False, return bin centers (for contour).
mask_invalid : bool
If True, return masked array with NaN/inf masked.
If False, return raw values (use when applying gaussian_filter).
Returns
-------
C : np.ma.MaskedArray or np.ndarray
2D array of aggregated values (masked if mask_invalid=True).
x : np.ndarray
X coordinates (edges or centers based on use_edges).
y : np.ndarray
Y coordinates (edges or centers based on use_edges).
"""
agg = self.agg(fcn=fcn).unstack("x")
if use_edges:
x = self.edges["x"]
y = self.edges["y"]
expected_offset = 1 # edges have n+1 points for n bins
else:
x = self.intervals["x"].mid
y = self.intervals["y"].mid
expected_offset = 0 # centers have n points for n bins
# HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
if x.size != agg.shape[1] + expected_offset:
agg = agg.reindex(columns=self.categoricals["x"])
if y.size != agg.shape[0] + expected_offset:
agg = agg.reindex(index=self.categoricals["y"])
x, y = self._maybe_convert_to_log_scale(x, y)
C = agg.values
if mask_invalid:
C = np.ma.masked_invalid(C)
return C, x, y
def _nan_gaussian_filter(self, array, sigma, **kwargs):
"""Wrapper for shared nan_gaussian_filter. See tools.nan_gaussian_filter."""
return nan_gaussian_filter(array, sigma, **kwargs)
[docs]
def make_plot(
self,
ax=None,
cbar=True,
limit_color_norm=False,
cbar_kwargs=None,
fcn=None,
alpha_fcn=None,
**kwargs,
):
r"""Make a 2D plot on `ax` using `ax.pcolormesh`.
Parameters
----------
ax: mpl.axes.Axes, None
If None, create an `Axes` instance from `plt.subplots`.
cbar: bool
If True, create color bar with `labels.z`.
limit_color_norm: bool
If True, limit the color range to 0.001 and 0.999 percentile range
of the z-value, count or otherwise.
cbar_kwargs: dict, None
If not None, kwargs passed to `self._make_cbar`.
fcn: FunctionType, None
Aggregation function. If None, automatically select in :py:meth:`agg`.
alpha_fcn: None, str
If not None, the function used to aggregate the data for setting alpha
value.
kwargs:
Passed to `ax.pcolormesh`.
If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`.
Returns
-------
ax: mpl.axes.Axes
Axes upon which plot was made.
cbar_or_mappable: colorbar.Colorbar, mpl.collections.QuadMesh
If `cbar` is True, return the colorbar. Otherwise, return the `Quadmesh` used
to create the colorbar.
"""
agg = self.agg(fcn=fcn).unstack("x")
x = self.edges["x"]
y = self.edges["y"]
# HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
if x.size != agg.shape[1] + 1:
agg = agg.reindex(columns=self.categoricals["x"])
if y.size != agg.shape[0] + 1:
agg = agg.reindex(index=self.categoricals["y"])
if ax is None:
fig, ax = plt.subplots()
x, y = self._maybe_convert_to_log_scale(x, y)
axnorm = self.axnorm
default_norm = None
if axnorm in ("c", "r"):
default_norm = mpl.colors.BoundaryNorm(
np.linspace(0, 1, 11), 256, clip=True
)
elif axnorm in ("d", "cd", "rd"):
default_norm = mpl.colors.LogNorm(clip=True)
norm = kwargs.pop("norm", default_norm)
if limit_color_norm:
self._limit_color_norm(norm)
C = np.ma.masked_invalid(agg.values)
XX, YY = np.meshgrid(x, y)
pc = ax.pcolormesh(XX, YY, C, norm=norm, **kwargs)
cbar_or_mappable = pc
if cbar:
if cbar_kwargs is None:
cbar_kwargs = dict()
if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys():
cbar_kwargs["ax"] = ax
# Pass `norm` to `self._make_cbar` so that we can choose the ticks to use.
cbar = self._make_cbar(pc, **cbar_kwargs)
cbar_or_mappable = cbar
self._format_axis(ax)
color_plot = self.data.loc[:, self.agg_axes].dropna().unique().size > 1
if (alpha_fcn is not None) and color_plot:
self.logger.warning(
"Make sure you verify alpha actually set. I don't yet trust this."
)
alpha_agg = self.agg(fcn=alpha_fcn)
alpha_agg = alpha_agg.unstack("x")
alpha_agg = np.ma.masked_invalid(alpha_agg.values.ravel())
# Feature scale then invert so smallest STD
# is most opaque.
alpha = 1 - mpl.colors.Normalize()(alpha_agg)
self.logger.warning("Scaling alpha filter as alpha**0.25")
alpha = alpha**0.25
# Set masked values to zero. Otherwise, masked
# values are rendered as black.
alpha = alpha.filled(0)
# Must draw to initialize `facecolor`s
plt.draw()
colors = pc.get_facecolors()
colors[:, 3] = alpha
pc.set_facecolor(colors)
elif alpha_fcn is not None:
self.logger.warning("Ignoring `alpha_fcn` because plotting counts")
return ax, cbar_or_mappable
[docs]
def plot_hist_with_contours(
self,
ax=None,
cbar=True,
limit_color_norm=False,
cbar_kwargs=None,
fcn=None,
# Contour-specific parameters
levels=None,
label_levels=False,
use_contourf=True,
contour_kwargs=None,
clabel_kwargs=None,
skip_max_clbl=True,
gaussian_filter_std=0,
gaussian_filter_kwargs=None,
nan_aware_filter=False,
**kwargs,
):
"""Make a 2D pcolormesh plot with contour overlay.
Combines `make_plot` (pcolormesh background) with `plot_contours`
(contour/contourf overlay) in a single call.
Parameters
----------
ax : mpl.axes.Axes, None
If None, create an `Axes` instance from `plt.subplots`.
cbar : bool
If True, create color bar with `labels.z`.
limit_color_norm : bool
If True, limit the color range to 0.001 and 0.999 percentile range.
cbar_kwargs : dict, None
If not None, kwargs passed to `self._make_cbar`.
fcn : FunctionType, None
Aggregation function. If None, automatically select.
levels : array-like, int, None
Contour levels. If None, automatically determined.
label_levels : bool
If True, add labels to contours with `ax.clabel`.
use_contourf : bool
If True, use filled contours. Else use line contours.
contour_kwargs : dict, None
Additional kwargs passed to contour/contourf (e.g., linestyles, colors).
clabel_kwargs : dict, None
Kwargs passed to `ax.clabel`.
skip_max_clbl : bool
If True, don't label the maximum contour level.
gaussian_filter_std : int
If > 0, apply Gaussian filter to contour data.
gaussian_filter_kwargs : dict, None
Kwargs passed to `scipy.ndimage.gaussian_filter`.
nan_aware_filter : bool
If True and gaussian_filter_std > 0, use NaN-aware filtering via
normalized convolution. Otherwise use standard scipy.ndimage.gaussian_filter.
kwargs :
Passed to `ax.pcolormesh`.
Returns
-------
ax : mpl.axes.Axes
cbar_or_mappable : colorbar.Colorbar or QuadMesh
qset : QuadContourSet
The contour set from the overlay.
lbls : list or None
Contour labels if label_levels is True.
"""
if ax is None:
fig, ax = plt.subplots()
if contour_kwargs is None:
contour_kwargs = {}
# Determine normalization
axnorm = self.axnorm
default_norm = None
if axnorm in ("c", "r"):
default_norm = mpl.colors.BoundaryNorm(
np.linspace(0, 1, 11), 256, clip=True
)
elif axnorm in ("d", "cd", "rd"):
default_norm = mpl.colors.LogNorm(clip=True)
norm = kwargs.pop("norm", default_norm)
if limit_color_norm:
self._limit_color_norm(norm)
# Get cmap from kwargs (shared between pcolormesh and contour)
cmap = kwargs.pop("cmap", None)
# --- 1. Plot pcolormesh background ---
C_edges, x_edges, y_edges = self._prep_agg_for_plot(fcn=fcn, use_edges=True)
XX_edges, YY_edges = np.meshgrid(x_edges, y_edges)
pc = ax.pcolormesh(XX_edges, YY_edges, C_edges, norm=norm, cmap=cmap, **kwargs)
# --- 2. Plot contour overlay ---
# Delay masking if gaussian filter will be applied
needs_filter = gaussian_filter_std > 0
C_centers, x_centers, y_centers = self._prep_agg_for_plot(
fcn=fcn, use_edges=False, mask_invalid=not needs_filter
)
# Apply Gaussian filter if requested
if needs_filter:
if gaussian_filter_kwargs is None:
gaussian_filter_kwargs = {}
if nan_aware_filter:
C_centers = self._nan_gaussian_filter(
C_centers, gaussian_filter_std, **gaussian_filter_kwargs
)
else:
from scipy.ndimage import gaussian_filter
C_centers = gaussian_filter(
C_centers, gaussian_filter_std, **gaussian_filter_kwargs
)
C_centers = np.ma.masked_invalid(C_centers)
XX_centers, YY_centers = np.meshgrid(x_centers, y_centers)
# Get contour levels
levels = self._get_contour_levels(levels)
# Contour function
contour_fcn = ax.contourf if use_contourf else ax.contour
# Default linestyles for contour
linestyles = contour_kwargs.pop(
"linestyles",
[
"-",
":",
"--",
(0, (7, 3, 1, 3, 1, 3, 1, 3, 1, 3)),
"--",
":",
"-",
(0, (7, 3, 1, 3)),
],
)
if levels is None:
args = [XX_centers, YY_centers, C_centers]
else:
args = [XX_centers, YY_centers, C_centers, levels]
qset = contour_fcn(
*args, linestyles=linestyles, cmap=cmap, norm=norm, **contour_kwargs
)
# --- 3. Contour labels ---
lbls = None
if label_levels:
if clabel_kwargs is None:
clabel_kwargs = {}
inline = clabel_kwargs.pop("inline", True)
inline_spacing = clabel_kwargs.pop("inline_spacing", -3)
fmt = clabel_kwargs.pop("fmt", "%s")
class nf(float):
def __repr__(self):
return float.__repr__(self).rstrip("0")
try:
clabel_args = (qset, levels[:-1] if skip_max_clbl else levels)
except TypeError:
clabel_args = (qset,)
qset.levels = [nf(level) for level in qset.levels]
lbls = ax.clabel(
*clabel_args,
inline=inline,
inline_spacing=inline_spacing,
fmt=fmt,
**clabel_kwargs,
)
# --- 4. Colorbar ---
cbar_or_mappable = pc
if cbar:
if cbar_kwargs is None:
cbar_kwargs = {}
if "cax" not in cbar_kwargs and "ax" not in cbar_kwargs:
cbar_kwargs["ax"] = ax
cbar_or_mappable = self._make_cbar(pc, **cbar_kwargs)
# --- 5. Format axis ---
self._format_axis(ax)
return ax, cbar_or_mappable, qset, lbls
[docs]
def get_border(self):
r"""Get the top and bottom edges of the plot.
Returns
-------
border: namedtuple
Contains "top" and "bottom" fields, each with a :py:class:`pd.Series`.
"""
Border = namedtuple("Border", "top,bottom")
top = {}
bottom = {}
for x, v in self.agg().unstack("x").items():
yt = v.last_valid_index()
if yt is not None:
z = v.loc[yt]
top[(yt, x)] = z
yb = v.first_valid_index()
if yb is not None:
z = v.loc[yb]
bottom[(yb, x)] = z
top = pd.Series(top)
bottom = pd.Series(bottom)
for edge in (top, bottom):
edge.index.names = ["y", "x"]
border = Border(top, bottom)
return border
def _plot_one_edge(
self,
ax,
edge,
smooth=False,
sg_kwargs=None,
xlim=(None, None),
ylim=(None, None),
**kwargs,
):
x = edge.index.get_level_values("x").mid
y = edge.index.get_level_values("y").mid
if sg_kwargs is None:
sg_kwargs = dict()
if smooth:
wlength = sg_kwargs.pop("window_length", int(np.floor(y.shape[0] / 10)))
polyorder = sg_kwargs.pop("polyorder", 3)
if not wlength % 2:
wlength -= 1
y = savgol_filter(y, wlength, polyorder, **sg_kwargs)
if self.log.x:
x = 10.0**x
if self.log.y:
y = 10.0**y
x0, x1 = xlim
y0, y1 = ylim
tk = np.full_like(x, True, dtype=bool)
if x0 is not None:
tk = tk & (x0 <= x)
if x1 is not None:
tk = tk & (x <= x1)
if y0 is not None:
tk = tk & (y0 <= y)
if y1 is not None:
tk = tk & (y <= y1)
# if (~tk).any():
x = x[tk]
y = y[tk]
return ax.plot(x, y, **kwargs)
[docs]
def plot_edges(self, ax, smooth=True, sg_kwargs=None, **kwargs):
"""Overplot the edges.
Parameters
----------
ax:
Axis on which to plot.
smooth: bool
If True, apply a Savitzky-Golay filter (:py:func:`scipy.signal.savgol_filter`)
to the y-values before plotting to smooth the curve.
sg_kwargs: dict, None
If not None, dict of kwargs passed to Savitzky-Golay filter. Also allows
for setting of `window_length` and `polyorder` as kwargs. They default to
10% of the number of observations (`window_length`) and 3 (`polyorder`).
Note that because `window_length` must be odd, if the 10% value is even, we
take 1-window_length.
kwargs:
Passed to `ax.plot`
"""
top, bottom = self.get_border()
color = kwargs.pop("color", "cyan")
label = kwargs.pop("label", None)
etop = self._plot_one_edge(
ax, top, smooth, sg_kwargs, color=color, label=label, **kwargs
)
ebottom = self._plot_one_edge(
ax, bottom, smooth, sg_kwargs, color=color, **kwargs
)
return etop, ebottom
def _get_contour_levels(self, levels):
if (levels is not None) or (self.axnorm is None):
pass
elif (levels is None) and (self.axnorm == "t"):
levels = [0.01, 0.1, 0.3, 0.7, 0.99]
elif (levels is None) and (self.axnorm == "d"):
levels = [3e-5, 1e-4, 3e-4, 1e-3, 1.7e-3, 2.3e-3]
elif (levels is None) and (self.axnorm in ["r", "c"]):
levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
elif (levels is None) and (self.axnorm in ["cd", "rd"]):
levels = None
else:
raise ValueError(
f"Unrecognized axis normalization {self.axnorm} for default levels."
)
return levels
def _verify_contour_passthrough_kwargs(
self, ax, clabel_kwargs, edges_kwargs, cbar_kwargs
):
if clabel_kwargs is None:
clabel_kwargs = dict()
if edges_kwargs is None:
edges_kwargs = dict()
if cbar_kwargs is None:
cbar_kwargs = dict()
if "cax" not in cbar_kwargs.keys() and "ax" not in cbar_kwargs.keys():
cbar_kwargs["ax"] = ax
return clabel_kwargs, edges_kwargs, cbar_kwargs
[docs]
def plot_contours(
self,
ax=None,
label_levels=True,
cbar=True,
limit_color_norm=False,
cbar_kwargs=None,
fcn=None,
plot_edges=False,
edges_kwargs=None,
clabel_kwargs=None,
skip_max_clbl=True,
use_contourf=False,
gaussian_filter_std=0,
gaussian_filter_kwargs=None,
nan_aware_filter=False,
**kwargs,
):
"""Make a contour plot on `ax` using `ax.contour`.
Parameters
----------
ax: mpl.axes.Axes, None
If None, create an `Axes` instance from `plt.subplots`.
label_levels: bool
If True, add labels to contours with `ax.clabel`.
cbar: bool
If True, create color bar with `labels.z`.
limit_color_norm: bool
If True, limit the color range to 0.001 and 0.999 percentile range
of the z-value, count or otherwise.
cbar_kwargs: dict, None
If not None, kwargs passed to `self._make_cbar`.
fcn: FunctionType, None
Aggregation function. If None, automatically select in :py:meth:`agg`.
plot_edges: bool
If True, plot the smoothed, extreme edges of the 2D histogram.
edges_kwargs: None, dict
Passed to {self.plot_edges!s}.
clabel_kwargs: None, dict
If not None, dictionary of kwargs passed to `ax.clabel`.
skip_max_clbl: bool
If True, don't label the maximum contour. Primarily used when the maximum
contour is, effectively, a point.
maximum_color:
The color for the maximum of the PDF.
use_contourf: bool
If True, use `ax.contourf`. Else use `ax.contour`.
gaussian_filter_std: int
If > 0, apply `scipy.ndimage.gaussian_filter` to the z-values using the
standard deviation specified by `gaussian_filter_std`.
gaussian_filter_kwargs: None, dict
If not None and gaussian_filter_std > 0, passed to :py:meth:`scipy.ndimage.gaussian_filter`
nan_aware_filter: bool
If True and gaussian_filter_std > 0, use NaN-aware filtering via
normalized convolution. Otherwise use standard scipy.ndimage.gaussian_filter.
kwargs:
Passed to :py:meth:`ax.pcolormesh`.
If row or column normalized data, `norm` defaults to `mpl.colors.Normalize(0, 1)`.
"""
levels = kwargs.pop("levels", None)
cmap = kwargs.pop("cmap", None)
norm = kwargs.pop(
"norm",
(
mpl.colors.BoundaryNorm(np.linspace(0, 1, 11), 256, clip=True)
if self.axnorm in ("c", "r")
else None
),
)
linestyles = kwargs.pop(
"linestyles",
[
"-",
":",
"--",
(0, (7, 3, 1, 3, 1, 3, 1, 3, 1, 3)),
"--",
":",
"-",
(0, (7, 3, 1, 3, 1, 3)),
],
)
if ax is None:
fig, ax = plt.subplots()
(
clabel_kwargs,
edges_kwargs,
cbar_kwargs,
) = self._verify_contour_passthrough_kwargs(
ax, clabel_kwargs, edges_kwargs, cbar_kwargs
)
inline = clabel_kwargs.pop("inline", True)
inline_spacing = clabel_kwargs.pop("inline_spacing", -3)
fmt = clabel_kwargs.pop("fmt", "%s")
agg = self.agg(fcn=fcn).unstack("x")
x = self.intervals["x"].mid
y = self.intervals["y"].mid
# HACK: Works around `gb.agg(observed=False)` pandas bug. (GH32381)
if x.size != agg.shape[1]:
agg = agg.reindex(columns=self.categoricals["x"])
if y.size != agg.shape[0]:
agg = agg.reindex(index=self.categoricals["y"])
x, y = self._maybe_convert_to_log_scale(x, y)
XX, YY = np.meshgrid(x, y)
C = agg.values
if gaussian_filter_std:
if gaussian_filter_kwargs is None:
gaussian_filter_kwargs = dict()
if nan_aware_filter:
C = self._nan_gaussian_filter(
C, gaussian_filter_std, **gaussian_filter_kwargs
)
else:
from scipy.ndimage import gaussian_filter
C = gaussian_filter(C, gaussian_filter_std, **gaussian_filter_kwargs)
C = np.ma.masked_invalid(C)
assert XX.shape == C.shape
assert YY.shape == C.shape
class nf(float):
# Source: https://matplotlib.org/3.1.0/gallery/images_contours_and_fields/contour_label_demo.html
# Define a class that forces representation of float to look a certain way
# This remove trailing zero so '1.0' becomes '1'
def __repr__(self):
return float.__repr__(self).rstrip("0")
levels = self._get_contour_levels(levels)
if (norm is None) and (levels is not None) and (len(levels) >= 2):
norm = mpl.colors.BoundaryNorm(levels, 256, clip=True)
contour_fcn = ax.contour
if use_contourf:
contour_fcn = ax.contourf
if levels is None:
args = [XX, YY, C]
else:
args = [XX, YY, C, levels]
qset = contour_fcn(*args, linestyles=linestyles, cmap=cmap, norm=norm, **kwargs)
try:
args = (qset, levels[:-1] if skip_max_clbl else levels)
except TypeError:
# None can't be subscripted.
args = (qset,)
lbls = None
if label_levels:
qset.levels = [nf(level) for level in qset.levels]
lbls = ax.clabel(
*args,
inline=inline,
inline_spacing=inline_spacing,
fmt=fmt,
**clabel_kwargs,
)
if plot_edges:
etop, ebottom = self.plot_edges(ax, **edges_kwargs)
cbar_or_mappable = qset
if cbar:
# Pass `norm` to `self._make_cbar` so that we can choose the ticks to use.
cbar = self._make_cbar(qset, norm=norm, **cbar_kwargs)
cbar_or_mappable = cbar
self._format_axis(ax)
return ax, lbls, cbar_or_mappable, qset
[docs]
def project_1d(self, axis, only_plotted=True, project_counts=False, **kwargs):
"""Make a `Hist1D` from the data stored in this `His2D`.
Parameters
----------
axis: str
"x" or "y", specifying the axis to project into 1D.
only_plotted: bool
If True, only pass data that appears in the {self.__class__.__name__} plot
to the :py:class:`Hist1D`.
project_counts: bool
If True, only send the variable plotted along `axis` to :py:class:`Hist1D`.
Otherwise, send both axes (but not z-values).
kwargs:
Passed to `Hist1D`. Primarily to allow specifying `bin_precision`.
Returns
-------
h1: :py:class:`Hist1D`
"""
axis = axis.lower()
assert axis in ("x", "y")
data = self.data
if data.loc[:, "z"].unique().size >= 2:
# Either all 1 or 1 and NaN.
other = "z"
else:
possible_axes = {"x", "y"}
possible_axes.remove(axis)
other = possible_axes.pop()
logx = self.log._asdict()[axis]
x = self.data.loc[:, axis]
if logx:
# Need to convert back to regular from log-space for data setting.
x = 10.0**x
y = self.data.loc[:, other] if not project_counts else None
logy = False # Defined b/c project_counts option.
if y is not None and (other == "y"):
# Only select y-values plotted.
logy = self.log._asdict()[other]
yedges = self.edges[other].values
y = y.where((yedges[0] <= y) & (y <= yedges[-1]))
if logy:
y = 10.0**y
if only_plotted:
tk = self.get_plotted_data_boolean_series()
x = x.loc[tk]
if y is not None:
y = y.loc[tk]
h1 = Hist1D(
x,
y=y,
logx=logx,
clip_data=False, # Any clipping will be addressed by bins.
nbins=self.edges[axis].values,
**kwargs,
)
h1.set_log(y=logy) # Need to propagate logy.
h1.set_labels(x=self.labels._asdict()[axis])
if not project_counts:
h1.set_labels(y=self.labels._asdict()[other])
return h1
[docs]
def make_joint_h2_h1_plot(
self, project_counts=True, kwargs_1d=None, fig_axes=None, **kwargs
):
figsize = kwargs.pop("figsize", (5, 6))
height_ratios = kwargs.pop("height_ratios", [0.25, 1, 0.2, 0.1])
width_ratios = kwargs.pop("width_ratios", [1, 0.25])
hspace = kwargs.pop("hspace", 0)
wspace = kwargs.pop("wspace", 0)
fig = plt.figure(figsize=figsize)
gs = mpl.gridspec.GridSpec(
4,
2,
height_ratios=height_ratios,
width_ratios=width_ratios,
hspace=hspace,
wspace=wspace,
)
hax = fig.add_subplot(gs[1, 0])
xax = fig.add_subplot(gs[0, 0], sharex=hax)
yax = fig.add_subplot(gs[1, 1], sharey=hax)
cax = fig.add_subplot(gs[3, 0])
cbar_kwargs = kwargs.pop("cbar_kwargs", dict())
cax = cbar_kwargs.pop("cax", cax)
orientation = cbar_kwargs.pop("orientation", "horizontal")
_, cbar = self.make_plot(
ax=hax,
cbar_kwargs=dict(cax=cax, orientation=orientation, **cbar_kwargs),
**kwargs,
)
if kwargs_1d is None:
kwargs_1d = dict()
self.project_1d("x", project_counts=project_counts).make_plot(
ax=xax, **kwargs_1d
)
self.project_1d("y", project_counts=project_counts).make_plot(
ax=yax, **kwargs_1d, transpose_axes=True
)
xax.label_outer()
# Mimic `ax.label_outer` for `yax`.
for label in yax.get_yticklabels(which="both"):
label.set_visible(False)
yax.get_yaxis().get_offset_text().set_visible(False)
yax.set_ylabel("")
log = self.log
if not log.x:
hax.xaxis.set_major_locator(
mpl.ticker.MaxNLocator(
nbins=hax.xaxis.get_ticklocs().size - 1, prune="upper"
)
)
if not log.y:
hax.yaxis.set_major_locator(
mpl.ticker.MaxNLocator(
nbins=hax.yaxis.get_ticklocs().size - 1, prune="upper"
)
)
return hax, xax, yax, cbar
[docs]
def id_data_above_contour(self, level):
r"""Gets data above the `level`.
Parameters
----------
level: scalar
The z-value above which to select data. Data is aggregated according
to `ax_norm`.
Returns
-------
above_contour: pd.Series
For data in a bin above `level`, indicates the x-`pd.Interval` within
which the observation falls. `NaN` are observations that are below
`level`. This object is purposely the same length as the data stored by
Hist2D and can be used in groupby operations.
"""
x = self.data.x
y = self.data.y
above_contour = pd.Series(np.nan, self.data.index)
for k, v in self.agg().unstack("x").items():
tk = v >= level
left, right = k.left, k.right
bottom, top = v[tk].index.min().left, v[tk].index.max().right
above_contour_at_x = (left < x) & (x <= right) & (bottom < y) & (y <= top)
above_contour[above_contour_at_x] = k
above_contour = pd.Series(
pd.Categorical(above_contour), index=above_contour.index
)
return above_contour
[docs]
def take_data_in_yrange_across_x(
self,
ranges_by_x,
get_x_bounds,
get_y_bounds,
):
r"""Take data within y-ranges across x-values.
Parameters
----------
ranges_by_x: iterable
An iterable with keys used to get the left and right bounds for the data
and values used to get the top and bottom bounds for the data.
get_x_bounds: function
First argument is one key of `ranges_by_x` and returns `left, right`.
Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according
to whether or not the keys are :math:`log(x)` or :math:`x` in a manner
that matches data stored in Hist2D.
get_y_bounds: functions
Takes on value of `ranges_by_x` and returns `top, bottom`. Second argument
Second argument is a kwarg (`expected_logx`) boolean to transform the returned values according
to whether or not the keys are :math:`log(y)` or :math:`y` in a manner
that matches data stored in Hist2D.
Returns
-------
taken: np.ndarray 1D
Array of indices for selecting data in interval.
"""
available_x = self.agg().unstack("x").columns
if ranges_by_x.index.symmetric_difference(available_x).size:
drop = ranges_by_x.index.symmetric_difference(available_x)
if not drop.isin(available_x).all():
raise ValueError(
"Need a way to drop values in selector that aren't available."
)
else:
self.logger.warning(
f"Dropping {drop.size} intervals from available for selecting."
)
data = self.data
logx = self.log.x
logy = self.log.y
taken = []
for x, at_x in ranges_by_x.iterrows():
l, r = get_x_bounds(x, expected_logx=logx)
b, t = get_y_bounds(at_x, expected_logy=logy)
assert l < r
assert b < t
tkx = (l < data.x) & (data.x <= r)
tky = (b < data.y) & (data.y <= t)
tk = tkx & tky
tk = tk.loc[tk].index
taken.append(tk)
taken = np.sort(np.concatenate(taken))
return taken