Source code for pyhdust.triangle

# -*- coding:utf-8 -*-

"""PyHdust auxiliary module: third-part MCMC plotting tools.

:co-author:Dan Foreman-Mackey
:license: GNU GPL v3.0 https://github.com/danmoser/pyhdust/blob/master/LICENSE
"""
from __future__ import print_function, absolute_import, unicode_literals
import numpy as _np
import warnings as _warn
import pyhdust.phc as _phc
import pyhdust.stats as _stt
from six import string_types as _strtypes

try:
    import matplotlib as _mpl
    import matplotlib.pyplot as _plt
    import matplotlib.cm as _cm
    import matplotlib.gridspec as _gridspec
    from matplotlib.ticker import MaxNLocator as _MaxNLoc
    from scipy.ndimage import gaussian_filter as _gf
    from scipy import stats as _stats
except ImportError:
    gaussian_filter = None
    _warn.warn("matplotlib or scipy not installed!!!")

__author__ = "Daniel Moser"
__email__ = "dmfaes@gmail.com"


[docs]def kerncorner(xs, cmapn="gray_r", ncl=3, bestvals="perc", labels=[]):
    """xs = (niteractions, ndim)
    ncl = number os contour lines
    bestvals = None, 'peak' or 'perc'
    """
    ndim = _np.shape(xs)[1]
    if len(labels) < ndim:
        labels += [""] * (ndim - len(labels))

    fig = _plt.figure(figsize=(9, 9))
    gs = _gridspec.GridSpec(ndim, ndim)
    gs.update(hspace=0.01)
    fs = dict(_mpl.rcParams.viewitems())["font.size"] - 4

    axs = []
    xslim = []
    for i in range(ndim):
        ax = []
        xslim += [[_np.min(xs[:, i]), _np.max(xs[:, i])]]
        for j in range(i + 1):
            ax += [_plt.subplot(gs[i, j])]
        axs += [ax]

    for i in range(ndim):
        for j in range(i + 1):
            axs[i][j].locator_params(nbins=5)
            xmin, xmax = xslim[j]
            if i == j:
                kernel = _stats.gaussian_kde(xs[:, i])
                x = _np.linspace(xmin, xmax, 101)
                y = kernel(x)
                axs[i][j].plot(x, y / _np.max(y))
                axs[i][j].set_xlim([xmin, xmax])
                ymin, ymax = axs[i][j].get_ylim()
                if isinstance(bestvals, _strtypes):
                    if bestvals.startswith("perc"):
                        pc = _np.percentile(xs[:, i], [15.9, 50.0, 84.1])
                    elif bestvals.startswith("peak"):
                        mode = x[_phc.find_nearest(y, _np.max(y), idx=True)]
                        mad = _stt.mad(xs[:, i])
                        pc = mode + _np.array([-mad, 0, mad])
                    else:
                        _warn.warn("Invalid `bestvals` in kerncorner", stacklevel=2)
                    if "pc" in locals():
                        dx = xmax - xmin
                        for p in pc:
                            if p < xmin + 0.015 * dx:
                                p = xmin + 0.015 * dx
                            if p > xmax - 0.015 * dx:
                                p = xmax - 0.015 * dx
                            axs[i][j].plot([p, p], [ymin, ymax], "k--")
            else:
                y = xs[:, i]
                x = xs[:, j]
                ymin, ymax = xslim[i]
                #
                values = _np.vstack([x, y])
                kernel = _stats.gaussian_kde(values)
                X, Y = _np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
                positions = _np.vstack([X.ravel(), Y.ravel()])
                Z = _np.reshape(kernel(positions), X.shape)
                #
                # axs[i][j].plot(x, y, 'k.')
                axs[i][j].imshow(
                    Z.T,
                    cmap=_plt.get_cmap(cmapn),
                    extent=[xmin, xmax, ymin, ymax],
                    origin="lower",
                )
                if ncl > 0:
                    CS = axs[i][j].contour(
                        X, Y, Z, ncl, colors=_phc.gradColor(range(ncl), cmapn="copper")
                    )
                # axs[i][j].clabel(CS, inline=1, fontsize=fs)
            if j == 0:
                axs[i][j].set_ylabel(labels[i])
            if i == ndim - 1:
                axs[i][j].set_xlabel(labels[j])
            if (i == 0 and j == 0) or (j != 0):
                axs[i][j].set_yticklabels([])
            axs[i][j].set_aspect(abs(xmax - xmin) / abs(ymax - ymin))
            if i != ndim - 1:
                # axs[i][j].get_xaxis().set_visible(False)
                # axs[i][j].xaxis.set_visible(False)
                axs[i][j].set_xticklabels([])
            else:
                _plt.setp(axs[i][j].xaxis.get_majorticklabels(), rotation=50)

    _phc.savefig(fig)  # figname='outname')
    return


[docs]def corner(
    xs,
    bins=20,
    range=None,
    weights=None,
    color="k",
    smooth=None,
    smooth1d=None,
    labels=None,
    label_kwargs=None,
    show_titles=False,
    title_fmt=".2f",
    title_kwargs=None,
    truths=None,
    truth_color="#4682b4",
    scale_hist=False,
    quantiles=None,
    verbose=False,
    fig=None,
    max_n_ticks=5,
    top_ticks=False,
    hist_kwargs=None,
    qtdiff=None,
    **hist2d_kwargs
):
    """
    Make a *sick* corner plot showing the projections of a data set in a
    multi-dimensional space. kwargs are passed to hist2d() or used for
    `matplotlib` styling.

    Parameters
    ----------
    xs : array_like (nsamples, ndim)
        The samples. This should be a 1- or 2-dimensional array. For a 1-D
        array this results in a simple histogram. For a 2-D array, the zeroth
        axis is the list of samples and the next axis are the dimensions of
        the space.

    weights : array_like (nsamples,)
        The weight of each sample. If `None` (default), samples are given
        equal weight.

    labels : iterable (ndim,) (optional)
        A list of names for the dimensions. If a ``xs`` is a
        ``pandas.DataFrame``, labels will default to column names.

    show_titles : bool (optional)
        Displays a title above each 1-D histogram showing the 0.5 quantile
        with the upper and lower errors supplied by the quantiles argument.

    title_fmt : string (optional)
        The format string for the quantiles given in titles.
        (default: `.2f`)

    title_args : dict (optional)
        Any extra keyword arguments to send to the `add_title` command.

    extents : iterable (ndim,) (optional)
        A list where each element is either a length 2 tuple containing
        lower and upper bounds (extents) or a float in range (0., 1.)
        giving the fraction of samples to include in bounds, e.g.,
        [(0.,10.), (1.,5), 0.999, etc.].
        If a fraction, the bounds are chosen to be equal-tailed.

    truths : iterable (ndim,) (optional)
        A list of reference values to indicate on the plots.  Individual
        values can be omitted by using ``None``.

    truth_color : str (optional)
        A ``matplotlib`` style color for the ``truths`` makers.

    scale_hist : bool (optional)
        Should the 1-D histograms be scaled in such a way that the zero line
        is visible?

    quantiles : iterable (optional)
        A list of fractional quantiles to show on the 1-D histograms as
        vertical dashed lines.

    verbose : bool (optional)
        If true, print the values of the computed quantiles.

    plot_contours : bool (optional)
        Draw contours for dense regions of the plot.

    plot_datapoints : bool (optional)
        Draw the individual data points.

    max_n_ticks: int (optional)
        maximum number of ticks to try to use

    fig : matplotlib.Figure (optional)
        Overplot onto the provided figure object.

    """
    if quantiles is None:
        quantiles = []
    if title_kwargs is None:
        title_kwargs = dict()
    if label_kwargs is None:
        label_kwargs = dict()
    if qtdiff is None:
        qtdiff = _np.zeros(xs.shape[1])

    # Try filling in labels from pandas.DataFrame columns.
    if labels is None:
        try:
            labels = xs.columns
        except AttributeError:
            pass

    # Deal with 1D sample lists.
    xs = _np.atleast_1d(xs)
    if len(xs.shape) == 1:
        xs = _np.atleast_2d(xs)
    else:
        assert len(xs.shape) == 2, "The input sample array must be 1- or 2-D."
        xs = xs.T
    assert xs.shape[0] <= xs.shape[1], (
        "I don't believe that you want more " "dimensions than samples!"
    )

    # Parse the weight array.
    if weights is not None:
        weights = _np.asarray(weights)
        if weights.ndim != 1:
            raise ValueError("Weights must be 1-D")
        if xs.shape[1] != weights.shape[0]:
            raise ValueError("Lengths of weights must match number of samples")

    # Parse the parameter ranges.
    if range is None:
        if "extents" in hist2d_kwargs:
            _warn.warn("Deprecated keyword argument 'extents'. " "Use 'range' instead.")
            range = hist2d_kwargs.pop("extents")
        else:
            range = [[x.min(), x.max()] for x in xs]
            # Check for parameters that never change.
            m = _np.array([e[0] == e[1] for e in range], dtype=bool)
            if _np.any(m):
                raise ValueError(
                    (
                        "It looks like the parameter(s) in "
                        "column(s) {0} have no dynamic range. "
                        "Please provide a `range` argument."
                    ).format(", ".join(map("{0}".format, _np.arange(len(m))[m])))
                )

    else:
        # If any of the extents are percentiles, convert them to ranges.
        for i, _ in enumerate(range):
            try:
                emin, emax = range[i]
            except TypeError:
                q = [0.5 - 0.5 * range[i], 0.5 + 0.5 * range[i]]
                range[i] = quantile(xs[i], q, weights=weights)

    if len(range) != xs.shape[0]:
        raise ValueError("Dimension mismatch between samples and range")

    # Parse the bin specifications.
    try:
        bins = [float(bins) for _ in range]
    except TypeError:
        if len(bins) != len(range):
            raise ValueError("Dimension mismatch between bins and range")

    # Some magic numbers for pretty axis layout.
    K = len(xs)
    factor = 2.0  # size of one side of one panel
    lbdim = 0.5 * factor  # size of left/bottom margin
    trdim = 0.2 * factor  # size of top/right margin
    whspace = 0.05  # w/hspace size
    plotdim = factor * K + factor * (K - 1.0) * whspace
    dim = lbdim + plotdim + trdim

    # Create a new figure if one wasn't provided.
    if fig is None:
        fig, axes = _plt.subplots(K, K, figsize=(dim, dim), tight_layout=False)
    else:
        try:
            axes = _np.array(fig.axes).reshape((K, K))
        except:
            raise ValueError(
                "Provided figure has {0} axes, but data has "
                "dimensions K={1}".format(len(fig.axes), K)
            )
        fig.set_tight_layout(False)

    # Format the figure.
    lb = lbdim / dim
    tr = (lbdim + plotdim) / dim
    fig.subplots_adjust(
        left=lb, bottom=lb, right=tr, top=tr, wspace=whspace, hspace=whspace
    )

    # Set up the default histogram keywords.
    if hist_kwargs is None:
        hist_kwargs = dict()
    hist_kwargs["color"] = hist_kwargs.get("color", color)
    if smooth1d is None:
        hist_kwargs["histtype"] = hist_kwargs.get("histtype", "step")

    for i, x in enumerate(xs):
        # Deal with masked arrays.
        if hasattr(x, "compressed"):
            x = x.compressed()

        if _np.shape(xs)[0] == 1:
            ax = axes
        else:
            ax = axes[i, i]
        # Plot the histograms.
        if smooth1d is None:
            n, _, _ = ax.hist(
                x,
                bins=int(round(bins[i])),
                weights=weights,
                range=range[i],
                **hist_kwargs
            )
        else:
            if gaussian_filter is None:
                raise ImportError("Please install scipy for smoothing")
            n, b = _np.histogram(
                x, bins=int(round(bins[i])), weights=weights, range=range[i]
            )
            n = _gf(n, smooth1d)
            x0 = _np.array(zip(b[:-1], b[1:])).flatten()
            y0 = _np.array(zip(n, n)).flatten()
            ax.plot(x0, y0, **hist_kwargs)

        if truths is not None and truths[i] is not None:
            ax.axvline(truths[i], color=truth_color)

        # Plot quantiles if wanted.
        if len(quantiles) > 0:
            qts = _np.array(quantiles) + qtdiff[i]
            qts = _np.where(qts <= 0, 0.01, qts)
            qts = _np.where(qts >= 1, 0.99, qts)
            qts = list(qts)
            qvalues = quantile(x, qts, weights=weights)
            for q in qvalues:
                ax.axvline(q, ls="dashed", color=color)

            if verbose:
                print("Quantiles:")
                print([item for item in zip(qts, qvalues)])

        if show_titles:
            # Compute the quantiles for the title. This might redo
            # unneeded computation but who cares.
            qts = _np.array(quantiles) + qtdiff[i]
            qts = _np.where(qts <= 0, 0.01, qts)
            qts = _np.where(qts >= 1, 0.99, qts)
            qts = list(qts)
            q_16, q_50, q_84 = quantile(x, qts, weights=weights)
            q_m, q_p = q_50 - q_16, q_84 - q_50

            # Format the quantile display.
            if q_50 < 1e6:
                fmt = "{{0:{0}}}".format(title_fmt).format
                title = r"${{{0}}}_{{-{1}}}^{{+{2}}}$"
                title = title.format(fmt(q_50), fmt(q_m), fmt(q_p))
            else:
                expn = int(_np.log10(q_50))
                fmt = "{{0:{0}}}".format(title_fmt).format
                title = r"${{{0}}}_{{-{1}}}^{{+{2}}}e{3}$"
                title = title.format(
                    fmt(q_50 * 10**-expn),
                    fmt(q_m * 10**-expn),
                    fmt(q_p * 10**-expn),
                    expn,
                )

            # Add in the column name if it's given.
            if labels is not None:
                if len(labels) <= i:
                    labels = list(labels) + [""]
                j = labels[i].find(" ")
                if j == -1:
                    j = len(labels[i])
                title = "{0} = {1}".format(labels[i][:j], title)

            # Add the title to the axis.
            ax.set_title(title, **title_kwargs)

        # Set up the axes.
        ax.set_xlim(range[i])
        if scale_hist:
            maxn = _np.max(n)
            ax.set_ylim(-0.1 * maxn, 1.1 * maxn)
        else:
            ax.set_ylim(0, 1.1 * _np.max(n))
        ax.set_yticklabels([])
        ax.xaxis.set_major_locator(_MaxNLoc(max_n_ticks, prune="lower"))

        if i < K - 1:
            if top_ticks:
                ax.xaxis.set_ticks_position("top")
                [l.set_rotation(45) for l in ax.get_xticklabels()]
            else:
                ax.set_xticklabels([])
        else:
            [l.set_rotation(45) for l in ax.get_xticklabels()]
            if labels is not None:
                ax.set_xlabel(labels[i], **label_kwargs)
                ax.xaxis.set_label_coords(0.5, -0.3)

        for j, y in enumerate(xs):
            if _np.shape(xs)[0] == 1:
                ax = axes
            else:
                ax = axes[i, j]
            if j > i:
                ax.set_visible(False)
                ax.set_frame_on(False)
                continue
            elif j == i:
                continue

            # Deal with masked arrays.
            if hasattr(y, "compressed"):
                y = y.compressed()
            hist2d(
                y,
                x,
                ax=ax,
                range=[range[j], range[i]],
                weights=weights,
                color=color,
                smooth=smooth,
                bins=[bins[j], bins[i]],
                **hist2d_kwargs
            )

            if truths is not None:
                if truths[i] is not None and truths[j] is not None:
                    ax.plot(truths[j], truths[i], "s", color=truth_color)
                if truths[j] is not None:
                    ax.axvline(truths[j], color=truth_color)
                if truths[i] is not None:
                    ax.axhline(truths[i], color=truth_color)

            ax.xaxis.set_major_locator(_MaxNLoc(max_n_ticks, prune="lower"))
            ax.yaxis.set_major_locator(_MaxNLoc(max_n_ticks, prune="lower"))

            if i < K - 1:
                ax.set_xticklabels([])
            else:
                [l.set_rotation(45) for l in ax.get_xticklabels()]
                if labels is not None:
                    ax.set_xlabel(labels[j], **label_kwargs)
                    ax.xaxis.set_label_coords(0.5, -0.3)

            if j > 0:
                ax.set_yticklabels([])
            else:
                [l.set_rotation(45) for l in ax.get_yticklabels()]
                if labels is not None:
                    ax.set_ylabel(labels[i], **label_kwargs)
                    ax.yaxis.set_label_coords(-0.3, 0.5)

    return fig


[docs]def quantile(x, q, weights=None):
    """
    Like numpy.percentile, but:

    * Values of q are quantiles [0., 1.] rather than percentiles [0., 100.]
    * scalar q not supported (q must be iterable)
    * optional weights on x

    """
    if weights is None:
        return _np.percentile(x, [100.0 * qi for qi in q])
    else:
        idx = _np.argsort(x)
        xsorted = x[idx]
        cdf = _np.add.accumulate(weights[idx])
        cdf /= cdf[-1]
        return _np.interp(q, cdf, xsorted).tolist()


[docs]def hist2d(
    x,
    y,
    bins=20,
    range=None,
    weights=None,
    levels=None,
    smooth=None,
    ax=None,
    color=None,
    plot_datapoints=True,
    plot_density=True,
    plot_contours=True,
    fill_contours=False,
    contour_kwargs=None,
    contourf_kwargs=None,
    data_kwargs=None,
    **kwargs
):
    """
    Plot a 2-D histogram of samples.

    """
    if ax is None:
        ax = _plt.gca()

    # Set the default range based on the data range if not provided.
    if range is None:
        if "extent" in kwargs:
            _warn.warn("Deprecated keyword argument 'extent'. " "Use 'range' instead.")
            range = kwargs["extent"]
        else:
            range = [[x.min(), x.max()], [y.min(), y.max()]]

    # Set up the default plotting arguments.
    # if color is None:
    # color = "k"
    #
    # Choose the default "sigma" contour levels.
    # if levels is None:
    # levels = 1.0 - _np.exp(-0.5 * _np.arange(0.5, 2.1, 0.5) ** 2)
    #
    # This is the color map for the density plot, over-plotted to indicate the
    # density of the points near the center.
    # density_cmap = LinearSegmentedColormap.from_list(
    # "density_cmap", [color, (1, 1, 1, 0)])
    #
    # This color map is used to hide the points at the high density areas.
    # white_cmap = LinearSegmentedColormap.from_list(
    # "white_cmap", [(1, 1, 1), (1, 1, 1)], N=2)
    #
    # This "color map" is the list of colors for the contour levels if the
    # contours are filled.
    # rgba_color = colorConverter.to_rgba(color)
    # contour_cmap = [rgba_color] + [list(rgba_color) for l in levels]
    # for i, l in enumerate(levels):
    # contour_cmap[i+1][-1] *= float(len(levels) - i) / (len(levels)+1)
    #
    # We'll make the 2D histogram to directly estimate the density.
    try:
        H, X, Y = _np.histogram2d(
            x.flatten(), y.flatten(), bins=bins, range=range, weights=weights
        )
    except ValueError:
        raise ValueError(
            "It looks like at least one of your sample columns "
            "have no dynamic range. You could try using the "
            "'range' argument."
        )
    #
    # if smooth is not None:
    # if gaussian_filter is None:
    # raise ImportError("Please install scipy for smoothing")
    # H = _gf(H, smooth)
    #
    # Compute the density levels.
    # Hflat = H.flatten()
    # inds = _np.argsort(Hflat)[::-1]
    # Hflat = Hflat[inds]
    # sm = _np.cumsum(Hflat)
    # sm /= sm[-1]
    # V = _np.empty(len(levels))
    # for i, v0 in enumerate(levels):
    # try:
    # V[i] = Hflat[sm <= v0][-1]
    # except:
    # V[i] = Hflat[0]
    #
    # Compute the bin centers.
    # X1, Y1 = 0.5 * (X[1:] + X[:-1]), 0.5 * (Y[1:] + Y[:-1])
    #
    # Extend the array for the sake of the contours at the plot edges.
    # H2 = H.min() + _np.zeros((H.shape[0] + 4, H.shape[1] + 4))
    # H2[2:-2, 2:-2] = H
    # H2[2:-2, 1] = H[:, 0]
    # H2[2:-2, -2] = H[:, -1]
    # H2[1, 2:-2] = H[0]
    # H2[-2, 2:-2] = H[-1]
    # H2[1, 1] = H[0, 0]
    # H2[1, -2] = H[0, -1]
    # H2[-2, 1] = H[-1, 0]
    # H2[-2, -2] = H[-1, -1]
    # X2 = _np.concatenate([
    # X1[0] + _np.array([-2, -1]) * _np.diff(X1[:2]),
    # X1,
    # X1[-1] + _np.array([1, 2]) * _np.diff(X1[-2:]),
    # ])
    # Y2 = _np.concatenate([
    # Y1[0] + _np.array([-2, -1]) * _np.diff(Y1[:2]),
    # Y1,
    # Y1[-1] + _np.array([1, 2]) * _np.diff(Y1[-2:]),
    # ])
    #
    # if plot_datapoints:
    # if data_kwargs is None:
    # data_kwargs = dict()
    # data_kwargs["color"] = data_kwargs.get("color", color)
    # data_kwargs["ms"] = data_kwargs.get("ms", 2.0)
    # data_kwargs["mec"] = data_kwargs.get("mec", "none")
    # data_kwargs["alpha"] = data_kwargs.get("alpha", 0.1)
    # ax.plot(x, y, "o", zorder=-1, rasterized=True, **data_kwargs)
    #
    # Plot the base fill to hide the densest data points.
    # if plot_contours or plot_density:
    # ax.contourf(X2, Y2, H2.T, [V[-1], H.max()],
    # cmap=white_cmap, antialiased=False)
    #
    # if plot_contours and fill_contours:
    # if contourf_kwargs is None:
    # contourf_kwargs = dict()
    # contourf_kwargs["colors"] = contourf_kwargs.get("colors", contour_
    # cmap)
    # contourf_kwargs["antialiased"] = contourf_kwargs.get("antialiased",
    # False)
    # ax.contourf(X2, Y2, H2.T, _np.concatenate([[H.max()], V, [0]]),
    # **contourf_kwargs)
    #
    # Plot the density map. This can't be plotted at the same time as the
    # contour fills.
    # elif plot_density:
    # ax.pcolor(X, Y, H.max() - H.T, cmap=density_cmap)
    if plot_density:
        ax.pcolor(X, Y, H.max() - H.T, cmap=_cm.get_cmap("gist_heat"))
    #
    # Plot the contour edge colors.
    # if plot_contours:
    # if contour_kwargs is None:
    # contour_kwargs = dict()
    # contour_kwargs["colors"] = contour_kwargs.get("colors", color)
    # ax.contour(X2, Y2, H2.T, V, **contour_kwargs)

    ax.set_xlim(range[0])
    ax.set_ylim(range[1])
    return


# Main
if __name__ == "__main__":
    pass