% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/diagnostics.R
\name{pareto-k-diagnostic}
\alias{pareto-k-diagnostic}
\alias{pareto_k_table}
\alias{pareto_k_ids}
\alias{pareto_k_values}
\alias{pareto_k_influence_values}
\alias{psis_n_eff_values}
\alias{mcse_loo}
\alias{plot.psis_loo}
\alias{plot.loo}
\alias{plot.psis}
\title{Diagnostics for Pareto smoothed importance sampling (PSIS)}
\usage{
pareto_k_table(x)

pareto_k_ids(x, threshold = NULL)

pareto_k_values(x)

pareto_k_influence_values(x)

psis_n_eff_values(x)

mcse_loo(x, threshold = NULL)

\method{plot}{psis_loo}(
  x,
  diagnostic = c("k", "ESS", "n_eff"),
  ...,
  label_points = FALSE,
  main = "PSIS diagnostic plot"
)

\method{plot}{psis}(
  x,
  diagnostic = c("k", "ESS", "n_eff"),
  ...,
  label_points = FALSE,
  main = "PSIS diagnostic plot"
)
}
\arguments{
\item{x}{An object created by \code{\link[=loo]{loo()}} or \code{\link[=psis]{psis()}}.}

\item{threshold}{For \code{pareto_k_ids()}, \code{threshold} is the minimum \eqn{k}
value to flag (default is a sample size \code{S} dependend threshold
\code{1 - 1 / log10(S)}). For \code{mcse_loo()}, if any \eqn{k} estimates are
greater than \code{threshold} the MCSE estimate is returned as \code{NA}
See \strong{Details} for the motivation behind these defaults.}

\item{diagnostic}{For the \code{plot} method, which diagnostic should be
plotted? The options are \code{"k"} for Pareto \eqn{k} estimates (the
default), or \code{"ESS"} or \code{"n_eff"} for PSIS effective sample size estimates.}

\item{label_points, ...}{For the \code{plot()} method, if \code{label_points} is
\code{TRUE} the observation numbers corresponding to any values of \eqn{k}
greater than the diagnostic threshold will be displayed in the plot.
Any arguments specified in \code{...} will be passed to \code{\link[graphics:text]{graphics::text()}}
and can be used to control the appearance of the labels.}

\item{main}{For the \code{plot()} method, a title for the plot.}
}
\value{
\code{pareto_k_table()} returns an object of class
\code{"pareto_k_table"}, which is a matrix with columns \code{"Count"},
\code{"Proportion"}, and \code{"Min. n_eff"}, and has its own print method.

\code{pareto_k_ids()} returns an integer vector indicating which
observations have Pareto \eqn{k} estimates above \code{threshold}.

\code{pareto_k_values()} returns a vector of the estimated Pareto
\eqn{k} parameters. These represent the reliability of sampling.

\code{pareto_k_influence_values()} returns a vector of the estimated Pareto
\eqn{k} parameters. These represent influence of the observations on the
model posterior distribution.

\code{psis_n_eff_values()} returns a vector of the estimated PSIS
effective sample sizes.

\code{mcse_loo()} returns the Monte Carlo standard error (MCSE)
estimate for PSIS-LOO. MCSE will be NA if any Pareto \eqn{k} values are
above \code{threshold}.

The \code{plot()} method is called for its side effect and does not
return anything. If \code{x} is the result of a call to \code{\link[=loo]{loo()}}
or \code{\link[=psis]{psis()}} then \code{plot(x, diagnostic)} produces a plot of
the estimates of the Pareto shape parameters (\code{diagnostic = "k"}) or
estimates of the PSIS effective sample sizes (\code{diagnostic = "ESS"}).
}
\description{
Print a diagnostic table summarizing the estimated Pareto shape parameters
and PSIS effective sample sizes, find the indexes of observations for which
the estimated Pareto shape parameter \eqn{k} is larger than some
\code{threshold} value, or plot observation indexes vs. diagnostic estimates.
The \strong{Details} section below provides a brief overview of the
diagnostics, but we recommend consulting Vehtari, Gelman, and Gabry (2017)
and Vehtari, Simpson, Gelman, Yao, and Gabry (2024) for full details.
}
\details{
The reliability and approximate convergence rate of the PSIS-based
estimates can be assessed using the estimates for the shape
parameter \eqn{k} of the generalized Pareto distribution. The
diagnostic threshold for Pareto \eqn{k} depends on sample size
\eqn{S} (sample size dependent threshold was introduced by Vehtari
et al. (2024), and before that fixed thresholds of 0.5 and 0.7 were
recommended). For simplicity, \code{loo} package uses the nominal sample
size \eqn{S} when computing the sample size specific
threshold. This provides an optimistic threshold if the effective
sample size is less than 2200, but if MCMC-ESS > S/2 the difference
is usually negligible. Thinning of MCMC draws can be used to
improve the ratio ESS/S.
\itemize{
\item If \eqn{k < min(1 - 1 / log10(S), 0.7)}, where \eqn{S} is the
sample size, the PSIS estimate and the corresponding Monte Carlo
standard error estimate are reliable.
\item If \eqn{1 - 1 / log10(S) <= k < 0.7}, the PSIS estimate and the
corresponding Monte Carlo standard error estimate are not
reliable, but increasing the (effective) sample size \eqn{S} above
2200 may help (this will increase the sample size specific
threshold \eqn{(1-1/log10(2200)>0.7} and then the bias specific
threshold 0.7 dominates).
\item If \eqn{0.7 <= k < 1}, the PSIS estimate and the corresponding Monte
Carlo standard error have large bias and are not reliable. Increasing
the sample size may reduce the variability in \eqn{k} estimate, which
may result in lower \eqn{k} estimate, too.
\item If \eqn{k \geq 1}{k >= 1}, the target distribution is estimated to
have a non-finite mean. The PSIS estimate and the corresponding Monte
Carlo standard error are not well defined. Increasing the sample size
may reduce the variability in the \eqn{k} estimate, which
may also result in a lower \eqn{k} estimate.
}

\subsection{What if the estimated tail shape parameter \eqn{k}
exceeds the diagnostic threshold?}{ Importance sampling is likely to
work less well if the marginal posterior \eqn{p(\theta^s | y)} and
LOO posterior \eqn{p(\theta^s | y_{-i})} are very different, which
is more likely to happen with a non-robust model and highly
influential observations.  If the estimated tail shape parameter
\eqn{k} exceeds the diagnostic threshold, the user should be
warned. (Note: If \eqn{k} is greater than the diagnostic threshold
then WAIC is also likely to fail, but WAIC lacks as accurate
diagnostic.)  When using PSIS in the context of approximate LOO-CV,
we recommend one of the following actions:
\itemize{
\item With some additional computations, it is possible to transform
the MCMC draws from the posterior distribution to obtain more
reliable importance sampling estimates. This results in a smaller
shape parameter \eqn{k}.  See \code{\link[=loo_moment_match]{loo_moment_match()}} and the
vignette \emph{Avoiding model refits in leave-one-out cross-validation
with moment matching} for an example of this.
\item Sampling from a leave-one-out mixture distribution (see the
vignette \emph{Mixture IS leave-one-out cross-validation for
high-dimensional Bayesian models}), directly from \eqn{p(\theta^s
  | y_{-i})} for the problematic observations \eqn{i}, or using
\eqn{K}-fold cross-validation (see the vignette \emph{Holdout
validation and K-fold cross-validation of Stan programs with the
loo package}) will generally be more stable.
\item Using a model that is more robust to anomalous observations will
generally make approximate LOO-CV more stable.
}

}

\subsection{Observation influence statistics}{ The estimated shape parameter
\eqn{k} for each observation can be used as a measure of the observation's
influence on posterior distribution of the model. These can be obtained with
\code{pareto_k_influence_values()}.
}

\subsection{Effective sample size and error estimates}{ In the case that we
obtain the samples from the proposal distribution via MCMC the \strong{loo}
package also computes estimates for the Monte Carlo error and the effective
sample size for importance sampling, which are more accurate for PSIS than
for IS and TIS (see Vehtari et al (2024) for details). However, the PSIS
effective sample size estimate will be
\strong{over-optimistic when the estimate of \eqn{k} is greater than}
\eqn{min(1-1/log10(S), 0.7)}, where \eqn{S} is the sample size.
}
}
\references{
Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model
evaluation using leave-one-out cross-validation and WAIC.
\emph{Statistics and Computing}. 27(5), 1413--1432. doi:10.1007/s11222-016-9696-4
(\href{https://link.springer.com/article/10.1007/s11222-016-9696-4}{journal version},
\href{https://arxiv.org/abs/1507.04544}{preprint arXiv:1507.04544}).

Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. (2024).
Pareto smoothed importance sampling. \emph{Journal of Machine Learning Research},
25(72):1-58.
\href{https://jmlr.org/papers/v25/19-556.html}{PDF}
}
\seealso{
\itemize{
\item \code{\link[=psis]{psis()}} for the implementation of the PSIS algorithm.
\item The \href{https://mc-stan.org/loo/articles/online-only/faq.html}{FAQ page} on
the \strong{loo} website for answers to frequently asked questions.
}
}
