Parametric bootstrap method for fitted models inheriting class.

Simulate datasets from a fitted model, refit the model, and generate a sampling distribution for a user-specified fit-statistic.

Arguments

object: a fitted model inheriting class "unmarkedFit"
statistic: a function returning a vector of fit-statistics. First argument must be the fitted model. Default is sum of squared residuals.
nsim: number of bootstrap replicates
report: print fit statistic every 'report' iterations during resampling
seed: set seed for reproducible bootstrap
parallel: logical (default = TRUE) indicating whether to compute bootstrap on multiple cores, if present. If TRUE, suppresses reporting of bootstrapped statistics. Defaults to serial calculation when nsim < 100. Parallel computation is likely to be slower for simple models when nsim < ~500, but should speed up the bootstrap of more complicated models.
ncores: integer (default = one less than number of available cores) number of cores to use when bootstrapping in parallel.
...: Additional arguments to be passed to statistic

Details

This function simulates datasets based upon a fitted model, refits the model, and evaluates a user-specified fit-statistic for each simulation. Comparing this sampling distribution to the observed statistic provides a means of evaluating goodness-of-fit or assessing uncertainty in a quantity of interest.

Value

An object of class parboot with three slots:

call: parboot call
t0: Numeric vector of statistics for original fitted model.
t.star: nsim by length(t0) matrix of statistics for each simulation fit.

Author

Richard Chandler rbchan@uga.edu and Adam Smith

Examples


data(linetran)
(dbreaksLine <- c(0, 5, 10, 15, 20))
#> [1]  0  5 10 15 20
lengths <- linetran$Length

ltUMF <- with(linetran, {
  unmarkedFrameDS(y = cbind(dc1, dc2, dc3, dc4),
  siteCovs = data.frame(Length, area, habitat), dist.breaks = dbreaksLine,
  tlength = lengths*1000, survey = "line", unitsIn = "m")
    })

# Fit a model
(fm <- distsamp(~area ~habitat, ltUMF))
#> 
#> Call:
#> distsamp(formula = ~area ~ habitat, data = ltUMF)
#> 
#> Density:
#>             Estimate    SE     z P(>|z|)
#> (Intercept)   -0.287 0.167 -1.72  0.0852
#> habitatB       0.253 0.198  1.28  0.2000
#> 
#> Detection:
#>             Estimate    SE     z  P(>|z|)
#> (Intercept)     3.06 0.548  5.57 2.53e-08
#> area           -0.13 0.096 -1.35 1.76e-01
#> 
#> AIC: 165.5482 

# Function returning three fit-statistics.
fitstats <- function(fm, na.rm=TRUE) {
    observed <- getY(fm@data)
    expected <- fitted(fm)
    resids <- residuals(fm)
    sse <- sum(resids^2, na.rm=na.rm)
    chisq <- sum((observed - expected)^2 / expected, na.rm=na.rm)
    freeTuke <- sum((sqrt(observed) - sqrt(expected))^2, na.rm=na.rm)
    out <- c(SSE=sse, Chisq=chisq, freemanTukey=freeTuke)
    return(out)
}

(pb <- parboot(fm, fitstats, nsim=25, report=1))
#> Warning: report argument is non-functional and will be deprecated in the next version
#> 
  |                                                  | 0 % ~calculating  
  |++                                                | 4 % ~01s          
  |++++                                              | 8 % ~01s          
  |++++++                                            | 12% ~00s          
  |++++++++                                          | 16% ~00s          
  |++++++++++                                        | 20% ~00s          
  |++++++++++++                                      | 24% ~00s          
  |++++++++++++++                                    | 28% ~00s          
  |++++++++++++++++                                  | 32% ~00s          
  |++++++++++++++++++                                | 36% ~00s          
  |++++++++++++++++++++                              | 40% ~00s          
  |++++++++++++++++++++++                            | 44% ~00s          
  |++++++++++++++++++++++++                          | 48% ~00s          
  |++++++++++++++++++++++++++                        | 52% ~00s          
  |++++++++++++++++++++++++++++                      | 56% ~00s          
  |++++++++++++++++++++++++++++++                    | 60% ~00s          
  |++++++++++++++++++++++++++++++++                  | 64% ~00s          
  |++++++++++++++++++++++++++++++++++                | 68% ~00s          
  |++++++++++++++++++++++++++++++++++++              | 72% ~00s          
  |++++++++++++++++++++++++++++++++++++++            | 76% ~00s          
  |++++++++++++++++++++++++++++++++++++++++          | 80% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++        | 84% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++      | 88% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++    | 92% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++++  | 96% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s  
#> 
#> Call: parboot(object = fm, statistic = fitstats, nsim = 25, report = 1)
#> 
#> Parametric Bootstrap Statistics:
#>                 t0 mean(t0 - t_B) StdDev(t0 - t_B) Pr(t_B > t0)
#> SSE          113.7          19.24            25.96        0.231
#> Chisq         53.6           8.97             9.58        0.115
#> freemanTukey  23.2           6.02             4.07        0.115
#> 
#> t_B quantiles:
#>              0% 2.5% 25% 50% 75% 97.5% 100%
#> SSE          43   48  81  97 113   141  154
#> Chisq        29   29  38  47  52    60   60
#> freemanTukey 11   11  14  17  19    24   25
#> 
#> t0 = Original statistic computed from data
#> t_B = Vector of bootstrap samples
#> 
plot(pb, main="")





# Finite-sample inference for a derived parameter.
# Population size in sampled area

Nhat <- function(fm) {
    sum(bup(ranef(fm, K=50)))
    }

set.seed(345)
(pb.N <- parboot(fm, Nhat, nsim=25, report=5))
#> Warning: report argument is non-functional and will be deprecated in the next version
#> 
  |                                                  | 0 % ~calculating  
  |++                                                | 4 % ~00s          
  |++++                                              | 8 % ~00s          
  |++++++                                            | 12% ~00s          
  |++++++++                                          | 16% ~00s          
  |++++++++++                                        | 20% ~00s          
  |++++++++++++                                      | 24% ~00s          
  |++++++++++++++                                    | 28% ~00s          
  |++++++++++++++++                                  | 32% ~00s          
  |++++++++++++++++++                                | 36% ~00s          
  |++++++++++++++++++++                              | 40% ~00s          
  |++++++++++++++++++++++                            | 44% ~00s          
  |++++++++++++++++++++++++                          | 48% ~00s          
  |++++++++++++++++++++++++++                        | 52% ~00s          
  |++++++++++++++++++++++++++++                      | 56% ~00s          
  |++++++++++++++++++++++++++++++                    | 60% ~00s          
  |++++++++++++++++++++++++++++++++                  | 64% ~00s          
  |++++++++++++++++++++++++++++++++++                | 68% ~00s          
  |++++++++++++++++++++++++++++++++++++              | 72% ~00s          
  |++++++++++++++++++++++++++++++++++++++            | 76% ~00s          
  |++++++++++++++++++++++++++++++++++++++++          | 80% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++        | 84% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++      | 88% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++    | 92% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++++  | 96% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s  
#> 
#> Call: parboot(object = fm, statistic = Nhat, nsim = 25, report = 5)
#> 
#> Parametric Bootstrap Statistics:
#>    t0 mean(t0 - t_B) StdDev(t0 - t_B) Pr(t_B > t0)
#> 1 162          0.701             15.9        0.462
#> 
#> t_B quantiles:
#>       0% 2.5% 25% 50% 75% 97.5% 100%
#> [1,] 129  132 151 162 174   184  184
#> 
#> t0 = Original statistic computed from data
#> t_B = Vector of bootstrap samples
#> 

# Compare to empirical Bayes confidence intervals
colSums(confint(ranef(fm, K=50)))
#>  2.5% 97.5% 
#>   117   218