Interactive Demos

RStudio-first interactive demos for density, regression, and constrained estimation with np and crs.
Keywords

interactive demos, manipulate, RStudio, np, crs

These examples are primarily pedagogical. Most use the manipulate package and are intended to be run inside RStudio so that you can move sliders, change kernels, alter smoothing parameters, and immediately see the effect on the estimate.

A quick way to run one

Download or open one of the scripts below in RStudio, then source it. Once the plot appears, click the small tool icon in the plot pane if the control panel is not already visible.

Best first demos

If you want to see… Start here Notes
How bandwidth, kernel, and sample size change a density estimate manipulate_density.R good first teaching demo
How off-axis quantiles change multivariate partial plots manipulate_wage1.R uses the wage1 data and precomputed bandwidth object
Shape restrictions in action manipulate_constrained_spline.R requires quadprog

Density and distribution demos

Regression and shape-restriction demos

Other demos

Notes

  • Some scripts require additional packages such as quadprog, mnormt, or rgl.
  • The manipulate package is tied to RStudio, so these are best treated as RStudio-first examples rather than generic batch scripts.

Inline source for two flagship demos

Source file: manipulate_density.R

## $Id: manipulate_density.R,v 1.25 2014/03/31 02:41:29 jracine Exp jracine $

## This file uses the `manipulate' package to create a dynamic plot
## with `sliders' and `pickers' where the user can set the bandwidth
## for univariate Rosenblatt-Parzen, adaptive, generalized, and knn
## kernel density estimates to gauge the impact on bias and
## variance. You can also modify the kernel function, order of the
## kernel function, and whether to use fixed, generalized_nn, or
## adaptive_nn bandwidths. As well you can change the degrees of
## freedom of the underlying chi-square distribution along with the
## number of observations. We also plot the true density for
## comparison purposes.

rm(list=ls())
require(manipulate)
require(np)
options(np.tree=TRUE)

## Write a function to do plotting and accept arguments... doing so
## allows multiple plot calls (e.g. overlay histogram etc.) which
## cannot otherwise be done with the manipulate function.

manipulate.plot <- function(n,df,sf,nn,bwtype,ckertype,ckerorder,hist,rug,cv,cv.type) {

  if(nn >= n) {
    warning(paste("Knn bandwidth must be less than",n))
    nn <- n-1
  }

  set.seed(42)
  x <- rchisq(n,df=df)
  x.eval <- seq(min(x)-0.5*sd(x),max(x)+0.5*sd(x),length=1000)
  f.dgp <- dchisq(ifelse(x.eval<0,0,x.eval),df=df)

  hist.max <- 0
  if(hist) hist.max <- max(hist(x,breaks=25,plot=FALSE)$density)

  if(cv) {
    bw <- npudensbw(~x,
                    bwtype=bwtype,
                    bwmethod=cv.type,
                    ckertype=ckertype,
                    ckerorder=ckerorder)
  } else {
    bw <- npudensbw(~x,
                    bwtype=bwtype,
                    ckertype=ckertype,
                    ckerorder=ckerorder,
                    bandwidth.compute=FALSE,
                    bwscaling=TRUE,
                    bws=ifelse(bwtype=="fixed",sf,nn))
  }

  f.kernel <- fitted(npudens(tdat=x,edat=x.eval,bws=bw))

  ylim <- c(0,max(hist.max,f.kernel,f.dgp))

  plot(x.eval,f.kernel,
       ylim=ylim,
       type="l",
       ylab="Density",
       xlab="Data",
       sub=paste(ifelse(bwtype=="fixed",paste("Bandwidth = ",formatC(bw$bw[1]*sd(x)*n^{-1/(2*ckerorder+1)},format="f",digits=2),sep=""),paste("Kth nearest neighbor = ",bw$bw[1],sep="")),", n = ",n,sep=""),
       lty=1,
       col=1)

  lines(x.eval,f.dgp,lty=2,col=2)

  legend("topright",
         c("Kernel Estimate",paste("Chi-square (df = ",df,")",sep="")),
         lty=c(1,2),
         col=c(1,2),
         bty="n",
         cex=0.75)

  if(hist) hist(x,breaks=25,freq=FALSE,col=NA,add=TRUE,lty=3)

  if(rug) rug(x)
}

## Call the manipulate function on the above function

manipulate(manipulate.plot(n,df,sf,nn,bwtype,ckertype,ckerorder,hist,rug,cv,cv.type),
           n=slider(100,1000,500,step=100,label="Number of observations"),
           df=slider(10,120,10,step=10,label="Chi-square degrees of freedom"),
           ckertype=picker("gaussian","epanechnikov","uniform",label="Kernel function"),
           ckerorder=picker(2,4,6,8,label="Kernel order"),
           bwtype=picker("fixed","adaptive_nn","generalized_nn",label="Bandwidth type"),
           sf=slider(0.1,2.5,1,label="Scale factor (bwtype=\"fixed\")",step=0.1,ticks=TRUE),
           nn=slider(2,1000,50,label="Kth nn (bwtype=*_nn)",step=1,ticks=TRUE),
           hist=checkbox(FALSE, "Show histogram"),
           rug=checkbox(FALSE, "Show rug"),
           cv=checkbox(FALSE, "Cross-validate smoothing parameters"),
           cv.type=picker("cv.ml","cv.ls",label="CV method"))

Source file: manipulate_wage1.R

## $Id: manipulate_wage1.R,v 1.2 2013/03/03 12:07:24 jracine Exp jracine $

## This file uses the `manipulate' package to create a dynamic plot
## with `sliders' where the user can set the off-axis variable
## quantiles for the local linear regression estimator using the
## `wage1' data.

rm(list=ls())
require(manipulate)
require(np)

## Load the wage1 data which contains the precomputed bandwidth object
## `bw.all'

data(wage1)

## Call the manipulate function on a plot object

manipulate(plot(bw.all,
                common.scale=common.scale,
                plot.errors.method="asymptotic",
                plot.errors.style="band",
                xq=c(0.5,0.5,educ,exper,tenure)),
           common.scale=picker(TRUE,FALSE,label="Common Scale for Y Axes"),
           educ=slider(0,1,0.5,label="Education Quantile",step=0.1,ticks=TRUE),
           exper=slider(0,1,0.5,label="Experience Quantile",step=0.1,ticks=TRUE),
           tenure=slider(0,1,0.5,label="Tenure Quantile",step=0.1,ticks=TRUE))
Back to top