# Joshua French # http://gallery.rcpp.org/articles/bayesian-time-series-changepoint/ # Function for Gibbs sampler. Takes the number of simulations desired, # the vector of observed data, the values of the hyperparameters, the # possible values of k, a starting value for phi, and a starting # value for k. # Function takes: # nsim: number of cycles to run # y: vector of observed values # a, b: parameters for prior distribution of lambda # c, d: parameters for prior distribution of phi # kposs: possible values of k # phi, k: starting values of chain for phi and k gibbsloop <- function(nsim, y, a, b, c, d, kposs, phi, k) { # matrix to store simulated values from each cycle out <- matrix(NA, nrow = nsim, ncol = 3) # number of observations n <- length(y) for(i in 1:nsim) { # Generate value from full conditional of phi based on # current values of other parameters lambda <- rgamma(1, a + sum(y[1:k]), b + k) # Generate value from full conditional of phi based on # current values of other parameters phi <- rgamma(1, c + sum(y[min((k+1),n):n]), d + n - k) # generate value of k # determine probability masses for full conditional of k # based on current parameters values pmf <- kprobloop(kposs, y, lambda, phi) k <- sample(x = kposs, size = 1, prob = pmf) out[i, ] <- c(lambda, phi, k) } out } # Given a vector of values x, the logsumexp function calculates # log(sum(exp(x))), but in a "smart" way that helps avoid # numerical underflow or overflow problems. logsumexp <- function(x) { log(sum(exp(x - max(x)))) + max(x) } # Determine pmf for full conditional of k based on current values of other # variables. Takes possible values of k, observed data y, current values of # lambda, and phi. It does this naively using a loop. kprobloop <- function(kposs, y, lambda, phi) { # create vector to store argument of exponential function of # unnormalized pmf, then calculate them x <- numeric(length(kposs)) for(i in kposs) { x[i] <- i*(phi - lambda) + sum(y[1:i]) * log(lambda/phi) } #return full conditional pmf of k return(exp(x - logsumexp(x))) } yr <- 1851:1962 counts <- c(4,5,4,1,0,4,3,4,0,6,3,3,4,0,2,6,3,3,5,4,5,3,1,4,4,1,5,5,3,4,2, 5,2,2,3,4,2,1,3,2,2,1,1,1,1,3,0,0,1,0,1,1,0,0,3,1,0,3,2,2,0,1, 1,1,0,1,0,1,0,0,0,2,1,0,0,0,1,1,0,2,3,3,1,1,2,1,1,1,1,2,4,2,0, 0,0,1,4,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1) nsim = 10000 chain <- gibbsloop(nsim = nsim, y = counts, a = 4, b = 1, c = 1, d = 2, kposs = 1:112, phi = 1, k = 40) hist(chain[1000:nsim,3]+1850,xlab="Year of changepoint",freq=FALSE, main=paste("Changepoint estimate: ",median(1850+chain[1000:nsim,3]))) hist(chain[1000:nsim,1],xlab="Rate of accidents per year before changepoint",freq=FALSE, main=paste("Rate per year estimate: ",median(chain[1000:nsim,1]))) hist(chain[1000:nsim,2],xlab="Rate of accidents per year after changepoint",freq=FALSE, main=paste("Rate per year estimate: ",median(chain[1000:nsim,2])))