# Joshua French
# http://gallery.rcpp.org/articles/bayesian-time-series-changepoint/
# Function for Gibbs sampler.  Takes the number of simulations desired,
# the vector of observed data, the values of the hyperparameters, the 
# possible values of k, a starting value for phi, and a starting
# value for k.

# Function takes:
# nsim: number of cycles to run
# y: vector of observed values
# a, b: parameters for prior distribution of lambda
# c, d: parameters for prior distribution of phi
# kposs: possible values of k
# phi, k: starting values of chain for phi and k

gibbsloop <- function(nsim, y, a, b, c, d, kposs, phi, k)
{
  # matrix to store simulated values from each cycle
  out <- matrix(NA, nrow = nsim, ncol = 3)
  
  # number of observations
  n <- length(y)
  
  for(i in 1:nsim)
  {
    # Generate value from full conditional of phi based on
    # current values of other parameters
    lambda <- rgamma(1, a + sum(y[1:k]), b + k)
    
    # Generate value from full conditional of phi based on
    # current values of other parameters
    phi <- rgamma(1, c + sum(y[min((k+1),n):n]), d + n - k)
    
    # generate value of k
    # determine probability masses for full conditional of k
    # based on current parameters values
    pmf <- kprobloop(kposs, y, lambda, phi)
    k <- sample(x = kposs, size = 1, prob = pmf)
    
    out[i, ] <- c(lambda, phi, k)
  }
  out
}

# Given a vector of values x, the logsumexp function calculates
# log(sum(exp(x))), but in a "smart" way that helps avoid
# numerical underflow or overflow problems.

logsumexp <- function(x)
{
  log(sum(exp(x - max(x)))) + max(x)
}

# Determine pmf for full conditional of k based on current values of other
# variables. Takes possible values of k, observed data y, current values of 
# lambda, and phi. It does this naively using a loop.

kprobloop <- function(kposs, y, lambda, phi)
{  
  # create vector to store argument of exponential function of 
  # unnormalized pmf, then calculate them
  x <- numeric(length(kposs))
  for(i in kposs)
  {
    x[i] <- i*(phi - lambda) + sum(y[1:i]) * log(lambda/phi)
  }
  #return full conditional pmf of k
  return(exp(x - logsumexp(x)))
}

yr <- 1851:1962
counts <- c(4,5,4,1,0,4,3,4,0,6,3,3,4,0,2,6,3,3,5,4,5,3,1,4,4,1,5,5,3,4,2,
            5,2,2,3,4,2,1,3,2,2,1,1,1,1,3,0,0,1,0,1,1,0,0,3,1,0,3,2,2,0,1,
            1,1,0,1,0,1,0,0,0,2,1,0,0,0,1,1,0,2,3,3,1,1,2,1,1,1,1,2,4,2,0,
            0,0,1,4,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1)

nsim = 10000
chain <- gibbsloop(nsim = nsim, y = counts, a = 4, b = 1, c = 1, d = 2, kposs = 1:112, phi = 1, k = 40)
hist(chain[1000:nsim,3]+1850,xlab="Year of changepoint",freq=FALSE,
     main=paste("Changepoint estimate: ",median(1850+chain[1000:nsim,3])))

hist(chain[1000:nsim,1],xlab="Rate of accidents per year before changepoint",freq=FALSE,
     main=paste("Rate per year estimate: ",median(chain[1000:nsim,1])))

hist(chain[1000:nsim,2],xlab="Rate of accidents per year after changepoint",freq=FALSE,
     main=paste("Rate per year estimate: ",median(chain[1000:nsim,2])))