---
title: "Modified five-dimensional example"
author: "Daniel Williamson and Victoria Volodina"
date: "18/01/2018"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_knit$set(root.dir = '~/Documents/ExeterUQ', echo = TRUE, warning = FALSE)
twd <- getwd()
print(twd)
```

We aim to redo the **Five-dimensional** example from the paper draft with an out of the box 4-extended Latin Hypercube (Williamson, 2015) on [-1, 1] and compare the performance of **stationary GP**, **our proposed nonstationary GP**, **TGP** and **CGP**.

```{r}
twd <- getwd()
source('BuildEmulator/BuildEmulator.R')
source('BuildEmulator/BuildEmulatorNSt.R')
source('Demonstrations/5DToyExampleFun.R')
```

We need to specify the regions and sub-regions for a 5D Toy function

```{r}
names.row <- c("group1", "group12", "group2", "group23","group3", "group34", "group4", "group45", "group5")
Group.specification <- data.frame(cbind(c(-1.00, -0.75, -0.72, -0.44, -0.40, -0.03, -0.01, 0.27, 0.37), 
                                  c(-0.75, -0.72, -0.44, -0.40, -0.03, -0.01, 0.27, 0.37, 1.00)), 
                                  row.names = names.row)
colnames(Group.specification) <- c("x5", "x5")

# For graphical purpose: we need to plot Toy function against 5D
x.plot <- seq(-1, 1, length.out = 500)
plot.group <- list()
for(i in 1:dim(Group.specification)[1]) {
  plot.group[[i]] <- which(x.plot >= Group.specification[i, 1] & x.plot < Group.specification[i, 2])
}
plot.output <- c()
for(n in 1:length(plot.group)) {
  s <- sapply(1:length(plot.group[[n]]), function(i) 
    GeneralFunWave(x.plot[plot.group[[n]][i]], a=a.vec[n], b=b.vec[n], 
                   beta1=beta1.vec[n], beta2=beta2.vec[n]))
  plot.output <- c(plot.output, s)
}
plot.z <- cbind(unlist(plot.group), na.omit(unlist(plot.output)))
plot.z.mod <- plot.z[order(plot.z[, 1]), ]
plot.output.final <- plot.z.mod[, 2]
plot.output.final <- plot.output.final - mean(plot.output.final) # centre around zero.

par(mfrow = c(1, 1), mar = c(4, 4, 2, 1))
plot(x.plot[1:499], plot.output.final, xlim = c(-1.1, 1.1), type = 'n', 
     xlab = 'x5', ylab = 'Y')
abline(v = c(-1.00, -0.75, -0.72, -0.44, -0.40, -0.03, -0.01, 0.27, 0.37, 1.00), lty = 'dashed')
group.mixture <- c(2, 4, 6, 8)
for(i in 1:length(group.mixture)) {
  polygon(c(seq(Group.specification[group.mixture[i], 1], Group.specification[group.mixture[i], 2], length.out = 100),
            rev(seq(Group.specification[group.mixture[i], 1], Group.specification[group.mixture[i], 2], length.out = 100))),
          c(rep(-15, 100), rep(6, 100)),col="skyblue")
}
lines(x.plot[1:499], plot.output.final)
```

Let's split our input matrix **newExtendedNorm** and simulator evaluations at input matrix **output.final**:
and constuct stationary and nonstationary GP emulators with active prior specification for **x5**:

```{r}
# Load the data frame that we are dealing with
load("Demonstrations/5DExampleMod.RData")
stGPFiveDToy <- nstGPFiveDToy <- MixtureFiveDToy <- list()

for(i in 1:length(LHC.data[[1]])) {
  tData5D <- data.frame(cbind(LHC.data[[1]][[i]], LHC.data[[2]][[i]]))
  names(tData5D) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'y')
  cands <- names(tData5D)[1:5]
  X5V <- as.data.frame(LHC.data[[3]][[i]])
  y5V <- LHC.data[[4]][[i]]
  names(X5V) <- c('x1', 'x2', 'x3', 'x4', 'x5')
  # stationary GP emulator
  toy5D.lm <- EMULATE.lm(Response=names(tData5D)[6], tData=tData5D,
                         tcands=cands,tcanfacs=NULL,TryFouriers=TRUE,maxOrder=2,maxdf = 5)
  toy5D.lm$linModel <- lm(y ~ ., data = tData5D)
  toy5D.gp <- EMULATE.gpstan(meanResponse=toy5D.lm, tData=tData5D, additionalVariables=names(tData5D)[1:5], 
                             FastVersion = TRUE, activePrior = TRUE, activeVariables = c('x5'))
  LOOs.st <- stanLOOplot(StanEmulator = toy5D.gp, ParamNames=colnames(toy5D.gp$Design))
  NewSamplesToy5D <- EMULATOR.gpstan(X5V, Emulator=toy5D.gp, FastVersion = FALSE)
  stGPFiveDToy[[i]] <- cbind(NewSamplesToy5D$Expectation,
                             NewSamplesToy5D$Expectation-2*sqrt(NewSamplesToy5D$Variance),
                             NewSamplesToy5D$Expectation+2*sqrt(NewSamplesToy5D$Variance))
  
  # nonstationary GP emulator
  std.err <- CalStError(LOOs.st, tData5D$y)
  myem.mixture <- MIXTURE.design(y.std = std.err, X = tData5D[, 1:5], L=2)
  myem.gp.nst <- EMULATE.gpstanNSt(toy5D.lm, tData = tData5D, additionalVariables=names(tData5D)[1:5],
                                   FastVersion = TRUE, mixtureComp = myem.mixture, activePrior = TRUE, 
                                   activeVariables = c('x5'))
  MixtureFiveDToy[[i]] <- data.frame(cbind(std.err, myem.mixture$MixtureMat))
  mixture.predict <- MIXTURE.predict(x = X5V, mixtureComp = myem.mixture)
  predict.nst <- EMULATOR.gpstanNSt(X5V, Emulator = myem.gp.nst, mixturePredict = mixture.predict, 
                                    FastVersion = FALSE)
  nstGPFiveDToy[[i]] <- cbind(predict.nst$Expectation, predict.nst$Expectation-2*sqrt(predict.nst$Variance),
                              predict.nst$Expectation+2*sqrt(predict.nst$Variance))
}

MixtureFiveDToy.mod <- list()
for(i in 1:length(MixtureFiveDToy)) {
  MixtureFiveDToy.mod[[i]] <- data.frame(LHC.data[[1]][[i]], MixtureFiveDToy[[i]])
  names(MixtureFiveDToy.mod[[i]]) <- c('x1', 'x2', 'x3', 'x4', 'x5','std.err', 'red', 'blue')
}
```

Let's perform similar diagnostics for TGP

```{r}
TGPFiveDToy <- list()
CGPFiveDToy <- list()
for(i in 1:length(LHC.data[[1]])) {
  tData5D <- data.frame(cbind(LHC.data[[1]][[i]], LHC.data[[2]][[i]]))
  names(tData5D) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'y')
  cands <- names(tData5D)[1:5]
  X5V <- as.data.frame(LHC.data[[3]][[i]])
  y5V <- LHC.data[[4]][[i]]
  names(X5V) <- c('x1', 'x2', 'x3', 'x4', 'x5')

  TGP.mod <- btgp(tData5D[, 1:5], tData5D[, 6], X5V, corr='expsep', R=2, 
                  trace=FALSE, nug.p=0, gd=c(0.01, 0.01), d = 5)
  TGPFiveDToy[[i]] <- cbind(TGP.mod$ZZ.mean, TGP.mod$ZZ.mean - 2*sqrt(TGP.mod$ZZ.s2), 
                            TGP.mod$ZZ.mean + 2*sqrt(TGP.mod$ZZ.s2))
}
```

and for CGP

```{r}
for(i in 1:length(LHC.data[[1]])) {
  tData5D <- data.frame(cbind(LHC.data[[1]][[i]], LHC.data[[2]][[i]]))
  names(tData5D) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'y')
  cands <- names(tData5D)[1:5]
  X5V <- as.data.frame(LHC.data[[3]][[i]])
  y5V <- LHC.data[[4]][[i]]
  names(X5V) <- c('x1', 'x2', 'x3', 'x4', 'x5')
  
  CGP.mod <- CGP(tData5D[, 1:5], tData5D[, 6])
  CGP.mod.predict <- predict(CGP.mod, X5V, PI = TRUE)
  std.dev <- (CGP.mod.predict$Y_up - CGP.mod.predict$Yp)/1.96
  CGPFiveDToy[[i]] <- cbind(CGP.mod.predict$Yp, CGP.mod.predict$Yp - 2*std.dev, 
                            CGP.mod.predict$Yp + 2*std.dev)
}
```

Graphical comparison of the methods. 


```{r}
par(mfrow = c(4, 4), mar = c(2, 2, 1, 1))
for(i in 1:4) {
  ValidPlot(stGPFiveDToy[[i]], LHC.data[[3]][[i]], LHC.data[[4]][[i]],
            axis = 5, heading = paste(" st-GP: Ens", toString(i), sep = " "), xrange = c(-1, 1), 
            interval = range(stGPFiveDToy[[i]], nstGPFiveDToy[[i]], TGPFiveDToy[[i]], CGPFiveDToy[[i]]),
            ParamNames = c("x5"))
  ValidPlot(nstGPFiveDToy[[i]],LHC.data[[3]][[i]], LHC.data[[4]][[i]], axis = 5, 
            heading = paste(" nst-GP: Ens", toString(i), sep = " "), xrange = c(-1, 1), 
            interval = range(stGPFiveDToy[[i]], nstGPFiveDToy[[i]], TGPFiveDToy[[i]], CGPFiveDToy[[i]] ),
            ParamNames = c("x5"))
  ValidPlot(TGPFiveDToy[[i]], LHC.data[[3]][[i]], LHC.data[[4]][[i]], axis = 5, 
            heading = paste(" TGP: Ens", toString(i), sep = " "), xrange = c(-1, 1), 
            interval = range(stGPFiveDToy[[i]], nstGPFiveDToy[[i]], TGPFiveDToy[[i]], CGPFiveDToy[[i]]),
            ParamNames = c("x5"))
  ValidPlot(CGPFiveDToy[[i]], LHC.data[[3]][[i]], LHC.data[[4]][[i]], axis = 5, 
            heading = paste(" CGP: Ens", toString(i), sep = " "), xrange = c(-1, 1), 
            interval = range(stGPFiveDToy[[i]], nstGPFiveDToy[[i]], TGPFiveDToy[[i]], CGPFiveDToy[[i]]),
            ParamNames = c("x5"))
}
```

Consider the performance of mixture model:

```{r}
p1 <- ggplot(MixtureFiveDToy.mod[[1]], aes(x = x5, y = std.err, col = rgb(red = red, blue = blue, green = 0), ymin = -3.5, ymax=3.5)) +
  geom_point(size = 2) + scale_color_identity()
p2 <- ggplot(MixtureFiveDToy.mod[[2]], aes(x = x5, y = std.err, col = rgb(red = red, blue = blue, green = 0), ymin = -3.5, ymax=3.5)) +
  geom_point(size = 2) + scale_color_identity()
p3 <- ggplot(MixtureFiveDToy.mod[[3]], aes(x = x5, y = std.err, col = rgb(red = red, blue = blue, green = 0), ymin = -3.5, ymax=3.5)) +
  geom_point(size = 2) + scale_color_identity()
p4 <- ggplot(MixtureFiveDToy.mod[[4]], aes(x = x5, y = std.err, col = rgb(red = red, blue = blue, green = 0), ymin = -3.5, ymax=3.5)) +
  geom_point(size = 2) + scale_color_identity()
multiplot(p1, p2, p3, p4, cols=4)
```