```{r}
suppressPackageStartupMessages(library(tidyverse))
knitr::opts_chunk$set(fig.path='figure/dice-kriging/', fig.height=4, 
                      cache.path='_knitr_cache/dice-kriging/')
```

# DiceKriging

> DiceKriging performs estimation, simulation, prediction and validation for various Kriging models.

We will reproduce some results from **Example 4.1** of the [DiceKriging JSS article](https://www.jstatsoft.org/article/view/v051i01/v51i01.pdf)
.

The following specifications define a Gaussian process over the univariate input `x` with known quadratic mean function `11 * x + 2 * x ^ 2` and Matern covariance function with `nu=5/2`, `sigma=5`, `theta=0.4`. The key function is `km` (stands for Kriging Model):


```{r}
library(DiceKriging)

input = data_frame(x = c(-1, -0.5, 0, 0.5, 1))
output = c(-9, -5, -1, 9, 11)
model = km(design = input, response = output, 
           formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), 
           covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)
```

All parameters of the Kriging model are fully specified, no estimation is necessary.
The function `predict.km` is used to emulate output at new input points.
The argument `type = 'SK'` denotes simple kriging, as opposed to `'UK'` for universal kriging.

```{r plot-gp}
x_new = data_frame(x = seq(-2, 2, len=200))
y_new = predict(model, newdata=x_new, type='SK')

xy_new = x_new %>% mutate(
  mean = y_new$mean,
  sd   = y_new$sd,
  lwr  = y_new$lower95,
  upr  = y_new$upper95)

ggplot(data=xy_new, aes(x=x)) + 
  geom_ribbon(aes(ymin=lwr, ymax=upr), colour='black', fill=NA, linetype='dashed') + 
  geom_line(aes(y=mean)) + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
```

**The influence of the length scale parameter `theta`**

```{r plot-vary-theta, fig.width=10}
df_theta = c(0.05, 0.4, 1) %>% 
  map_df(.f = function(theta) {
    model = km(design = input, response = output, 
               formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), 
               covtype = 'matern5_2', coef.cov = theta, coef.var = 5^2)
    y_new = predict(model, newdata=x_new, type='SK')
    return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95, theta=theta))
  })
ggplot(data=df_theta, aes(x=x)) + facet_grid(~theta) + 
  geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
```


**Incluence of the trend function**:

```{r plot-vary-trend, fig.width=10}
df_trend = 
  list(list(name = 'constant', formula = ~1,             coefs = 0),
       list(name = 'linear',   formula = ~x,             coefs = c(0, 10)),
       list(name = 'sine',     formula = ~sin(pi/4 * x), coefs = c(1, 15))) %>%
  map_df(.f = function(trend) {
    model = km(design = input, response = output, 
               formula = trend$formula, coef.trend = trend$coefs, 
               covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)
    y_new = predict(model, newdata=x_new, type='SK')
    return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, 
                            upr=y_new$upper95, name=trend$name))
  })

ggplot(data=df_trend, aes(x=x)) + facet_grid(~name) + 
  geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
```

**Simulating from the GP model**:

Unconditional and conditional simulations can be generated with the `simulate` function:

```{r plot-simulations, fig.width=10}
df_sim = list(list(cond = FALSE,  type = 1),
              list(cond = TRUE,   type = 2)) %>%
  map_df(.f = function(sim_cond) {
    simulate(object=model, nsim=5, newdata=x_new, cond=sim_cond$cond) %>% 
      t %>% as_data_frame %>% bind_cols(x_new) %>% 
      gather('simulation', 'y', -x) %>% mutate(type=sim_cond$type)
  }) %>% mutate(type = factor(type, labels=c('unconditional', 'conditional')))

ggplot(data=df_sim) + facet_wrap(~type) + 
  geom_line(aes(x=x, y=y, colour=simulation), show.legend=FALSE) +
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
```


**Parameter estimation**:

If trend and covariance parameters are left unspecified, they are estimated by `km`:

```{r}
set.seed(1234)
model_est = km(~., design=input, response=output, covtype='gauss')
model_est
```

```{r plot-estimated}
y_new = predict(model_est, newdata=x_new, type='UK')
df_est = x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95)

ggplot(data=df_est, aes(x=x)) + geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
```