suppressPackageStartupMessages(library(tidyverse))
knitr::opts_chunk$set(fig.path='figure/dice-kriging/', fig.height=4, 
                      cache.path='_knitr_cache/dice-kriging/')

DiceKriging

DiceKriging performs estimation, simulation, prediction and validation for various Kriging models.

We will reproduce some results from Example 4.1 of the DiceKriging JSS article .

The following specifications define a Gaussian process over the univariate input x with known quadratic mean function 11 * x + 2 * x ^ 2 and Matern covariance function with nu=5/2, sigma=5, theta=0.4. The key function is km (stands for Kriging Model):

library(DiceKriging)

input = data_frame(x = c(-1, -0.5, 0, 0.5, 1))
output = c(-9, -5, -1, 9, 11)
model = km(design = input, response = output, 
           formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), 
           covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)

All parameters of the Kriging model are fully specified, no estimation is necessary. The function predict.km is used to emulate output at new input points. The argument type = 'SK' denotes simple kriging, as opposed to 'UK' for universal kriging.

x_new = data_frame(x = seq(-2, 2, len=200))
y_new = predict(model, newdata=x_new, type='SK')

xy_new = x_new %>% mutate(
  mean = y_new$mean,
  sd   = y_new$sd,
  lwr  = y_new$lower95,
  upr  = y_new$upper95)

ggplot(data=xy_new, aes(x=x)) + 
  geom_ribbon(aes(ymin=lwr, ymax=upr), colour='black', fill=NA, linetype='dashed') + 
  geom_line(aes(y=mean)) + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')

The influence of the length scale parameter theta

df_theta = c(0.05, 0.4, 1) %>% 
  map_df(.f = function(theta) {
    model = km(design = input, response = output, 
               formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), 
               covtype = 'matern5_2', coef.cov = theta, coef.var = 5^2)
    y_new = predict(model, newdata=x_new, type='SK')
    return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95, theta=theta))
  })
ggplot(data=df_theta, aes(x=x)) + facet_grid(~theta) + 
  geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')

Incluence of the trend function:

df_trend = 
  list(list(name = 'constant', formula = ~1,             coefs = 0),
       list(name = 'linear',   formula = ~x,             coefs = c(0, 10)),
       list(name = 'sine',     formula = ~sin(pi/4 * x), coefs = c(1, 15))) %>%
  map_df(.f = function(trend) {
    model = km(design = input, response = output, 
               formula = trend$formula, coef.trend = trend$coefs, 
               covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)
    y_new = predict(model, newdata=x_new, type='SK')
    return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, 
                            upr=y_new$upper95, name=trend$name))
  })

ggplot(data=df_trend, aes(x=x)) + facet_grid(~name) + 
  geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')

Simulating from the GP model:

Unconditional and conditional simulations can be generated with the simulate function:

df_sim = list(list(cond = FALSE,  type = 1),
              list(cond = TRUE,   type = 2)) %>%
  map_df(.f = function(sim_cond) {
    simulate(object=model, nsim=5, newdata=x_new, cond=sim_cond$cond) %>% 
      t %>% as_data_frame %>% bind_cols(x_new) %>% 
      gather('simulation', 'y', -x) %>% mutate(type=sim_cond$type)
  }) %>% mutate(type = factor(type, labels=c('unconditional', 'conditional')))

ggplot(data=df_sim) + facet_wrap(~type) + 
  geom_line(aes(x=x, y=y, colour=simulation), show.legend=FALSE) +
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')

Parameter estimation:

If trend and covariance parameters are left unspecified, they are estimated by km:

set.seed(1234)
model_est = km(~., design=input, response=output, covtype='gauss')
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~x
## * covariance model : 
##   - type :  gauss 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 
##   - parameters upper bounds :  4 
##   - best initial criterion value(s) :  -9.365339 
## 
## N = 1, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       9.3653  |proj g|=   2.8397e-34
## Derivative >= 0, backtracking line search impossible.final  value 9.365339 
## stopped after 0 iterations
model_est
## 
## Call:
## km(formula = ~., design = input, response = output, covtype = "gauss")
## 
## Trend  coeff.:
##                Estimate
##  (Intercept)     1.0000
##            x    10.8000
## 
## Covar. type  : gauss 
## Covar. coeff.:
##                Estimate
##     theta(x)     0.0380
## 
## Variance estimate: 2.48
y_new = predict(model_est, newdata=x_new, type='UK')
df_est = x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95)

ggplot(data=df_est, aes(x=x)) + geom_line(aes(y=mean)) + 
  geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + 
  geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')