suppressPackageStartupMessages(library(tidyverse))
knitr::opts_chunk$set(fig.path='figure/dice-kriging/', fig.height=4,
cache.path='_knitr_cache/dice-kriging/')
DiceKriging performs estimation, simulation, prediction and validation for various Kriging models.
We will reproduce some results from Example 4.1 of the DiceKriging JSS article .
The following specifications define a Gaussian process over the univariate input x with known quadratic mean function 11 * x + 2 * x ^ 2 and Matern covariance function with nu=5/2, sigma=5, theta=0.4. The key function is km (stands for Kriging Model):
library(DiceKriging)
input = data_frame(x = c(-1, -0.5, 0, 0.5, 1))
output = c(-9, -5, -1, 9, 11)
model = km(design = input, response = output,
formula = ~ x + I(x^2), coef.trend = c(0, 11, 2),
covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)
All parameters of the Kriging model are fully specified, no estimation is necessary. The function predict.km is used to emulate output at new input points. The argument type = 'SK' denotes simple kriging, as opposed to 'UK' for universal kriging.
x_new = data_frame(x = seq(-2, 2, len=200))
y_new = predict(model, newdata=x_new, type='SK')
xy_new = x_new %>% mutate(
mean = y_new$mean,
sd = y_new$sd,
lwr = y_new$lower95,
upr = y_new$upper95)
ggplot(data=xy_new, aes(x=x)) +
geom_ribbon(aes(ymin=lwr, ymax=upr), colour='black', fill=NA, linetype='dashed') +
geom_line(aes(y=mean)) +
geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
The influence of the length scale parameter theta
df_theta = c(0.05, 0.4, 1) %>%
map_df(.f = function(theta) {
model = km(design = input, response = output,
formula = ~ x + I(x^2), coef.trend = c(0, 11, 2),
covtype = 'matern5_2', coef.cov = theta, coef.var = 5^2)
y_new = predict(model, newdata=x_new, type='SK')
return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95, theta=theta))
})
ggplot(data=df_theta, aes(x=x)) + facet_grid(~theta) +
geom_line(aes(y=mean)) +
geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') +
geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
Incluence of the trend function:
df_trend =
list(list(name = 'constant', formula = ~1, coefs = 0),
list(name = 'linear', formula = ~x, coefs = c(0, 10)),
list(name = 'sine', formula = ~sin(pi/4 * x), coefs = c(1, 15))) %>%
map_df(.f = function(trend) {
model = km(design = input, response = output,
formula = trend$formula, coef.trend = trend$coefs,
covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2)
y_new = predict(model, newdata=x_new, type='SK')
return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95,
upr=y_new$upper95, name=trend$name))
})
ggplot(data=df_trend, aes(x=x)) + facet_grid(~name) +
geom_line(aes(y=mean)) +
geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') +
geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
Simulating from the GP model:
Unconditional and conditional simulations can be generated with the simulate function:
df_sim = list(list(cond = FALSE, type = 1),
list(cond = TRUE, type = 2)) %>%
map_df(.f = function(sim_cond) {
simulate(object=model, nsim=5, newdata=x_new, cond=sim_cond$cond) %>%
t %>% as_data_frame %>% bind_cols(x_new) %>%
gather('simulation', 'y', -x) %>% mutate(type=sim_cond$type)
}) %>% mutate(type = factor(type, labels=c('unconditional', 'conditional')))
ggplot(data=df_sim) + facet_wrap(~type) +
geom_line(aes(x=x, y=y, colour=simulation), show.legend=FALSE) +
geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')
Parameter estimation:
If trend and covariance parameters are left unspecified, they are estimated by km:
set.seed(1234)
model_est = km(~., design=input, response=output, covtype='gauss')
##
## optimisation start
## ------------------
## * estimation method : MLE
## * optimisation method : BFGS
## * analytical gradient : used
## * trend model : ~x
## * covariance model :
## - type : gauss
## - nugget : NO
## - parameters lower bounds : 1e-10
## - parameters upper bounds : 4
## - best initial criterion value(s) : -9.365339
##
## N = 1, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate 0 f= 9.3653 |proj g|= 2.8397e-34
## Derivative >= 0, backtracking line search impossible.final value 9.365339
## stopped after 0 iterations
model_est
##
## Call:
## km(formula = ~., design = input, response = output, covtype = "gauss")
##
## Trend coeff.:
## Estimate
## (Intercept) 1.0000
## x 10.8000
##
## Covar. type : gauss
## Covar. coeff.:
## Estimate
## theta(x) 0.0380
##
## Variance estimate: 2.48
y_new = predict(model_est, newdata=x_new, type='UK')
df_est = x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95)
ggplot(data=df_est, aes(x=x)) + geom_line(aes(y=mean)) +
geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') +
geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red')