```{r} suppressPackageStartupMessages(library(tidyverse)) knitr::opts_chunk$set(fig.path='figure/dice-kriging/', fig.height=4, cache.path='_knitr_cache/dice-kriging/') ``` # DiceKriging > DiceKriging performs estimation, simulation, prediction and validation for various Kriging models. We will reproduce some results from **Example 4.1** of the [DiceKriging JSS article](https://www.jstatsoft.org/article/view/v051i01/v51i01.pdf) . The following specifications define a Gaussian process over the univariate input `x` with known quadratic mean function `11 * x + 2 * x ^ 2` and Matern covariance function with `nu=5/2`, `sigma=5`, `theta=0.4`. The key function is `km` (stands for Kriging Model): ```{r} library(DiceKriging) input = data_frame(x = c(-1, -0.5, 0, 0.5, 1)) output = c(-9, -5, -1, 9, 11) model = km(design = input, response = output, formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2) ``` All parameters of the Kriging model are fully specified, no estimation is necessary. The function `predict.km` is used to emulate output at new input points. The argument `type = 'SK'` denotes simple kriging, as opposed to `'UK'` for universal kriging. ```{r plot-gp} x_new = data_frame(x = seq(-2, 2, len=200)) y_new = predict(model, newdata=x_new, type='SK') xy_new = x_new %>% mutate( mean = y_new$mean, sd = y_new$sd, lwr = y_new$lower95, upr = y_new$upper95) ggplot(data=xy_new, aes(x=x)) + geom_ribbon(aes(ymin=lwr, ymax=upr), colour='black', fill=NA, linetype='dashed') + geom_line(aes(y=mean)) + geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red') ``` **The influence of the length scale parameter `theta`** ```{r plot-vary-theta, fig.width=10} df_theta = c(0.05, 0.4, 1) %>% map_df(.f = function(theta) { model = km(design = input, response = output, formula = ~ x + I(x^2), coef.trend = c(0, 11, 2), covtype = 'matern5_2', coef.cov = theta, coef.var = 5^2) y_new = predict(model, newdata=x_new, type='SK') return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95, theta=theta)) }) ggplot(data=df_theta, aes(x=x)) + facet_grid(~theta) + geom_line(aes(y=mean)) + geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red') ``` **Incluence of the trend function**: ```{r plot-vary-trend, fig.width=10} df_trend = list(list(name = 'constant', formula = ~1, coefs = 0), list(name = 'linear', formula = ~x, coefs = c(0, 10)), list(name = 'sine', formula = ~sin(pi/4 * x), coefs = c(1, 15))) %>% map_df(.f = function(trend) { model = km(design = input, response = output, formula = trend$formula, coef.trend = trend$coefs, covtype = 'matern5_2', coef.cov = 0.4, coef.var = 5^2) y_new = predict(model, newdata=x_new, type='SK') return(x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95, name=trend$name)) }) ggplot(data=df_trend, aes(x=x)) + facet_grid(~name) + geom_line(aes(y=mean)) + geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red') ``` **Simulating from the GP model**: Unconditional and conditional simulations can be generated with the `simulate` function: ```{r plot-simulations, fig.width=10} df_sim = list(list(cond = FALSE, type = 1), list(cond = TRUE, type = 2)) %>% map_df(.f = function(sim_cond) { simulate(object=model, nsim=5, newdata=x_new, cond=sim_cond$cond) %>% t %>% as_data_frame %>% bind_cols(x_new) %>% gather('simulation', 'y', -x) %>% mutate(type=sim_cond$type) }) %>% mutate(type = factor(type, labels=c('unconditional', 'conditional'))) ggplot(data=df_sim) + facet_wrap(~type) + geom_line(aes(x=x, y=y, colour=simulation), show.legend=FALSE) + geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red') ``` **Parameter estimation**: If trend and covariance parameters are left unspecified, they are estimated by `km`: ```{r} set.seed(1234) model_est = km(~., design=input, response=output, covtype='gauss') model_est ``` ```{r plot-estimated} y_new = predict(model_est, newdata=x_new, type='UK') df_est = x_new %>% mutate(mean=y_new$mean, lwr=y_new$lower95, upr=y_new$upper95) ggplot(data=df_est, aes(x=x)) + geom_line(aes(y=mean)) + geom_line(aes(y=lwr), lty='dashed') + geom_line(aes(y=upr), lty='dashed') + geom_point(data=bind_cols(input, y=output), aes(x=x, y=y), col='red') ```