twd <- getwd() packages <- c('GenSA', 'far', 'fields', 'lhs', 'maps', 'mco', 'mvtnorm', 'ncdf4', 'parallel', 'rstan', 'shape', 'tensor', 'withr', 'loo', 'bayesplot') sapply(packages, require, character.only = TRUE, quietly = TRUE) source("BuildEmulator/AutoLMcode.R") source("BuildEmulator/CustomPredict.R") source("BuildEmulator/JamesNewDevelopment.R") source("BuildEmulator/DannyDevelopment.R") # Stan files for GP Emulator in Stan tfile_loc = paste(twd, "/BuildEmulator/FitGP.stan", sep = "") tprednewfile_loc = paste(twd, "/BuildEmulator/PredictGen.stan", sep = "") ccode_fit <- stanc(file = tfile_loc) model_fit <-stan_model(stanc_ret = ccode_fit) ccode_predict <- stanc(file = tprednewfile_loc) model_predict <- stan_model(stanc_ret = ccode_predict) gpstan.default.params <- list(SigSq = 0.15, SigSqV = 0.25, AlphaAct = 4, BetaAct = 4, AlphaInact = 42, BetaInact = 9, AlphaNugget = 3, BetaNugget = 0.1, AlphaRegress = 0, BetaRegress = 10, nugget = 0.0001, SwitchDelta = 1, SwitchNugget = 1, SwitchSigma = 1) EMULATE.lm <- function(Response, tData, dString="tData",maxdf=NULL,tcands=cands,tcanfacs=canfacs,TryFouriers=FALSE,maxOrder=NULL){ #' Generate a linear model for Gaussian Process (GP) Emulator #' #' @param Response a string corresponding to the response variable #' @param tData a data frame containing the inputs and outputs #' @param dString a string corresponding to the name of a data frame #' @param maxdf a maximim number of degrees of freedom that we are expecting to #' lose by adding terms to the linear model #' @param tcands a vector of parameter names #' @param tcanfacs a vector of parameter names that are factors #' @param TryFouriers a logical argument with default FALSE. TRUE allows #' the Fourier transformation of the parameters #' @param maxOrder maximum order of Fourier terms (Fourier series) #' #' @return A linear model to EMULATE.gpstan function #' if(is.null(maxdf)){ maxdf <- ceiling(length(tData[,1])/10) } startLM <- eval(parse(text=paste("lm(",Response,"~1,data=",dString,")",sep=""))) if(maxdf < 2) return(list(linModel=startLM,Names=NULL,mainEffects=NULL,Interactions=NULL,Factors=NULL,FactorInteractions=NULL,ThreeWayInters=NULL,DataString=dString,ResponseString=Response,tData=tData,BestFourier=TRUE)) if(TryFouriers) msl <- list(linModel=startLM,Names=NULL,mainEffects=NULL,Interactions=NULL,Factors=NULL,FactorInteractions=NULL,ThreeWayInters=NULL,DataString=dString,ResponseString=Response,tData=tData,BestFourier=TRUE,maxOrder=maxOrder) else msl <- list(linModel=startLM,Names=NULL,mainEffects=NULL,Interactions=NULL,Factors=NULL,FactorInteractions=NULL,ThreeWayInters=NULL,DataString=dString,ResponseString=Response,tData=tData,BestFourier=FALSE) added <- AddBest(tcands,tcanfacs,msl) for(i in 1:30){ added <- AddBest(tcands,tcanfacs,added) if(!is.null(added$Break)) break } print(summary(added$linModel)) if(length(tData[,1])-1-added$linModel$df > maxdf){ trm <- removeNterms(N=500,linModel=added$linModel,dataString=added$DataString,responseString=added$ResponseString,Tolerance=sum(sort(anova(added$linModel)$"Sum Sq"))*(1e-4),Names=added$Names,mainEffects=added$mainEffects,Interactions=added$Interactions,Factors=added$Factors,FactorInteractions=added$FactorInteractions,ThreeWayInters=added$ThreeWayInters,tData=added$tData,Fouriers = added$Fouriers) print(summary(trm$linModel)) trm2 <- removeNterms(N=max(c(0,length(tData[,1])-maxdf-1-trm$linModel$df)),linModel=trm$linModel,dataString=added$DataString,responseString=added$ResponseString,Tolerance=sum(sort(anova(added$linModel)$"Sum Sq"))*(5e-1),Names=trm$Names,mainEffects=trm$mainEffects,Interactions=trm$Interactions,Factors=trm$Factors,FactorInteractions=trm$FactorInteractions,ThreeWayInters=trm$ThreeWayInters,tData=added$tData,Fouriers=trm$Fouriers) print(summary(trm2$linModel)) trm2$pre.Lists <- get.predict.mats(trm2$linModel) trm2$DataString <- dString trm2$ResponseString <- Response return(trm2) } else{ return(added) } } CovMatrix <- function(Design, cls){ #' Constructs a squared exponential correlation matrix #' #' @param Design a matrix of inputs for design set #' @param cls a vector of correlation length parameters #' #' @return A matrix of correlation calculated for design points #' n = dim(Design)[1] # number of training points if(is.null(n)) n <- length(Design) p = dim(Design)[2] # number of parameters if(is.null(p)) p <- 1 stopifnot(p == length(cls)) if(!is.numeric(as.matrix(Design))){ for (i in 1:p){ Design[,i] = as.numeric(as.character(Design[,i])) } } D = as.matrix(dist(scale(Design,center=FALSE,scale=cls),method="euclidean",diag=TRUE,upper=TRUE)) return(exp(-(D^2))) } NewCov <- function(x, Design, cls){ #' A matrix of correlations (squared exponential form) between x and Design #' matrices #' #' @param x a matrix of inputs for validation set #' @param Design a matrix of inputs for design set #' @param cls a vector of correlation length parameters #' #' @return A matrix of correlations between x and Design #' n = dim(Design)[1] # number of training points if(is.null(n)) n <- length(Design) m = dim(x)[1] p = dim(Design)[2] # number of parameters if(is.null(p)) p <- 1 stopifnot(p == length(cls)) if(!(is.null(names(Design)))) x = x[,names(Design)] if(!is.numeric(as.matrix(Design))){ for (i in 1:p){ Design[,i] = as.numeric(as.character(Design[,i])) } } if(!is.numeric(as.matrix(x))){ for (i in 1:p){ x[,i] = as.numeric(as.character(x[,i])) } } D = as.matrix(rdist(scale(Design,center=FALSE,scale=cls),scale(x,center=FALSE,scale=cls))) return(exp(-(D^2))) } #Build a GP stan emulator #meanResponse is the output of EMULATE.lm containing a linear model emulator #... Refer to covariance parameters EMULATE.gpstan <- function(meanResponse, CompiledModelFit = model_fit, sigmaPrior = FALSE, nuggetPrior = FALSE, activePrior = FALSE, activeVariables = NULL, tData, additionalVariables = NULL, FastVersion = FALSE, prior.params = gpstan.default.params, ...){ #' Function implement Gaussian process (GP) emulator #' #' @param meanResponse the output of EMULATE.lm containing a linaer model emulator #' @param CompiledModelFit an instance of S4 class stanmodel used for fitting a GP model #' @param sigmaPrior a logical argument with FALSE (default) specifying sigma prior parameters #' at the values found from the linear model fit (NEED MORE CLARIFICATION) #' @param nuggetPrior a logical argument with FALSE (default) specifying nugget parameter at #' fixed value. TRUE specifying a prior distribution for the nugget parameter #' @param activePrior a logical argument with FALSE (default) specifying the same prior #' for correlation length parameters. TRUE specifying different priors for correlation #' length parameters. #' @param activeVariables a vector of parametere names considered to be active, with default NULL #' @param tData a data frame containing the inputs and outputs and must be the same as that to create meanResponse #' @param additionalVariables is a vector of parameter names that we want to fit a GP to that didn't make it into the model #' the default model, with additionalVariables=NULL, is to only treat those terms that make it into the linear model as active #' @param FastVersion TRUE value results at saving parameter values at the posterior mean #' FALSE (default) saves the posterior samples for parameters. #' @param prior.params a list of parameters to the prior specification. The default is #' gpstan.default.params. #' @return A GP emulator object #Design <- as.matrix(tData[,which((names(tData)%in%meanResponse$Names) | (names(tData)%in%meanResponse$Factors) | (names(tData)%in%additionalVariables) | (names(tData) %in% names(meanResponse$Fouriers)))]) Design <- as.matrix(tData[,which((names(tData)%in%meanResponse$Names) | (names(tData)%in%additionalVariables) | (names(tData) %in% names(meanResponse$Fouriers)))]) #Perhaps can take factors out of Design? H still includes them, only effects corr lengths in Stan #Slightly careful handling in EMULATOR.gpstan would be required tF <- tData[,which(names(tData)==meanResponse$ResponseString)] H <- model.matrix(meanResponse$linModel) N1 <- dim(Design)[1] Np <- dim(H)[2] if(sigmaPrior) prior.params$SwitchSigma = 2 if(nuggetPrior) { prior.params$SwitchNugget = 2 #prior.params$UpperLimitNugget = prior.params$BetaNugget/(prior.params$AlphaNugget-1) + 10*sqrt(prior.params$BetaNugget^2/((prior.params$AlphaNugget-1)^2*(prior.params$AlphaNugget-2))) } # else { # prior.params$UpperLimitNugget = 2*prior.params$nugget # } if(activePrior) prior.params$SwitchDelta = 2 if(!sigmaPrior) { consEm <- EMULATE.lm(Response=meanResponse$ResponseString, tData=tData, tcands="Noise",tcanfacs=NULL,TryFouriers=TRUE,maxOrder=2,maxdf = 0) sd2 <- summary(consEm$linModel)$sig - summary(meanResponse$linModel)$sig sigsq <- summary(meanResponse$linModel)$sigma sigsqvar <- sd2 # consider the constant Gaussian Process mean if(Np == 1) sigsqvar <- summary(meanResponse$linModel)$sig prior.params$SigSq <- sigsq prior.params$SigSqV <- sigsqvar } if(activePrior) { #if(is.null(activeVariables)) active.inputs <- names(tData)[which((names(tData)%in%meanResponse$Names) | (names(tData)%in%additionalVariables) |(names(tData)%in%names(meanResponse$Fouries)))] #else active.inputs <- activeVariables #inactive.inputs <- names(tData)[-which((names(tData)%in%active.inputs)|(names(tData)%in%meanResponse$ResponseString))] active.inputs <- colnames(Design)[colnames(Design)%in%activeVariables] inactive.inputs <- colnames(Design)[-which(colnames(Design)%in%active.inputs)] Design.active <- as.matrix(tData[, active.inputs]) Design.inactive <- as.matrix(tData[, inactive.inputs]) Design <- cbind(Design.active, Design.inactive) colnames(Design) <- c(active.inputs, inactive.inputs) p.active <- length(active.inputs) p.inactive <- length(inactive.inputs) p <- dim(Design)[2] init.list <- list(list(beta=array(meanResponse$linModel$coefficients, dim = Np), sigma=prior.params$SigSq, nugget = prior.params$nugget, delta_par=array(c(rep(0.05, p.active), rep(0.7, p.inactive)), dim=p)), list(beta=array(meanResponse$linModel$coefficients, dim = Np), sigma=prior.params$SigSq, nugget = prior.params$nugget, delta_par=array(c(rep(0.1, p.active), rep(1, p.inactive)), dim=p))) StanEmulator <- sampling(CompiledModelFit, data = list(N1 = N1, pact = p.active, pinact = p.inactive, p = p, Np = Np, SwitchDelta = prior.params$SwitchDelta, SwitchNugget = prior.params$SwitchNugget, SwitchSigma = prior.params$SwitchSigma, SigSq = prior.params$SigSq, SigSqV = prior.params$SigSqV, AlphaAct = prior.params$AlphaAct, BetaAct = prior.params$BetaAct, AlphaInact = prior.params$AlphaInact, BetaInact = prior.params$BetaInact, AlphaNugget = prior.params$AlphaNugget, BetaNugget = prior.params$BetaNugget, AlphaRegress = prior.params$AlphaRegress, BetaRegress = prior.params$BetaRegress, nuggetfix = prior.params$nugget, X1 = Design, y1 = tF, H1 = H), #UpperLimitNugget = prior.params$UpperLimitNugget), iter = 2000, warmup = 1000, chains = 2, cores = 2, init = init.list, pars = c('nugget', 'sigma', 'delta_par', 'beta', 'log_lik'), ...) } else { # consider the same prior specification for delta_par (correlation length parameter) Design <- as.matrix(tData[,which((names(tData)%in%meanResponse$Names) | (names(tData)%in%additionalVariables) | (names(tData) %in% names(meanResponse$Fouriers)))]) p <- dim(Design)[2] p.active = p.inactive = 1 init.list <- list(list(beta=array(meanResponse$linModel$coefficients, dim = Np), sigma=prior.params$SigSq, nugget = prior.params$nugget, delta_par=array(rep(0.7, p), dim=p)), list(beta=array(meanResponse$linModel$coefficients, dim = Np), sigma=prior.params$SigSq, nugget = prior.params$nugget, delta_par=array(rep(1, p), dim=p))) StanEmulator <- sampling(CompiledModelFit, data = list(N1 = N1, pact = p.active, pinact = p.inactive, p = p, Np = Np, SwitchDelta = prior.params$SwitchDelta, SwitchNugget = prior.params$SwitchNugget, SwitchSigma = prior.params$SwitchSigma, SigSq = prior.params$SigSq, SigSqV = prior.params$SigSqV, AlphaAct = prior.params$AlphaAct, BetaAct = prior.params$BetaAct, AlphaInact = prior.params$AlphaInact, BetaInact = prior.params$BetaInact, AlphaNugget = prior.params$AlphaNugget, BetaNugget = prior.params$BetaNugget, AlphaRegress = prior.params$AlphaRegress, BetaRegress = prior.params$BetaRegress, nuggetfix = prior.params$nugget, #UpperLimitNugget = prior.params$UpperLimitNugget, X1 = Design, y1 = tF, H1 = H), iter = 2000, warmup = 1000, chains = 2, cores = 2, init = init.list, pars = c('nugget', 'sigma', 'delta_par', 'beta', 'log_lik'), ...) } ParameterSamples <- rstan::extract(StanEmulator, pars = c('sigma', 'delta_par', 'beta', 'nugget')) if(FastVersion){ lps <- extract_log_lik(StanEmulator) tMAP <- which.max(rowSums(lps)) A <- ParameterSamples$sigma[tMAP]^2*CovMatrix(Design, ParameterSamples$delta_par[tMAP, ]) #+ diag(ParameterSamples$nugget[tMAP], nrow = dim(Design)[1], ncol = dim(Design)[1]) QA <- chol(A) diff <- tF - H%*%ParameterSamples$beta[tMAP, ] Ldiff <- backsolve(QA, diff, transpose=TRUE) #part of the mean update that can be done offline FastParts <- list(tMAP=tMAP, A=A, QA=QA, Ldiff=Ldiff) } else{ FastParts <- NULL } gp.list <- list(Design=Design, tF=tF, H=H, ParameterSamples=ParameterSamples, FastParts=FastParts, StanModel = StanEmulator, prior.params = prior.params, init.list = init.list) return(c(meanResponse,gp.list)) } EMULATOR.gpstan <- function(x, Emulator, GP=TRUE, FastVersion=FALSE, CompiledModelPredict = model_predict){ #' Predict method for Gaussian Process model #' #' @param x a matrix of inputs #' @param Emulator a GP emulator from EMULATE.gpstan function #' @param GP not sure about this argument (?) #' @param FastVersion TRUE value results at saving parameter values #' at the posterior mean; FALSE (default) saves the posterior samples for parameters #' @param CompiledModelPredict an instance of S4 class stanmodel that is used to produce predictions #' #' @return A list contatining 'Expectation' and 'Variance' #' if(!GP){# || (Emulator$nugget==1)){ lin <- Emulator$linModel newDat <- as.data.frame(x) prediction <- try(custom.predict(lin,newdata=newDat,Emulator$pre.Lists),silent=TRUE) if(inherits(prediction,"try-error")){ stop("Ensure the data frame has the correct named columns") } return(list(Expectation=as.vector(prediction[,1]), Variance = as.vector(prediction[,2])^2)) } else{ Xpred <- as.data.frame(x) tt = terms(Emulator$linModel) Terms = delete.response(tt) mm = model.frame(Terms, Xpred, xlev = Emulator$linModel$xlevels) Hpred = model.matrix(Terms, mm, contrasts.arg = Emulator$linModel$contrasts) #Ensure Xpred enters the emulator with the columns in the exact same order as when #emulated to ensure correlation lengths mean the same thing. if(dim(Emulator$Design)[2] > 1) { tinds = sapply(1:length(colnames(Emulator$Design)), function(k) which(colnames(Emulator$Design)[k] == colnames(Xpred)) ) Xpred <- Xpred[, tinds] }else { Xpred <- matrix(Xpred[, 1], ncol = 1) } if(FastVersion){ if(is.null(Emulator$FastParts)) stop("Build a FastVersion with EMULATE.gpstan first") Covtx <- Emulator$ParameterSamples$sigma[Emulator$FastParts$tMAP]^2*NewCov(Xpred, Design=Emulator$Design, cls = Emulator$ParameterSamples$delta[Emulator$FastParts$tMAP,]) txtA <- backsolve(Emulator$FastParts$QA,Covtx,transpose=TRUE) if(dim(Emulator$H)[2]> 1) tExpectation <- as.vector(Hpred%*%Emulator$ParameterSamples$beta[Emulator$FastParts$tMAP,] + crossprod(txtA, Emulator$FastParts$Ldiff)) else tExpectation <- as.vector(Hpred%*%Emulator$ParameterSamples$beta[Emulator$FastParts$tMAP] + crossprod(txtA, Emulator$FastParts$Ldiff)) tVariance <- as.vector(diag(diag(Emulator$ParameterSamples$sigma[Emulator$FastParts$tMAP]^2,nrow=dim(Xpred)[1],ncol=dim(Xpred)[1]) - crossprod(txtA))) StandardDev <- sqrt(tVariance) } else{ fit.y2 <- sampling(CompiledModelPredict, data = list(X1 = Emulator$Design, X2 = Xpred, y1= Emulator$tF, H1 = Emulator$H, H2 = Hpred, N1 = dim(Emulator$Design)[1], N2 = dim(Xpred)[1], Np = dim(Emulator$H)[2], p = dim(Emulator$Design)[2], M = dim(Emulator$ParameterSamples$beta)[1], sigma = Emulator$ParameterSamples$sigma, beta = Emulator$ParameterSamples$beta, delta = Emulator$ParameterSamples$delta_par, nugget = Emulator$ParameterSamples$nugget), iter = 1, warmup = 0, chains = 1, cores = 1, pars = c("tmeans","tsds"), include=TRUE, algorithm = c('Fixed_param')) predict.y2 <- extract(fit.y2, pars = c('tmeans','tsds')) tExpectation <- predict.y2$tmeans[1, ] StandardDev <- predict.y2$tsds[1, ] } return(list(Expectation = tExpectation, Variance=StandardDev^2)) } } EMULATOR.gpstan.multicore <- function(x, Emulator, GP=TRUE, FastVersion=FALSE, batches=500){ #' Predict method for Gaussian process model for many runs #' #' @param x a matrix of #' @param Emulator a GP emulator from EMULATE.gpstan function #' @param GP not sure about this argument (?) #' @param FastVersion TRUE value results at using parameter values #' at the posterior mean; FALSE (default) uses the posterior samples for parameters #' #' @return A list contatining 'Expectation' and 'Variance' #' Nruns <- dim(x)[1] Nbatch <- ceiling(Nruns/batches) batchSeq <- lapply(1:(Nbatch-1), function(k) seq(from=1+(k - 1)*batches,by=1,length.out = batches)) batchSeq[[Nbatch]] <- batchSeq[[Nbatch-1]][batches]:Nruns tSamples <- mclapply(batchSeq, function(e) EMULATOR.gpstan(x=x[e,], Emulator=Emulator, GP=GP, FastVersion = FastVersion),mc.cores = 6) tExpectation <- rep(NA,Nruns) tVariance <- rep(NA,Nruns) for(i in 1:Nbatch){ tExpectation[batchSeq[[i]]] <- tSamples[[i]]$Expectation tVariance[batchSeq[[i]]] <- tSamples[[i]]$Variance } return(list(Expectation=tExpectation,Variance=tVariance)) } ExtractCentredScaledDataAndBasis <- function(OriginalField, scaling=1){ #MeanField is now the uncentered, unscaled field we want to emulate. The last step is to centre, scale and extract the basis EnsMean <- apply(OriginalField, MARGIN=1, mean) CentredField <- OriginalField for(i in 1:dim(OriginalField)[2]){ CentredField[,i] <- CentredField[,i] - EnsMean } CentredField <- CentredField/scaling Basis <- svd(t(CentredField))$v return(list(tBasis=Basis, CentredField = CentredField, EnsembleMean = EnsMean, scaling=scaling)) } BuildStanEmulator <- function(Response, tData, cands, additionalVariables=NULL, canfacs = NULL, TryFouriers = TRUE, maxOrder = 2, maxdf = NULL, CompiledModelFit = model_fit, sigmaPrior = FALSE, nuggetPrior=FALSE, activePrior = FALSE, activeVariables = NULL, prior.params = gpstan.default.params, FastVersion = TRUE){ #' Function to construct Gaussian Proceess emulator in one go #' #' @param Response a character corresponding to the response of interest #' @param tData a data frame of inputs and a vector of output responses #' @param cands a vector of input parameters #' @param additionalVariables a vector of parameter names that we want to fit a GP to #' that didn't make it into the model #' @param canfacs a vector of input parameters that are factors #' @param TryFouriers a logical argument with TRUE (default) allowing the Fourier #' transformation of input parameters #' @param maxOrder maximum order of Fourier terms (Fourier series) #' @param maxdf maximum degrees of freedom #' @param CompiledModelFit an instance of S4 class stanmodel used for fitting a GP model #' @param sigmaPrior a logical argument with FALSE (default) specifying sigma prior parameters #' at the values found from the linear model fit #' @param nuggetPrior a logical argument with FALSE (default) specifying nugget parameter #' at fixed value and TRUE specifying a prior distribution for the nugget parameter. #' @param activePrior a logical argument with FALSE (default) specifying the same prior #' for correlation length parameters. TRUE specifying different priors for correlation length #' parameters. #' @param activeVariables a vector of parameter names considered to be active, with default NULL #' @param prior.params default parameters to the prior specification. The default is #' gpstan.default.params. #' @param FastVersion TRUE value results at saving parameter values at posterior mean, #' FALSE (default) saves the posterior samples for parameters. #' #' @return A GP Emulator object #' myem.lm <- EMULATE.lm(Response=Response, tData=tData, tcands=cands, tcanfacs=canfacs,TryFouriers=TryFouriers,maxOrder=maxOrder, maxdf = maxdf) myem.gp = EMULATE.gpstan(meanResponse=myem.lm, sigmaPrior = sigmaPrior, nuggetPrior = nuggetPrior, activePrior = activePrior, activeVariables = activeVariables, tData = tData, additionalVariables=additionalVariables, FastVersion=FastVersion, prior.params = prior.params) myem.gp } #This function must have tData with order: Parameters, Noise, Coefficients InitialBasisEmulators <- function(tData, HowManyEmulators, additionalVariables=NULL, sigmaPrior = FALSE, nuggetPrior = FALSE, activePrior = FALSE, activeVariables = NULL, prior.params = gpstan.default.params, ...){ lastCand <- which(names(tData)=="Noise") tfirst <- lastCand + 1 if(is.null(HowManyEmulators)) HowManyEmulators <- length(names(tData)) - lastCand lapply(1:HowManyEmulators, function(k) try(BuildStanEmulator(Response=names(tData)[lastCand+k], tData=tData, cands=names(tData)[1:lastCand], additionalVariables=additionalVariables[[k]], maxdf=ceiling(length(tData[,1])/10)+1, sigmaPrior = sigmaPrior, nuggetPrior = nuggetPrior, activePrior = activePrior, activeVariables = activeVariables, prior.params = prior.params, ...), silent = TRUE)) } ValidPlot <- function(fit, X, y, interval = c(), axis = 1, heading = " ", xrange = c(-1, 1), ParamNames) { #' Plots the emulator predictions together with error bars and true values. #' #' @param fit a data frame of emulator predictions. First column corresponds #' to the posterior mean, second and third columns correspond to the lower #' and upper quantiles respectively. #' @param X a data frame of inputs #' @param y a vector of simulator evaluations at X #' @param interval a vector to define the y axis limit of the plot, the #' default option is the empty vector. #' @param axis the input against which the emulator predictions are plotted #' @param heading a title of the plot #' @param xrange a vector to define the x limit of the plot #' @param ParamNames a vector of names of the parameters in the data frame #' #' @return A plot of emulator predictions together with the error bars #' (plus and minus 2*standard deviations) and true values. outside <- c() inside <- c() maxval <- max(fit) minval <- min(fit) for(i in 1:length(y)) { if(fit[i, 2] <= y[i] & y[i] <= fit[i, 3]) { inside <- c(inside,i) } } outside <- c(1:length(y))[-inside] chosen_column <- which(colnames(X) == ParamNames[axis]) if(is.null(interval)) { plot(X[ , chosen_column], fit[, 1], pch=20, ylab='Y', xlab=ParamNames[axis], ylim=c(minval, maxval), xlim=xrange, main=heading,cex.main=0.8) } else { plot(X[ , chosen_column], fit[, 1], pch=20, ylab='Y', xlab=ParamNames[axis], ylim=interval, xlim=xrange, main=heading,cex.main=0.8) } arrows(X[, chosen_column], fit[, 2], X[, chosen_column], fit[, 3], length=0.05, angle=90, code=3, col='black') points(X[inside, chosen_column], y[inside], pch=20, col='green') points(X[outside, chosen_column], y[outside], pch=20, col='red') } virtual.LOO <- function(Design, y, cls, sigma, H, beta, nugget) { #' Function to compute the virtual Leave-One-Out formulas. #' #' @param Design a matrix of inputs for design set. #' @param y a vector of simulator responses at Design. #' @param cls a vector of correlation length parameters. #' @param sigma a real value of scale parameter. #' @param H a matrix of regression functions evaluted at the design points. #' @param beta a vector of regression coefficients. #' @param nugget a real value of a nugget parameter. #' #' @return a random generation for the normal distribution with mean and standard deviation #' found using the virtual Leave-One-Out formulas. expectation <- c() variance <- c() predict_y <- c() Gamma2 <- sigma^2*CovMatrix(Design, cls) + diag(nugget, nrow = dim(Design)[1]) Gamma2Inv <- solve(Gamma2) MeanRes <- c(H%*%beta) Gamma2InvY <- Gamma2Inv %*%(y - MeanRes) DiagGamma2Inv <- diag(Gamma2Inv) for(i in 1:length(y)) { expectation[i] <- y[i] - Gamma2InvY[i]/DiagGamma2Inv[i] variance[i] <- 1/DiagGamma2Inv[i] predict_y[i] <- rnorm(1, mean = expectation[i], sd = sqrt(variance[i])) } return(predict_y) } LOO.plot <- function(StanEmulator, ParamNames) { #' Function to generate Leave-One-Out predictions. #' #' @param StanEmulator a GP emulator from EMULATE.gpstan function. #' @param ParamNames a vector of names for parameters. #' #' @return a data frame with three columns, with first column corresponding to posterior mean, #' and second and third columns corresponding to the minus and plus two standard deviations. #' The function also generates LOO validation plots. predict.y <- sapply(1:dim(StanEmulator$ParameterSamples$delta_par)[1], function(k) virtual.LOO(Design = StanEmulator$Design, y = StanEmulator$tF, H = StanEmulator$H, beta = StanEmulator$ParameterSamples$beta[k, ], cls = StanEmulator$ParameterSamples$delta_par[k, ], sigma = StanEmulator$ParameterSamples$sigma[k], nugget = StanEmulator$ParameterSamples$nugget[k])) mean.predict.y <- rowMeans(predict.y) sd.predict.y <- apply(predict.y, 1, sd) fit.stan <- data.frame(cbind(mean.predict.y, mean.predict.y-2*sd.predict.y, mean.predict.y + 2*sd.predict.y)) names(fit.stan) <- c('posterior mean', 'lower quantile', 'upper quantile') p <- length(ParamNames) if(p<2){ par(mfrow = c(1, 1), mar=c(4, 4, 1, 1)) } else if(p<3){ par(mfrow = c(1, 2), mar=c(4, 4, 1, 1)) } else if(p<4){ par(mfrow = c(1, 3), mar=c(4, 4, 1, 1)) } else if(p <5){ par(mfrow = c(2, 2), mar=c(4, 4, 1, 1)) } else if(p<7){ par(mfrow = c(2, 3), mar=c(4, 4, 1, 1)) } else if(p<10){ par(mfrow = c(3, 3), mar=c(4, 4, 1, 1)) } else if(p<13){ par(mfrow = c(4, 3), mar=c(4, 4, 1, 1)) } else if(p<=16){ par(mfrow = c(4, 4), mar=c(4, 4, 1, 1)) } #par(mfrow = c(1, 3), mar=c(4, 4, 1, 1)) for(i in 1:p) { ValidPlot(fit.stan, StanEmulator$Design, StanEmulator$tF, interval = range(fit.stan), axis = i, heading = "", xrange = c(-1, 1), ParamNames = ParamNames) } return(fit.stan) } ValidationStan <- function(NewData, Emulator, main = ""){ #' Predict method for Gaussian Process model #' #' @param NewData a data frame containing the inputs and outputs #' @param Emulator a GP emulator from EMULATE.gpstan function #' @param main a heading for the validation plots #' #' @return A list contating 'Expectation' and 'Variance' together with the #' validation plots. #' p <- dim(Emulator$Design)[2] if(p >= 2) Xpred <- NewData[,which((names(NewData)%in%colnames(Emulator$Design))|names(NewData)%in%Emulator$Factors)] else Xpred = NewData[, 1] emData <- EMULATOR.gpstan(Xpred, Emulator=Emulator) fit.stan <- cbind(emData$Expectation, emData$Expectation-2*sqrt(emData$Variance), emData$Expectation+2*sqrt(emData$Variance)) if(p<2){ par(mfrow = c(1, 1), mar=c(4, 4, 1, 1)) } else if(p<3){ par(mfrow = c(1, 2), mar=c(4, 4, 1, 1)) } else if(p<4){ par(mfrow = c(1, 3), mar=c(4, 4, 1, 1)) } else if(p <5){ par(mfrow = c(2, 2), mar=c(4, 4, 1, 1)) } else if(p<7){ par(mfrow = c(2, 3), mar=c(4, 4, 1, 1)) } else if(p<10){ par(mfrow = c(3, 3), mar=c(4, 4, 1, 1)) } else if(p<13){ par(mfrow = c(4, 3), mar=c(4, 4, 1, 1)) } else if(p<=16){ par(mfrow = c(4, 4), mar=c(4, 4, 1, 1)) } for(i in 1:p) { ValidPlot(fit.stan, X = Xpred, y=NewData[,which(names(NewData)==Emulator$ResponseString)], interval = range(fit.stan), axis = i, heading = main, xrange = c(-1, 1), ParamNames = colnames(Emulator$Design)) } fit.stan }