#===============================================================================
#
# Computing emulators and History Matching using dgpsi and tinydancer
# Potential new central procedure of the HighTune software
# Original HTune software developed during coding sprints of 2017
#    Authors : HighTune team, 2017
# Adaptations for novel emulation and history matching software January 2024
#    Author: Daniel Williamson
#
# Main changes :
#  * Changed emulation to dgpsi - gp as default. Capacity for dgp and as_gp's 
#    within package
#  * Changed History matching approach to use tinydancer for small space sampling
#   Author: Daniel Williamson - Jan 2024
#
#===============================================================================

# PREAMBLE, NEEDS TO HAPPEN ONCE ON INSTALL AND NOT HERE, AS WITH LOADING MOGP
# UPDATE dgpsi AND tinydancer
#devtools::install_github("mingdeyu/dgpsi-R")
#devtools::install_github("BayesExeter/tinydancer")
#Set up the conda environment for the first time. This should move to install. 
#Later in this script we load dgpsi and call init_py as if the software is 
#already installed.
#MUST RUN ONCE THE FIRST TIME, AGAIN CAN HAPPEN ON INSTALL NOT HERE
#library(dgpsi)
#init_py(reinstall=TRUE)

#############
# Najda notes
# la fonction init_py de la lib dgpsi-R fait deux choses :
# - installe, via conda, la lib python dgpsi (dont dpgsi-R est l'interface) et ses dépendances
# - initialise l'environnement, via reticulate
# dans htexplo,
# l'installation a été faite via le script setup.sh, avec pip install
# l'initialisation de l'environnement est fait par les lignes ci dessous :
library(reticulate)
pkg_env <- get("pkg.env", envir = asNamespace("dgpsi"))
assign('dgpsi', reticulate::import("dgpsi"), pkg_env)
assign('py_buildin', reticulate::import_builtins(), pkg_env)
assign('np', reticulate::import("numpy"), pkg_env)
assign('copy', reticulate::import("copy"), pkg_env)
assign('py_gc', reticulate::import("gc"), pkg_env)
assign('dill', reticulate::import("dill"), pkg_env)
assign('base64', reticulate::import("base64"), pkg_env)
pkg_env$thread_num <- pkg_env$dgpsi$get_thread()

#================================================================================

# Begin script-specific code
twd <- getwd()
source('htune_convert.R')

#===============================================================================
# default values for optional arguments
#===============================================================================
WAVEN=1                     # Wave number of history matching (HM)
tau=0                       # number of metrics than can fail in the HM 
cutoff=3                    # cut-off on the Implausibility
sample_size=1000           # size of emulated sample (Xp) (this no longer needs to be large
                            #It can in fact be as small as sample_size_next_design)
sample_size_next_design=80  # sample size for true simulations for the next wave
max_sample_size=3000000     # Maximum LH size (above this limit, Xp is splitted)
sub_sample_size=300000      # size for splitting Xp for Implausibility computation
maxlLHS=50                  # Maximum number of lLHS
npixels=40                  # number of pixel in x and y for implausibility matrix 15 -> 40
print_level=0               # "debug" pour activer des prints diagnostics
#meanFun="fitted"            # mean function type for gaussian process [fitted|constant] DEPRECIATED

# if print = "debug", recommend sample_size=20, sample_size_next_design=5

#===============================================================================
# Diagnostics functions : size and time
#===============================================================================
htsize <- function(object,name) {
  print(paste(name,", type:",typeof(object),", length:",length(object),", memory:",format(object.size(object), units = "Mb"))) }
prev_time <- Sys.time()
httime <- function(where) {
  new_time <- Sys.time()
  print(paste("Time elapses in ",where," : ",new_time-prev_time)) }

#===============================================================================
# Reading line args
#===============================================================================
args = commandArgs(trailingOnly=TRUE)
if (length(args)%%2!=0) { print(paste("Bad number of arguments",length(args),"in htune_EmulatingMultiMetric.R")); q("no",1) ; }
if (length(args)>0) {
  for (iarg in seq(1,length(args),by=2)) {
    if (args[iarg]=="-wave") { WAVEN=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-tau") {tau=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-cutoff") {cutoff=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-sample_size") {sample_size=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-sample_size_next_design") {sample_size_next_design=as.numeric(args[iarg+1]) }
#    else if (args[iarg]=="-meanFun") {meanFun=args[iarg+1] }
    else if (args[iarg]=="-seed") { set.seed(as.numeric(args[iarg+1])) }
    else { print(paste("Bad argument",args[iarg],"in htune_Emulating_Multi_Metric_Multi_LHS.R")) ; q("no",1) ; } # quit with error code 1 
  }
}

#if ((meanFun != "fitted") & (meanFun != "constant")) {
#  print("Bad choice for meanFun [constant|fitted]")
#  q("no",1)
#}
#ABOVE IS DEPRECATED. EMULATORS NO LONGER HAVE A meanFun.

#===============================================================================
# Reading results of a series of SCM simulations
#===============================================================================

print(paste("Arguments : -wave",WAVEN,"-cutoff",cutoff,"-tau",tau,sep=" "))
source("ModelParam.R")
load(file=paste("WAVE",WAVEN,"/Wave",WAVEN,"_SCM.Rdata",sep=""))
load(file=paste("WAVE",WAVEN,"/Wave",WAVEN,"_REF.Rdata",sep=""))

if ( nmetrique == 1) { tau=0 }

cands <- names(tData)[1:(nparam+1)] # Selects columns with the parameters
metric <- names(tData)[(nparam+2):(nparam+1+nmetrique)] # select columns with the metrics
print("History Matching for parameters: ")
print(cands[1:(nparam+1)])
print(c("nmetrique,tau=",nmetrique,tau))

#FIRST ISSUE TO CONSIDER, HOW TO HANDLE TOO MANY PARAMETERS. ARD/DESIGN/AS ALL OPTIONS

#===============================================================================
print("  ========   Scatter plot metrics=f(param)   ======== ")
#===============================================================================
# Plotting for each parameter, the metric as a function of parameter values:
# directly from the SCM outputs
# Organizing sub-windows as a function of the number of plots
# For plotting with real parameters on axes
#===============================================================================

file = paste("WAVE",WAVEN,"/Plots_Metrics.pdf",sep="")
pdf(file=file)

param_SCM <- DesignConvert(tData[,1:nparam])
logs=array(1:nparam) ; for ( iparam in 1:nparam ) { logs[iparam]<-"" }
logs[which.logs] <- "x"
if(nparam*nmetrique<2){ par(mfrow = c(1, 1), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique <=4){ par(mfrow = c(2, 2), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique<=9){ par(mfrow = c(3, 3), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique<=16){ par(mfrow = c(4, 4), mar=c(4, 4, 1, 1))
} else if(nparam*nmetrique<=25){ par(mfrow = c(5,5), mar=c(4, 4, 1, 1)) }
for (j in 1:nmetrique) {
  print(paste("Metrique [",j,"]",metric[j],"  obs : ",tObs[j],"   err : ",tObsErr[j]))
}
for ( iparam in 1:nparam ) {
  for (j in 1:nmetrique) {
    ymin=tObs[j]-sqrt(tObsErr[j])
    ymax=tObs[j]+sqrt(tObsErr[j])
    plot(param_SCM[,iparam],tData[,nparam+j+1],col=2,xlab=cands[iparam],ylab=metric[j],log=logs[iparam],ylim=range(tData[,nparam+j+1],ymin,ymax))
    abline(v=param.defaults[iparam],col='blue',lty=2)
    abline(h=c(tObs[j],ymin,ymax ), col = 'blue', lty =2, lwd=c(1,3,3))
  }
}
dev.off()

print(paste("OK0"))
print(paste("Created figure file: ", file, sep=""))

#===============================================================================
print("   ======   Reading emulators of previous waves  ========   ")
#===============================================================================

#mogp_dir <- "./mogp_emulator"
#source("BuildEmulator/BuildEmulator.R")
library(dgpsi)
#init_py()
library(tinydancer)
source("tinydancer_plots.R")

EMULATOR.LIST <- list()
tau_vec = c()
cutoff_vec = c()
taufile="tau-cutoff.Rdata"

#Change type of objects loaded and how
# We now have an emulator per metric stored in a dgpsi bundle object
for (i in 1:WAVEN) {
  prefix=paste("WAVE",i,"/EMULATOR_MULT_METRIC_wave",i,sep="")
  if(file.exists(paste(prefix,".pkl",sep=""))){
    tmp <- dgpsi::read(prefix)
    EMULATOR.LIST[[i]] = ifelse(names(tmp)[1]=="id", tmp, unpack(tmp))
    rm(tmp)
    print(paste("An emulator has been loaded from ",prefix,sep=""))
  } 
}
if (file.exists(taufile)) { load(taufile) }

if (length(EMULATOR.LIST) == WAVEN ) {
  # We shall overwrite the cutoff or tau for the last wave
  tau_vec[WAVEN]=tau
  cutoff_vec[WAVEN]=cutoff
} else{
  
  #===============================================================================
  print(paste("   ======   Computing emulator for wave ",WAVEN," ======== "))
  #===============================================================================
  #DW dgpsi implementation. Trial only with gp for now and all params
  #################################################################
  # Building emulator for the new WAVE (if not already done)
  #################################################################
  # variables 1:nparam correspond to input parameters
    #========================================================
  # First fitting metrics (columns nparam+2:nparam+1+nmetrique of tData from file SCM.Rdata)
  #========================================================
 
  #===============================================================================
  print("  ==============      Building emulators    ============== ")
  #===============================================================================
  
  tData <- as.matrix(tData)
  XX <- tData[,1:nparam,drop=F]
  YY <- tData[,-c(1:(nparam+1)),drop=F]
  vecchia_setting <- ifelse(nrow(XX)>400, TRUE, FALSE)
  gplist <- lapply(1:nmetrique, function(ii) {
          gp_ii <- gp(XX, YY[,ii,drop=F], nugget_est=T, vecchia=vecchia_setting, verb=F)
          if((nparam>12) || (nparam>floor(nrow(XX)/10))){#temporary ARD limit on emulator dims
            max_num <- min(c(12, floor(nrow(XX)/10)))
            least_active <- which(order(gp_ii$specs$lengthscales) > max_num)
            gp_ii <- gp(XX[,-least_active], YY[,ii,drop=F], nugget_est=T, vecchia=vecchia_setting, verb=F)
          }
          return(gp_ii)
  })
  gplist <- lapply(gplist, function(e) validate(e))
  
  file = paste("WAVE",WAVEN,"/Plots_LOO.pdf",sep="")
  pdf(file=file)
  #DIAGNOSTICS : verifying that the original SCMdata are well reproduced when eliminating the point from the emulator

  for (i in 1:nmetrique) {
    print(paste("   ====== Plotting LOO for metrique ", i, "   ======     "))
    
    if (!is.null(gplist[[i]])) {
      print(plot(gplist[[i]]))
      dev.flush()
    } else {
      print(paste("Warning: gplist[[", i, "]] is NULL, skipping plot."))
    }
    
    print(paste("RMSE: ", gplist[[i]]$loo$rmse, "\t mean (emulator std-deviation for each prediction)", mean(gplist[[i]]$loo$std), sep=""))
   print(paste("NRMSE: ", gplist[[i]]$loo$nrmse * 100, "%", sep=""))
  }
  
  dev.off()
  
  #===============================================================================
  print("  ==============      Saving emulators    ============== ")
  #===============================================================================
  
  EMULATOR.LIST[[WAVEN]] = gplist
  tau_vec[WAVEN] = tau
  #SAVE EMULATOR FOR HISTORY MATCHING
  cutoff_vec[WAVEN] = cutoff
  #for (i in 1:WAVEN) {
  #  prefix=paste("WAVE",i,"/EMULATOR_MULT_METRIC_wave",i,sep="")
  #  save_ExUQmogp(EMULATOR.LIST[[i]], filename=prefix)
  #  print(paste("An emulator has been saved under: ",prefix,sep=""))
  #}
  #DW: only need to save WAVEN
  prefix = paste("WAVE",i,"/EMULATOR_MULT_METRIC_wave",i,sep="")
  to_save <- ifelse(length(gplist)==1, gplist[[1]], dgpsi::pack(gplist))
  dgpsi::write(to_save, prefix)
  save(tau_vec, cutoff_vec, file = taufile)
}

#===============================================================================
#===============================================================================
print("  ==============     Beginning history matching     ============== ")
#===============================================================================
#===============================================================================

print(paste("Generating ",sample_size,"samples varying", nparam, "input parameters"))

Disc = rep(0, nmetrique)

# An implausibility function. Code can be moved to a code file
single_wave_implausibility <- function(NewData, Emulators, Obs, Disc, ObsErr,valmax){
  if(!is.null(names(Emulators)))
    stop("Emulator should be a list of emulators, even if only containing 1 emulator")
  n_em <- length(Emulators)#number of metrics
  timps <- sapply(1:n_em, function(kk){
    subData <- NewData[which(colnames(tData) %in% colnames(Emulators[[kk]]$data$X))]
    tEm <- predict(Emulators[[kk]],subData)$results
    abs(Obs[kk] - tEm$mean)/sqrt(tEm$var + Disc[kk] + ObsErr[kk])
  })
  return(sort(timps)[n_em-(valmax-1)])
}

#Below is a multi-points version for rejection sampling, not used.
single_wave_implausibility_manypoints <- function(NewData, Emulators, Obs, Disc, ObsErr,valmax){
  if(!is.null(names(Emulators)))
    stop("Emulator should be a list of emulators, even if only containing 1 emulator")
  n_em <- length(Emulators)#number of metrics
  timps <- sapply(1:n_em, function(kk){
    subData <- NewData[,which(colnames(tData) %in% colnames(Emulators[[kk]]$data$X)),drop=F]
    tEm <- predict(Emulators[[kk]],subData)$results
    abs(Obs[kk] - tEm$mean)/sqrt(tEm$var + Disc[kk] + ObsErr[kk])
  })
  return(sort(timps)[n_em-(valmax-1)])
}

#A multiwave implausibility function
multi_wave_imp <- function(x, targetLevel, waves){
  levels <- length(targetLevel)
  ans <- rep(Inf, levels)
  waveFail <- FALSE
  this.level <- 1
  wave.num <- 1
  Timp <- NA
  while((this.level <= levels) & !waveFail){
    Timp <- single_wave_implausibility(NewData=x, Emulators = EMULATOR.LIST[[wave.num]], Obs = tObs,
                                       Disc=Disc, ObsErr = tObsErr,valmax=tau_vec[wave.num]+1)
    wave.num <- wave.num + 1
    if ((Timp > targetLevel[this.level]) & (wave.num <= waves)){
      waveFail <- TRUE
    }
    if((!waveFail) & (wave.num > waves)){
      ans[this.level:levels] <- Timp
      this.level <- levels + 1
    }
    else{
      ans[this.level] <- Timp
      this.level <- this.level + 1
    }
  }
  return(ans)
}

# Control parameters for tinydancer to sample small spaces. I will code this up for the
# reliable and slower sampler, but the subset_sim verison is as easy to implement (see example on git)
control_list <- list(
  volume_ratio = 0.1,
  num_mutations = 10,
  num_iterations = 100,
  box_limits = cbind(rep(0, nparam), rep(1, nparam)), #We should change parameter conversion codes to [0,1]
  estimate_volume=TRUE,
  print_every = 500,
  one_per_level = TRUE
)
new_ladder <- construct_temperature_ladder(
  implausibility = function(x, target_level){multi_wave_imp(x, target_level, WAVEN)},
  dims = nparam,
  target_levels = cutoff_vec,
  control_list = control_list
)
wave_samples <- sample_slice_ptmcmc(
  new_ladder,
  n_iter = sample_size,
  implausibility = function(x, target_level){multi_wave_imp(x, target_level, WAVEN)},
  control_list = control_list
)

#With one_per_level = TRUE, there is a sample for each wave in one of the chains
#Find the NROY chains for each wave
NROYchains <- sapply(1:WAVEN, function(kk) 
  dplyr::first(which(lapply(new_ladder$imp_levels, function(j) j[kk]) <= cutoff_vec[kk]))+1)

#Draw WAVEN implausibility matrix. Some notes:
#1. Simply substitute WAVEN below with any k <= WAVEN to draw a wave k plot from the samples,
#2. The meaning of the density plots has changed. The samples are Unif(NROY), so the plot is
#   an estimate (at each pixel) of the (marginal for nparam>2) density of NROY in the d dimensions of X.
ImpData <- wave_samples$uniform_sample_list[[NROYchains[WAVEN]]][,c(1:nparam, nparam+WAVEN)]
colnames(ImpData) <- c(paste("X",1:nparam,sep=""),"I1")#Replace with actual parameter names
ImpList01 <- impList_tinydancer(which_vars = 1:nparam,
                                var_names = paste("X", 1:nparam, sep=""),
                                ImpData=ImpData, resolution = c(npixels, npixels)
                                )
#Not sure about this
param.defaults.norm=unlist(DesignantiConvert1D(param.defaults))
imp.layout01(ImpList01, VarNames = paste("X", 1:nparam, sep=""), VariableDensity=F, newPDF=TRUE,
             the.title = paste0("InputSpace_wave", WAVEN, ".pdf"),
             Points=matrix(param.defaults.norm, ncol=nparam))
mtext(paste("Remaining space after wave",WAVEN,": ", new_ladder$volume_estimate, sep=""))
dev.off()
print(paste("Created figure file: ", "InputSpace_wave",WAVEN,".pdf", sep=""))

#===============================================================================
# Creation of Wave WAVEN+1 sample
#===============================================================================

param_SCM <- DesignConvert(ImpData[,1:nparam])
UFILE=paste("param_after_wave",WAVEN,sep="")
head(param_SCM)
logs=array(1:nparam) ; for ( iparam in 1:nparam ) { logs[iparam]<-"linear" }
logs[which.logs] <- "log"
for (i in 1:nparam) {
  line<-paste(names(tData)[i], min(param_SCM[,i]), max(param_SCM[,i]),param.defaults[i],logs[i],sep=" ")
  print(line)
  base::write(line, file=UFILE,append=TRUE)
}
print('OK')
print(which.logs)

#design.waveNext <- sample(nrow(XpNext), samplesz, rep = F)
#Replace the above as we already have uniform samples it is pointless resampling now. 
#As the slice sampler is an MCMC (it is already thinned), we can thin some more if we have generated
# sample_size >> sample_size_next_design points.
WaveNext <- param_SCM[which((1:sample_size)%%ceiling(sample_size/sample_size_next_design)==1),]
# THE OLD LINE BELOW LOOKS WRONG. IT SETS THE DESIGN IN THE TRANSFORMED SPACE AND NOT IN THE MODEL SPACE
# NEED DISCUSSION
#WaveNext <- XpNext[design.waveNext, ]
save(WaveNext,file=paste("Wave",strtoi(WAVEN, base = 0L)+1,".RData",sep=""))
print(paste("Next wave design has been saved under: ","Wave",strtoi(WAVEN, base = 0L)+1,".RData",sep=""))