#===============================================================================
#
# Computing emulators and History Matching using Exeter software
# Central procedure of the HighTune soft
# Developed during coding sprints of the HighTune project
#    Authors : HighTune team, 2017
#
# Main changes :
#  * Change from old rstan version of the Exeter R codes to Mogp version based
#        R, python and C and ported on GPU
#        Daniel Williamson, ~ 2019
#  * Spliting the Xp vector of parameters to allow running with
#        larger sample size
#        Maelle Coulon, Najda Villefranque, Frédéric Hourdin, 10/2022
#
#===============================================================================

twd <- getwd()
source('htune_convert.R')
library(reticulate)

#===============================================================================
# default values for optional arguments
#===============================================================================
WAVEN=1                     # Wave number of history matching (HM)
tau=0                       # number of metrics than can fail in the HM 
cutoff=3                    # cut-off on the Implausibility
sample_size=20000           # size of emulated sample (Xp)
sample_size_next_design=80  # sample size for true simulations for the next wave
max_sample_size=3000000     # Maximum LH size (above this limit, Xp is splitted)
sub_sample_size=300000      # size for splitting Xp for Implausibility computation
maxlLHS=50                  # Maximum number of lLHS
npixels=15                  # number of pixel in x and y for implausibility matrix
print_level=0               # "debug" pour activer des prints diagnostics
meanFun="fitted"            # mean function type for gaussian process [fitted|constant]

#===============================================================================
# Diagnostics functions : size and time
#===============================================================================
htsize <- function(object,name) {
   print(paste(name,", type:",typeof(object),", length:",length(object),", memory:",format(object.size(object), units = "Mb"))) }
prev_time <- Sys.time()
httime <- function(where) {
   new_time <- Sys.time()
   print(paste("Time elapses in ",where," : ",new_time-prev_time)) }


#===============================================================================
# Reading line args
#===============================================================================
args = commandArgs(trailingOnly=TRUE)
if (length(args)%%2!=0) { print(paste("Bad number of arguments",length(args),"in htune_EmulatingMultiMetric.R")); q("no",1) ; }
if (length(args)>0) {
  for (iarg in seq(1,length(args),by=2)) {
    if (args[iarg]=="-wave") { WAVEN=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-tau") {tau=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-cutoff") {cutoff=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-sample_size") {sample_size=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-sample_size_next_design") {sample_size_next_design=as.numeric(args[iarg+1]) }
    else if (args[iarg]=="-meanFun") {meanFun=args[iarg+1] }
    else if (args[iarg]=="-seed") { set.seed(as.numeric(args[iarg+1])) }
    else { print(paste("Bad argument",args[iarg],"in htune_Emulating_Multi_Metric_Multi_LHS.R")) ; q("no",1) ; } # quit with error code 1 
  }
}

#===============================================================================
# Reading results of a series of SCM simulations
#===============================================================================

print(paste("Arguments : -wave",WAVEN,"-cutoff",cutoff,"-tau",tau,sep=" "))
source("ModelParam.R")
load(file=paste("WAVE",WAVEN,"/Wave",WAVEN,"_SCM.Rdata",sep=""))
load(file=paste("WAVE",WAVEN,"/Wave",WAVEN,"_REF.Rdata",sep=""))

if ( nmetrique == 1) { tau=0 }

cands <- names(tData)[1:(nparam+1)] # Selects columns with the parameters
metric <- names(tData)[(nparam+2):(nparam+1+nmetrique)] # select columns with the metrics
print("History Matching for parameters: ")
print(cands[1:(nparam+1)])
print(c("nmetrique,tau=",nmetrique,tau))

#===============================================================================
print("  ========   Scatter plot metrcis=f(param)   ======== ")
#===============================================================================
# Ploting for each parameter, the metric as a fuction of parameter values:
# directly from the SCM outputs
# Organizing sub-windows as a fuction of the number of plots
# For ploting with real prameters as axis
#===============================================================================

file = paste("WAVE",WAVEN,"/Plots_Metrics.pdf",sep="")
pdf(file=file)

param_SCM <- DesignConvert(tData[,1:nparam])
logs=array(1:nparam) ; for ( iparam in 1:nparam ) { logs[iparam]<-"" }
logs[which.logs] <- "x"
if(nparam*nmetrique<2){ par(mfrow = c(1, 1), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique <=4){ par(mfrow = c(2, 2), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique<=9){ par(mfrow = c(3, 3), mar=c(4, 4, 1, 1)) 
} else if(nparam*nmetrique<=16){ par(mfrow = c(4, 4), mar=c(4, 4, 1, 1))
} else if(nparam*nmetrique<=25){ par(mfrow = c(5,5), mar=c(4, 4, 1, 1)) }
for (j in 1:nmetrique) {
        print(paste("Metrique [",j,"]",metric[j],"  obs : ",tObs[j],"   err : ",tObsErr[j]))
}
for ( iparam in 1:nparam ) {
  for (j in 1:nmetrique) {
    ymin=tObs[j]-sqrt(tObsErr[j])
    ymax=tObs[j]+sqrt(tObsErr[j])
    plot(param_SCM[,iparam],tData[,nparam+j+1],col=2,xlab=cands[iparam],ylab=metric[j],log=logs[iparam],ylim=range(tData[,nparam+j+1],ymin,ymax))
    abline(v=param.defaults[iparam],col='blue',lty=2)
    abline(h=c(tObs[j],ymin,ymax ), col = 'blue', lty =2, lwd=c(1,3,3))
  }
}
dev.off()

print(paste("OK0"))
print(paste("Created figure file: ", file, sep=""))

#===============================================================================
print("   ======   Reading emulators of previous waves  ========   ")
#===============================================================================

mogp_dir <- "./mogp_emulator"
source("BuildEmulator/BuildEmulator.R")

EMULATOR.LIST <- list()
tau_vec = c()
cutoff_vec = c()
taufile="tau-cutoff.Rdata"

for (i in 1:WAVEN) {
  prefix=paste("WAVE",i,"/EMULATOR_MULT_METRIC_wave",i,sep="")
  if(file.exists(paste(prefix,".RData",sep="")) && file.exists(paste(prefix,"_mogp",sep=""))) {
      EMULATOR.LIST[[i]] = load_ExUQmogp(prefix)
      print(paste("An emulator has been loaded from ",prefix,sep=""))
  } 
}
if (file.exists(taufile)) { load(taufile) }

if ( length(EMULATOR.LIST) == WAVEN ) {
  # We shall overwrite the cutoff or tau for the last wave
  tau_vec[WAVEN]=tau
  cutoff_vec[WAVEN]=cutoff
} else {

#===============================================================================
print(paste("   ======   Computing emulator for wave ",WAVEN," ======== "))
#===============================================================================
  #DW Back to new codes and explanations
  #################################################################
  # Building emulator for the new WAVE (if not already done)
  #################################################################
  # variables 1:nparam correspond to input parameters
  # additionalVariables can still be specified, see MOGP_documentation.
  #     It can be decided or not to add part or all the other variables
  #     They are then specified in the additionalVariables
  #     if adding x3 to x1 and x2, puting names(tData)[3] or names(tData)[1:3] is equivalent.
  #========================================================
  # First fitting metrics (columns nparam+2:nparam+1+nmetrique of tData from file SCM.Rdata) with a linear model "lm" and then fitting mogp to whole object:
  #========================================================
  # maxOrder is the max number of fourier modes (see new documentation to change this)
  # maxdf : max number of degree of freedom = number of fuctions retained in the linear model (see new documentation to change this.)
  # This should be adapted as a function of the number of simulations
  # Choices a list of choices per emulator. See new documentation for how to change and what the defaults do (some terms e.d. maxdf are as before.) 
  #meanFun defaults to "fitted" which calls our usual lm fits. Can be set to "linear" to give e.g. x1+x2+x3+..x_nparam
  #kernel vector of kernels for each emulator or a single kernel. "Gaussian is the default. We have Matern52 implemented with more coming.
  #################################################################
  
  listnew <-lapply(1:nmetrique,function(k) names(tData[1:nparam])) # Est ce que ça fait vraiment ce qu'on veut ?
  #myem.lm <-InitialBasisEmulators(tData=tData,HowManyEmulators=nmetrique,additionalVariables = listnew)
  # DANNY CHANGE NUMBER 1. THIS IS A DANGEROUS DEFAULT CALL THAT SHOULD BE REPLACE:
  #additionalVariables=listnew means that all parameters are fitted in the Gaussian process, which is generally bad.
  #The default in ExeterUQ (when additionalVariables=NULL) is to include all parameters that
  #were highlighted as active by the stepwise regression, but maybe not included due to limits on 
  #available degrees of freedom. additionalVariables allows variables we think should be important 
  #to be forced into the emulator even if they didn't show up in the regression phase.
  #DW Note different function name with same calling protocol. (Enjoy the speed :) )
  # myem.lm <- BuildNewEmulators(tData=tData,HowManyEmulators=nmetrique,additionalVariables = NULL) #can still change additionalVariables to listnew if wanted.

#===============================================================================
print("  ==============      Building emulators    ============== ")
#===============================================================================

  # Modif DW: "I am soon pushing a different default here, and it is better for high tune to have the old default."
  myem.lm <- BuildNewEmulators(tData=tData,HowManyEmulators=nmetrique,additionalVariables = NULL, meanFun="fitted")
  
  file = paste("WAVE",WAVEN,"/Plots_LOO.pdf",sep="")
  pdf(file=file)
  #DIAGNOSTICS : verifying that the original SCMdata are well reproduced when eliminating the point from the emulator
  tvars <- rep(NA, nmetrique)
  terrs <- rep(NA, nmetrique)
  for (i in 1:nmetrique) {

#===============================================================================
    print(paste("   ====== Computing LOO for metrique ",i,"   ======     "))
#===============================================================================
    tLOOs1 <- LOO.plot(Emulators = myem.lm, which.emulator=i, ParamNames=names(tData)[1:nparam], OriginalRanges = TRUE, 
                       RangeFile = "ModelParam.R",
                       Obs=tObs[i], ObsErr = sqrt(tObsErr[i]), ObsRange=TRUE)
    terrs[i] <- (tLOOs1[,1]-tData[,nparam+i+1])^2
    tvars[i] <- ((tLOOs1[,3]-tLOOs1[,1])/2)^2
    print(paste("mean(emulator error/training data set): ",sqrt(mean(terrs[i])),"\t mean (emulator std-deviation for each prediction)",sqrt(mean(tvars[i])),sep=""))
  }
  dev.off()
  
#===============================================================================
  print("  ==============      Saving emulators    ============== ")
#===============================================================================

  EMULATOR.LIST[[WAVEN]] = myem.lm
  tau_vec[WAVEN] = tau
  #SAVE EMULATOR FOR HISTORY MATCHING
  cutoff_vec[WAVEN] = cutoff
  for (i in 1:WAVEN) {
    prefix=paste("WAVE",i,"/EMULATOR_MULT_METRIC_wave",i,sep="")
    save_ExUQmogp(EMULATOR.LIST[[i]], filename=prefix)
    print(paste("An emulator has been saved under: ",prefix,sep=""))
  }
  save(tau_vec, cutoff_vec, file = taufile)
}

#===============================================================================
#===============================================================================
   print("  ==============     Begining history matching     ============== ")
#===============================================================================
#===============================================================================

source('HistoryMatching/HistoryMatching.R')
print(paste("Generating ",sample_size,"samples varying nparam input parameters"))


#===============================================================================
# sample_size is the number of samples required
#    If sample_size > max_sample_size
#    nbLHS samples Xp of size max_sample_size are created successively
#    each Xp has size sample_size_new
#===============================================================================

max_sample_size <- min(c(sample_size,max_sample_size))
nbLHS=( sample_size - 1 ) %/% max_sample_size + 1
sample_size_new=sample_size%/%nbLHS
print(paste("Dividing sample_zize=",sample_size," in ",nbLHS," pieces"))
dimXP = list()   #stock la taille de Xp = vecteur total dimXP[lLHS]
lLHS=1
dimNROY = matrix(0, nrow=0, ncol=WAVEN)
isImpmatrix=0 #=1 once the implausibility matrix is evaluated
repeat {

     Xp <- as.data.frame(2*randomLHS(sample_size_new, nparam)-1)
     dimXP[lLHS]=dim(Xp)[1]
     names(Xp) <- names(tData)[1:nparam]
     Disc = rep(0, nmetrique)
     totonew<-DesignantiConvert1D(param.defaults)
     param.defaults.norm=rep(0,nparam)
     print(paste("lLHS=",lLHS,"Generating ",sample_size_new,"samples varying nparam input parameters"))
     htsize(Xp,"Xp")
     dimNROY_OneRow = matrix(0, nrow=1, ncol=WAVEN)
     dimNROY = rbind(dimNROY,dimNROY_OneRow)
     htsize(dimNROY,"dimNROY")

     for (i in 1:nparam) {param.defaults.norm[i]=totonew[i,]}
     
     if(WAVEN == 1) {
     
#===============================================================================
         print(paste("==== Computing implausibility for wave ",WAVEN," ====="))
#===============================================================================
   
         Timps <- ImplausibilityMOGP(NewData=Xp, Emulator=EMULATOR.LIST[[1]], Discrepancy=Disc, Obs=tObs, ObsErr=tObsErr)
         ImpData_wave1 = cbind(Xp, Timps)
         VarNames <- names(Xp)
         valmax = tau_vec[1] + 1
         if(lLHS==1) { #we create implausibility matrix only at the first step on nbLHS -> Maelle
           ImpListM1 = CreateImpList(whichVars = 1:nparam, VarNames=VarNames, ImpData=ImpData_wave1, nEms=EMULATOR.LIST[[1]]$mogp$n_emulators, whichMax=valmax)
           imp.layoutm11(ImpListM1,VarNames,VariableDensity=FALSE,newPDF=TRUE,the.title=paste("InputSpace_wave",WAVEN,".pdf",sep=""),newPNG=FALSE,newJPEG=FALSE,newEPS=FALSE,Points=matrix(param.defaults.norm,ncol=nparam))
         }
         NROY1 <- which(rowSums(Timps <= cutoff_vec[1]) >=EMULATOR.LIST[[1]]$mogp$n_emulators - tau_vec[1])
         dimNROY[lLHS,1]=length(NROY1)
         TMimpls_wave1 <- apply(Timps, 1,MaxImp,whichMax=valmax)
         if(lLHS==1){
           XpNext <- Xp[NROY1, ]
         } else {
           XpNext <- rbind(XpNext, Xp[NROY1, ])
         }
         print(paste("lLHS=",lLHS," : dim(Xp[NROY1, ])[1] ", dim(Xp[NROY1, ])[1], sep=""))
         print(paste("dim(Xp[NROY1, ]) ", dim(Xp[NROY1, ]), sep=""))
         print(paste("dim(Xp) ", dim(Xp), sep=""))
         print(paste("fin vague 1 : dim(XpNext)=", dim(XpNext),"dim(XpNext)[1]=",dim(XpNext)[1], sep=""))
   
     } else { # WAVEN > 1

         XpNextNew = Xp
         NROY.list = list()
         Impl.list = list()
         for(i in 1:(length(EMULATOR.LIST))) {

#===============================================================================
             print(paste("= Computing implausibility, wave ",i,"/",WAVEN," =="))
#===============================================================================

             # Computing implausibility for sub samples of Xp of
             # size sub_sample_size

             nsub=( nrow(XpNextNew) - 1 ) %/% sub_sample_size + 1 #eventuellement externaliser le 300 000
             print(paste("Taille de XpNextNew : ", dim(XpNextNew), sep=""))
             #print(paste("XpNextNew AAA wave:",i,dim(row(XpNextNew))))
             Xp_tmp <- split(XpNextNew,factor(sort(rank(row.names(XpNextNew))%%nsub)))
             htsize(XpNextNew,paste("XpNextNew, splitted in",nsub,"pieces"))

             for(k in 1:nsub) {
                 Xp_tmp2 <- as.data.frame(Xp_tmp[k])
                 htsize(Xp_tmp2,paste("Xp_tmp2, k=",k))
                 Timps_2 = ImplausibilityMOGP(NewData=Xp_tmp2, Emulator=EMULATOR.LIST[[i]],Discrepancy=Disc, Obs=tObs, ObsErr=tObsErr)
                 if(k == 1){
                    Timps = Timps_2
                 } else {
                    Timps = rbind(Timps,Timps_2)
                 }
                 htsize(Timps,paste("Timps, k=",k))
             }

             valmax = tau_vec[i] + 1
	     if (length(Timps) == nmetrique){
		     #print("probleme, Timps ne contient plus qu'un vecteur")
		     #when Timps contain just on vector we transform it into a matrix
		     Timps = matrix(Timps, nrow = 1)
             }
	    
             print(c("cutoff",i,cutoff_vec[i]))
             Impl.list[[i]] = matrix(apply(Timps, 1,MaxImp,whichMax=valmax), ncol = 1)
             NROY.list[[i]] = which(rowSums(Timps <= cutoff_vec[i]) >= EMULATOR.LIST[[i]]$mogp$n_emulators - tau_vec[i])
             dimNROY[lLHS,i] = length(NROY.list[[i]]) #Stock pour chaque l et chaque i(=vague) le nombre de vecteur non exclu ce qui permet le calcul du Remaining space pour chaque vague a la fin
             XpNextNew = XpNextNew[NROY.list[[i]], ]
             print("apres3")
             #remaining_sample_size=length(row(XpNextNew))
	     remaining_sample_size=dim(XpNextNew)[1]
             print(paste("===> Nb of pts in NROY (wave:",i,",lLHS:",lLHS,") =",remaining_sample_size))
	     if( remaining_sample_size == 0){
                print("remaining_sample_size is empty, we leave the loop on emulators")
	        break
	     }
             #print(paste("On ne conserve que les bons vecteurs de paramètres : XpNext a maintenant la taille ", dim(XpNext), sep=""))
             #print(paste("Remaining space after wave",i,": ",length(NROY.list[[i]])/dim(Xp)[1],sep=""))
             #cat(length(NROY.list[[i]])/dim(Xp)[1],file=paste("Remaining_space_after_wave_",i,".txt", sep=""),sep="")


         } # End Loop on Waves

         httime(paste("Implausibility computation, lHLS=",lLHS))
         print("fin de la boucle sur les emulateurs = sur les vagues precedentes et actuelle")

         if(lLHS == 1){
            XpNext = XpNextNew
         } else {
            XpNext = rbind(XpNext,XpNextNew) #concatenation des XpNextnew sur les l dans XpNext
         }
         print(paste("XpNext final : ", dim(XpNext)[1]) )
	 htsize(XpNext, "XpNext final")

         Impl.list[[length(EMULATOR.LIST)]] = Timps
	 
         if(isImpmatrix == 0){
	   if( dim(XpNextNew)[1] > 0){
             ImpData <- ImpDataWaveM(Xp, NROY.list, Impl.list)
             VarNames <- names(Xp)
#===============================================================================
             print(paste("==Drawing implausibility matrix, wave ",WAVEN," =="))
#===============================================================================

             print("Drawing Implausibility matrix if its not done already and XpNextNew is not empty")
             ImpList <- CreateImpListWaveM(whichVars = 1:nparam, VarNames=VarNames, ImpData = ImpData,
                                         nEms=EMULATOR.LIST[[length(EMULATOR.LIST)]]$mogp$n_emulators, Resolution=c(15,15), whichMax=valmax)
             httime("Implausibility matrix : creating ImpList ")
             imp.layoutm11(ImpList,VarNames,VariableDensity=FALSE,newPDF=TRUE,the.title=paste("InputSpace_wave",WAVEN,".pdf",sep=""),newPNG=FALSE,newJPEG=FALSE,newEPS=FALSE,Points=matrix(param.defaults.norm,ncol=nparam))
             httime("Implausibility matrix : creating Graph ")
	     isImpmatrix=1
           } else {
	      print("XpNextNew est vide, on ne calcul ni ImpData ni la matrice d'implausibilite")
	   }
        }

     } # End If WAVEN == 1

#===============================================================================
# Ending loop on lLHS if reaching nbHLS, except if
# of remaining vectors < sample_size_next_design
#===============================================================================

     remaining_sample_size=nrow(XpNext)
     print(paste("Number of remaining samples ",remaining_sample_size))
     if ( lLHS >= nbLHS ) {
          if ( remaining_sample_size >= sample_size_next_design ) {
             break
          } else if (lLHS >= maxlLHS) {
	     print("!!!! lLHS > maxlLHS !!!!")
	     rspace=colSums(dimNROY)[i]/Reduce('+', dimXP)
	     print(paste("Remaining space after wave",i,": ",rspace,sep=""))
	     print(paste("With ",colSums(dimNROY)[i], "good vectors and ", Reduce('+', dimXP), "samples"))
	     print("!!!! We stop the experiment !!!!")
	     stop()
          } else {
             print(paste("Not enough samples, ",remaining_sample_size,"<",sample_size_next_design," Running a new ",sample_size_new,"sample"))
          }
     }
     lLHS=lLHS+1

#===============================================================================
} # Ending loop on lLHS
#===============================================================================

#===============================================================================
# Storing Normalized Remaining space
#===============================================================================

for(i in 1:(length(EMULATOR.LIST))) {
      rspace=colSums(dimNROY)[i]/Reduce('+', dimXP)
      print(paste("Remaining space after wave",i,": ",rspace,sep=""))
      cat(rspace,file=paste("Remaining_space_after_wave_",i,".txt", sep=""),sep="")
}

if(isImpmatrix==1) { #if implausibility matrix has been evaluated, we create the pdf file
  mtext(paste("Remaining space:", rspace, sep=""), side=1)
  print(paste("Created figure file: ", "InputSpace_wave",WAVEN,".pdf", sep=""))
}
#fin des modifs Maelle

#===============================================================================
# Creation of Wave WAVEN+1 sample
#===============================================================================

print("on regarde les parametres avec lesquels on va lancer la prochaine vague")
print(paste("nombre de vecteurs restant dans XpNext :", nrow(XpNext), sep=""))
if (nrow(XpNext)>=sample_size_next_design) { samplesz=sample_size_next_design } else { samplesz=nrow(XpNext)
print(paste("Final Sample size reduced to ",samplesz))
}

design.waveNext <- sample(nrow(XpNext), samplesz, rep = F)
WaveNext <- XpNext[design.waveNext, ]
save(WaveNext,file=paste("Wave",strtoi(WAVEN, base = 0L)+1,".RData",sep=""))
print(paste("Next wave design has been saved under: ","Wave",strtoi(WAVEN, base = 0L)+1,".RData",sep=""))
#} else { print("The NROY space is not large enough to allow resampling for next wave") }