## File: TVECM_XHSTest.R
## New version of the Hansen-Seo test, which includes the extension to
## three regimes and an improved grid search algorithm.
##
## Reference: Berner Larsen (2012). A threshold cointegration analysis
##            of Norwegian interest rates, Master Thesis in Statistics,
##            University of Tromso. Norway. Available at:
##            https://munin.uit.no/handle/10037/4370
#####################################################################

library(foreach) # Provides Foreach Looping Construct 
library(tsDyn)   # Nonlinear Time Series Models with Regime Switching

TVECM.XHStest <- function(data, lag=1, ngridTh=300, trim=0.05, nboot=100,
fixed.beta=NULL, intercept=TRUE, boot.type=c("FixedReg", "ResBoot"),
hpc=c("none", "foreach"), common=c("All", "ECT"),
type=c("2Reg", "SymReg", "3Reg"), tolerance=1e-12,trace=TRUE,dir=FALSE) {

## Check args:
boot.type <- match.arg(boot.type)
hpc       <- match.arg(hpc)
common    <- match.arg(common)
type      <- match.arg(type)

lag=1; ngridTh=300; trim=0.05; nboot=100;fixed.beta=NULL; intercept=TRUE
tolerance=1e-12; trace=TRUE; dir=FALSE

# dir=FALSE # internal value, was used to try different implementation of lmtest

### Organize Data
data <- as.matrix(data)

if(ncol(data)>2) {warning("Please no more than two equations")}
  if(is.null(colnames(data))){colnames(data)<-paste("Var", c(1:2), sep="")}

T      <- nrow(data)
p      <- lag
y      <- diff(data)[(p+1):(T-1),]
DeltaX <- embed(diff(data),p+1)[,-(1:2)]

if(intercept) DeltaX <- cbind(1, DeltaX)
x <- DeltaX
t <- nrow(y)

### Compute beta with VECM() and extract ect
if(is.null(fixed.beta)){
  ve <- VECM(data, lag=lag, include="const", estim="ML")
  }else{
  ve <- VECM(data, lag=lag, include="const", beta=fixed.beta, estim="2OLS")
}
  
ect    <- ve$model[,grep("ECT", colnames(ve$model))]
w0     <- matrix(ect[!is.na(ect)], ncol=1)
w0.ord <- if(type=="2Reg" | type=="3Reg") sort(w0) else sort(abs(w0))
Ttrim  <- ceiling(trim*t)

### Set up of the grid
GridSetup <- function(allgammas){
  Min1 <- allgammas[Ttrim] # At least element [1:Min1] to lower regime such that
                                # at least 100trim % goes to lower regime
                                
  Max1 <- allgammas[t-Ttrim+1]  # At least element [(t-Trim+1):t] to upper
                                # regime such that at least 100trim % goes to upper regime
  if (Min1>Max1) stop("The parameter trim is too large\n")

  gamma2 <- allgammas # allgammas: all possible threshold values
  i <- 2
  while (i <= length(gamma2)) {
    if (abs(gamma2[i-1]-gamma2[i])< tolerance) {
      gamma2 <- gamma2[-i]
    } else {
      i <- i+1
    }
  } # gamma2: all possible threshold values, but now all repetitions are removed,
    # i.e. the difference between two consequtive elements are always larger
    # than tolerance

  iMin <- max(which(gamma2<=Min1,arr.ind=TRUE)) # The index of the element of
  # gamma2 which is the smallest possible value of the lower threshold.
  iMax <- max(which(gamma2<=Max1,arr.ind=TRUE))-1 # The index of the element of
            # gamma2 which is the largest possible value of the upper threshold.
  if (iMin>iMax) {
    stop("iMin>iMax probably because the parameter tolerance is too large\n")
  }
  gamma2     <- gamma2[iMin:iMax] # The threshold value(s) have to be in this set
  newNgridTh <- length(gamma2)
  if(ngridTh>newNgridTh) {
    warning("ngridTh (", ngridTh,
    ") >= the number of potential threshold values, set to ",
       newNgridTh, "\n")
       ngridTh <- newNgridTh
  }else {
      warning("ngridTh (", ngridTh,
      ") < the number of potential threshold values, ",newNgridTh,
      "so only a subset of the potential threshold values is selected.\n")
  
   if (trace) {
     cat("The set of possible threshold values:\n")
     print(gamma2)
    }

   gamma2 <- gamma2[round(seq(from=1, to=newNgridTh,length.out=ngridTh))]
   if (trace) {
     cat("The set of the selected threshold values:\n")
     print(gamma2)
   }
  }
  
  gamma2 <- gamma2+tolerance # necessary to ensure that the condition w_t<=gamma2[j]
     # is TRUE also in the case that w_t is slightly larger than gamma2[j] due to
     # inaccuracies in floating point numbers.
return(gamma2)
}

###########
### LM Test when type="2Reg" or "SymReg". This function returns the whole set of
### LM-values such that the threshold values corresponding to the supLM value may be
### found later. When bootstrapping we use the function lmtest2RegSymReg_boot which
### returns only the supLM value. In addition, SSR is computed for each threshold
### value in lmtest2RegSymReg, but not in lmtest2RegSymReg_boot. These are the only
### differences between these two functions.
###########

lmtest2RegSymReg <- function(y,x,w0,gammas,dir=dir){
  # y: y var, x: intercept and lags matrix, w0: ECT term, gammas: potential thresholds
  ngridTh <- length(gammas)
  X <- cbind(w0,x) # X: ECT and intercept and lags
  if(dir){
    q <- qr(X)
    res_restr <- qr.resid(q,y)
  } else{
    z0zz      <- X%*%solve(t(X)%*%X)
    res_restr <- lm.fit(X,y)$residuals # residuals from the linear VECM given b0
  }

  res_restr1 <- res_restr[,1]
  res_restr2 <- res_restr[,2]
  store      <- rep(0, ngridTh)
  SSR        <- rep(10000, ngridTh)

  for(i in 1:ngridTh){
    d1 <- if(type=="2Reg") ifelse(w0<=gammas[i],1,0) else
      ifelse(w0<= - gammas[i] | w0> gammas[i],1,0) # d1: dummy variable
    z1 <- if(common=="All") c(d1)*X else c(d1)*w0
    res_unrestr <- if(dir) qr.resid(q, z1) else z1-z0zz%*%(t(X)%*%z1)
    # z11: residuals from unrestricted model (with threhsold)
   
    zea <- res_restr1*res_unrestr
    zeb <- res_restr2*res_unrestr
    ze  <- cbind(zea,zeb)
    # [(z11.*(e1*ones(1,length(z11(1,:))))),(z11.*(e2*ones(1,length(z11(1,:)))))];
    v    <- crossprod(ze)
    z11y <- crossprod(res_unrestr,y)
    s    <- matrix(c(z11y), ncol=1) # vectorization of the parameter matrix z11y
    Vinv <- try(solve(v), silent=TRUE)
    if(inherits(Vinv, "try-error")) Vinv<-ginv(v)
    store[i] <- t(s)%*%Vinv%*%s
    d2       <- 1-d1
    z2       <- c(d2)*X
    Z        <- cbind(z1,z2)
    SSR[i]   <- crossprod(c(qr.resid(qr(Z),y)))
} # end of the whole loop

ret       <- list()
ret$store <- store
ret$SSR   <- SSR
return(ret)
} # end of the function lmtest2RegSymReg

### LM Test for bootstrapping when type="2Reg" or "SymReg". This function returns only
### the supLM value. When not bootstrapping we use the function lmtest2RegSymReg which
### returns the whole set of the LM values, and the SSR values. This is the only
### difference between these two functions.
###########

lmtest2RegSymReg_boot<-function(y,x,w0,gammas,dir=dir){
  # y: y var, x: intercept and lags matrix, w0: ECT term, gammas: potential thresholds

  ngridTh <- length(gammas)
  X       <- cbind(w0,x) #X: ECT and intercept and lags
  if(dir){
    q         <- qr(X)
    res_restr <- qr.resid(q,y)
  } else{
    z0zz      <- X%*%solve(t(X)%*%X)
    res_restr <- lm.fit(X,y)$residuals # residuals from the linear VECM given b0
  }
  
  res_restr1 <- res_restr[,1]
  res_restr2 <- res_restr[,2]
  store      <- rep(0, ngridTh)

for(i in 1:ngridTh){
  d1 <- if(type=="2Reg") ifelse(w0<=gammas[i],1,0) else
    ifelse(w0<= - gammas[i] | w0> gammas[i],1,0) # d1: dummy variable
    z1          <- if(common=="All") c(d1)*X else c(d1)*w0
    res_unrestr <- if(dir) qr.resid(q, z1) else z1-z0zz%*%(t(X)%*%z1)
    
    # z11: residuals from unrestricted model (with threshold)
    zea <- res_restr1*res_unrestr
    zeb <- res_restr2*res_unrestr
    ze  <- cbind(zea,zeb)
    # [(z11.*(e1*ones(1,length(z11(1,:))))),(z11.*(e2*ones(1,length(z11(1,:)))))];
    
    v    <- crossprod(ze)
    z11y <- crossprod(res_unrestr,y)
    s    <- matrix(c(z11y), ncol=1) # vectorization of the parameter matrix z11y
    Vinv <- try(solve(v), silent=TRUE)
    if(inherits(Vinv, "try-error")) Vinv <- ginv(v)
      store[i] <- t(s)%*%Vinv%*%s
    } # end of the whole loop
    lm01 <- max(store, na.rm=TRUE)
return(lm01)
} # end of the function lmtest2RegSymReg_boot

###########
### LM Test when type="3Reg". This function returns the whole set of
### LM-values such that the threshold values corresponding to the supLM value may be
### found later. When bootstrapping we use the function lmtest3Reg_boot which returns
### only the supLM value. In addition, SSR is computed for each pair of threshold
### values in lmtest3Reg, but not in lmtest3Reg_boot. These are the only
### differences between these two functions.
###########

lmtest3Reg <- function(y,x,w0,gammas,dir=dir){
  # y: y var, x: intercept and lags matrix, w0: ECT term, gammas: potential thresholds
  ngridTh <- length(gammas)
  w0.ord  <- sort(w0)+tolerance # The comparisons between w0.ord and gammas decide
  # the limits of the loops below. As we have already added tolerance to gammas, we
  # also have to add tolerance to w0.ord, to get correct comparisons.

  X <- cbind(w0,x) #X: ECT and intercept and lags
  if(dir){
    q         <- qr(X)
    res_restr <- qr.resid(q,y)
  } else{
    z0zz      <- X%*%solve(t(X)%*%X)
    res_restr <- lm.fit(X,y)$residuals # residuals from the linear VECM given b0
  }

  res_restr1 <- res_restr[,1]
  res_restr2 <- res_restr[,2]
  store      <- matrix(0, ncol=ngridTh,nrow=ngridTh)
  SSR        <- matrix(100000, ncol=ngridTh,nrow=ngridTh)
  Max2       <- w0.ord[which(abs(w0.ord-w0.ord[t-Ttrim])<tolerance,arr.ind=TRUE)[1]-Ttrim]

  # which(abs(w0.ord-w0.ord[t-Ttrim])<tolerance,arr.ind=TRUE)[1] computes the
  # index in w0.ord of the smallest element in w0.ord equal to w0.ord[t-Ttrim] except
  # for floating point error, so Max2 is the element in w0.ord with index Trim
  # smaller than the computed index by which(....)[1].

  iMax <- max(which(gammas<=Max2,arr.ind=TRUE)) # The index of the largest element
                                                # in gammas which is <=Max2
  for(i in 1:iMax){
    d1   <- ifelse(w0<=gammas[i],1,0) # d1: dummy variable
    z1   <- if(common=="All") c(d1)*X else c(d1)*w0
    help <- which(abs(w0.ord-gammas[i])<tolerance,arr.ind=TRUE) # The index of the
    # elements in w0.ord which are =gammas[i] except for floating point error

    Min2 <- w0.ord[help[length(help)]+Ttrim]-tolerance # Min2 is the element in
    # w0.ord which index is Trim larger than the index in w0.ord of the
    # largest element in w0.ord which is equal to gammas[i]. The tolerance
    # is subtracted due to the test against gammas in the next two statements.
    # help <- length(which(abs(gammas-Min2)<tolerance,arr.ind=TRUE))
  
    if (gammas[ngridTh] > Min2) { # Min2 in gammas, i.e. the middle
    # regime contains at least
    # Ttrim elements; if help=0, gammas[i] is so large that the middle
    # regime contains less than Trim elements, which means that no LM
    # and SSR values should be computed for this value of i

    jMin <- min(which(gammas >= Min2,arr.ind=TRUE))
    # The index of the least element >= Min2 in gammas

    for (j in jMin:ngridTh) {
      d3 <- ifelse(w0>gammas[j],1,0) # d1: dummy variable
      z3 <- if(common=="All") c(d3)*X else c(d3)*w0
      z  <- cbind(z1,z3)

      res_unrestr <- if(dir) qr.resid(q, z) else z-z0zz%*%(t(X)%*%z)
      # z11: residuals from unrestricted model (with threhsold)
      zea <- res_restr1*res_unrestr
      zeb <- res_restr2*res_unrestr
      ze  <- cbind(zea,zeb)
      # [(z11.*(e1*ones(1,length(z11(1,:))))),(z11.*(e2*ones(1,length(z11(1,:)))))];
      v    <- crossprod(ze)
      z11y <- crossprod(res_unrestr,y)
      s    <- matrix(c(z11y), ncol=1)
      
      # vectorization of the parameter matrix z11y
      Vinv <- try(solve(v), silent=TRUE)

      if(inherits(Vinv, "try-error")) Vinv<-ginv(v)
        store[i,j] <- t(s)%*%Vinv%*%s
        d2         <- 1-d1-d3
        z2         <- c(d2)*X
        Z          <- cbind(z1,z2,z3)
        SSR[i,j]   <- crossprod(c(qr.resid(qr(Z),y)))
    } # end for (j in jMin:ngridTh) {
   }  # end if (help>0) {
}     # end of the whole loop
ret <- list()
ret$store <- store
ret$SSR   <- SSR
return(ret)
} # end of the function lmtest3Reg

### LM Test for bootstrapping when type="3Reg". This function returns only the supLM
### value. When not bootstrapping, we use the function lmtest3Reg which
### returns the whole set of LM values, and the SSR values. This is the only
### difference between these two functions.
###########

lmtest3Reg_boot <- function(y,x,w0,gammas,dir=dir){
  # y: y var, x: intercept and lags matrix, w0: ECT term, gammas: potential thresholds
  ngridTh <- length(gammas)
  w0.ord  <- sort(w0)+tolerance # The comparisons between w0.ord and gammas decide
  # the limits of the loops below. As we have already added tolerance to gammas,
  # we also have to add tolerance to w0.ord, to get correct comparisons.

  X <- cbind(w0,x) # X: ECT and intercept and lags
  if(dir){
    q         <- qr(X)
    res_restr <- qr.resid(q,y)
  } else{
    z0zz      <- X%*%solve(t(X)%*%X)
    res_restr <- lm.fit(X,y)$residuals # residuals from the linear VECM given b0
  }

  res_restr1 <- res_restr[,1]
  res_restr2 <- res_restr[,2]
  store      <- matrix(0, ncol=ngridTh,nrow=ngridTh)
  Max2       <- w0.ord[which(abs(w0.ord-w0.ord[t-Ttrim])<tolerance,arr.ind=TRUE)[1]-Ttrim]

  # which(abs(w0.ord-w0.ord[t-Ttrim])<tolerance,arr.ind=TRUE)[1] computes the
  # index in w0.ord of the smallest element in w0.ord equal to w0.ord[t-Ttrim] except
  # for floating point error, so Max2 is the element in w0.ord with index Trim
  # smaller than the computed index by which(....)[1].

  iMax <- max(which(gammas<=Max2,arr.ind=TRUE)) # The index of the largest element
                                                # in gammas which is <=Max2
  for(i in 1:iMax){
    d1   <- ifelse(w0<=gammas[i],1,0) # d1: dummy variable
    z1   <- if(common=="All") c(d1)*X else c(d1)*w0
    help <- which(abs(w0.ord-gammas[i])<tolerance,arr.ind=TRUE) # The index of the
    # elements in w0.ord which are =gammas[i] except for floating point error
  
    Min2 <- w0.ord[help[length(help)]+Ttrim]-tolerance # Min2 is the element in
    # w0.ord which index is Trim larger than the index in w0.ord of the
    # largest element in w0.ord which is equal to gammas[i]. The tolerance
    # is subtracted due to the test against gammas in the next two statements.
    # help<-length(which(abs(gammas-Min2)<tolerance,arr.ind=TRUE))

    if (gammas[ngridTh] > Min2) { # Min2 in gammas, i.e. the middle
                                  # regime contains at least
      # Ttrim elements; if help=0, gammas[i] is so large that the middle
      # regime contains less than Trim elements, which means that no LM
      # and SSR values should be computed for this value of i

      jMin <- min(which(gammas >= Min2,arr.ind=TRUE))
      # The index of the least element >= Min2 in gammas

  for (j in jMin:ngridTh) {
    d3 <- ifelse(w0>gammas[j],1,0) # d1: dummy variable
    z3 <- if(common=="All") c(d3)*X else c(d3)*w0
    z  <- cbind(z1,z3)
    res_unrestr <- if(dir) qr.resid(q, z) else z-z0zz%*%(t(X)%*%z)
    # z11: residuals from unrestricted model (with threhsold)
  
    zea <- res_restr1*res_unrestr
    zeb <- res_restr2*res_unrestr
    ze  <- cbind(zea,zeb)
    # [(z11.*(e1*ones(1,length(z11(1,:))))),(z11.*(e2*ones(1,length(z11(1,:)))))];
    v    <- crossprod(ze)
    z11y <- crossprod(res_unrestr,y)
    s    <- matrix(c(z11y), ncol=1)
  
    # vectorization of the parameter matrix z11y
    Vinv <- try(solve(v), silent=TRUE)
    if(inherits(Vinv, "try-error")) Vinv <- ginv(v)
      store[i,j] <- t(s)%*%Vinv%*%s
    
  } # end for (j in jMin:ngridTh) {
 }  # end if (help>0) {
}   # end of the whole loop

lm01 <- max(store, na.rm=TRUE)
return(lm01)
} # end of the function lmtest3Reg_boot

gamma2 <- GridSetup(w0.ord)
lm01   <- if (type=="3Reg") lmtest3Reg(y,x,w0,gamma2, dir=dir) else
  lmtest2RegSymReg(y,x,w0,gamma2, dir=dir)
teststat <- max(lm01$store, na.rm=TRUE)

##################################
### Bootstraps
##################################
if(nboot==0){
  CriticalValBoot <- NULL
  PvalBoot        <- NULL
  boots.reps      <- NULL
  if(hpc=="foreach") warning("hpc='foreach' used only when nboot>0\n")
    }else if (nboot>0){
    ##################################
    ### Fixed Regressor Bootstrap %
    ##################################

    if(boot.type=="FixedReg"){
      X <- cbind(w0,x) # X: ECT and intercept and lags
      Ttrim <- trim*t
      if(dir){
        q <- qr(X)
      } else{
        z0zz <- X%*%solve(t(X)%*%X)
      }

    lmtest_withBoot <- function(e){
      yr <- rnorm(n=t,0,1)*e
      return(if (type=="3Reg") lmtest3Reg_boot(yr,x,w0,gamma2,dir=dir) else
      lmtest2RegSymReg_boot(yr,x,w0,gamma2,dir=dir))
    }

  boots.reps <- if(hpc=="none") replicate(nboot, lmtest_withBoot(e=residuals(ve)))
    else foreach(i=1:nboot, .export="lmtest_withBoot", .combine="c")%dopar% lmtest_withBoot(e=residuals(ve))

  ##################################
  ### Residual Bootstrap
  ##################################
  } else{
  lmtest_with_resBoot <- function(ve){
  # bootstrap it
  data.boot <- TVECM.sim(TVECMobject=ve, type="boot")

  # estimate VECM  
  if(is.null(fixed.beta)){
    ve.boot <- VECM(data.boot, lag=lag, include="const", estim="ML")
  }else{
  ve.boot <- VECM(data.boot, lag=lag, include="const",
  beta=fixed.beta, estim="2OLS")
  }
  
  # extract w0, y and x
  ect.boot  <- ve.boot$model[,"ECT"]
  which.nas <- 1:(p+1)
  w0.boot   <- matrix(ect.boot[-which.nas], ncol=1)
  x.boot    <- ve.boot$model[-which.nas,-c(1:3)]
  y.boot    <- ve.boot$model[,c(1:2)]
  y.boot    <- diff(y.boot)[(p+1):(T-1),]
  
  # set-up grid
  w0.ord.boot <- sort(w0.boot)
  gamma2.boot <- GridSetup(w0.ord.boot)
  return(if (type=="3Reg") lmtest3Reg_boot(y.boot,x.boot,w0.boot,
  gamma2.boot, dir=dir) else
  lmtest2RegSymReg_boot(y.boot,x.boot,w0.boot,gamma2.boot,dir=dir))
}

boots.reps <- if(hpc=="none") replicate(nboot, lmtest_with_resBoot(ve)) else
foreach(i=1:nboot,.export="lmtest_with_resBoot", .combine="c")%dopar% lmtest_with_resBoot
} # end if boot= ResBoot

## result: compute p values and critical values:
PvalBoot        <- mean(ifelse(boots.reps>teststat,1,0))
CriticalValBoot <- quantile(boots.reps, probs= c(0.9, 0.95,0.99))
} # end if boot>0

####### Return args
args           <- list()
args$nboot     <- nboot
args$boot.type <- boot.type
ret            <- list()
ret$args       <- args
ret$stat       <- teststat
ret$SSR        <- min(lm01$SSR,na.rm=TRUE)
ret$LMvalues   <- lm01
ret$ths        <- gamma2

if (type=="3Reg") {
  a           <- which(abs(lm01$store-ret$stat)<1e-10,arr.ind=TRUE)
  ret$maxThLM <- c(gamma2[a[1,1]],gamma2[a[1,2]])
  d1          <- ifelse(w0<=ret$maxThLM[1],1,0)
  d3          <- ifelse(w0>ret$maxThLM[2],1,0)
  d2          <- 1-d1-d3
  
  ret$PercentsLM <- c(round(mean(d1)*100,digits=1),round(mean(d2)*100,digits=1),
  round(mean(d3)*100,digits=1))
  a <- which(abs(lm01$SSR-ret$SSR)<1e-10,arr.ind=TRUE)
  ret$minThSSR <- c(gamma2[a[1,1]],gamma2[a[1,2]])
  d1 <- ifelse(w0<=ret$minThSSR[1],1,0)
  d3 <- ifelse(w0>ret$minThSSR[2],1,0)
  d2 <- 1-d1-d3
  
  ret$PercentsSSR <- c(round(mean(d1)*100,digits=1),round(mean(d2)*100,digits=1),
  round(mean(d3)*100,digits=1))
} else {
  ret$maxThLM <- gamma2[which(abs(lm01$store-ret$stat)<1e-10)]
  d1          <- ifelse(w0<=ret$maxThLM,1,0)
  d2          <- 1-d1
  ret$PercentsLM  <- c(round(mean(d1)*100,digits=1),round(mean(d2)*100,digits=1))
  ret$minThSSR    <- gamma2[which(abs(lm01$SSR-ret$SSR)<1e-10)]
  d1              <- ifelse(w0<=ret$minThSSR,1,0)
  d2              <- 1-d1
  ret$PercentsSSR <- c(round(mean(d1)*100,digits=1),round(mean(d2)*100,digits=1))
}

  ret$PvalBoot        <- PvalBoot
  ret$CriticalValBoot <- CriticalValBoot
  ret$allBoots        <- boots.reps
  class(ret)          <- "TVECMXHanSeo02Test"
return(ret)
} # End of the whole function

### Print method
print.TVECMXHanSeo02Test<-function(x,...){
cat("## Test of linear versus threshold cointegration",
" of Hansen and Seo (2002) ##\n\n", sep="")
cat("Test Statistic:\t", x$stat)
cat("\t(Maximized for threshold value:", x$maxThLM, ")\n")
cat("Percentage of observations in each regime", x$PercentsLM, "%\n\n")

cat("Minimum SSR:\t", x$SSR)
cat("\t(Minimized for threshold value:", x$minThSSR, ")\n")
cat("Percentage of observations in each regime", x$PercentsSSR,"%\n")
  if(x$args$nboot>0){
    boot.name <- switch(x$args$boot.type,
    "FixedReg"="Fixed regressor bootstrap", "ResBoot"="Residual Bootstrap")
    cat("P-Value:\t", x$PvalBoot, "\t\t(",boot.name, ")\n")
  }
}