# Deterministic Functions

## deterministic.g.function

Time ordering of data is W, C1, L1, A1, Y1, C2, L2, A2, Y2
True value of E[Y_(1,1,1,1)] (expected value of Y setting C1, A1, C2, A2 all to 1) is approximately 0.413.
A1 is known to always be 1 if L1 < -2, and is 1 with probability 0.1 if L1 > 2
A2 is known to always be 1 if A1 is 1
We can incorporate this knowledge using deterministic.g.function.

Generate data:

set.seed(123)
rexpit <- function(x) rbinom(n=length(x), size=1, prob=plogis(x))
n <- 1000
ua <- rep(TRUE, n)   #ua = uncensored and alive
L1 <- A1 <- Y1 <- C2.binary <- L2 <- A2 <- Y2 <- rep(NA_real_, n)
W <- rnorm(n)

C1 <- BinaryToCensoring(is.uncensored=rexpit(2 + W))
ua <- ua & C1 == "uncensored"
L1[ua] <- rnorm(n)[ua] + W[ua]
A1[ua] <- rexpit(L1[ua])
A1[ua & L1 < -2] <- 1
A1[ua & L1 >  2] <- rbinom(n, size=1, prob=0.1)[ua & L1 >  2]
Y1[ua] <- rexpit((W + L1 - A1)[ua])
ua <- ua & !Y1

C2.binary[ua] <- rexpit((1 + 0.7 * L1 - A1)[ua])
C2 <- BinaryToCensoring(is.uncensored=C2.binary)
ua <- ua & C2 == "uncensored"
L2[ua] <- (0.5 * L1 - 0.9 * A1 + rnorm(n))[ua]
A2[ua] <- rexpit((0.5 * L1 + 0.8 * L2)[ua]) | A1[ua]
Y2[ua] <- rexpit((0.7 * L1 + L2 - 0.8 * A1 - A2)[ua])
Y2[Y1 == 1] <- 1  # if a patient dies at time 1, record death at time 2 as well
data <- data.frame(W, C1, L1, A1, Y1, C2, L2, A2, Y2)

Without considering deterministic knowledge of A1 and A2:

result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"),
Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1),
survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for L1:
#> Q.kplus1 ~ W
#> formula for Y1:
#> Q.kplus1 ~ W + L1 + A1
#> formula for L2:
#> Q.kplus1 ~ W + L1 + A1
#> formula for Y2:
#> Q.kplus1 ~ W + L1 + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for C1:
#> C1 ~ W
#> formula for A1:
#> A1 ~ W + L1
#> formula for C2:
#> C2 ~ W + L1 + A1
#> formula for A2:
#> A2 ~ W + L1 + A1 + L2
#>
#> Estimate of time to completion: < 1 minute
summary(result)
#> Estimator:  tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"),
#>     Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE,
#>     abar = c(1, 1))
#>
#>    Parameter Estimate:  0.42358
#>     Estimated Std Err:  0.022199
#>               p-value:  <2e-16
#>     95% Conf Interval: (0.38007, 0.46709)

Now we use deterministic.g.function to include our deterministic knowledge of A1 and A2:

deterministic.g.function <- function(data, current.node, nodes) {
if (names(data)[current.node] == "A1") {
det <- (data$L1 < -2 | data$L1 > 2) & !is.na(data$L1) prob1 <- ((data$L1 < -2) * 1 + (data$L1 > 2) * 0.1)[det] } else if (names(data)[current.node] == "A2") { det <- data$A1 == 1 & !is.na(data$A1) prob1 <- 1 } else if (names(data[current.node]) %in% c("C1", "C2")){ return(NULL) #this returns the default of no deterministic links #note that it is not necessary to specify that prior censoring indicates future censoring } else { stop("unexpected current.node") } return(list(is.deterministic=det, prob1=prob1)) } result <- ltmle(data, Anodes=c("A1","A2"), Cnodes=c("C1", "C2"), Lnodes=c("L1", "L2"), Ynodes=c("Y1", "Y2"), abar=c(1, 1), survivalOutcome=TRUE, deterministic.g.function = deterministic.g.function) #> Qform not specified, using defaults: #> formula for L1: #> Q.kplus1 ~ W #> formula for Y1: #> Q.kplus1 ~ W + L1 + A1 #> formula for L2: #> Q.kplus1 ~ W + L1 + A1 #> formula for Y2: #> Q.kplus1 ~ W + L1 + A1 + L2 + A2 #> #> gform not specified, using defaults: #> formula for C1: #> C1 ~ W #> formula for A1: #> A1 ~ W + L1 #> formula for C2: #> C2 ~ W + L1 + A1 #> formula for A2: #> A2 ~ W + L1 + A1 + L2 #> #> Estimate of time to completion: < 1 minute summary(result) #> Estimator: tmle #> Call: #> ltmle(data = data, Anodes = c("A1", "A2"), Cnodes = c("C1", "C2"), #> Lnodes = c("L1", "L2"), Ynodes = c("Y1", "Y2"), survivalOutcome = TRUE, #> abar = c(1, 1), deterministic.g.function = deterministic.g.function) #> #> Parameter Estimate: 0.42013 #> Estimated Std Err: 0.026763 #> p-value: <2e-16 #> 95% Conf Interval: (0.36768, 0.47259) ## deterministic.Q.function In this example, when L2 is positive, the patient leaves the study and we consider her final outcome to be 0. W -> A1 -> Y1 -> L2 -> A2 -> Y2 n <- 1000 L2 <- A2 <- Y2 <- as.numeric(rep(NA, n)) W <- rnorm(n) A1 <- rexpit(W) Y1 <- rexpit(W - A1) alive <- Y1 == 0 L2[alive] <- (0.5 * W - 0.9 * A1 + rnorm(n))[alive] completed.study <- alive & L2 > 0 • Scenario 1: patients don’t change treatment after leaving study; leave their A2 as NA A2[alive & !completed.study] <- rexpit((0.5 * W + 0.8 * L2)[alive & !completed.study]) Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study]) Y2[alive & completed.study] <- 0 Y2[!alive] <- 1 # if a patient dies at time 1, record death at time 2 as well data <- data.frame(W, A1, Y1, L2, A2, Y2) Specify that Q is deterministically 0 when L2 is in the history of the current Q regression and L2 > 0 It is not necessary to specify that Q is deterministically 1 if Y1 is 1; this is automatic. Also note that det.Q.fun doesn’t condition on called.from.estimate.g so g will also be set deterministically after L2 > 0. det.Q.fun.1 <- function(data, current.node, nodes, called.from.estimate.g) { L2.index <- which(names(data) == "L2") stopifnot(length(L2.index) == 1) L2.in.history <- L2.index < current.node if (! L2.in.history) return(NULL) is.deterministic <- data$L2 > 0 & !is.na(data$L2) return(list(is.deterministic=is.deterministic, Q.value=0)) } result.scenario1 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1), SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.1, survivalOutcome=TRUE) #> Qform not specified, using defaults: #> formula for Y1: #> Q.kplus1 ~ W + A1 #> formula for Y2: #> Q.kplus1 ~ W + A1 + L2 + A2 #> #> gform not specified, using defaults: #> formula for A1: #> A1 ~ W #> formula for A2: #> A2 ~ W + A1 + L2 #> #> Warning in CheckForVarianceWarning(inputs, g.ratio): Variance estimate is based #> on influence curve only, which may be significantly anticonservative because #> your data appears to contain positivity violations. Robust variance estimate is #> not currently available with deterministic.Q.function but this will be addressed #> in a future release. summary(result.scenario1) #> Estimator: tmle #> Call: #> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1", #> "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL, #> estimate.time = FALSE, deterministic.Q.function = det.Q.fun.1) #> #> Parameter Estimate: 0.34674 #> Estimated Std Err: 0.028181 #> p-value: <2e-16 #> 95% Conf Interval: (0.29151, 0.40198) • Scenario 2: treatment can still change after a patient leaves the study A2[alive] <- rexpit((0.5 * W + 0.8 * L2)[alive]) #patients can change treatment after leaving study Y2[alive & !completed.study] <- rexpit((L2 - 0.8 * A1 - A2)[alive & !completed.study]) Y2[alive & completed.study] <- 0 Y2[!alive] <- 1 # if a patient dies at time 1, record death at time 2 as well data <- data.frame(W, A1, Y1, L2, A2, Y2) det.Q.fun.2 <- function(data, current.node, nodes, called.from.estimate.g) { #there is no deterministic information when calculating g - treatment may still change if (called.from.estimate.g) return(NULL) L2.index <- which(names(data) == "L2") stopifnot(length(L2.index) == 1) L2.in.history <- L2.index < current.node if (! L2.in.history) return(NULL) is.deterministic <- data$L2 > 0 & !is.na(data\$L2)
return(list(is.deterministic=is.deterministic, Q.value=0))
}

result.scenario2 <- ltmle(data, Anodes=c("A1","A2"), Lnodes="L2", Ynodes=c("Y1", "Y2"), abar=c(1, 1),
SL.library=NULL, estimate.time=FALSE, deterministic.Q.function=det.Q.fun.2, survivalOutcome=TRUE)
#> Qform not specified, using defaults:
#> formula for Y1:
#> Q.kplus1 ~ W + A1
#> formula for Y2:
#> Q.kplus1 ~ W + A1 + L2 + A2
#>
#> gform not specified, using defaults:
#> formula for A1:
#> A1 ~ W
#> formula for A2:
#> A2 ~ W + A1 + L2
#>
#> Warning in CheckForVarianceWarning(inputs, g.ratio): Variance estimate is based
#> on influence curve only, which may be significantly anticonservative because
#> your data appears to contain positivity violations. Robust variance estimate is
#> not currently available with deterministic.Q.function but this will be addressed
#> in a future release.
summary(result.scenario2)
#> Estimator:  tmle
#> Call:
#> ltmle(data = data, Anodes = c("A1", "A2"), Lnodes = "L2", Ynodes = c("Y1",
#>     "Y2"), survivalOutcome = TRUE, abar = c(1, 1), SL.library = NULL,
#>     estimate.time = FALSE, deterministic.Q.function = det.Q.fun.2)
#>
#>    Parameter Estimate:  0.33135
#>     Estimated Std Err:  0.022441
#>               p-value:  <2e-16
#>     95% Conf Interval: (0.28737, 0.37534)