What is NRSP Residual?

Let \(T_i\) be a possibly right-censored observation, \(d_i\) be the indicator for being uncensored, and \(S_i(\cdot)\) be the survival function of original uncensored \(T^*_i\) given covariate \(x_i\) and parameters. The randomized survival probability (RSP) for \(T_{i}\) is defined as: \[\begin{equation} S_i^{R}(T_i, d_{i}, U_{i}) = \left\{ \begin{array}{rl} S_{i}(T_{i}), & \text{if $T_i$ is uncensored, i.e., $d_{i}=1$,}\\ U_{i}\,S_{i}(T_{i}), & \text{if $T_i$ is censored, i.e., $d_i=0$.} \end{array} \right. \label{rsp} \end{equation}\] where \(U_i\) is a uniform random number on \((0, 1)\). In Wu, Feng, and Li (2019+), we show that RSPs are uniformly distributed on \((0,1)\) under the true model for \(T^*_i\). Therefore, they can be transformed into residuals with any desired distribution. NRSP residuals are transformed RSPs with the standard normal quantile: \[\begin{equation} r_{i}^{\mbox{nrsp}}(T_i, d_{i}, U_i)=\Phi^{-1} (S_{i}^R(T_i, d_{i}, U_i)) \end{equation}\]

NRSP residuals are approximately standard normally distributed under the true model with estimated parameters. The approximate normality holds for any censoring distribution. NRSP residuals can be used to check the goodness-of-fit of \(S_i(\cdot)\) quantitatively by applying SW normality test to NRSP residuals, and graphically by visualizing their QQplot. Scatterplots of NRSP residuals may reveal the model departure nature.

R Functions for Computing NRSP for survreg and coxph Objects

#input: survreg_fit is a survreg object
resid_survreg<-function(survreg_fit)
{
  distr<-survreg_fit$dist
  y<- survreg_fit$y
  m <- nrow (y)
  parms<- as.numeric(survreg_fit[["parms"]])
  alpha_hat<-1/survreg_fit$scale
  
  if (distr %in% c("weibull","exponential","logistic","lognormal",
                   "loglogistic","gaussian", "loggaussian","rayleigh"))
  {
    SP<-1-(psurvreg(as.data.frame(as.matrix(y))[,-2], 
                    survreg_fit[["linear.predictors"]], 
                    scale=1/alpha_hat, distribution=distr))
    haz <- dsurvreg(as.data.frame(as.matrix(y))[,-2],
                    survreg_fit[["linear.predictors"]],
                    scale=1/alpha_hat, distribution=distr)/SP
  }else if  (distr %in% "t") 
  {
    SP<-1-(psurvreg(as.data.frame(as.matrix(y))[,-2], 
                    survreg_fit[["linear.predictors"]], 
                    scale=1/alpha_hat, distribution=distr,
                    parms = parms))
    haz <- dsurvreg(as.data.frame(as.matrix(y))[,-2], 
                    survreg_fit[["linear.predictors"]],
                    scale=1/alpha_hat, distribution=distr,
                    parms = parms)/SP
  }else stop ("The distribution is not supported")
  censored <- which(as.data.frame(as.matrix(y))[,-1]==0)
  n.censored <- length(censored)
  # NRSP
  RSP <- SP
  RSP[censored] <- RSP[censored]*runif(n.censored)
  nrsp <- qnorm(RSP)
  
  # Unmodified CS residual
  ucs<- -log(SP)
  # Modified CS residual
  MSP<- SP
  MSP[censored] <- SP[censored]/exp(1)
  mcs <- -log(MSP)
  nmsp <- qnorm (MSP)
  #Martingale Residual
  martg<- as.data.frame(as.matrix(survreg_fit$y))[,-1]- ucs
  #Deviance Residual
  dev<- sign(martg)* sqrt((-2)*(martg+as.data.frame(as.matrix(survreg_fit$y))[,-1]*
                            log(as.data.frame(as.matrix(survreg_fit$y))[,-1]-martg)))
  nrsp.sw.pvalue<-shapiro.test(nrsp)$p.value
  #hazard function
  haz_fn<- haz
  
  list(RSP=RSP,nrsp=nrsp,nrsp.sw.pvalue=nrsp.sw.pvalue,
       ucs=ucs, mcs=mcs,martg=martg, dev=dev, nmsp=nmsp,haz_fn=haz_fn)
}
# outputs:
## RSP --- Randomized Survival Probabilities
## nrsp --- Normalized RSP 
## nrsp.sw.pvalue --- GOF test p-values by applying SW test to NRSP
## ucs --- unmodified CS residuals
## mcs --- modified CS residuals
## nmsp --- normalized modified SP
## martg --- Martingale residuals
## dev --- Deviance residuals
## haz_fn --- hazard function of cs residuals


#input: coxfit_fit is a coxph object
resid_coxph<-function(coxfit_fit)
{
  y<- coxfit_fit$y
  m <- nrow (y)
  mre <- resid(coxfit_fit, type="martingale")
  dre <- resid(coxfit_fit, type="deviance")
  #Unmodified Cox-Snell residual
  ucs <- as.data.frame(as.matrix(y))[,-1] - mre
  #Survival Function
  SP<- exp(-ucs)
  censored <- which(as.data.frame(as.matrix(y))[,-1]==0)
  n.censored <- length(censored)
  #NMSP residual (normal-transformed modified survival prob)
  MSP<- SP
  MSP[censored] <- SP[censored]/exp(1)
  mcs <- -log(MSP)
  nmsp<- qnorm(MSP)
  #NRSP residual
  RSP <- SP
  RSP[censored] <- RSP[censored]*runif(n.censored)
  nrsp <- qnorm(RSP)
  nrsp.sw.pvalue<-shapiro.test(nrsp)$p.value
  list(nrsp=nrsp,RSP=RSP,nrsp.sw.pvalue=nrsp.sw.pvalue,
       ucs=ucs,mcs=mcs,nmsp=nmsp)
}
# outputs:
## RSP --- Randomized Survival Probabilities
## nrsp --- Normalized RSP 
## nrsp.sw.pvalue --- GOF test p-values by applying SW test to NRSP
## ucs --- unmodified CS residuals
## mcs --- modified CS residuals
## nmsp --- normalized modified SP
## martg --- Martingale residuals
## dev --- Deviance residuals
## haz_fn --- hazard function of cs residuals

Installation

Copy the above R functions into your R enviroment.

References

To be inserted.

Examples for Illustration and Demonstration

Illustration of RSP

Data Generation Function

#simulated data:t from weibull distrbution,c from exponentail distrbutions
set.seed(1) 
rexp2 <- function(n, rate){ if (rate==0) rep(Inf,n) else rexp(n=n, rate = rate)}
simulated_data<- function(n,beta0 , beta1 , alpha, mean.censor)
{   
  x <- rbinom(n, size = 1, p = 0.5)
  t0<- rexp2(n, rate= 1/mean.censor)
  survreg_sim_data <- rsurvreg( n, mean = beta0 + beta1 * x,
                                scale=1/alpha, distribution='weibull')
  t <- pmin(survreg_sim_data, t0)
  d <- as.numeric(t0>= survreg_sim_data )
  data_form<- data.frame(survreg_sim_data,t0,t,d,x) 
  out_r<-list(data_form=data_form, alpha=alpha, beta0=beta0, beta1=beta1)
  return (out_r) 
}

Animated Randomized Survival Probabilities of 50 Simulated Datasets

library("foreach")
library("survival")

n<-2000
beta0<-2
beta1<-1
alpha<-1.7
mean.censor<-14

#nrep is preset to a number
foreach (j = 1:nrep) %do%
{
   # simulating a dataset
   out_r<- simulated_data(n=n,beta0=beta0,beta1=beta1,
                          alpha=alpha, mean.censor=mean.censor)
   simulated_data_random<-out_r$data_form
   
   #checking censoring rate
   table(simulated_data_random$d)
   
   #fit AFT model
   survreg_fit_t <- survreg(Surv(out_r[[1]]$t, 
                                 out_r[[1]]$d) ~ out_r[[1]]$x,dist="weibull")
   survreg_fit_w <- survreg(Surv(out_r[[1]]$t, 
                                 out_r[[1]]$d) ~ out_r[[1]]$x,dist="lognormal")

   # compute residuals 
   true_model <- resid_survreg(survreg_fit_t)
   wrong_model<- resid_survreg(survreg_fit_w)

   rsp.t <- true_model$RSP
   nrsp.t<- true_model$nrsp
   rsp.w <- wrong_model$RSP 
   nrsp.w<- wrong_model$nrsp

   t<-seq(0, 50, length=n)
   gp1<- which(out_r[[1]]$x==1&out_r[[1]]$d==1)
   gp2<- which(out_r[[1]]$x==1&out_r[[1]]$d==0)
   gp3<- which(out_r[[1]]$x==0&out_r[[1]]$d==1)
   gp4<- which(out_r[[1]]$x==0&out_r[[1]]$d==0)

   s1.t<-1-(psurvreg(t, mean=sum(survreg_fit_t$coefficients),
                     scale=survreg_fit_t$scale, distribution="weibull"))
   s0.t<-1-(psurvreg(t, mean=survreg_fit_t$coefficients[[1]],
                     scale=survreg_fit_t$scale, distribution="weibull"))

   s1.w<-1-(psurvreg(t, mean=sum(survreg_fit_w$coefficients),
                     scale=survreg_fit_w$scale, distribution="lognormal"))
   s0.w<-1-(psurvreg(t, mean=survreg_fit_w$coefficients[[1]], 
                     scale=survreg_fit_w$scale, distribution="lognormal"))
   
   par(mfrow = c(2,2),mar=c(4,4,2,2))
   
   #The plot of true model
   plot(t, s1.t,type="l", ylab="S(t)",xlab="Time",
        main="RSPs, True Model",ylim=c(0,1.0),xlim=c(0,40))
   lines(t, s0.t)
   ## choose a random sample of observations for display
   sp <- sample(1:n,400)
   s.gp1 <- gp1[which(gp1 %in% sp)]
   s.gp2 <- gp2[which(gp2 %in% sp)]
   s.gp3 <- gp3[which(gp3 %in% sp)]
   s.gp4 <- gp4[which(gp4 %in% sp)]

   points(out_r[[1]]$t[s.gp1], rsp.t[s.gp1],col="green",pch=0)
   points(out_r[[1]]$t[s.gp2],rsp.t[s.gp2],col="red",pch=8)
   points(out_r[[1]]$t[s.gp3],rsp.t[s.gp3],pch=2,col="green")
   points(out_r[[1]]$t[s.gp4],rsp.t[s.gp4],pch=1,col="red")
   legend ("topright", 
           legend = c(expression('Uncensored,x'['i']*"=1"),
                      expression('Censored,x'['i']*'=1'),
                      expression('Uncensored,x'['i']*'=0'),
                      expression('Censored,x'['i']*'=0')),
           pch=c(0,8,2,1), col = c(3,2,3,2))
   hist(rsp.t,xlab="Randomized Survival Probability",main="Histogram of RSPs, True Model")
   
   #The plot of wrong model
   plot(t, s1.w,type="l",ylab="S(t)",xlab="Time",main="RSPs, Wrong Model",
        ylim=c(0,1.0),xlim=c(0,40))
   lines(t, s0.w)
   
   sp <- sample(1:n,400)
   s.gp1 <- gp1[which(gp1 %in% sp)]
   s.gp2 <- gp2[which(gp2 %in% sp)]
   s.gp3 <- gp3[which(gp3 %in% sp)]
   s.gp4 <- gp4[which(gp4 %in% sp)]

   points(out_r[[1]]$t[s.gp1],rsp.w[s.gp1],col="green",pch=0)
   points(out_r[[1]]$t[s.gp2],rsp.w[s.gp2],col="red",pch=8)
   points(out_r[[1]]$t[s.gp3],rsp.w[s.gp3],pch=2,col="green")
   points(out_r[[1]]$t[s.gp4],rsp.w[s.gp4],pch=1,col="red")

   legend ("topright", legend = c(expression('Uncensored,x'['i']*"=1"),
                                  expression('Censored,x'['i']*'=1'),
                                  expression('Uncensored,x'['i']*'=0'),
                                  expression('Censored,x'['i']*'=0')),
           pch=c(0,8,2,1), col = c(3,2,3,2)
          )
   hist(rsp.w,xlab="Randomized Survival Probability",
        main="Histogram of RSPs, Wrong Model")
}