# What is NRSP Residual?

Let $$T_i$$ be a possibly right-censored observation, $$d_i$$ be the indicator for being uncensored, and $$S_i(\cdot)$$ be the survival function of original uncensored $$T^*_i$$ given covariate $$x_i$$ and parameters. The randomized survival probability (RSP) for $$T_{i}$$ is defined as: $$$S_i^{R}(T_i, d_{i}, U_{i}) = \left\{ \begin{array}{rl} S_{i}(T_{i}), & \text{if T_i is uncensored, i.e., d_{i}=1,}\\ U_{i}\,S_{i}(T_{i}), & \text{if T_i is censored, i.e., d_i=0.} \end{array} \right. \label{rsp}$$$ where $$U_i$$ is a uniform random number on $$(0, 1)$$. In Wu, Feng, and Li (2019+), we show that RSPs are uniformly distributed on $$(0,1)$$ under the true model for $$T^*_i$$. Therefore, they can be transformed into residuals with any desired distribution. NRSP residuals are transformed RSPs with the standard normal quantile: $$$r_{i}^{\mbox{nrsp}}(T_i, d_{i}, U_i)=\Phi^{-1} (S_{i}^R(T_i, d_{i}, U_i))$$$

NRSP residuals are approximately standard normally distributed under the true model with estimated parameters. The approximate normality holds for any censoring distribution. NRSP residuals can be used to check the goodness-of-fit of $$S_i(\cdot)$$ quantitatively by applying SW normality test to NRSP residuals, and graphically by visualizing their QQplot. Scatterplots of NRSP residuals may reveal the model departure nature.

# R Functions for Computing NRSP for survreg and coxph Objects

#input: survreg_fit is a survreg object
resid_survreg<-function(survreg_fit)
{
distr<-survreg_fit$dist y<- survreg_fit$y
m <- nrow (y)
parms<- as.numeric(survreg_fit[["parms"]])
alpha_hat<-1/survreg_fit$scale if (distr %in% c("weibull","exponential","logistic","lognormal", "loglogistic","gaussian", "loggaussian","rayleigh")) { SP<-1-(psurvreg(as.data.frame(as.matrix(y))[,-2], survreg_fit[["linear.predictors"]], scale=1/alpha_hat, distribution=distr)) haz <- dsurvreg(as.data.frame(as.matrix(y))[,-2], survreg_fit[["linear.predictors"]], scale=1/alpha_hat, distribution=distr)/SP }else if (distr %in% "t") { SP<-1-(psurvreg(as.data.frame(as.matrix(y))[,-2], survreg_fit[["linear.predictors"]], scale=1/alpha_hat, distribution=distr, parms = parms)) haz <- dsurvreg(as.data.frame(as.matrix(y))[,-2], survreg_fit[["linear.predictors"]], scale=1/alpha_hat, distribution=distr, parms = parms)/SP }else stop ("The distribution is not supported") censored <- which(as.data.frame(as.matrix(y))[,-1]==0) n.censored <- length(censored) # NRSP RSP <- SP RSP[censored] <- RSP[censored]*runif(n.censored) nrsp <- qnorm(RSP) # Unmodified CS residual ucs<- -log(SP) # Modified CS residual MSP<- SP MSP[censored] <- SP[censored]/exp(1) mcs <- -log(MSP) nmsp <- qnorm (MSP) #Martingale Residual martg<- as.data.frame(as.matrix(survreg_fit$y))[,-1]- ucs
#Deviance Residual
dev<- sign(martg)* sqrt((-2)*(martg+as.data.frame(as.matrix(survreg_fit$y))[,-1]* log(as.data.frame(as.matrix(survreg_fit$y))[,-1]-martg)))
nrsp.sw.pvalue<-shapiro.test(nrsp)$p.value #hazard function haz_fn<- haz list(RSP=RSP,nrsp=nrsp,nrsp.sw.pvalue=nrsp.sw.pvalue, ucs=ucs, mcs=mcs,martg=martg, dev=dev, nmsp=nmsp,haz_fn=haz_fn) } # outputs: ## RSP --- Randomized Survival Probabilities ## nrsp --- Normalized RSP ## nrsp.sw.pvalue --- GOF test p-values by applying SW test to NRSP ## ucs --- unmodified CS residuals ## mcs --- modified CS residuals ## nmsp --- normalized modified SP ## martg --- Martingale residuals ## dev --- Deviance residuals ## haz_fn --- hazard function of cs residuals #input: coxfit_fit is a coxph object resid_coxph<-function(coxfit_fit) { y<- coxfit_fit$y
m <- nrow (y)
mre <- resid(coxfit_fit, type="martingale")
dre <- resid(coxfit_fit, type="deviance")
#Unmodified Cox-Snell residual
ucs <- as.data.frame(as.matrix(y))[,-1] - mre
#Survival Function
SP<- exp(-ucs)
censored <- which(as.data.frame(as.matrix(y))[,-1]==0)
n.censored <- length(censored)
#NMSP residual (normal-transformed modified survival prob)
MSP<- SP
MSP[censored] <- SP[censored]/exp(1)
mcs <- -log(MSP)
nmsp<- qnorm(MSP)
#NRSP residual
RSP <- SP
RSP[censored] <- RSP[censored]*runif(n.censored)
nrsp <- qnorm(RSP)
nrsp.sw.pvalue<-shapiro.test(nrsp)$p.value list(nrsp=nrsp,RSP=RSP,nrsp.sw.pvalue=nrsp.sw.pvalue, ucs=ucs,mcs=mcs,nmsp=nmsp) } # outputs: ## RSP --- Randomized Survival Probabilities ## nrsp --- Normalized RSP ## nrsp.sw.pvalue --- GOF test p-values by applying SW test to NRSP ## ucs --- unmodified CS residuals ## mcs --- modified CS residuals ## nmsp --- normalized modified SP ## martg --- Martingale residuals ## dev --- Deviance residuals ## haz_fn --- hazard function of cs residuals # Installation Copy the above R functions into your R enviroment. # References To be inserted. # Examples for Illustration and Demonstration ## Illustration of RSP ### Data Generation Function #simulated data:t from weibull distrbution,c from exponentail distrbutions set.seed(1) rexp2 <- function(n, rate){ if (rate==0) rep(Inf,n) else rexp(n=n, rate = rate)} simulated_data<- function(n,beta0 , beta1 , alpha, mean.censor) { x <- rbinom(n, size = 1, p = 0.5) t0<- rexp2(n, rate= 1/mean.censor) survreg_sim_data <- rsurvreg( n, mean = beta0 + beta1 * x, scale=1/alpha, distribution='weibull') t <- pmin(survreg_sim_data, t0) d <- as.numeric(t0>= survreg_sim_data ) data_form<- data.frame(survreg_sim_data,t0,t,d,x) out_r<-list(data_form=data_form, alpha=alpha, beta0=beta0, beta1=beta1) return (out_r) } ### Animated Randomized Survival Probabilities of 50 Simulated Datasets library("foreach") library("survival") n<-2000 beta0<-2 beta1<-1 alpha<-1.7 mean.censor<-14 #nrep is preset to a number foreach (j = 1:nrep) %do% { # simulating a dataset out_r<- simulated_data(n=n,beta0=beta0,beta1=beta1, alpha=alpha, mean.censor=mean.censor) simulated_data_random<-out_r$data_form

#checking censoring rate
table(simulated_data_random$d) #fit AFT model survreg_fit_t <- survreg(Surv(out_r[[1]]$t,
out_r[[1]]$d) ~ out_r[[1]]$x,dist="weibull")
survreg_fit_w <- survreg(Surv(out_r[[1]]$t, out_r[[1]]$d) ~ out_r[[1]]$x,dist="lognormal") # compute residuals true_model <- resid_survreg(survreg_fit_t) wrong_model<- resid_survreg(survreg_fit_w) rsp.t <- true_model$RSP
nrsp.t<- true_model$nrsp rsp.w <- wrong_model$RSP
nrsp.w<- wrong_model$nrsp t<-seq(0, 50, length=n) gp1<- which(out_r[[1]]$x==1&out_r[[1]]$d==1) gp2<- which(out_r[[1]]$x==1&out_r[[1]]$d==0) gp3<- which(out_r[[1]]$x==0&out_r[[1]]$d==1) gp4<- which(out_r[[1]]$x==0&out_r[[1]]$d==0) s1.t<-1-(psurvreg(t, mean=sum(survreg_fit_t$coefficients),
scale=survreg_fit_t$scale, distribution="weibull")) s0.t<-1-(psurvreg(t, mean=survreg_fit_t$coefficients[[1]],
scale=survreg_fit_t$scale, distribution="weibull")) s1.w<-1-(psurvreg(t, mean=sum(survreg_fit_w$coefficients),
scale=survreg_fit_w$scale, distribution="lognormal")) s0.w<-1-(psurvreg(t, mean=survreg_fit_w$coefficients[[1]],
scale=survreg_fit_w$scale, distribution="lognormal")) par(mfrow = c(2,2),mar=c(4,4,2,2)) #The plot of true model plot(t, s1.t,type="l", ylab="S(t)",xlab="Time", main="RSPs, True Model",ylim=c(0,1.0),xlim=c(0,40)) lines(t, s0.t) ## choose a random sample of observations for display sp <- sample(1:n,400) s.gp1 <- gp1[which(gp1 %in% sp)] s.gp2 <- gp2[which(gp2 %in% sp)] s.gp3 <- gp3[which(gp3 %in% sp)] s.gp4 <- gp4[which(gp4 %in% sp)] points(out_r[[1]]$t[s.gp1], rsp.t[s.gp1],col="green",pch=0)
points(out_r[[1]]$t[s.gp2],rsp.t[s.gp2],col="red",pch=8) points(out_r[[1]]$t[s.gp3],rsp.t[s.gp3],pch=2,col="green")
points(out_r[[1]]$t[s.gp4],rsp.t[s.gp4],pch=1,col="red") legend ("topright", legend = c(expression('Uncensored,x'['i']*"=1"), expression('Censored,x'['i']*'=1'), expression('Uncensored,x'['i']*'=0'), expression('Censored,x'['i']*'=0')), pch=c(0,8,2,1), col = c(3,2,3,2)) hist(rsp.t,xlab="Randomized Survival Probability",main="Histogram of RSPs, True Model") #The plot of wrong model plot(t, s1.w,type="l",ylab="S(t)",xlab="Time",main="RSPs, Wrong Model", ylim=c(0,1.0),xlim=c(0,40)) lines(t, s0.w) sp <- sample(1:n,400) s.gp1 <- gp1[which(gp1 %in% sp)] s.gp2 <- gp2[which(gp2 %in% sp)] s.gp3 <- gp3[which(gp3 %in% sp)] s.gp4 <- gp4[which(gp4 %in% sp)] points(out_r[[1]]$t[s.gp1],rsp.w[s.gp1],col="green",pch=0)
points(out_r[[1]]$t[s.gp2],rsp.w[s.gp2],col="red",pch=8) points(out_r[[1]]$t[s.gp3],rsp.w[s.gp3],pch=2,col="green")
points(out_r[[1]]\$t[s.gp4],rsp.w[s.gp4],pch=1,col="red")

legend ("topright", legend = c(expression('Uncensored,x'['i']*"=1"),
expression('Censored,x'['i']*'=1'),
expression('Uncensored,x'['i']*'=0'),
expression('Censored,x'['i']*'=0')),
pch=c(0,8,2,1), col = c(3,2,3,2)
)
hist(rsp.w,xlab="Randomized Survival Probability",
main="Histogram of RSPs, Wrong Model")
}