需要用到的包:KMsurv survival
#install.packages("KMsurv","survival")

课件上的数据可以通过以下方式获得:

library(KMsurv)
data(drug6mp)
library(survival)

通过帮助查看一下数据文件,搞清楚数据有那些列,分别代表什么含义。

?drug6mp

核对数据:

table(drug6mp$t1) #安慰剂组
## 
##  1  2  3  4  5  8 11 12 15 17 22 23 
##  2  2  1  2  2  4  2  2  1  1  1  1
table(drug6mp$relapse,drug6mp$t2) #治疗组
##    
##     6 7 9 10 11 13 16 17 19 20 22 23 25 32 34 35
##   0 1 0 1  1  1  0  0  1  1  1  0  0  1  2  1  1
##   1 3 1 0  1  0  1  1  0  0  0  1  1  0  0  0  0

1、非参数方法

KM估计

分别对安慰剂组和治疗组的生存时间进行KM估计:

治疗组的Km估计

kmsurvival2 <- survfit(Surv(drug6mp$t2,drug6mp$relapse) ~ 1)
summary(kmsurvival2)
## Call: survfit(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     6     21       3    0.857  0.0764        0.720        1.000
##     7     17       1    0.807  0.0869        0.653        0.996
##    10     15       1    0.753  0.0963        0.586        0.968
##    13     12       1    0.690  0.1068        0.510        0.935
##    16     11       1    0.627  0.1141        0.439        0.896
##    22      7       1    0.538  0.1282        0.337        0.858
##    23      6       1    0.448  0.1346        0.249        0.807
plot(kmsurvival2,main="K-M esimate for treatment group", xlab="Time",ylab="Survival Probability",col=3)

练习1、对数据drug6mp的安慰剂组进行并KM估计:

练习2、 分别画出安慰剂和治疗组的生存曲线,并加以解释

ana<-rbind(cbind(drug6mp$t1,0,1),cbind(drug6mp$t2,1,drug6mp$relapse))

治疗组和安慰剂组的生存函数比较(log-rank test):

survdiff(Surv(ana[,1],ana[,3]==1) ~ ana[,2])
## Call:
## survdiff(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2])
## 
##             N Observed Expected (O-E)^2/E (O-E)^2/V
## ana[, 2]=0 21       21     10.7      9.77      16.8
## ana[, 2]=1 21        9     19.3      5.46      16.8
## 
##  Chisq= 16.8  on 1 degrees of freedom, p= 4.17e-05

2、参数方法

不同参数指数分布的生存曲线:

es1<-function(x) pexp(x,rate=1,lower.tail=F)
es2<-function(x) pexp(x,rate=2,lower.tail=F)
es3<-function(x) pexp(x,rate=3,lower.tail=F)
curve(es1,from=0.01,to=4,ylab="survival function",type = "l",lty = 2,col = 3)
curve(es2,from=0.01,to=4,type="l",lty = 2,pch = 3, col = 4, add=T)
curve(es3,from=0.01,to=4,type = "l", lty = 2, pch = 4, col = 6, add=T)
legend(2.5,0.8,c("rate=1","rate=2","rate=3"),col = c(3, 4, 6),  text.col = "green4", lty = c(2, 2, 2), pch = c(NA, 3, 4), merge = TRUE, bg = "gray90")

1)没有协变量情形

指数分布拟合:

安慰剂组的指数分布拟合:

e1<-survreg(Surv(drug6mp$t1) ~ 1,dist="exponential")
summary(e1)
## 
## Call:
## survreg(formula = Surv(drug6mp$t1) ~ 1, dist = "exponential")
##             Value Std. Error   z        p
## (Intercept)  2.16      0.218 9.9 4.33e-23
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -66.3   Loglik(intercept only)= -66.3
## Number of Newton-Raphson Iterations: 4 
## n= 21
1/exp(e1$coefficients)#指数分布的参数估计
## (Intercept) 
##   0.1153846

治疗组:

e2<-survreg(Surv(drug6mp$t2,drug6mp$relapse) ~ 1,dist="exponential")
summary(e2)
## 
## Call:
## survreg(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1, dist = "exponential")
##             Value Std. Error    z     p
## (Intercept)  3.69      0.333 11.1 2e-28
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -42.2   Loglik(intercept only)= -42.2
## Number of Newton-Raphson Iterations: 4 
## n= 21
exp(e2$coef)
## (Intercept) 
##    39.88889
1/exp(e2$coef)
## (Intercept) 
##  0.02506964

练习3、考虑一个模拟研究:用参数为2的指数分布生成10000个生存数据,然后用用指数分布拟合,估计分布参数。

威布尔分布拟合:

安慰剂组:

we1<-survreg(Surv(drug6mp$t1) ~ 1,dist="weib")
summary(we1)
## 
## Call:
## survreg(formula = Surv(drug6mp$t1) ~ 1, dist = "weib")
##              Value Std. Error     z        p
## (Intercept)  2.249      0.168 13.40 5.72e-41
## Log(scale)  -0.315      0.174 -1.82 6.94e-02
## 
## Scale= 0.73 
## 
## Weibull distribution
## Loglik(model)= -64.9   Loglik(intercept only)= -64.9
## Number of Newton-Raphson Iterations: 6 
## n= 21
exp(we1$coefficients) #weibull's scale parameter
## (Intercept) 
##    9.482141
1/we1$scale # weibull's shape prameter
## [1] 1.3705

治疗组:

we2<-survreg(Surv(drug6mp$t2,drug6mp$relapse) ~ 1,dist="weib")
summary(we2)
## 
## Call:
## survreg(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1, dist = "weib")
##              Value Std. Error     z        p
## (Intercept)  3.519      0.273 12.87 6.28e-38
## Log(scale)  -0.303      0.278 -1.09 2.77e-01
## 
## Scale= 0.739 
## 
## Weibull distribution
## Loglik(model)= -41.7   Loglik(intercept only)= -41.7
## Number of Newton-Raphson Iterations: 5 
## n= 21
exp(we2$coefficients)
## (Intercept) 
##    33.76515
1/we2$scale
## [1] 1.353735

练习4、用参数shape=2,scale=5的威布尔分布生成100000个生存时间,用威布尔拟合,并估计参数 。

2)加速失效模型(带协变量的参数模型)

考虑带一个协变量( 分组0: 安慰剂组,1:治疗组 ):

指数分布的加速失效模型:

e4<-survreg(Surv(ana[,1],ana[,3]==1) ~ ana[,2],dist="exponential")
summary(e4)
## 
## Call:
## survreg(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2], dist = "exponential")
##             Value Std. Error    z        p
## (Intercept)  2.16      0.218 9.90 4.33e-23
## ana[, 2]     1.53      0.398 3.83 1.27e-04
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -108.5   Loglik(intercept only)= -116.8
##  Chisq= 16.49 on 1 degrees of freedom, p= 4.9e-05 
## Number of Newton-Raphson Iterations: 4 
## n= 42

威布尔分布的加速失效模型:

w5<-survreg(Surv(ana[,1],ana[,3]==1) ~ ana[,2],dist="weibull")
summary(w5)
## 
## Call:
## survreg(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2], dist = "weibull")
##              Value Std. Error     z        p
## (Intercept)  2.248      0.166 13.55 8.30e-42
## ana[, 2]     1.267      0.311  4.08 4.51e-05
## Log(scale)  -0.312      0.147 -2.12 3.43e-02
## 
## Scale= 0.732 
## 
## Weibull distribution
## Loglik(model)= -106.6   Loglik(intercept only)= -116.4
##  Chisq= 19.65 on 1 degrees of freedom, p= 9.3e-06 
## Number of Newton-Raphson Iterations: 5 
## n= 42