需要用到的包:KMsurv survival
#install.packages("KMsurv","survival")
课件上的数据可以通过以下方式获得:
library(KMsurv)
data(drug6mp)
library(survival)
通过帮助查看一下数据文件,搞清楚数据有那些列,分别代表什么含义。
?drug6mp
核对数据:
table(drug6mp$t1) #安慰剂组
##
## 1 2 3 4 5 8 11 12 15 17 22 23
## 2 2 1 2 2 4 2 2 1 1 1 1
table(drug6mp$relapse,drug6mp$t2) #治疗组
##
## 6 7 9 10 11 13 16 17 19 20 22 23 25 32 34 35
## 0 1 0 1 1 1 0 0 1 1 1 0 0 1 2 1 1
## 1 3 1 0 1 0 1 1 0 0 0 1 1 0 0 0 0
KM估计:
分别对安慰剂组和治疗组的生存时间进行KM估计:
治疗组的Km估计
kmsurvival2 <- survfit(Surv(drug6mp$t2,drug6mp$relapse) ~ 1)
summary(kmsurvival2)
## Call: survfit(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 21 3 0.857 0.0764 0.720 1.000
## 7 17 1 0.807 0.0869 0.653 0.996
## 10 15 1 0.753 0.0963 0.586 0.968
## 13 12 1 0.690 0.1068 0.510 0.935
## 16 11 1 0.627 0.1141 0.439 0.896
## 22 7 1 0.538 0.1282 0.337 0.858
## 23 6 1 0.448 0.1346 0.249 0.807
plot(kmsurvival2,main="K-M esimate for treatment group", xlab="Time",ylab="Survival Probability",col=3)
ana<-rbind(cbind(drug6mp$t1,0,1),cbind(drug6mp$t2,1,drug6mp$relapse))
治疗组和安慰剂组的生存函数比较(log-rank test):
survdiff(Surv(ana[,1],ana[,3]==1) ~ ana[,2])
## Call:
## survdiff(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2])
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## ana[, 2]=0 21 21 10.7 9.77 16.8
## ana[, 2]=1 21 9 19.3 5.46 16.8
##
## Chisq= 16.8 on 1 degrees of freedom, p= 4.17e-05
不同参数指数分布的生存曲线:
es1<-function(x) pexp(x,rate=1,lower.tail=F)
es2<-function(x) pexp(x,rate=2,lower.tail=F)
es3<-function(x) pexp(x,rate=3,lower.tail=F)
curve(es1,from=0.01,to=4,ylab="survival function",type = "l",lty = 2,col = 3)
curve(es2,from=0.01,to=4,type="l",lty = 2,pch = 3, col = 4, add=T)
curve(es3,from=0.01,to=4,type = "l", lty = 2, pch = 4, col = 6, add=T)
legend(2.5,0.8,c("rate=1","rate=2","rate=3"),col = c(3, 4, 6), text.col = "green4", lty = c(2, 2, 2), pch = c(NA, 3, 4), merge = TRUE, bg = "gray90")
指数分布拟合:
安慰剂组的指数分布拟合:
e1<-survreg(Surv(drug6mp$t1) ~ 1,dist="exponential")
summary(e1)
##
## Call:
## survreg(formula = Surv(drug6mp$t1) ~ 1, dist = "exponential")
## Value Std. Error z p
## (Intercept) 2.16 0.218 9.9 4.33e-23
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -66.3 Loglik(intercept only)= -66.3
## Number of Newton-Raphson Iterations: 4
## n= 21
1/exp(e1$coefficients)#指数分布的参数估计
## (Intercept)
## 0.1153846
治疗组:
e2<-survreg(Surv(drug6mp$t2,drug6mp$relapse) ~ 1,dist="exponential")
summary(e2)
##
## Call:
## survreg(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1, dist = "exponential")
## Value Std. Error z p
## (Intercept) 3.69 0.333 11.1 2e-28
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -42.2 Loglik(intercept only)= -42.2
## Number of Newton-Raphson Iterations: 4
## n= 21
exp(e2$coef)
## (Intercept)
## 39.88889
1/exp(e2$coef)
## (Intercept)
## 0.02506964
威布尔分布拟合:
安慰剂组:
we1<-survreg(Surv(drug6mp$t1) ~ 1,dist="weib")
summary(we1)
##
## Call:
## survreg(formula = Surv(drug6mp$t1) ~ 1, dist = "weib")
## Value Std. Error z p
## (Intercept) 2.249 0.168 13.40 5.72e-41
## Log(scale) -0.315 0.174 -1.82 6.94e-02
##
## Scale= 0.73
##
## Weibull distribution
## Loglik(model)= -64.9 Loglik(intercept only)= -64.9
## Number of Newton-Raphson Iterations: 6
## n= 21
exp(we1$coefficients) #weibull's scale parameter
## (Intercept)
## 9.482141
1/we1$scale # weibull's shape prameter
## [1] 1.3705
治疗组:
we2<-survreg(Surv(drug6mp$t2,drug6mp$relapse) ~ 1,dist="weib")
summary(we2)
##
## Call:
## survreg(formula = Surv(drug6mp$t2, drug6mp$relapse) ~ 1, dist = "weib")
## Value Std. Error z p
## (Intercept) 3.519 0.273 12.87 6.28e-38
## Log(scale) -0.303 0.278 -1.09 2.77e-01
##
## Scale= 0.739
##
## Weibull distribution
## Loglik(model)= -41.7 Loglik(intercept only)= -41.7
## Number of Newton-Raphson Iterations: 5
## n= 21
exp(we2$coefficients)
## (Intercept)
## 33.76515
1/we2$scale
## [1] 1.353735
考虑带一个协变量( 分组0: 安慰剂组,1:治疗组 ):
指数分布的加速失效模型:
e4<-survreg(Surv(ana[,1],ana[,3]==1) ~ ana[,2],dist="exponential")
summary(e4)
##
## Call:
## survreg(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2], dist = "exponential")
## Value Std. Error z p
## (Intercept) 2.16 0.218 9.90 4.33e-23
## ana[, 2] 1.53 0.398 3.83 1.27e-04
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -108.5 Loglik(intercept only)= -116.8
## Chisq= 16.49 on 1 degrees of freedom, p= 4.9e-05
## Number of Newton-Raphson Iterations: 4
## n= 42
威布尔分布的加速失效模型:
w5<-survreg(Surv(ana[,1],ana[,3]==1) ~ ana[,2],dist="weibull")
summary(w5)
##
## Call:
## survreg(formula = Surv(ana[, 1], ana[, 3] == 1) ~ ana[, 2], dist = "weibull")
## Value Std. Error z p
## (Intercept) 2.248 0.166 13.55 8.30e-42
## ana[, 2] 1.267 0.311 4.08 4.51e-05
## Log(scale) -0.312 0.147 -2.12 3.43e-02
##
## Scale= 0.732
##
## Weibull distribution
## Loglik(model)= -106.6 Loglik(intercept only)= -116.4
## Chisq= 19.65 on 1 degrees of freedom, p= 9.3e-06
## Number of Newton-Raphson Iterations: 5
## n= 42