需要安装的包:
install.packages("ISwR")
来一个R的程序吧:
plot(rnorm(500))
计算器
2 + 2
## [1] 4
exp(-2)
## [1] 0.1353
rnorm(15)
## [1] -0.229650 0.401697 -1.370342 -0.004025 -0.118978 -0.696247 -0.726071
## [8] -2.412498 1.631338 1.495504 -1.263071 1.404315 -0.955993 -1.178038
## [15] 1.021226
赋值语句 Assignments <-
x <- 2
x
## [1] 2
x + x
## [1] 4
向量
计算bmi:
weight <- c(60, 72, 57, 90, 95, 72)
weight
## [1] 60 72 57 90 95 72
height <- c(1.75, 1.80, 1.65, 1.90, 1.74, 1.91)
bmi <- weight/height^2
bmi
## [1] 19.59 22.22 20.94 24.93 31.38 19.74
计算身高体重的均值和标准差:
sum(weight)
## [1] 446
sum(weight)/length(weight)
## [1] 74.33
xbar <- sum(weight)/length(weight)
weight - xbar
## [1] -14.333 -2.333 -17.333 15.667 20.667 -2.333
(weight - xbar)^2
## [1] 205.444 5.444 300.444 245.444 427.111 5.444
sum((weight - xbar)^2)
## [1] 1189
sqrt(sum((weight - xbar)^2)/(length(weight) - 1))
## [1] 15.42
mean(weight)
## [1] 74.33
sd(weight)
## [1] 15.42
t 检验
t.test(bmi, mu=22.5)
##
## One Sample t-test
##
## data: bmi
## t = 0.34, df = 5, p-value = 0.7
## alternative hypothesis: true mean is not equal to 22.5
## 95 percent confidence interval:
## 18.42 27.85
## sample estimates:
## mean of x
## 23.13
画图
plot(height,weight)
plot(height, weight, pch=2)
getwd()
## [1] "E:/mywork/study_R/R_sim_2016/R_base_2"
setwd("E:/mywork/study_R/R_sim_2016") #设定你的工作目录
setwd("E:/mywork/study_R/R_sim_2016/R_base_2") #设定你的工作目录
getwd()
## [1] "E:/mywork/study_R/R_sim_2016/R_base_2"
workplace
ls()
## [1] "bmi" "height" "weight" "x" "xbar"
rm(height, weight)
ls()
## [1] "bmi" "x" "xbar"
save.image()
attach
和 detach
命令attach(ISwR::thuesen)##请提前安装包“ISwR”
blood.glucose
## [1] 15.3 10.8 8.1 19.5 7.2 5.3 9.3 11.1 7.5 12.2 6.7 5.2 19.0 15.1
## [15] 6.7 8.6 4.2 10.3 12.5 16.1 13.3 4.9 8.8 9.5
search()
## [1] ".GlobalEnv" "ISwR::thuesen" "package:stats"
## [4] "package:graphics" "package:grDevices" "package:utils"
## [7] "package:datasets" "package:methods" "Autoloads"
## [10] "package:base"
with(ISwR::thuesen, plot(blood.glucose, short.velocity))
detach()
search()
## [1] ".GlobalEnv" "package:stats" "package:graphics"
## [4] "package:grDevices" "package:utils" "package:datasets"
## [7] "package:methods" "Autoloads" "package:base"
subset
、 transform
、 和 within
的用法library("ISwR")
thuesen
## blood.glucose short.velocity
## 1 15.3 1.76
## 2 10.8 1.34
## 3 8.1 1.27
## 4 19.5 1.47
## 5 7.2 1.27
## 6 5.3 1.49
## 7 9.3 1.31
## 8 11.1 1.09
## 9 7.5 1.18
## 10 12.2 1.22
## 11 6.7 1.25
## 12 5.2 1.19
## 13 19.0 1.95
## 14 15.1 1.28
## 15 6.7 1.52
## 16 8.6 NA
## 17 4.2 1.12
## 18 10.3 1.37
## 19 12.5 1.19
## 20 16.1 1.05
## 21 13.3 1.32
## 22 4.9 1.03
## 23 8.8 1.12
## 24 9.5 1.70
#knitr::kable(thuesen)
thue2 <- subset(thuesen,blood.glucose<7)
thue2
## blood.glucose short.velocity
## 6 5.3 1.49
## 11 6.7 1.25
## 12 5.2 1.19
## 15 6.7 1.52
## 17 4.2 1.12
## 22 4.9 1.03
thue3 <- transform(thuesen,log.gluc=log(blood.glucose))
thue3
## blood.glucose short.velocity log.gluc
## 1 15.3 1.76 2.728
## 2 10.8 1.34 2.380
## 3 8.1 1.27 2.092
## 4 19.5 1.47 2.970
## 5 7.2 1.27 1.974
## 6 5.3 1.49 1.668
## 7 9.3 1.31 2.230
## 8 11.1 1.09 2.407
## 9 7.5 1.18 2.015
## 10 12.2 1.22 2.501
## 11 6.7 1.25 1.902
## 12 5.2 1.19 1.649
## 13 19.0 1.95 2.944
## 14 15.1 1.28 2.715
## 15 6.7 1.52 1.902
## 16 8.6 NA 2.152
## 17 4.2 1.12 1.435
## 18 10.3 1.37 2.332
## 19 12.5 1.19 2.526
## 20 16.1 1.05 2.779
## 21 13.3 1.32 2.588
## 22 4.9 1.03 1.589
## 23 8.8 1.12 2.175
## 24 9.5 1.70 2.251
thue4 <- within(thuesen,{
log.gluc <- log(blood.glucose)
m <- mean(log.gluc)
centered.log.gluc <- log.gluc - m
rm(m)
})
thue4
## blood.glucose short.velocity centered.log.gluc log.gluc
## 1 15.3 1.76 0.481880 2.728
## 2 10.8 1.34 0.133573 2.380
## 3 8.1 1.27 -0.154109 2.092
## 4 19.5 1.47 0.724441 2.970
## 5 7.2 1.27 -0.271892 1.974
## 6 5.3 1.49 -0.578266 1.668
## 7 9.3 1.31 -0.015959 2.230
## 8 11.1 1.09 0.160972 2.407
## 9 7.5 1.18 -0.231070 2.015
## 10 12.2 1.22 0.255463 2.501
## 11 6.7 1.25 -0.343865 1.902
## 12 5.2 1.19 -0.597314 1.649
## 13 19.0 1.95 0.698466 2.944
## 14 15.1 1.28 0.468722 2.715
## 15 6.7 1.52 -0.343865 1.902
## 16 8.6 NA -0.094211 2.152
## 17 4.2 1.12 -0.810888 1.435
## 18 10.3 1.37 0.086171 2.332
## 19 12.5 1.19 0.279756 2.526
## 20 16.1 1.05 0.532846 2.779
## 21 13.3 1.32 0.341791 2.588
## 22 4.9 1.03 -0.656738 1.589
## 23 8.8 1.12 -0.071221 2.175
## 24 9.5 1.70 0.005319 2.251
#help(mean)
#?"mean"
library(survival)
##
## Attaching package: 'survival'
## The following object is masked from 'package:ISwR':
##
## lung
detach("package:survival")
set.seed(1234) #make it happen....
x <- rnorm(100)
hist(x,freq=F)
curve(dnorm(x),add=T)
h <- hist(x, plot=F)
ylim <- range(0, h$density, dnorm(0))
hist(x, freq=F, ylim=ylim)
curve(dnorm(x), add=T)
a <- c(1, 2, 5, 3, 6, -2, 4)
b <- c("one", "two", "three")
c <- c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE)
a <- c(1, 2, 5, 3, 6, -2, 4)
a[3]
## [1] 5
a[c(1, 3, 5)]
## [1] 1 5 6
a[2:6]
## [1] 2 5 3 6 -2
建立字符型的向量:
a <- c("k", "j", "h", "a", "c", "m")
a[3]
## [1] "h"
a[c(1, 3, 5)]
## [1] "k" "h" "c"
a[2:6]
## [1] "j" "h" "a" "c" "m"
y <- matrix(1:20, nrow=5, ncol=4)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
cells <- c(1,26,24,68)
rnames <- c("R1", "R2")
cnames <- c("C1", "C2")
mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=TRUE,
dimnames=list(rnames, cnames))
mymatrix
## C1 C2
## R1 1 26
## R2 24 68
mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=FALSE,
dimnames=list(rnames, cnames))
mymatrix
## C1 C2
## R1 1 24
## R2 26 68
x <- matrix(1:10, nrow=2)
x
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
x[2,]
## [1] 2 4 6 8 10
x[,2]
## [1] 3 4
x[1,4]
## [1] 7
x[1, c(4,5)]
## [1] 7 9
# - Creating an array
dim1 <- c("A1", "A2")
dim2 <- c("B1", "B2", "B3")
dim3 <- c("C1", "C2", "C3", "C4")
z <- array(1:24, c(2,3,4), dimnames=list(dim1, dim2, dim3))
z
## , , C1
##
## B1 B2 B3
## A1 1 3 5
## A2 2 4 6
##
## , , C2
##
## B1 B2 B3
## A1 7 9 11
## A2 8 10 12
##
## , , C3
##
## B1 B2 B3
## A1 13 15 17
## A2 14 16 18
##
## , , C4
##
## B1 B2 B3
## A1 19 21 23
## A2 20 22 24
数据框可通过函数data.frame()
创建:
# - Creating a dataframe
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata
## patientID age diabetes status
## 1 1 25 Type1 Poor
## 2 2 34 Type2 Improved
## 3 3 28 Type1 Excellent
## 4 4 52 Type1 Poor
# - Specifying elements of a dataframe
patientdata[1:2]
## patientID age
## 1 1 25
## 2 2 34
## 3 3 28
## 4 4 52
patientdata[c("diabetes","status")]
## diabetes status
## 1 Type1 Poor
## 2 Type2 Improved
## 3 Type1 Excellent
## 4 Type1 Poor
patientdata$age
## [1] 25 34 28 52
类别(名义型)变量和有序类别(有序型)变量在R中称为因子(factor)。
# - Using factors
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
diabetes <- factor(diabetes)
要表示有序型变量,需要为函数factor()
指定参数ordered=TRUE
:
status <- factor(status, order=TRUE)
patientdata <- data.frame(patientID, age, diabetes, status)
str(patientdata)
## 'data.frame': 4 obs. of 4 variables:
## $ patientID: num 1 2 3 4
## $ age : num 25 34 28 52
## $ diabetes : Factor w/ 2 levels "Type1","Type2": 1 2 1 1
## $ status : Ord.factor w/ 3 levels "Excellent"<"Improved"<..: 3 2 1 3
summary(patientdata)
## patientID age diabetes status
## Min. :1.00 Min. :25.0 Type1:3 Excellent:1
## 1st Qu.:1.75 1st Qu.:27.2 Type2:1 Improved :1
## Median :2.50 Median :31.0 Poor :2
## Mean :2.50 Mean :34.8
## 3rd Qu.:3.25 3rd Qu.:38.5
## Max. :4.00 Max. :52.0
# - Creating a list
g <- "My First List"
h <- c(25, 26, 18, 39)
j <- matrix(1:10, nrow=5)
k <- c("one", "two", "three")
mylist <- list(title=g, ages=h, j, k)
mylist
## $title
## [1] "My First List"
##
## $ages
## [1] 25 26 18 39
##
## [[3]]
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
##
## [[4]]
## [1] "one" "two" "three"
# Entering data interactively from the keyboard
mydata <- data.frame(age=numeric(0),
gender=character(0), weight=numeric(0))
mydata <- edit(mydata)
mydata
# Entering data inline
mydatatxt <- "
age gender weight
25 m 166
30 f 115
18 f 120
"
mydata <- read.table(header=TRUE, text=mydatatxt)
使用read.table()
从带分隔符的文本文件中导入数据
先建立如下的文件studentgrades.csv
StudentID,First,Last,Math,Science,Social Studies
011,Bob,Smith,90,80,67
012,Jane,Weary,75,,80
010,Dan,"Thornton, III",65,75,70
040,Mary,"O'Leary",90,95,92
注意:建立这个文件时,需要在最后一行回车保存!
这个文件可以从这里下载!
#grades <- read.table("http://statstudy.github.io/data/studentgrades.csv", header=TRUE,row.names="StudentID", sep=",")
grades <- read.table("studentgrades.csv",header=TRUE,row.names="StudentID", sep=",")
grades # print data frame
## First Last Math Science Social.Studies
## 11 Bob Smith 90 80 67
## 12 Jane Weary 75 NA 80
## 10 Dan Thornton, III 65 75 70
## 40 Mary O'Leary 90 95 92
str(grades) # view data frame structure
## 'data.frame': 4 obs. of 5 variables:
## $ First : Factor w/ 4 levels "Bob","Dan","Jane",..: 1 3 2 4
## $ Last : Factor w/ 4 levels "O'Leary","Smith",..: 2 4 3 1
## $ Math : int 90 75 65 90
## $ Science : int 80 NA 75 95
## $ Social.Studies: int 67 80 70 92
仔细观察读入数据文件的问题!
下面来纠正::
# Alternatively, import the data while specifying column classes
grades <- read.table("studentgrades.csv", header=TRUE,row.names="StudentID", sep=",",colClasses=c("character", "character", "character","numeric", "numeric", "numeric"))
grades # print data frame
## First Last Math Science Social.Studies
## 011 Bob Smith 90 80 67
## 012 Jane Weary 75 NA 80
## 010 Dan Thornton, III 65 75 70
## 040 Mary O'Leary 90 95 92
str(grades) # view data frame structure
## 'data.frame': 4 obs. of 5 variables:
## $ First : chr "Bob" "Jane" "Dan" "Mary"
## $ Last : chr "Smith" "Weary" "Thornton, III" "O'Leary"
## $ Math : num 90 75 65 90
## $ Science : num 80 NA 75 95
## $ Social.Studies: num 67 80 70 92
library(xlsx)
workbook <- "c:/myworkbook.xlsx"
mydataframe <- read.xlsx(workbook, 1)
参阅这里.
使用RCurl
包和XML
包.
参看foreign
包和Hmisc
包.