从这里下载数据,有三个文件:
要求:
best_hospt()
的函数,能够输出不同的州按照30天死亡率最低的医院。 输入:两个参数,一个是州的简称,另一个是疾病(数据里有三种病:“heart attack”、 “heart failure” 和“pneumonia” ) 输出:30天死亡率最低的医院名称。outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
#head(outcome)
hist(as.numeric(outcome[,11]))
## Warning in hist(as.numeric(outcome[, 11])): 强制改变过程中产生了NA
#table(outcome[,12])
hospital <- read.csv("hospital-data.csv", colClasses="character")
merged <- merge(outcome, hospital, by="Provider.Number")
deaths <- as.numeric(merged[,11])
## Warning: 强制改变过程中产生了NA
patients <- as.numeric(merged[,15])
## Warning: 强制改变过程中产生了NA
owners <- factor(merged$Hospital.Ownership)
#str(outcome$State)
#Reference lattice for xyplot.
library(lattice)
#Plot variables
x_label <- "Number of Patients Seen"
y_label <- "30-day Death Rate"
main_title <- "Heart Attack 30-day Death Rate by Ownership"
#XYPlot for relationship between death rate and number of patients seen with linear regression line.
xyplot(deaths ~ patients | owners,
allow.multiple=TRUE,
xlab=x_label,
ylab=y_label,
main=main_title,
type=c("p", "r"))
best_hospt <- function(state, outcome) {
#Invalid outcome input type
if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
stop("invalid outcome")
}
#Get index for our given outcome string.
index <- ifelse(outcome == "heart attack", 11, ifelse(outcome == "heart failure", 17, 23))
#Read and coerce our dataset while suppressing warnings and removing NA's.
data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
data[,index] <- suppressWarnings(as.numeric(data[,index]))
data <- na.omit(data)
#Invalid state input or no observations
states <- table(data$State)
if (!state %in% names(states)) {
stop("invalid state")
}
#Slice our data by the given state and sort it by outcome and hospital name.
slice <- subset(data, State==state)
slice <- slice[order(slice[,index], na.last=TRUE),2]
slice <- na.omit(slice)
#Get hospital name with the lowest 30-day mortality rate.
slice[1]
}
best_hospt("TX", "heart attack")
## [1] "CYPRESS FAIRBANKS MEDICAL CENTER"
best_hospt("TX", "heart failure")
## [1] "FORT DUNCAN MEDICAL CENTER"
best_hospt("MD", "heart attack")
## [1] "JOHNS HOPKINS HOSPITAL, THE"
best_hospt("MD", "pneumonia")
## [1] "GREATER BALTIMORE MEDICAL CENTER"
当有参数输入错误时,会返回错误信息:
best_hospt("BB", "heart attack")
##invalid state
best_hospt("NY", "heat attack")
##invalid outcome
rankhospital <- function(state, outcome, num="best") {
#Invalid outcome input type
if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
stop("invalid outcome")
}
#Get index for our given outcome string.
index <- ifelse(outcome == "heart attack", 11, ifelse(outcome == "heart failure", 17, 23))
#Read and coerce our dataset while suppressing warnings and removing NA's.
data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
data[,index] <- suppressWarnings(as.numeric(data[,index]))
data <- na.omit(data)
#Invalid state input or no observations
states <- table(data$State)
if (!state %in% names(states)) {
stop("invalid state")
}
#Slice our data by the given state and sort it by outcome and hospital name.
slice <- subset(data, State==state)
slice <- slice[order(slice[,index], slice[,2], na.last=TRUE),2] #this is ordered by rate
slice <- na.omit(slice)
num <- ifelse(num == "best", 1, ifelse(num == "worst", length(slice), as.numeric(num)))
#Get hospital name for the given rank by its 30-day mortality rate.
slice[num]
}
rankhospital()
, 需要按照不同州对不同的疾病按照30天死亡率由低到高排名。 输入三个参数:第一个是州的简称,第二个是疾病名称,第三个是排名。输出的是医院的名称。比如:rankhospital("TX", "heart failure", 4)
## [1] "DETAR HOSPITAL NAVARRO"
rankhospital("MD", "heart attack", "worst")
## [1] "HARFORD MEMORIAL HOSPITAL"
rankhospital("MN", "heart attack", 5000)
## [1] NA
rankall <- function(outcome, num = "best") {
full_data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
column <- if (outcome == "heart attack") {
full_data[, 11] <- as.numeric(full_data[, 11])
"Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack"
} else if (outcome == "heart failure") {
full_data[, 17] <- as.numeric(full_data[, 17])
"Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure"
} else if (outcome == "pneumonia") {
full_data[, 23] <- as.numeric(full_data[, 23])
"Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia"
} else {
stop("invalid outcome")
}
data_by_state <- split(full_data[, c("Hospital.Name", "State", column)], full_data$State)
rank_hospital <- function(state_data, num) {
ordered_state_data <- order(state_data[3], state_data$Hospital.Name, na.last=NA)
if (num == "best") {
state_data$Hospital.Name[ordered_state_data[1]]
} else if (num == "worst") {
state_data$Hospital.Name[ordered_state_data[length(ordered_state_data)]]
} else if (is.numeric(num)) {
state_data$Hospital.Name[ordered_state_data[num]]
} else {
stop("invalid num")
}
}
pre_result <- lapply(data_by_state, rank_hospital, num)
data.frame(hospital = unlist(pre_result), state = names(pre_result), row.names = names(pre_result))
}
head(rankall("heart attack", 20), 10)
tail(rankall("pneumonia", "worst"), 10)
请以“R语言与多元统计5+姓名+学号”为标题 Email至:32025690@qq.com
返回课程主页。