这里下载数据,有三个文件:

要求:

outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
#head(outcome)
hist(as.numeric(outcome[,11]))
## Warning in hist(as.numeric(outcome[, 11])): 强制改变过程中产生了NA

#table(outcome[,12])
hospital <- read.csv("hospital-data.csv", colClasses="character")

merged <- merge(outcome, hospital, by="Provider.Number")

deaths <- as.numeric(merged[,11])
## Warning: 强制改变过程中产生了NA
patients <- as.numeric(merged[,15])
## Warning: 强制改变过程中产生了NA
owners <- factor(merged$Hospital.Ownership)
#str(outcome$State)
#Reference lattice for xyplot.
library(lattice)

#Plot variables
x_label <- "Number of Patients Seen"
y_label <- "30-day Death Rate"
main_title <- "Heart Attack 30-day Death Rate by Ownership"

#XYPlot for relationship between death rate and number of patients seen with linear regression line.
xyplot(deaths ~ patients | owners, 
    allow.multiple=TRUE, 
    xlab=x_label, 
    ylab=y_label, 
    main=main_title,
    type=c("p", "r"))

best_hospt <- function(state, outcome) {
    #Invalid outcome input type
    if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
        stop("invalid outcome")
    }
    
    #Get index for our given outcome string.
    index <- ifelse(outcome == "heart attack", 11, ifelse(outcome == "heart failure", 17, 23))
    
    #Read and coerce our dataset while suppressing warnings and removing NA's.
    data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
    data[,index] <- suppressWarnings(as.numeric(data[,index]))
    data <- na.omit(data)
    
    #Invalid state input or no observations
    states <- table(data$State)
    if (!state %in% names(states)) { 
        stop("invalid state")
    }
    
    #Slice our data by the given state and sort it by outcome and hospital name.
    slice <- subset(data, State==state)
    slice <- slice[order(slice[,index], na.last=TRUE),2]
    slice <- na.omit(slice)
    
    #Get hospital name with the lowest 30-day mortality rate.
    slice[1]
}
best_hospt("TX", "heart attack")
## [1] "CYPRESS FAIRBANKS MEDICAL CENTER"
best_hospt("TX", "heart failure")
## [1] "FORT DUNCAN MEDICAL CENTER"
best_hospt("MD", "heart attack")
## [1] "JOHNS HOPKINS HOSPITAL, THE"
best_hospt("MD", "pneumonia")
## [1] "GREATER BALTIMORE MEDICAL CENTER"

当有参数输入错误时,会返回错误信息:

best_hospt("BB", "heart attack")
##invalid state
best_hospt("NY", "heat attack")
##invalid outcome
rankhospital <- function(state, outcome, num="best") {
    #Invalid outcome input type
    if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
        stop("invalid outcome")
    }
    
    #Get index for our given outcome string.
    index <- ifelse(outcome == "heart attack", 11, ifelse(outcome == "heart failure", 17, 23))
    
    #Read and coerce our dataset while suppressing warnings and removing NA's.
    data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
    data[,index] <- suppressWarnings(as.numeric(data[,index]))
    data <- na.omit(data)
    
    #Invalid state input or no observations
    states <- table(data$State)
    if (!state %in% names(states)) { 
        stop("invalid state")
    }
    
    #Slice our data by the given state and sort it by outcome and hospital name.
    slice <- subset(data, State==state)
    slice <- slice[order(slice[,index], slice[,2], na.last=TRUE),2] #this is ordered by rate
    slice <- na.omit(slice)
    
    num <- ifelse(num == "best", 1, ifelse(num == "worst", length(slice), as.numeric(num)))
    
    #Get hospital name for the given rank by its 30-day mortality rate.
    slice[num]
}
rankhospital("TX", "heart failure", 4)
## [1] "DETAR HOSPITAL NAVARRO"
rankhospital("MD", "heart attack", "worst")
## [1] "HARFORD MEMORIAL HOSPITAL"
rankhospital("MN", "heart attack", 5000)
## [1] NA
rankall <- function(outcome, num = "best") {
    full_data <- read.csv("outcome-of-care-measures.csv", colClasses="character")
    
    column <- if (outcome == "heart attack") {
        full_data[, 11] <- as.numeric(full_data[, 11])
        "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack"
    } else if (outcome == "heart failure") {
        full_data[, 17] <- as.numeric(full_data[, 17])
        "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure"
    } else if (outcome == "pneumonia") {
        full_data[, 23] <- as.numeric(full_data[, 23])
        "Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia"
    } else {
        stop("invalid outcome")
    }

    data_by_state <- split(full_data[, c("Hospital.Name", "State", column)], full_data$State)

    rank_hospital <- function(state_data, num) {
        ordered_state_data <- order(state_data[3], state_data$Hospital.Name, na.last=NA)
        
        if (num == "best") {
           state_data$Hospital.Name[ordered_state_data[1]]
        } else if (num == "worst") {
           state_data$Hospital.Name[ordered_state_data[length(ordered_state_data)]]
        } else if (is.numeric(num)) {
           state_data$Hospital.Name[ordered_state_data[num]]
        } else {
            stop("invalid num")
        }
    }

    pre_result <- lapply(data_by_state, rank_hospital, num)
    
    data.frame(hospital = unlist(pre_result), state = names(pre_result), row.names = names(pre_result))
}

head(rankall("heart attack", 20), 10)

tail(rankall("pneumonia", "worst"), 10)

请以“R语言与多元统计5+姓名+学号”为标题 Email至:32025690@qq.com

返回课程主页