本手册后来的都能正常操作完成,相关的更多方面是统计原理,现在把整个手册相关命令或表达式整理出来,同时把R的提示略去,以便直接复制进行操作,下一步就是边操作边理解,不明白手册中为什么要那样分析,再回到手册去找找解释,这样把R的操作与统计分析两方面结合起来学习:
#########################################################
###############################################
#
# A Handbook of Statistical Analyses Using R
# Brian S. Everitt and Torsten Hothorn
#
###############################################
###############################################
#
# Preface
#
###############################################
search()
[1] ".GlobalEnv" "package:methods" "package:stats"
[4] "package:graphics" "package:grDevices" "package:utils"
[7] "package:datasets" "Autoloads" "package:base"
library("HSAUR")
载入需要的程辑包:lattice
载入需要的程辑包:MASS
载入需要的程辑包:scatterplot3d
search()
[1] ".GlobalEnv" "package:HSAUR" "package:scatterplot3d"
[4] "package:MASS" "package:lattice" "package:methods"
[7] "package:stats" "package:graphics" "package:grDevices"
[10] "package:utils" "package:datasets" "Autoloads"
[13] "package:base"
# 只要你提前安装了所需要的包,系统会自动把需要的包载入,本人把所有的CRAN包都安装了,
# 生物信息学方面的除外。
# 载入包时,要注意,R对大小写敏感,同一字母的大小写是不同的字符,在双引号内,
# 空格也是一个字符,如果你在双引号内有空,而包没有,那将返回不存在某某程序包。
vignette("Ch_introduction_to_R", package = "HSAUR")
# 这将显示该安装内的帮助文件Ch_introduction_to_R,(本例中是一个PDF格式)
edit(vignette("Ch_introduction_to_R", package = "HSAUR")) # 查看该包的短文信息
# 如果想全面看看该包中的介绍,输入下全命令:
vignette(package = "HSAUR")
x <- sqrt(25) + 2
x
print(x)
#################################################
#
# CHAPTER 1
# An Introduction to R
# 1.3 Help and Documentation
#
################################################
help("mean") # 或者用?mean
library("sandwich")
help(package = "sandwich") # 程序包的介绍
vignette("sandwich") # 打开包的使用说明
#################################################
#
# CHAPTER 1
# An Introduction to R
# 1.4 Data Objects in R
#
#################################################
data("Forbes2000", package = "HSAUR") #该包有与系统不兼容的字符,先进行替换
Forbes2000[32,2]<-"FMISN1"
Forbes2000[54,2]<-"FMISN2"
Forbes2000[883,2]<-"FMISN3"
edit(Forbes2000)
str(Forbes2000)
help("Forbes2000")
class(Forbes2000)
dim(Forbes2000)
nrow(Forbes2000)
ncol(Forbes2000)
names(Forbes2000)
class(Forbes2000[, "rank"])
length(Forbes2000[, "rank"])
class(Forbes2000[, "name"])
length(Forbes2000[, "name"])
Forbes2000[, "name"][1]
class(Forbes2000[, "category"])
nlevels(Forbes2000[, "category"])
levels(Forbes2000[, "category"])
table(Forbes2000[, "category"])
class(Forbes2000[, "sales"])
median(Forbes2000[, "sales"])
mean(Forbes2000[, "sales"])
range(Forbes2000[, "sales"])
summary(Forbes2000[, "sales"])
###################################################
#
# CHAPTER 1
# An Introduction to R
# 1.5 Data Import and Export
#
###################################################
csvForbes2000 <- read.table("Forbes2000.csv", header = TRUE,sep = ",", row.names = 1)
# 注意row.names = 1
class(csvForbes2000[, "name"])
csvForbes2000 <- read.table("Forbes2000.csv", header = TRUE,sep = ",",
row.names = 1, colClasses = c("character","integer", "character",
"factor", "factor","numeric", "numeric", "numeric", "numeric"))
class(csvForbes2000[, "name"])
all.equal(csvForbes2000, Forbes2000)
classes <- c("character", "integer", "character","factor",
"factor", "numeric", "numeric", "numeric","numeric")
length(classes)
class(classes)
write.table(Forbes2000, file = "Forbes2000.csv",sep = ",", col.names = NA)
write.table(Forbes2000, file = "Forbes.csv",sep = ",", col.names = NA)
save(Forbes2000, file = "Forbes2000.rda")
list.files(pattern = ".rda")
load("Forbes2000.rda")
###################################################
#
# CHAPTER 1
# An Introduction to R
# 1.6 Basic Data Manipulation
#
####################################################
companies <- Forbes2000[, "name"]
companies[1]
1:3
[1] 1 2 3
companies[1:3]
companies[-(4:2000)]
length(companies)
Forbes2000[1:3, c("name", "sales", "profits", "assets")]
companies <- Forbes2000$name
companies <- Forbes2000[, "name"]
order_sales <- order(Forbes2000$sales)
companies[order_sales[1:3]]
Forbes2000[order_sales[c(2000, 1999, 1998)], c("name","sales", "profits", "assets")]
Forbes2000[Forbes2000$assets > 1000, c("name", "sales","profits", "assets")]
table(Forbes2000$assets > 1000)
na_profits <- is.na(Forbes2000$profits)
table(na_profits)
Forbes2000[na_profits, c("name", "sales", "profits","assets")]
table(complete.cases(Forbes2000))
UKcomp <- subset(Forbes2000, country == "United Kingdom")
dim(UKcomp)
CHcomp<-subset(Forbes2000, country == "China")
dim(CHcomp)
CHcomp
######################################################
#
# CHAPTER 1
# An Introduction to R
# 1.7 Simple Summary Statistics
#
######################################################
summary(Forbes2000)
lapply(Forbes2000, summary)
mprofits <- tapply(Forbes2000$profits, Forbes2000$category,median, na.rm = TRUE)
median(Forbes2000$profits)
median(Forbes2000$profits,na.rm=TRUE)
rev(sort(mprofits))[1:3]
######################################################
#
# 1.7 Simple Summary Statistics
# 1.7.1 Simple Graphics
#
######################################################
fm <- marketvalue ~ sales
class(fm)
layout(matrix(1:2, nrow = 2))
hist(Forbes2000$marketvalue)
hist(log(Forbes2000$marketvalue))
plot(log(marketvalue) ~ log(sales), data = Forbes2000,pch = ".")
plot(marketvalue~sales, data = Forbes2000,pch = ".")
#########################################################
#
# CHAPTER 1
# An Introduction to R
# 1.8 Organising an Analysis
#
#########################################################
#########################################################
#
# CHAPTER 1
# An Introduction to R
# 1.9 Summary
#
#########################################################
boxplot(log(marketvalue) ~ country,
data = subset(Forbes2000,country %in% c("United Kingdom",
"Germany","India", "Turkey")), ylab = "log(marketvalue)",varwidth = TRUE)
#########################################################
#
# CHAPTER 1
# An Introduction to R
# Exercises
#
#########################################################
########## Ex. 1.1:##########
tapply(Forbes2000$profits, Forbes2000$country,median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "United States",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "United Kingdom",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "France",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "Germany",median, na.rm = TRUE)
######### Ex. 1.2: #########
Gercomp<-subset(Forbes2000, country == "Germany")
table(complete.cases(Gercomp))
Gercomp[Gercomp$profits < 0, c("name","country", "sales","profits", "assets")]
######################################################
Forbes2000[Forbes2000$profits < 0 & Forbes2000$country == "Germany", c("name","country", "sales","profits", "assets")]
######### Ex. 1.3: #########
levels(Forbes2000[, "category"])
myidea<-Forbes2000[Forbes2000$category ==
c("Banking" ,"Business services & supplies" ,"Drugs & biotechnology",
"Food drink & tobacco" ,"Hotels restaurants & leisure" , "Insurance",
"Telecommunications services" ,"Transportation"), c("name","country",
"category", "sales","profits", "assets")]
myidea[myidea$profits>1,c("name","country", "category", "sales","profits", "assets")]
######### Ex. 1.4: #########
ForbesTopp <- subset(Forbes2000, profits != "NA")
length(ForbesTopp$profits)
order_profits <- order(ForbesTopp$profits)
Forbes50<-ForbesTopp[order_profits[c(1995:1945)],
c("name", "country", "sales", "profits", "assets")]
Forbes50
layout(matrix(1:2, nrow = 2))
hist(Forbes50$assets)
hist(Forbes50$sales)
hist(log(Forbes50$assets))
hist(log(Forbes50$sales))
plot(log(sales) ~ log(assets), data = Forbes50,pch = ".")
plot(log(assets) ~ log(sales), data = Forbes50,pch = ".")
######### Ex. 1.5: #########
companies_mean<-tapply(Forbes2000$sales, Forbes2000$country,mean, na.rm = TRUE)
summary(Forbes2000[Forbes2000$profits > 5, c("name","country", "profits")])
rm(list = ls())
######################################################
#
# CHAPTER 2
# Simple Inference:
# Guessing Lengths,Wave Energy, Water Hardness, Piston Rings, and Rearrests of Juveniles
#
######################################################
######################################################
#
# CHAPTER 2
# 2.3.1 Estimating the Width of a Room
#
######################################################
rm(list = ls())
library("HSAUR")
data("roomwidth", package = "HSAUR")
edit(roomwidth)
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
convert
tapply(roomwidth$width * convert, roomwidth$unit, summary)
########################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
myroom<-data.frame(unit, width)
summary(myroom)
tapply(myroom$width, myroom$unit, summary)
######################################################
layout(matrix(c(1, 2, 1, 3), nrow = 2, ncol = 2,byrow = FALSE))
matrix(c(1, 2, 1, 3), nrow = 2, ncol = 2,byrow = FALSE)
boxplot(I(width * convert) ~ unit, data = roomwidth,
ylab = "Estimated width (feet)", varwidth = TRUE,
names = c("Estimates in feet", "Estimates in metres (converted to feet)"))
feet <- roomwidth$unit == "feet"
qqnorm(roomwidth$width[feet], ylab = "Estimated width (feet)")
qqline(roomwidth$width[feet])
qqnorm(roomwidth$width[!feet], ylab = "Estimated width (metres)")
qqline(roomwidth$width[!feet])
#####################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
t.test(width~unit, alternative='two.sided',
conf.level=.95, var.equal=FALSE, data=roomwidth)
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
myroom<-data.frame(unit, width)
t.test(width~unit, alternative='two.sided', conf.level=.95, var.equal=FALSE, data=myroom)
t.test(width~unit, alternative='two.sided', conf.level=.95, var.equal=TRUE, data=myroom)
###################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
t.test(I(width * convert) ~ unit, data = roomwidth, var.equal = TRUE)
t.test(I(width * convert) ~ unit, data = roomwidth, var.equal = FALSE)
wilcox.test(I(width * convert) ~ unit, data = roomwidth, conf.int = TRUE)
######################################################
#
# CHAPTER 2
# 2.3.2 Wave Energy Device Mooring
#
######################################################
data("waves", package = "HSAUR")
edit(waves)
t.test(waves$method1, waves$method2,
alternative='two.sided', conf.level=.95, paired=TRUE)
#####################################################
boxplot(waves$method1, ylab="method1")
mooringdiff <- waves$method1 - waves$method2
layout(matrix(1:2, ncol = 2))
boxplot(mooringdiff, ylab = "Differences (Newton metres)",main = "Boxplot")
abline(h = 0, lty = 2)
qqnorm(mooringdiff, ylab = "Differences (Newton metres)")
qqline(mooringdiff)
######################################################
#
# CHAPTER 2
# 2.3.3 Mortality and Water Hardness
#
######################################################
library("HSAUR")
data("roomwidth", "waves", "water", "pistonrings", package = "HSAUR")
######################################################
Hist(water$hardness, scale="frequency", breaks="Sturges", col="darkgray")
scatterplot(mortality~hardness, reg.line=lm, smooth=TRUE,
labels=FALSE, boxplots='xy', span =0.5, data=water)
boxplot(water$mortality, ylab="mortality")
#####################################################
nf <- layout(matrix(c(2, 0, 1, 3), 2, 2, byrow = TRUE),c(2, 1), c(1, 2), TRUE)
psymb <- as.numeric(water$location)
plot(mortality ~ hardness, data = water, pch = psymb)
abline(lm(mortality ~ hardness, data = water))
legend("topright", legend = levels(water$location),pch = c(1, 2), bty = "n")
hist(water$hardness)
boxplot(water$mortality)
######################################################
#
# CHAPTER 2
# 2.3.4 Piston-ring Failures
#
######################################################
data("pistonrings", package = "HSAUR")
edit(pistonrings)
chisq.test(pistonrings)$residuals
library(vcd)
assoc(pistonrings)
######################################################
#
# CHAPTER 2
# 2.3.5 Rearrests of Juveniles
#
######################################################
data("rearrests", package = "HSAUR")
edit(rearrests)
mcnemar.test(rearrests, correct = FALSE)
binom.test(rearrests[2], n = sum(rearrests[c(2,3)]))
#####################################################
#
# CHAPTER 3
# Conditional Inference:
# Guessing Lengths, Suicides, Gastrointestinal Damage, and Newborn Infants
#
######################################################
#####################################################
#
# CHAPTER 3
# 3.3 Analysis Using R
# 3.3.1 Estimating the Width of a Room Revised
#
######################################################
rm(list = ls())
library("HSAUR")
data("roomwidth", package = "HSAUR")
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
feet <- roomwidth$unit == "feet"
metre <- !feet
y <- roomwidth$width * convert
T <- mean(y[feet]) - mean(y[metre])
T
meandiffs <- double(9999)
for (i in 1:length(meandiffs))
{
sy <- sample(y)
meandiffs[ i ] <- mean(sy[feet]) - mean(sy[metre])
}
# 注意原文[ i ]中i前后没有空格,只因与论坛斜体字符总突后而加了空格
hist(meandiffs)
abline(v = T, lty = 2)
abline(v = -T, lty = 2)
greater <- abs(meandiffs) > abs(T)
mean(greater)
binom.test(sum(greater), length(greater))$conf.int
attr(,"conf.level")
binom.test(sum(greater), length(greater))
library("coin")
independence_test(y ~ unit, data = roomwidth, distribution = "exact")
wilcox_test(y ~ unit, data = roomwidth, distribution = "exact")
#####################################################
#
# CHAPTER 3
# 3.3.3 Gastrointestinal Damages
#
#####################################################
data("Lanza", package = "HSAUR")
edit(Lanza)
xtabs(~treatment + classification + study, data = Lanza)
xtabs()
xtabs(formula = ~., data = parent.frame(), subset, na.action,
exclude = c(NA, NaN), drop.unused.levels = FALSE)
library("coin")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "I")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "II")
p <- cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "II",distribution = approximate(B = 19999))
p
pvalue(p)
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "III")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "IV")
cmh_test(classification ~ treatment | study, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)))
#####################################################
#
# CHAPTER 3
# 3.3.4 Teratogenesis
#
#####################################################
anomalies <- as.table
(matrix(c(235, 23, 3, 0, 41,35, 8, 0, 20, 11, 11, 1, 2, 1, 3, 1),
ncol = 4,dimnames = list(MD = 0:3, RA = 0:3)))
anomalies
mh_test(anomalies)
mh_test(anomalies, scores = list(c(0, 1, 2, 3)))
###############################################
#####################################################
#
# CHAPTER 4
# Analysis of Variance:
# Weight Gain,Foster Feeding in Rats, WaterHardness and Male Egyptian Skulls
#
#####################################################
#####################################################
#
# CHAPTER 4
# 4.3.1 Weight Gain in Rats
#
#####################################################
rm(list = ls())
library("HSAUR")
data("weightgain", "foster", "water", "skulls", package = "HSAUR")
edit(weightgain)
tapply(weightgain$weightgain, list(weightgain$source,weightgain$type), mean)
tapply(weightgain$weightgain, list(weightgain$source,weightgain$type), sd)
wg_aov <- aov(weightgain ~ source * type, data = weightgain)
plot.design(weightgain)
options("contrasts")
coef(aov(weightgain ~ source + type + source:type,data = weightgain, contrasts = list(source = contr.sum)))
summary(wg_aov)
interaction.plot(weightgain$type, weightgain$source,weightgain$weightgain)
summary(aov(weightgain ~ type * source, data = weightgain))
#####################################################
#
# CHAPTER 4
# 4.3.2 Foster Feeding of Rats of Different Genotype
#
#####################################################
edit(foster)
tapply(foster$weight, foster$litgen, mean)
tapply(foster$weight, foster$motgen, mean)
tapply(foster$weight, foster$litgen, sd)
tapply(foster$weight, foster$motgen, sd)
tapply(foster$weight, list(foster$litgen, foster$motgen), mean)
tapply(foster$weight, list(foster$litgen, foster$motgen), sd)
summary(aov(weight ~ litgen * motgen, data = foster))
summary(aov(weight ~ motgen * litgen, data = foster))
plot.design(foster)
foster_aov <- aov(weight ~ litgen * motgen, data = foster)
foster_aov
foster_hsd <- TukeyHSD(foster_aov, "motgen")
foster_hsd
plot(foster_hsd)
#####################################################
#
# CHAPTER 4
# 4.3.3 Water Hardness and Mortality
#
#####################################################
edit(water)
summary(manova(cbind(hardness, mortality) ~ location,data = water), test = "Hotelling-Lawley")
tapply(water$hardness, water$location, mean)
tapply(water$mortality, water$location, mean)
#####################################################
#
# CHAPTER 4
# 4.3.4 Male Egyptian Skulls
#
#####################################################
edit(skulls)
means <- aggregate(skulls[, c("mb", "bh", "bl","nh")], list(epoch = skulls$epoch), mean)
means
pairs(means[, -1], panel = function(x, y) {text(x, y, abbreviate(levels(skulls$epoch)))})
skulls_manova <- manova(cbind(mb, bh, bl, nh) ~epoch, data = skulls)
skulls_manova
summary(skulls_manova, test = "Pillai")
summary(skulls_manova, test = "Wilks")
summary(skulls_manova, test = "Hotelling-Lawley")
summary(skulls_manova, test = "Roy")
summary.aov(manova(cbind(mb, bh, bl, nh) ~ epoch,data = skulls))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch,
data = skulls,subset = epoch %in% c("c4000BC", "c3300BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "c1850BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "c200BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "cAD150")))
#####################################################
#
# CHAPTER 5
# Multiple Linear Regression: Cloud Seeding
#
#####################################################
rm(list = ls())
library("HSAUR")
library("KernSmooth")
library("mclust")
library("boot")
library("flexmix")
data(aspirin, BtheB, Lanza, phosphate, polyps, roomwidth,
skulls, toothpaste, waves, BCG, clouds, foster, mastectomy,
pistonrings, pottery, schizophrenia2, smoking, voting,
weightgain, birthdeathrates, CYGOB1, gardenflowers, meteo,
planets, rearrests, schizophrenia, students, water, womensrole,
bladdercancer, epilepsy, heptathlon, orallesions, plasma,
respiratory, schooldays, suicides, watervoles, package = "HSAUR")
####################################################
edit(clouds)
layout(matrix(1:2, nrow = 2))
bxpseeding <- boxplot(rainfall ~ seeding, data = clouds,
ylab = "Rainfall", xlab = "Seeding")
bxpecho <- boxplot(rainfall ~ echomotion, data = clouds,
ylab = "Rainfall", xlab = "Echo Motion")
rownames(clouds)[clouds$rainfall %in% c(bxpseeding$out,bxpecho$out)]
#####################################################
#
# CHAPTER 5
# 5.3.1 Fitting a Linear Model
#
#####################################################
clouds_formula <- rainfall ~ seeding *
(sne + cloudcover +prewetness + echomotion) + time
Xstar <- model.matrix(clouds_formula, data = clouds)
attr(Xstar, "contrasts")
layout(matrix(1:4, nrow = 2))
plot(rainfall ~ time, data = clouds)
plot(rainfall ~ sne, data = clouds, xlab = "S-NE criterion")
plot(rainfall ~ cloudcover, data = clouds)
plot(rainfall ~ prewetness, data = clouds)
clouds_lm <- lm(clouds_formula, data = clouds)
class(clouds_lm)
summary(clouds_lm)
betastar <- coef(clouds_lm)
betastar
#####################################################
Vbetastar <- vcov(clouds_lm)
Vbetastar
sqrt(diag(Vbetastar))
#####################################################
#
# CHAPTER 5
# 5.3.2 Regression Diagnostics
#
#####################################################
clouds_resid <- residuals(clouds_lm)
clouds_resid
clouds_fitted <- fitted(clouds_lm)
clouds_fitted
psymb <- as.numeric(clouds$seeding)
plot(rainfall ~ cloudcover, data = clouds, pch = psymb)
abline(lm(rainfall ~ cloudcover,
data = clouds,subset = seeding == "no"))
abline(lm(rainfall ~ cloudcover,
data = clouds,subset = seeding == "yes"), lty = 2)
legend("topright", legend = c("No seeding", "Seeding"),
pch = 1:2, lty = 1:2, bty = "n")
#####################################################
plot(clouds_fitted, clouds_resid, xlab = "Fitted values",
ylab = "Residuals", ylim = max(abs(clouds_resid)) *
c(-1, 1), type = "n")
abline(h = 0, lty = 2)
text(clouds_fitted, clouds_resid, labels = rownames(clouds))
#####################################################
qqnorm(clouds_resid, ylab = "Residuals")
qqline(clouds_resid)
####################################################
#########################################################
###############################################
#
# A Handbook of Statistical Analyses Using R
# Brian S. Everitt and Torsten Hothorn
#
###############################################
###############################################
#
# Preface
#
###############################################
search()
[1] ".GlobalEnv" "package:methods" "package:stats"
[4] "package:graphics" "package:grDevices" "package:utils"
[7] "package:datasets" "Autoloads" "package:base"
library("HSAUR")
载入需要的程辑包:lattice
载入需要的程辑包:MASS
载入需要的程辑包:scatterplot3d
search()
[1] ".GlobalEnv" "package:HSAUR" "package:scatterplot3d"
[4] "package:MASS" "package:lattice" "package:methods"
[7] "package:stats" "package:graphics" "package:grDevices"
[10] "package:utils" "package:datasets" "Autoloads"
[13] "package:base"
# 只要你提前安装了所需要的包,系统会自动把需要的包载入,本人把所有的CRAN包都安装了,
# 生物信息学方面的除外。
# 载入包时,要注意,R对大小写敏感,同一字母的大小写是不同的字符,在双引号内,
# 空格也是一个字符,如果你在双引号内有空,而包没有,那将返回不存在某某程序包。
vignette("Ch_introduction_to_R", package = "HSAUR")
# 这将显示该安装内的帮助文件Ch_introduction_to_R,(本例中是一个PDF格式)
edit(vignette("Ch_introduction_to_R", package = "HSAUR")) # 查看该包的短文信息
# 如果想全面看看该包中的介绍,输入下全命令:
vignette(package = "HSAUR")
x <- sqrt(25) + 2
x
print(x)
#################################################
#
# CHAPTER 1
# An Introduction to R
# 1.3 Help and Documentation
#
################################################
help("mean") # 或者用?mean
library("sandwich")
help(package = "sandwich") # 程序包的介绍
vignette("sandwich") # 打开包的使用说明
#################################################
#
# CHAPTER 1
# An Introduction to R
# 1.4 Data Objects in R
#
#################################################
data("Forbes2000", package = "HSAUR") #该包有与系统不兼容的字符,先进行替换
Forbes2000[32,2]<-"FMISN1"
Forbes2000[54,2]<-"FMISN2"
Forbes2000[883,2]<-"FMISN3"
edit(Forbes2000)
str(Forbes2000)
help("Forbes2000")
class(Forbes2000)
dim(Forbes2000)
nrow(Forbes2000)
ncol(Forbes2000)
names(Forbes2000)
class(Forbes2000[, "rank"])
length(Forbes2000[, "rank"])
class(Forbes2000[, "name"])
length(Forbes2000[, "name"])
Forbes2000[, "name"][1]
class(Forbes2000[, "category"])
nlevels(Forbes2000[, "category"])
levels(Forbes2000[, "category"])
table(Forbes2000[, "category"])
class(Forbes2000[, "sales"])
median(Forbes2000[, "sales"])
mean(Forbes2000[, "sales"])
range(Forbes2000[, "sales"])
summary(Forbes2000[, "sales"])
###################################################
#
# CHAPTER 1
# An Introduction to R
# 1.5 Data Import and Export
#
###################################################
csvForbes2000 <- read.table("Forbes2000.csv", header = TRUE,sep = ",", row.names = 1)
# 注意row.names = 1
class(csvForbes2000[, "name"])
csvForbes2000 <- read.table("Forbes2000.csv", header = TRUE,sep = ",",
row.names = 1, colClasses = c("character","integer", "character",
"factor", "factor","numeric", "numeric", "numeric", "numeric"))
class(csvForbes2000[, "name"])
all.equal(csvForbes2000, Forbes2000)
classes <- c("character", "integer", "character","factor",
"factor", "numeric", "numeric", "numeric","numeric")
length(classes)
class(classes)
write.table(Forbes2000, file = "Forbes2000.csv",sep = ",", col.names = NA)
write.table(Forbes2000, file = "Forbes.csv",sep = ",", col.names = NA)
save(Forbes2000, file = "Forbes2000.rda")
list.files(pattern = ".rda")
load("Forbes2000.rda")
###################################################
#
# CHAPTER 1
# An Introduction to R
# 1.6 Basic Data Manipulation
#
####################################################
companies <- Forbes2000[, "name"]
companies[1]
1:3
[1] 1 2 3
companies[1:3]
companies[-(4:2000)]
length(companies)
Forbes2000[1:3, c("name", "sales", "profits", "assets")]
companies <- Forbes2000$name
companies <- Forbes2000[, "name"]
order_sales <- order(Forbes2000$sales)
companies[order_sales[1:3]]
Forbes2000[order_sales[c(2000, 1999, 1998)], c("name","sales", "profits", "assets")]
Forbes2000[Forbes2000$assets > 1000, c("name", "sales","profits", "assets")]
table(Forbes2000$assets > 1000)
na_profits <- is.na(Forbes2000$profits)
table(na_profits)
Forbes2000[na_profits, c("name", "sales", "profits","assets")]
table(complete.cases(Forbes2000))
UKcomp <- subset(Forbes2000, country == "United Kingdom")
dim(UKcomp)
CHcomp<-subset(Forbes2000, country == "China")
dim(CHcomp)
CHcomp
######################################################
#
# CHAPTER 1
# An Introduction to R
# 1.7 Simple Summary Statistics
#
######################################################
summary(Forbes2000)
lapply(Forbes2000, summary)
mprofits <- tapply(Forbes2000$profits, Forbes2000$category,median, na.rm = TRUE)
median(Forbes2000$profits)
median(Forbes2000$profits,na.rm=TRUE)
rev(sort(mprofits))[1:3]
######################################################
#
# 1.7 Simple Summary Statistics
# 1.7.1 Simple Graphics
#
######################################################
fm <- marketvalue ~ sales
class(fm)
layout(matrix(1:2, nrow = 2))
hist(Forbes2000$marketvalue)
hist(log(Forbes2000$marketvalue))
plot(log(marketvalue) ~ log(sales), data = Forbes2000,pch = ".")
plot(marketvalue~sales, data = Forbes2000,pch = ".")
#########################################################
#
# CHAPTER 1
# An Introduction to R
# 1.8 Organising an Analysis
#
#########################################################
#########################################################
#
# CHAPTER 1
# An Introduction to R
# 1.9 Summary
#
#########################################################
boxplot(log(marketvalue) ~ country,
data = subset(Forbes2000,country %in% c("United Kingdom",
"Germany","India", "Turkey")), ylab = "log(marketvalue)",varwidth = TRUE)
#########################################################
#
# CHAPTER 1
# An Introduction to R
# Exercises
#
#########################################################
########## Ex. 1.1:##########
tapply(Forbes2000$profits, Forbes2000$country,median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "United States",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "United Kingdom",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "France",median, na.rm = TRUE)
tapply(Forbes2000$profits, Forbes2000$country== "Germany",median, na.rm = TRUE)
######### Ex. 1.2: #########
Gercomp<-subset(Forbes2000, country == "Germany")
table(complete.cases(Gercomp))
Gercomp[Gercomp$profits < 0, c("name","country", "sales","profits", "assets")]
######################################################
Forbes2000[Forbes2000$profits < 0 & Forbes2000$country == "Germany", c("name","country", "sales","profits", "assets")]
######### Ex. 1.3: #########
levels(Forbes2000[, "category"])
myidea<-Forbes2000[Forbes2000$category ==
c("Banking" ,"Business services & supplies" ,"Drugs & biotechnology",
"Food drink & tobacco" ,"Hotels restaurants & leisure" , "Insurance",
"Telecommunications services" ,"Transportation"), c("name","country",
"category", "sales","profits", "assets")]
myidea[myidea$profits>1,c("name","country", "category", "sales","profits", "assets")]
######### Ex. 1.4: #########
ForbesTopp <- subset(Forbes2000, profits != "NA")
length(ForbesTopp$profits)
order_profits <- order(ForbesTopp$profits)
Forbes50<-ForbesTopp[order_profits[c(1995:1945)],
c("name", "country", "sales", "profits", "assets")]
Forbes50
layout(matrix(1:2, nrow = 2))
hist(Forbes50$assets)
hist(Forbes50$sales)
hist(log(Forbes50$assets))
hist(log(Forbes50$sales))
plot(log(sales) ~ log(assets), data = Forbes50,pch = ".")
plot(log(assets) ~ log(sales), data = Forbes50,pch = ".")
######### Ex. 1.5: #########
companies_mean<-tapply(Forbes2000$sales, Forbes2000$country,mean, na.rm = TRUE)
summary(Forbes2000[Forbes2000$profits > 5, c("name","country", "profits")])
rm(list = ls())
######################################################
#
# CHAPTER 2
# Simple Inference:
# Guessing Lengths,Wave Energy, Water Hardness, Piston Rings, and Rearrests of Juveniles
#
######################################################
######################################################
#
# CHAPTER 2
# 2.3.1 Estimating the Width of a Room
#
######################################################
rm(list = ls())
library("HSAUR")
data("roomwidth", package = "HSAUR")
edit(roomwidth)
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
convert
tapply(roomwidth$width * convert, roomwidth$unit, summary)
########################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
myroom<-data.frame(unit, width)
summary(myroom)
tapply(myroom$width, myroom$unit, summary)
######################################################
layout(matrix(c(1, 2, 1, 3), nrow = 2, ncol = 2,byrow = FALSE))
matrix(c(1, 2, 1, 3), nrow = 2, ncol = 2,byrow = FALSE)
boxplot(I(width * convert) ~ unit, data = roomwidth,
ylab = "Estimated width (feet)", varwidth = TRUE,
names = c("Estimates in feet", "Estimates in metres (converted to feet)"))
feet <- roomwidth$unit == "feet"
qqnorm(roomwidth$width[feet], ylab = "Estimated width (feet)")
qqline(roomwidth$width[feet])
qqnorm(roomwidth$width[!feet], ylab = "Estimated width (metres)")
qqline(roomwidth$width[!feet])
#####################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
t.test(width~unit, alternative='two.sided',
conf.level=.95, var.equal=FALSE, data=roomwidth)
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
unit<-roomwidth$unit
width<-roomwidth$width * convert
myroom<-data.frame(unit, width)
t.test(width~unit, alternative='two.sided', conf.level=.95, var.equal=FALSE, data=myroom)
t.test(width~unit, alternative='two.sided', conf.level=.95, var.equal=TRUE, data=myroom)
###################################################
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
t.test(I(width * convert) ~ unit, data = roomwidth, var.equal = TRUE)
t.test(I(width * convert) ~ unit, data = roomwidth, var.equal = FALSE)
wilcox.test(I(width * convert) ~ unit, data = roomwidth, conf.int = TRUE)
######################################################
#
# CHAPTER 2
# 2.3.2 Wave Energy Device Mooring
#
######################################################
data("waves", package = "HSAUR")
edit(waves)
t.test(waves$method1, waves$method2,
alternative='two.sided', conf.level=.95, paired=TRUE)
#####################################################
boxplot(waves$method1, ylab="method1")
mooringdiff <- waves$method1 - waves$method2
layout(matrix(1:2, ncol = 2))
boxplot(mooringdiff, ylab = "Differences (Newton metres)",main = "Boxplot")
abline(h = 0, lty = 2)
qqnorm(mooringdiff, ylab = "Differences (Newton metres)")
qqline(mooringdiff)
######################################################
#
# CHAPTER 2
# 2.3.3 Mortality and Water Hardness
#
######################################################
library("HSAUR")
data("roomwidth", "waves", "water", "pistonrings", package = "HSAUR")
######################################################
Hist(water$hardness, scale="frequency", breaks="Sturges", col="darkgray")
scatterplot(mortality~hardness, reg.line=lm, smooth=TRUE,
labels=FALSE, boxplots='xy', span =0.5, data=water)
boxplot(water$mortality, ylab="mortality")
#####################################################
nf <- layout(matrix(c(2, 0, 1, 3), 2, 2, byrow = TRUE),c(2, 1), c(1, 2), TRUE)
psymb <- as.numeric(water$location)
plot(mortality ~ hardness, data = water, pch = psymb)
abline(lm(mortality ~ hardness, data = water))
legend("topright", legend = levels(water$location),pch = c(1, 2), bty = "n")
hist(water$hardness)
boxplot(water$mortality)
######################################################
#
# CHAPTER 2
# 2.3.4 Piston-ring Failures
#
######################################################
data("pistonrings", package = "HSAUR")
edit(pistonrings)
chisq.test(pistonrings)$residuals
library(vcd)
assoc(pistonrings)
######################################################
#
# CHAPTER 2
# 2.3.5 Rearrests of Juveniles
#
######################################################
data("rearrests", package = "HSAUR")
edit(rearrests)
mcnemar.test(rearrests, correct = FALSE)
binom.test(rearrests[2], n = sum(rearrests[c(2,3)]))
#####################################################
#
# CHAPTER 3
# Conditional Inference:
# Guessing Lengths, Suicides, Gastrointestinal Damage, and Newborn Infants
#
######################################################
#####################################################
#
# CHAPTER 3
# 3.3 Analysis Using R
# 3.3.1 Estimating the Width of a Room Revised
#
######################################################
rm(list = ls())
library("HSAUR")
data("roomwidth", package = "HSAUR")
convert <- ifelse(roomwidth$unit == "feet", 1, 3.28)
feet <- roomwidth$unit == "feet"
metre <- !feet
y <- roomwidth$width * convert
T <- mean(y[feet]) - mean(y[metre])
T
meandiffs <- double(9999)
for (i in 1:length(meandiffs))
{
sy <- sample(y)
meandiffs[ i ] <- mean(sy[feet]) - mean(sy[metre])
}
# 注意原文[ i ]中i前后没有空格,只因与论坛斜体字符总突后而加了空格
hist(meandiffs)
abline(v = T, lty = 2)
abline(v = -T, lty = 2)
greater <- abs(meandiffs) > abs(T)
mean(greater)
binom.test(sum(greater), length(greater))$conf.int
attr(,"conf.level")
binom.test(sum(greater), length(greater))
library("coin")
independence_test(y ~ unit, data = roomwidth, distribution = "exact")
wilcox_test(y ~ unit, data = roomwidth, distribution = "exact")
#####################################################
#
# CHAPTER 3
# 3.3.3 Gastrointestinal Damages
#
#####################################################
data("Lanza", package = "HSAUR")
edit(Lanza)
xtabs(~treatment + classification + study, data = Lanza)
xtabs()
xtabs(formula = ~., data = parent.frame(), subset, na.action,
exclude = c(NA, NaN), drop.unused.levels = FALSE)
library("coin")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "I")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "II")
p <- cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "II",distribution = approximate(B = 19999))
p
pvalue(p)
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "III")
cmh_test(classification ~ treatment, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)),
subset = Lanza$study == "IV")
cmh_test(classification ~ treatment | study, data = Lanza,
scores = list(classification = c(0, 1, 6, 17,30)))
#####################################################
#
# CHAPTER 3
# 3.3.4 Teratogenesis
#
#####################################################
anomalies <- as.table
(matrix(c(235, 23, 3, 0, 41,35, 8, 0, 20, 11, 11, 1, 2, 1, 3, 1),
ncol = 4,dimnames = list(MD = 0:3, RA = 0:3)))
anomalies
mh_test(anomalies)
mh_test(anomalies, scores = list(c(0, 1, 2, 3)))
###############################################
#####################################################
#
# CHAPTER 4
# Analysis of Variance:
# Weight Gain,Foster Feeding in Rats, WaterHardness and Male Egyptian Skulls
#
#####################################################
#####################################################
#
# CHAPTER 4
# 4.3.1 Weight Gain in Rats
#
#####################################################
rm(list = ls())
library("HSAUR")
data("weightgain", "foster", "water", "skulls", package = "HSAUR")
edit(weightgain)
tapply(weightgain$weightgain, list(weightgain$source,weightgain$type), mean)
tapply(weightgain$weightgain, list(weightgain$source,weightgain$type), sd)
wg_aov <- aov(weightgain ~ source * type, data = weightgain)
plot.design(weightgain)
options("contrasts")
coef(aov(weightgain ~ source + type + source:type,data = weightgain, contrasts = list(source = contr.sum)))
summary(wg_aov)
interaction.plot(weightgain$type, weightgain$source,weightgain$weightgain)
summary(aov(weightgain ~ type * source, data = weightgain))
#####################################################
#
# CHAPTER 4
# 4.3.2 Foster Feeding of Rats of Different Genotype
#
#####################################################
edit(foster)
tapply(foster$weight, foster$litgen, mean)
tapply(foster$weight, foster$motgen, mean)
tapply(foster$weight, foster$litgen, sd)
tapply(foster$weight, foster$motgen, sd)
tapply(foster$weight, list(foster$litgen, foster$motgen), mean)
tapply(foster$weight, list(foster$litgen, foster$motgen), sd)
summary(aov(weight ~ litgen * motgen, data = foster))
summary(aov(weight ~ motgen * litgen, data = foster))
plot.design(foster)
foster_aov <- aov(weight ~ litgen * motgen, data = foster)
foster_aov
foster_hsd <- TukeyHSD(foster_aov, "motgen")
foster_hsd
plot(foster_hsd)
#####################################################
#
# CHAPTER 4
# 4.3.3 Water Hardness and Mortality
#
#####################################################
edit(water)
summary(manova(cbind(hardness, mortality) ~ location,data = water), test = "Hotelling-Lawley")
tapply(water$hardness, water$location, mean)
tapply(water$mortality, water$location, mean)
#####################################################
#
# CHAPTER 4
# 4.3.4 Male Egyptian Skulls
#
#####################################################
edit(skulls)
means <- aggregate(skulls[, c("mb", "bh", "bl","nh")], list(epoch = skulls$epoch), mean)
means
pairs(means[, -1], panel = function(x, y) {text(x, y, abbreviate(levels(skulls$epoch)))})
skulls_manova <- manova(cbind(mb, bh, bl, nh) ~epoch, data = skulls)
skulls_manova
summary(skulls_manova, test = "Pillai")
summary(skulls_manova, test = "Wilks")
summary(skulls_manova, test = "Hotelling-Lawley")
summary(skulls_manova, test = "Roy")
summary.aov(manova(cbind(mb, bh, bl, nh) ~ epoch,data = skulls))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch,
data = skulls,subset = epoch %in% c("c4000BC", "c3300BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "c1850BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "c200BC")))
summary(manova(cbind(mb, bh, bl, nh) ~ epoch, data = skulls,subset = epoch %in% c("c4000BC", "cAD150")))
#####################################################
#
# CHAPTER 5
# Multiple Linear Regression: Cloud Seeding
#
#####################################################
rm(list = ls())
library("HSAUR")
library("KernSmooth")
library("mclust")
library("boot")
library("flexmix")
data(aspirin, BtheB, Lanza, phosphate, polyps, roomwidth,
skulls, toothpaste, waves, BCG, clouds, foster, mastectomy,
pistonrings, pottery, schizophrenia2, smoking, voting,
weightgain, birthdeathrates, CYGOB1, gardenflowers, meteo,
planets, rearrests, schizophrenia, students, water, womensrole,
bladdercancer, epilepsy, heptathlon, orallesions, plasma,
respiratory, schooldays, suicides, watervoles, package = "HSAUR")
####################################################
edit(clouds)
layout(matrix(1:2, nrow = 2))
bxpseeding <- boxplot(rainfall ~ seeding, data = clouds,
ylab = "Rainfall", xlab = "Seeding")
bxpecho <- boxplot(rainfall ~ echomotion, data = clouds,
ylab = "Rainfall", xlab = "Echo Motion")
rownames(clouds)[clouds$rainfall %in% c(bxpseeding$out,bxpecho$out)]
#####################################################
#
# CHAPTER 5
# 5.3.1 Fitting a Linear Model
#
#####################################################
clouds_formula <- rainfall ~ seeding *
(sne + cloudcover +prewetness + echomotion) + time
Xstar <- model.matrix(clouds_formula, data = clouds)
attr(Xstar, "contrasts")
layout(matrix(1:4, nrow = 2))
plot(rainfall ~ time, data = clouds)
plot(rainfall ~ sne, data = clouds, xlab = "S-NE criterion")
plot(rainfall ~ cloudcover, data = clouds)
plot(rainfall ~ prewetness, data = clouds)
clouds_lm <- lm(clouds_formula, data = clouds)
class(clouds_lm)
summary(clouds_lm)
betastar <- coef(clouds_lm)
betastar
#####################################################
Vbetastar <- vcov(clouds_lm)
Vbetastar
sqrt(diag(Vbetastar))
#####################################################
#
# CHAPTER 5
# 5.3.2 Regression Diagnostics
#
#####################################################
clouds_resid <- residuals(clouds_lm)
clouds_resid
clouds_fitted <- fitted(clouds_lm)
clouds_fitted
psymb <- as.numeric(clouds$seeding)
plot(rainfall ~ cloudcover, data = clouds, pch = psymb)
abline(lm(rainfall ~ cloudcover,
data = clouds,subset = seeding == "no"))
abline(lm(rainfall ~ cloudcover,
data = clouds,subset = seeding == "yes"), lty = 2)
legend("topright", legend = c("No seeding", "Seeding"),
pch = 1:2, lty = 1:2, bty = "n")
#####################################################
plot(clouds_fitted, clouds_resid, xlab = "Fitted values",
ylab = "Residuals", ylim = max(abs(clouds_resid)) *
c(-1, 1), type = "n")
abline(h = 0, lty = 2)
text(clouds_fitted, clouds_resid, labels = rownames(clouds))
#####################################################
qqnorm(clouds_resid, ylab = "Residuals")
qqline(clouds_resid)
####################################################