# 产生数据集<br />
x<-c(rnorm(200,30,1),rnorm(200,10,1.5),rnorm(100,5,0.5))<br />
y<-c(rnorm(200,30,1),rnorm(200,10,1.5),rnorm(100,5,0.5))<br />
data<-data.frame(x,y)<br />
# 彩色空间<br />
library(colorspace)<br />
# 显示数据集的结构<br />
str(data)<br />
<br />
# 系统聚类<br />
# 聚类的一些必要的函数<br />
library(cluster)<br />
library(rattle)<br />
#系统聚类函数在包amap中<br />
require(amap, quietly=TRUE)<br />
#聚类结果有包fpc提供<br />
require(fpc, quietly=TRUE)<br />
#绘图 需cba包<br />
require(cba, quietly=TRUE)<br />
<br />
chcluster <- hclusterpar(na.omit(data[,c(1:2)]), method="manhattan", link="ward", nbproc=2)<br />
chcluster <br />
# 聚类中心<br />
centers.hclust(na.omit(data[,c(1:2)]), chcluster, 3)<br />
<br />
#产生树形图 用矩形显示聚类结果<br />
<br />
par(bg="grey")<br />
plot(chcluster, main="", sub="", xlab="", labels=FALSE, hang=0)<br />
rect.hclust(chcluster, k=3)<br />
title(main="HCluster_Dendrogram_data", sub=paste("R", format(Sys.time(), "%Y-%b-%d %H:%M:%S"), Sys.info()["user"]))<br />
<br />
#类与类之间的相关性<br />
<br />
par(bg="yellow")<br />
plotcluster(na.omit(data[,c(1:2)]),  cutree(chcluster, 3))<br />
title(main="Discriminant Coordinates data", sub=paste("R", format(Sys.time(), "%Y-%b-%d %H:%M:%S"), Sys.info()["user"]))<br />
<br />
#数据集的聚类效果图<br />
plot(data[,c(1:2)], col=cutree(chcluster, 3))<br />
 title(main="", sub=paste("R", format(Sys.time(), "%Y-%b-%d %H:%M:%S"), Sys.info()["user"]))<br />
<br />
#验证聚类结果的基本统计信息<br />
cluster.stats(dist(na.omit(data[,c(1:2)])), cutree(chcluster, 3))<br />
<br />
library(rattle)<br />
library(colorspace)<br />
require(fpc, quietly=TRUE)<br />
str(data)<br />
<br />
# KMEANS CLUSTER<br />
<br />
# Set the seed to get the same clusters each time.<br />
<br />
set.seed(252964)<br />
<br />
# Generate a kmeans cluster of size 3.<br />
<br />
kmeans <- kmeans(na.omit(data[,c(1:2)]), 3)<br />
<br />
## REPORT ON CLUSTER CHARACTERISTICS<br />
kmeans <br />
# Cluster sizes:<br />
<br />
paste(kmeans$size, collapse=' ')<br />
<br />
# Cluster centers:<br />
<br />
kmeans$centers<br />
<br />
# Within cluster sum of squares:<br />
<br />
kmeans$withinss<br />
<br />
# Generate a data plot.<br />
<br />
par(bg="orange")<br />
plot(na.omit(data[,c(1:2)]), col=kmeans$cluster)<br />
title(main="", sub=paste("R", format(Sys.time(), "%Y-%b-%d %H:%M:%S"), Sys.info()["user"]))<br />
<br />
# Generate a discriminant coordinates plot.<br />
<br />
par(bg="grey")<br />
plotcluster(na.omit(data[,c(1:2)]), kmeans$cluster)<br />
title(main="Discriminant Coordinates data", sub=paste("R", format(Sys.time(), "%Y-%b-%d %H:%M:%S"), Sys.info()["user"]))<br />
<br />
cluster.stats(dist(na.omit(data[,c(1:2)])), kmeans$cluster)<br />


为什么不用png图形呢?



对大多数统计图形来说,png不仅比jpeg文件要小,而且清晰很多。
系统聚类


<br />
# hclust 在包 stats中<br />
 <br />
reqire(MASS)<br />
<br />
HCluster <- hclust(dist(model.matrix(~-1 + x+y, data), method= "manhattan") , method= "ward")<br />
HCluster<br />
<br />
par(bg="orange") <br />
plot(HCluster, main= "Cluster Dendrogram for Solution HCluster", xlab= <br />
  "Observation Number in Data Set data", sub="Method=ward;Distance=city-block")<br />
  <br />
summary(as.factor(cutree(HCluster, k = 3))) # Cluster Sizes<br />
by(model.matrix(~-1 + x + y, data), as.factor(cutree(HCluster, k = 3)),mean) # Cluster Centroids<br />
<br />
par(bg="grey") <br />
biplot(princomp(model.matrix(~-1+x+y, data)),xlabs=as.character(cutree(HCluster, k = 3)), cex=rep(par("cex"),2),main="HCluster")<br />




23 天 后
15 天 后
4 年 后

请问# Within cluster sum of squares:

kmeans$withinss

的函数是什么啊?怎么使用呀?谢谢