<br />
ocluster = function(datasam, classnum) {<br />
#有序样本聚类,输入datasam为样本数据阵,每一行为一个样本;<br />
#输入classnum为要分的类数<br />
#返回值result1为分类结果示意图<br />
#各类的起始点存在变量breaks中<br />
#输出三个矩阵 ra_dis:距离矩阵 leastlost:最小损失矩阵 classid:分类标识矩阵<br />
#author:banmudi 2010.11</p>
<p>#样本数<br />
sam_n = dim(datasam)[1]<br />
#子函数,计算i-j个样本组成的类的半径<br />
radi = function(i, j) {<br />
#提取i-j个样本<br />
temp =as.matrix( datasam[i:j, ])<br />
mu = colMeans(matrix(temp,j-i+1))<br />
vec = apply(matrix(temp,j-i+1), 1, function(x) {<br />
x - mu<br />
})<br />
round(sum(apply(matrix(vec,j-i+1), 2, crossprod)),3)<br />
}</p>
<p> #计算距离矩阵<br />
ra_dis = matrix(0, sam_n, sam_n)<br />
rownames(ra_dis) = 1:sam_n<br />
colnames(ra_dis) = 1:sam_n<br />
for (i in 1:(sam_n - 1)) {<br />
for (j in (i + 1):sam_n) {<br />
ra_dis[i, j] = radi(i, j)<br />
ra_dis[j, i] = radi(i, j)<br />
}<br />
}</p>
<p> #最小损失矩阵,行为样本数,列为分类数<br />
#leastlost[i,j]表示把1:i样本分成j类对应的最小损失<br />
leastlost = matrix(, sam_n - 1, sam_n - 1)<br />
rownames(leastlost) = 2:sam_n<br />
colnames(leastlost) = 2:sam_n<br />
diag(leastlost) = 0<br />
#round(leastlost,3);</p>
<p> #记录下对应的分类结点<br />
classid = matrix(, sam_n - 1, sam_n - 1)<br />
rownames(classid) = 2:sam_n<br />
colnames(classid) = 2:sam_n<br />
diag(classid) = 2:sam_n</p>
<p> #分成两类时,填写最小损失阵的第一列<br />
leastlost[as.character(3:sam_n), "2"] = sapply(3:sam_n,<br />
function(xn) {<br />
min(ra_dis[1, 1:(xn - 1)] + ra_dis[2:xn, xn])<br />
})<br />
classid[as.character(3:sam_n), "2"] = sapply(3:sam_n, function(xn) {<br />
which((ra_dis[1, 1:(xn - 1)] + ra_dis[2:xn, xn]) == (min(ra_dis[1,<br />
1:(xn - 1)] + ra_dis[2:xn, xn])))[1] + 1<br />
})<br />
#分成j类时,填写最小损失阵的 第二列到最后一列<br />
for (j in as.character(3:(sam_n - 1))) {<br />
#分成j类<br />
leastlost[as.character((as.integer(j) + 1):sam_n), j] = sapply((as.integer(j) +<br />
1):sam_n, function(xn) {<br />
min(leastlost[as.character(j:xn - 1), as.character(as.integer(j) -<br />
1)] + ra_dis[j:xn, xn])<br />
})</p>
<p> classid[as.character((as.integer(j) + 1):sam_n), j] = sapply((as.integer(j) +<br />
1):sam_n, function(xn) {<br />
a = which((leastlost[as.character(j:xn - 1), as.character(as.integer(j) -<br />
1)] + ra_dis[j:xn, xn]) == min(leastlost[as.character(j:xn -<br />
1), as.character(as.integer(j) - 1)] + ra_dis[j:xn,<br />
xn]))[1] + as.integer(j) - 1<br />
})<br />
}</p>
<p> diag(classid) = 2:sam_n</p>
<p> breaks = rep(0, 1, classnum)<br />
breaks[1] = 1<br />
breaks[classnum] = classid[as.character(sam_n), as.character(classnum)]<br />
flag = classnum - 1<br />
while (flag >= 2) {<br />
breaks[flag] = classid[as.character(breaks[flag + 1] -<br />
1), as.character(flag)]<br />
flag = flag - 1<br />
}</p>
<p>print("distance matrix:");#cat("\n")<br />
print(ra_dis[2:sam_n,1:(sam_n-1)], na.print = ""); #输出距离矩阵<br />
print("leastlost matrix:")<br />
print(leastlost[2:(sam_n-1),1:(sam_n-2)], na.print = ""); #输出最小损失矩阵<br />
print("classid matrix:")<br />
print(classid[2:(sam_n-1),1:(sam_n-2)], na.print = ""); #输出分类标识矩阵<br />
cat("\n")<br />
print("result")<br />
#画一个简单的分类示意图<br />
result1=NULL<br />
for (p in 1:sam_n) {<br />
result1 <- cat(result1,p, " ")<br />
for (w in 1:length(breaks)) {<br />
if (p == breaks[w] - 1) {<br />
result1 <- cat(result1, "||")<br />
}<br />
}<br />
if (p == sam_n)<br />
result1= cat(result1, "\n")<br />
}<br />
}<br />
</p>
FROM 半亩田