在R使用过程中,遇到了样本权重设置问题(SPSS中Data菜单下面专门有了,SAS中也比较好解决);各个建模函数中均设置了weight参数了,而基础统计量反而没有专门的函数,weighted.mean和cor.wt等也有几个。在Hmisc包中也有一些。索性我自己写了一个,便于自己使用,现在提供出来,供大家多多提意见,看看里面有没有什么错误,多多修改一下。至于列联表、频次分析的时候直接使用函数xtabs就可以了。
stats.wt <-<br />
function(x, w, ci = 0.95, na.rm = TRUE, column = 1)<br />
{<br />
if(missing(w)) w<-rep.int(1,length(x))<br />
if(length(w) != length(x))<br />
stop("The vector and weights must be equal")<br />
if(any(is.na(w)))<br />
stop("The weights must not be NAs")<br />
if(any(w < 0))<br />
stop("The weights must be all greater than 0")<br />
<br />
if (class(x) == "matrix") {<br />
x = x[, column]<br />
warning("Column ", column, " of matrix used")<br />
}<br />
if (class(x) == "data.frame") {<br />
x = x[, column]<br />
warning("Column ", column, " of data.frame used")<br />
}<br />
if (class(x) == "timeSeries") {<br />
x = x@Data[, colum]<br />
warning("Column ", column, " of timeSeries used")<br />
}<br />
if (!is.numeric(x))<br />
stop("The selected column or vector is not numeric")<br />
x = as.vector(x)<br />
<br />
i=is.na(x)<br />
na.obs.wt <- sum(w[i])<br />
<br />
if(na.rm) {<br />
i <- !is.na(x + w)<br />
x <- x[i]<br />
w <- w[i]<br />
}<br />
<br />
nobs.wt <- sum(w)<br />
min.wt <- min(x)<br />
max.wt <- max(x)<br />
sum.wt <- sum(w * x)<br />
mean.wt <- sum.wt / nobs.wt<br />
var.wt <- sum(w * (x - mean.wt)^2) / (nobs.wt-1)<br />
sd.wt <- sqrt(var.wt)<br />
se.wt <- sd.wt/sqrt(nobs.wt)<br />
t.val <- qt((1 - ci)/2, nobs.wt - 1)<br />
lcl.wt <- mean.wt + se.wt * t.val<br />
ucl.wt <- mean.wt - se.wt * t.val<br />
skew.wt <- sum(w * (x - mean.wt)^2) / sqrt(var.wt^3) / nobs.wt<br />
kurt.wt <- sum(w * (x - mean.wt)^4) / var.wt^2 / nobs.wt -3<br />
<br />
stats = c(nobs.wt,<br />
na.obs.wt,<br />
min.wt,<br />
max.wt,<br />
mean.wt,<br />
sum.wt,<br />
se.wt,<br />
lcl.wt,<br />
ucl.wt,<br />
var.wt,<br />
sd.wt,<br />
skew.wt,<br />
kurt.wt)<br />
names(stats) <- c("nobs",<br />
"NAs",<br />
"Minimum",<br />
"Maximum",<br />
"Mean",<br />
"Sum",<br />
"SE Mean",<br />
"LCL Mean",<br />
"UCL Mean",<br />
"Variance",<br />
"Stdev",<br />
"Skewness",<br />
"Kurtosis")<br />
stats <- as.data.frame(format(stats, scientific = F))<br />
names(stats) <- "x"<br />
stats<br />
}<br />
<br />
stats.wt.default <- function(x, w=rep.int(1,length(x[,1])), ci = 0.95, na.rm = TRUE)<br />
{<br />
ans.names <- names(x)<br />
fun <- function(col1) stats.wt(col1 , w=w, ci=ci, na.rm=na.rm)<br />
ans <- apply(x, 2, fun)<br />
ans <- as.data.frame(ans)<br />
names(ans) <- ans.names<br />
ans<br />
}<br />
<br />
<br />
# dat <- CO2[,4]<br />
# stats.wt(dat)<br />
<br />
# dat <- CO2[,4:5]<br />
# stats.wt.default(dat)<br />
<br />
<br />
wtd.moment.vec<-<br />
function(x, w, n=2, na.rm=TRUE)<br />
{<br />
if (!is.numeric(x))<br />
{<br />
warning("argument is not numeric vector: returning NA")<br />
return(as.numeric(NA))<br />
}<br />
# n<-n<br />
if(missing(w))<br />
w<-rep.int(1,length(x))<br />
else if (length(x) != length(w))<br />
stop("'x' and 'w' must have the same length!")<br />
if(is.integer(w))<br />
w<-as.numeric(w)<br />
if(na.rm)<br />
{<br />
i <- !is.na(x + w)<br />
w <- w[i]<br />
x <- x[i]<br />
}<br />
xbar<-sum(w * x)/sum(w)<br />
sum(w * ((x - xbar)^n))/(sum(w) - 1)<br />
}<br />
<br />
wtd.moment.data.frame<-<br />
function(x, w, n=2, na.rm=TRUE)<br />
{<br />
if(! is.data.frame(x))<br />
stop("x must be a dataframe")<br />
vec.moment<-function(vec) wtd.moment(vec, w=w, n=n, na.rm=na.rm)<br />
sapply(x, vec.moment)<br />
}<br />
<br />
wtd.moment<-<br />
function(x, w, n=2, na.rm=TRUE)<br />
{<br />
if(is.vector(x)) return(wtd.moment.vec(x=x, w=w, n=n, na.rm=na.rm))<br />
else if(is.data.frame(x)) return(wtd.moment.data.frame(x=x, w=w, n=n, na.rm=na.rm))<br />
}<br />
<br />
# w=1:84<br />
# wtd.moment(CO2[,4], w, n=3)