急需！分布拟合检验的命令，在R实现

jinguogong

请问，在R里如何实现分布拟合检验，比如KS\chisq

karlqi

可以调用 ks.test 和 chisq.test （函数ks.test 就是S+中的ks.gof）

具体的用法直接用?ks.test 和 ?chisq.test看 R 的帮助好了。

jinguogong

谢谢！

jinguogong

对不起，再顺便问问

结果出现这个问题

ks.test(data4,x)

Two-sample Kolmogorov-Smirnov test

data: data4 and x

D = 0.0313, p-value = 0.4957

alternative hypothesis: two-sided

Warning message:

cannot compute correct p-values with ties in: ks.test(data4, x)

什么意思？

karlqi

直接看ks.test的代码：

function (x, y, ..., alternative = c("two.sided", "less", "greater"),

exact = NULL)

{

pkolmogorov1x <- function(x, n) {

if (x <= 0)

return(0)

if (x >= 1)

return(1)

j <- seq.int(from = 0, to = floor(n * (1 - x)))

1 - x * sum(exp(lchoose(n, j) + (n - j) * log(1 - x -

j/n) + (j - 1) * log(x + j/n)))

}

alternative <- match.arg(alternative)

DNAME <- deparse(substitute(x))

x <- x[!is.na(x)]

n <- length(x)

if (n < 1)

stop("not enough 'x' data")

PVAL <- NULL

if (is.numeric(y)) {

DNAME <- paste(DNAME, "and", deparse(substitute(y)))

y <- y[!is.na(y)]

n.x <- as.double(n)

n.y <- length(y)

if (n.y < 1)

stop("not enough 'y' data")

if (is.null(exact))

exact <- (n.x * n.y < 10000)

METHOD <- "Two-sample Kolmogorov-Smirnov test"

TIES <- FALSE

n <- n.x * n.y/(n.x + n.y)

w <- c(x, y)

z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))

if (length(unique(w)) < (n.x + n.y)) {

warning("cannot compute correct p-values with ties")

z <- z[c(which(diff(sort(w)) != 0), n.x + n.y)]

TIES <- TRUE

}

STATISTIC <- switch(alternative, two.sided = max(abs(z)),

greater = max(z), less = -min(z))

nm_alternative <- switch(alternative, two.sided = "two-sided",

less = "the CDF of x lies below that of y", greater = "the CDF of x lies above that of y")

if (exact && (alternative == "two.sided") && !TIES)

PVAL <- 1 - .C("psmirnov2x", p = as.double(STATISTIC),

as.integer(n.x), as.integer(n.y), PACKAGE = "stats")$p

}

else {

if (is.character(y))

y <- get(y, mode = "function")

if (mode(y) != "function")

stop("'y' must be numeric or a string naming a valid function")

if (is.null(exact))

exact <- (n < 100)

METHOD <- "One-sample Kolmogorov-Smirnov test"

TIES <- FALSE

if (length(unique(x)) < n) {

warning("cannot compute correct p-values with ties")

TIES <- TRUE

}

x <- y(sort(x), ...) - (0:(n - 1))/n

STATISTIC <- switch(alternative, two.sided = max(c(x,

1/n - x)), greater = max(1/n - x), less = max(x))

if (exact && !TIES) {

PVAL <- if (alternative == "two.sided")

1 - .C("pkolmogorov2x", p = as.double(STATISTIC),

as.integer(n), PACKAGE = "stats")$p

else 1 - pkolmogorov1x(STATISTIC, n)

}

nm_alternative <- switch(alternative, two.sided = "two-sided",

less = "the CDF of x lies below the null hypothesis",

greater = "the CDF of x lies above the null hypothesis")

}

names(STATISTIC) <- switch(alternative, two.sided = "D",

greater = "D^+", less = "D^-")

pkstwo <- function(x, tol = 1e-06) {

if (is.numeric(x))

x <- as.vector(x)

else stop("argument 'x' must be numeric")

p <- rep(0, length(x))

p[is.na(x)] <- NA

IND <- which(!is.na(x) & (x > 0))

if (length(IND) > 0) {

p[IND] <- .C("pkstwo", as.integer(length(x[IND])),

p = as.double(x[IND]), as.double(tol), PACKAGE = "stats")$p

}

return(p)

}

if (is.null(PVAL)) {

PVAL <- ifelse(alternative == "two.sided", 1 - pkstwo(sqrt(n) *

STATISTIC), exp(-2 * n * STATISTIC^2))

}

RVAL <- list(statistic = STATISTIC, p.value = PVAL, alternative = nm_alternative,

method = METHOD, data.name = DNAME)

class(RVAL) <- "htest"

return(RVAL)

}

即可发现原因。再具体就要看data4和x是什么了，以后最好把数据dump出来一并贴上，方便解决问题。