看看这篇文章:
http://www.jstatsoft.org/v08/i18/kolmo.pdf
如果有足够的精力,也不妨看看R的KS检验源程序:
> ks.test<br />
function (x, y, ..., alternative = c("two.sided", "less", "greater"), <br />
exact = NULL) <br />
{<br />
pkolmogorov1x <- function(x, n) {<br />
if (x <= 0) <br />
return(0)<br />
if (x >= 1) <br />
return(1)<br />
j <- seq(from = 0, to = floor(n * (1 - x)))<br />
1 - x * sum(exp(lchoose(n, j) + (n - j) * log(1 - x - <br />
j/n) + (j - 1) * log(x + j/n)))<br />
}<br />
alternative <- match.arg(alternative)<br />
DNAME <- deparse(substitute(x))<br />
x <- x[!is.na(x)]<br />
n <- length(x)<br />
if (n < 1) <br />
stop("not enough 'x' data")<br />
PVAL <- NULL<br />
if (is.numeric(y)) {<br />
DNAME <- paste(DNAME, "and", deparse(substitute(y)))<br />
y <- y[!is.na(y)]<br />
n.x <- as.double(n)<br />
n.y <- length(y)<br />
if (n.y < 1) <br />
stop("not enough 'y' data")<br />
if (is.null(exact)) <br />
exact <- (n.x * n.y < 10000)<br />
METHOD <- "Two-sample Kolmogorov-Smirnov test"<br />
TIES <- FALSE<br />
n <- n.x * n.y/(n.x + n.y)<br />
w <- c(x, y)<br />
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))<br />
if (length(unique(w)) < (n.x + n.y)) {<br />
warning("cannot compute correct p-values with ties")<br />
z <- z[c(which(diff(sort(w)) != 0), n.x + n.y)]<br />
TIES <- TRUE<br />
}<br />
STATISTIC <- switch(alternative, two.sided = max(abs(z)), <br />
greater = max(z), less = -min(z))<br />
nm_alternative <- switch(alternative, two.sided = "two-sided", <br />
less = "the CDF of x lies below that of y", greater = "the CDF of x lies above that of y")<br />
if (exact && (alternative == "two.sided") && !TIES) <br />
PVAL <- 1 - .C("psmirnov2x", p = as.double(STATISTIC), <br />
as.integer(n.x), as.integer(n.y), PACKAGE = "stats")$p<br />
}<br />
else {<br />
if (is.character(y)) <br />
y <- get(y, mode = "function")<br />
if (mode(y) != "function") <br />
stop("'y' must be numeric or a string naming a valid function")<br />
if (is.null(exact)) <br />
exact <- (n < 100)<br />
METHOD <- "One-sample Kolmogorov-Smirnov test"<br />
TIES <- FALSE<br />
if (length(unique(x)) < n) {<br />
warning("cannot compute correct p-values with ties")<br />
TIES <- TRUE<br />
}<br />
x <- y(sort(x), ...) - (0:(n - 1))/n<br />
STATISTIC <- switch(alternative, two.sided = max(c(x, <br />
1/n - x)), greater = max(1/n - x), less = max(x))<br />
if (exact && !TIES) {<br />
PVAL <- if (alternative == "two.sided") <br />
1 - .C("pkolmogorov2x", p = as.double(STATISTIC), <br />
as.integer(n), PACKAGE = "stats")$p<br />
else 1 - pkolmogorov1x(STATISTIC, n)<br />
}<br />
nm_alternative <- switch(alternative, two.sided = "two-sided", <br />
less = "the CDF of x lies below the null hypothesis", <br />
greater = "the CDF of x lies above the null hypothesis")<br />
}<br />
names(STATISTIC) <- switch(alternative, two.sided = "D", <br />
greater = "D^+", less = "D^-")<br />
pkstwo <- function(x, tol = 1e-06) {<br />
if (is.numeric(x)) <br />
x <- as.vector(x)<br />
else stop("argument 'x' must be numeric")<br />
p <- rep(0, length(x))<br />
p[is.na(x)] <- NA<br />
IND <- which(!is.na(x) & (x > 0))<br />
if (length(IND) > 0) {<br />
p[IND] <- .C("pkstwo", as.integer(length(x[IND])), <br />
p = as.double(x[IND]), as.double(tol), PACKAGE = "stats")$p<br />
}<br />
return(p)<br />
}<br />
if (is.null(PVAL)) {<br />
PVAL <- ifelse(alternative == "two.sided", 1 - pkstwo(sqrt(n) * <br />
STATISTIC), exp(-2 * n * STATISTIC^2))<br />
}<br />
RVAL <- list(statistic = STATISTIC, p.value = PVAL, alternative = nm_alternative, <br />
method = METHOD, data.name = DNAME)<br />
class(RVAL) <- "htest"<br />
return(RVAL)<br />
}<br />
<environment: namespace:stats><br />