jiangshq
非参数方法用的越来越多,医学数据很多都不是正态分布,经常会用到秩和检验来比较几组的中位数,牵涉到多组时两两比较是个问题,sas程序不知道如何完成,编制比较麻烦。
jiangshq
最近搞毕业论文,所以会问很多问题,等论文搞完了会好好参与讨论,也会把我在医学统计方面的资料和经验分享给大家。
zt2730
我这是转帖:!!!!!也为他发愁过 后来在另外的论坛上找到了点答案!分享一下!
对于多组间非参数检验的两两比较,SPSS和SAS中均没有现成的模块。 对分析变量进行编秩,然后采用方差分析对秩进行两两比较,感觉这是个不错的注意,但我没有看到相关参考书上的理论依据。
本人现根据《医学统计学》(孙振球主编,供研究生用)提供的Nemenyi法,用SAS进行编程实现多个样本间的两两比较,希望大家试用,并改进。从下面实例的运行结果看,对秩的SNK法比较与Nemenyi法的结果是一致的。
SAS程序如下:(dxy_zhaql)
option formchar=" ---------";
/*本程序根据《医学统计学》(供研究生用)P133提供的Nemenyi法公式进行多个样本两两比较*/
/*本例分组数为6,其它分组情况要做相应修改*/
%let n=82;/*n为总例数*/
%let g=6;/*g为组数*/
data dt1;
input x g@@;/*x为要分析的变量,g为分组变量*/
output;
cards;
1.144414 1
1.177112 4
1.239130 4
1.242507 1
1.242507 1
1.340599 1
1.373297 1
1.402174 5
1.405995 4
1.467391 4
1.504087 5
1.536785 5
1.536785 6
1.569482 1
1.602180 6
1.630435 4
1.634877 6
1.695652 1
1.695652 6
1.728261 1
1.728261 4
1.728261 6
1.760870 4
1.760870 6
1.793478 1
1.863760 4
1.891304 1
1.891304 4
1.891304 5
1.994550 6
2.021739 1
2.059946 4
2.119565 1
2.119565 6
2.250000 6
2.347826 3
2.354223 1
2.354223 2
2.419619 4
2.517711 6
2.550409 5
2.583106 4
2.713896 3
2.771739 2
2.779292 5
2.804348 3
2.836957 5
2.844687 4
2.869565 3
2.934783 1
3.065217 3
3.065217 5
3.138965 3
3.228261 5
3.269755 2
3.293478 6
3.391304 4
3.408719 3
3.456522 3
3.465940 3
3.531335 6
3.586957 3
3.645777 3
3.662125 3
3.694823 2
3.760218 2
3.782609 2
3.945652 2
3.945652 2
4.010870 2
4.043478 3
4.043478 5
4.087193 5
4.108696 5
4.610354 6
5.002725 5
5.152174 2
5.543478 2
5.591281 5
5.771739 6
5.787466 3
6.702997 2
6.768392 2
7.389646 2
;
run;
proc npar1way wilcoxon data=dt1;output out=dt9;var x;class g;run;/*非参数检验*/
proc rank data=dt1 out=dt2;var x;ranks rx;run;/*编秩*/
proc anova data=dt2;/*秩的SNK法比较*/
class g;
model rx=g;
means g/snk;
run;
data dt3;/*保留具有相同秩的样本,以便计算校正变量C*/
set dt2;
keep i j x g rx;
i+1;
if i=rx then delete;
j+1;
run;
proc sort data=dt3;by rx;run;
proc univariate data=dt3 noprint;output out=dt4 n=n;
var j;
by rx;
run;
data dt5;
set dt4;
keep cj c n j;
cj+(n**3-n)/(&n**3-&n);
c=1-cj;
run;
proc univariate data=dt5 noprint;var c;output out=dt6 max=c;
run;
data _null_;
set dt6;
call symput('c',c);/*c为校正变量*/
run;
proc sort data=dt2 out=dt2sbg;by g;
proc means data=dt2sbg noprint;var rx;by g; output out=rxbg mean=rxmean;/*计算各组均秩*/
run;
data dt7;/*给各组均秩、样本数赋值*/
set rxbg;
i+1;
if i=1 then do;
call symput('rx1',rxmean);call symput('n1',_freq_);end;
if i=2 then do;
call symput('rx2',rxmean);call symput('n2',_freq_);end;
if i=3 then do;
call symput('rx3',rxmean);call symput('n3',_freq_);end;
if i=4 then do;
call symput('rx4',rxmean);call symput('n4',_freq_);end;
if i=5 then do;
call symput('rx5',rxmean);call symput('n5',_freq_);end;
if i=6 then do;
call symput('rx6',rxmean);call symput('n6',_freq_);end;
%put _user_;
run;
data dt8;/*计算组间比较卡方值和P值*/
do g=1 to &g*(&g-1)/2;
dif=&g-1;
if g=1 then do;
com='g1-g2';meanscore1=&rx1;meanscore2=&rx2;
chi=(&rx1-&rx2)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=2 then do;
com='g1-g3';meanscore1=&rx1;meanscore2=&rx3;
chi=(&rx1-&rx3)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=3 then do;
com='g1-g4';meanscore1=&rx1;meanscore2=&rx4;
chi=(&rx1-&rx4)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=4 then do;
com='g1-g5';meanscore1=&rx1;meanscore2=&rx5;
chi=(&rx1-&rx5)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=5 then do;
com='g1-g6';meanscore1=&rx1;meanscore2=&rx6;
chi=(&rx1-&rx6)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=6 then do;
com='g2-g3';meanscore1=&rx2;meanscore2=&rx3;
chi=(&rx2-&rx3)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=7 then do;
com='g2-g4';meanscore1=&rx2;meanscore2=&rx4;
chi=(&rx2-&rx4)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=8 then do;
com='g2-g5';meanscore1=&rx2;meanscore2=&rx5;
chi=(&rx2-&rx5)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=9 then do;
com='g2-g5';meanscore1=&rx2;meanscore2=&rx5;
chi=(&rx2-&rx6)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=10 then do;
com='g3-g4';meanscore1=&rx3;meanscore2=&rx4;
chi=(&rx3-&rx4)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=11 then do;
com='g3-g5';meanscore1=&rx3;meanscore2=&rx5;
chi=(&rx3-&rx5)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=12 then do;
com='g3-g6';meanscore1=&rx3;meanscore2=&rx6;
chi=(&rx3-&rx6)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=13 then do;
com='g4-g5';meanscore1=&rx4;meanscore2=&rx5;
chi=(&rx4-&rx5)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=14 then do;
com='g4-g6';meanscore1=&rx4;meanscore2=&rx6;
chi=(&rx4-&rx6)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
if g=15 then do;
com='g5-g6';meanscore1=&rx5;meanscore2=&rx6;
chi=(&rx5-&rx6)**2/(((&n*(&n+1))/12)*(1/&n2+1/&n4)*&c);
p=1-probchi(chi,&g-1);
end;
output;
end;
run;
proc print;
run;
huadli
谢谢
huadli
%macro nparcomparison(data=, /*我们要分析的数据集 */
n=, /*样本数 */
group=, /*感兴趣的组别,数值型 */
groupnum=, /*感兴趣的组的水平 */
var=); /*感兴趣的变量 */
proc rank data=&data out=r_&data;
var &var;
ranks r&var;
run;
data rr_&data;/*保留具有相同秩的样本,以便计算校正变量C*/
set r_&data;
keep i j x &group r&var;
i+1;
if i=r&var then delete;
j+1;
run;
proc sort data=rr_&data;
by r&var;
run;
proc univariate data=rr_&data noprint;
var j;
output out=rrr_&data n=n;
by r&var;
run;
data rr_&data;
set rr_&data;
keep cj c n j;
cj+(n**3-n)/(&n**3-&n);
c=1-cj;
run;
proc univariate data=rr_&data noprint;
var c;
output out=dt6 max=c;
run;
data _null_;
set dt6;
call symput('c',c);/*c为校正变量*/
run;
proc means data=r_&data nway noprint;
class &group;
var r&var;
output out=rxbg mean=r&var.mean;/*计算各组均秩*/
run;
data _null_;
set rxbg;
%do i=1 %to &groupnum;
if &group=&i then
do;
call symput("rx&i",r&var.mean);call symput("n&i",_freq_);
end;
%end;
run;
data out;
%do i=1 %to &groupnum;
%do j=%eval(&i+1) %to &groupnum;
label="group&i-group&j";
meanscore1=&&rx&i;
meanscore2=&&rx&j;
chi=(&&rx&i-&&rx&j)**2/(((&n*(&n+1))/12)*(1/&&n&i+1/&&n&j)*&c);
p=1-probchi(chi,&groupnum-1);
output;
%end;
%end;
run;
title "Contrast";
options missing=' ';
proc print data=out label uniform noobs split='*';
format meanscore1 10.2
meanscore2 10.2
chi 10.2
p pvalue7.4
;
label
label="Label"
meanscore1="Group's mean score"
meanscore2="Group's mean score"
chi="Chi's statistics"
p="P value";
var label meanscore1 meanscore2 chi p;
run;
options missing='.';
proc datasets library=work nolist;
delete r_&data rr_&data rrr_&data dt6 rxbg;
run;
quit;
%mend nparcomparison;
%nparcomparison(data=dt1,group=g,groupnum=6,var=x);
huadli
chi=(&&rx&i-&&rx&j)**2/(((&n*(&n+1))/12)*(1/&&n&i+1/&&n&j)*&c);
组不同时 1/&&n&i+1/&&n&j是不是要变化 ???
jiangshq
多谢,试试先
jiangshq
好像有点问题呀
jiangshq
22: LINE and COLUMN cannot be determined.
NOTE 242-205: NOSPOOL is on. Rerunning with OPTION SPOOL may allow recovery of the LINE and COLUMN
where the error has occurred.
ERROR 22-322: Syntax error, expecting one of the following: a name, a quoted string,
a numeric constant, a datetime constant, a missing value, INPUT, PUT.
22: LINE and COLUMN cannot be determined.
NOTE 242-205: NOSPOOL is on. Rerunning with OPTION SPOOL may allow recovery of the LINE and COLUMN
where the error has occurred.
ERROR 22-322: Syntax error, expecting one of the following: a name, a quoted string,
a numeric constant, a datetime constant, a missing value, INPUT, PUT.
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.RR_DT1 may be incomplete. When this step was stopped there were 0
observations and 4 variables.
WARNING: Data set WORK.RR_DT1 was not replaced because this step was stopped.
NOTE: DATA statement used (Total process time):
real time 0.03 seconds
cpu time 0.01 seconds
ERROR: Variable C not found.
huadli
%nparcomparison(data=dt1,n=82,group=g,groupnum=6,var=x);
这个 忘记改了
huadli
%macro nparcomparison(data=, /*我们要分析的数据集 */
group=, /*感兴趣的组别,数值型 */
groupnum=, /*感兴趣的组的水平 */
var=); /*感兴趣的变量 */
data &data;
set &data end=eof;
t+1;
if eof then
call symput("n",t);
run;
proc rank data=&data out=r_&data;
var &var;
ranks r&var;
run;
data rr_&data;/*保留具有相同秩的样本,以便计算校正变量C*/
set r_&data;
keep i j &var &group r&var;
i+1;
if i=r&var then delete;
j+1;
run;
proc sort data=rr_&data;
by r&var;
run;
proc univariate data=rr_&data noprint;
var j;
output out=rrr_&data n=n;
by r&var;
run;
data rr_&data;
set rr_&data;
keep cj c n j;
cj+(n**3-n)/(&n**3-&n);
c=1-cj;
run;
proc univariate data=rr_&data noprint;
var c;
output out=dt6 max=c;
run;
data _null_;
set dt6;
call symput('c',c);/*c为校正变量*/
run;
proc means data=r_&data nway noprint;
class &group;
var r&var;
output out=rxbg mean=r&var.mean;/*计算各组均秩*/
run;
data _null_;
set rxbg;
%do i=1 %to &groupnum;
if &group=&i then
do;
call symput("rx&i",r&var.mean);call symput("n&i",_freq_);
end;
%end;
run;
data out;
%do i=1 %to &groupnum;
%do j=%eval(&i+1) %to &groupnum;
label="group&i-group&j";
meanscore1=&&rx&i;
meanscore2=&&rx&j;
chi=(&&rx&i-&&rx&j)**2/(((&n*(&n+1))/12)*(1/&&n&i+1/&&n&j)*&c);
p=1-probchi(chi,&groupnum-1);
output;
%end;
%end;
run;
title "Contrast";
options missing=' ';
proc print data=out label uniform noobs split='*';
format meanscore1 10.2
meanscore2 10.2
chi 10.2
p pvalue7.4
;
label
label="Label"
meanscore1="Group's mean score"
meanscore2="Group's mean score"
chi="Chi's statistics"
p="P value";
var label meanscore1 meanscore2 chi p;
run;
options missing='.';
proc datasets library=work nolist;
delete r_&data rr_&data rrr_&data dt6 rxbg;
run;
quit;
%mend nparcomparison;
%nparcomparison(data=dt1,group=g,groupnum=6,var=x);
joan985
是不是就是做宏的程序呢?
另外,个人见解,将BY语句改为CLASS语句,就省略掉SORT排序的程序了,简单迅速。
joan985
我用过这个程序还不错
proc npar1way data=sasuser.a1 wilcoxon;这边是直接用数据集,如果小数据可以使用input语句
var x; x是变量
class g; g是分组
run;
proc freq data=sasuser.a1;
tables g*x/scores=rank cmh2;
run;
proc rank;
var x;ranks rx;
run;
joan985
上面的程序是连续性变量啊,补充一下