new;
/* SUBSET - program for computing Akaike AIC and Schwarz BIC statistics for ordered subsets of means or proportions. For means, AIC and BIC are computed for both homogeneous and heterogeneous cases. For heterogeneous cases, variances are pooled in the same patterns as means.
Data are group-wise in four columns (three for proportions) in an Excel spreadsheet (VERSION 2.1 to 7.0 ONLY) or other spreadsheet or database programs such as Lotus or DB2 (see "import" under Help for information on acceptable formats).
The first row (A1, B1, C1, {D1} ) of the spreadsheet is assumed to contain column labels (read and stored as "varnams" but not otherwise referenced in the program). The required data by column are:
A - group labels (not used in program)
B - group sample size
C - group mean {or proportion}
D - group variance (unbiased estimate using n-1 in denominator); omit for proportions
NOTE: When any observed frequency is 0, all frequencies should be incremented by 0.5 prior to computing proportions.
Reference: C. Dayton (1998), Information Criteria for the Paired-Comparisons Problem, American Statistician, 52, 144-151. */ /* All screen output is also directed to the file subset.out in the current directory */

print "Default file for all printed output is Subset.out in the current directory"; output file = subset.out reset;
print "Program SUBSET for Ordered Subsets of Means or Proportions";
print "Prepared by: C. Mitchell Dayton";
print "Department of Measurement & Statistics";
print "University of Maryland";
print "E-Mail: CD4@UMAIL.UMD.EDU";

/* Determine case: 1 = Means, 2 = Proportions */

print "Enter 1 for means or 2 for proportions ";; case = con(1,1);

/* Determine number of patterns to display on output */

print "Number of AIC/BIC values to display (5 is recommended; for 3 groups, use 4) ";; nsave = con(1,1);

/* Import the Excel spreadsheet */

print "Name of spreadsheet file? ";; fmt = cons;

print "Range in spreadsheet? ";;range=cons;

/* Branch to segment for case 2, proportions */

if case == 1; goto next0; else; goto nextp; endif;

next0:

{ydat,varnams} = import(fmt,range,1);

ydat = ydat[.,2:4]; /* omits group labels in first column */

k = rows(ydat); /* k is number of groups */

ydat = sortc(ydat,2);

t1=hsec; /* Start timing execution */

/* Change variance estimates from unbiased (denominator = n-1) to MLE (denominator = n) */

ydat[.,3] = ((ydat[.,1]-1)./ydat[.,1]).*ydat[.,3];

/* Compute overall variance estimate as weighted average of group variances */

n = sumc(ydat[.,1]);

varml = ydat[.,1]'ydat[.,3]/n;

xx=cumsumc(ones(k,1));

base=2;

aic1=zeros(nsave,k+1); bic1=aic1; aic2=aic1; bic2=aic1;

x=1;

do until x > 2^(k-1); x1=x-1;

/* Generate v with binary equivalent of integer = x-1 */

if x1 == 0; v = zeros(1,k);

elseif x1 > 0;

nbase = maxc(log(x1)./log(base))+1;

zbase=reshape(base,nbase,rows(x1));

vv = rev(recserrc(x1,zbase))';

v = zeros(1,k-rows(vv'))~vv;

endif;

/* Compute u that contains ordered subsets based on changes in v.

For example, if v = {0010}, then u = {1123} */

u = zeros(1,k);

i=2;

u[1]=1;

do until i > k;

if v[i] == v[i-1]; u[i] = u[i-1];

elseif v[i] /= v[i-1]; u[i] = u[i-1]+1; endif;

i = i+1; endo;

/* Create k-mean vector, mms, with equality restrictions imposed based on patterns in u vector */

mns = zeros(1,k);

nns = mns; mms = mns; s3=mns;

i=1;

do until i > maxc(u');

mns[i]= sumc(submat(ydat[.,1].*ydat[.,2],indexcat(u',i),1));

nns[i]= sumc(submat(ydat[.,1],indexcat(u',i),1));

i=i+1; endo;

mms = (subscat(u',xx,mns'))';

nns = (subscat(u',xx,nns'))';

mms = mms./nns;

/* Homogeneous case: Compute log-likelihoods and AIC, BIC values*/

lik = (n*varml+(ydat[.,1]'(mms'-ydat[.,2])^2))';

lik = (-n/2)*(ln(2*pi)+ln(varml))-lik/(2*varml);

aic = -2*lik + 2*(maxc(u')+1);

bic = -2*lik + ln(n)*(maxc(u')+1);

/* print aic~u~v; used as diagnostic only */

/* Save AIC, BIC if smaller than any currently saved case */

if x < nsave+1; aic1[x,.] = aic~u; bic1[x,.] = bic~u; goto next1; endif;

if maxc(aic1[.,1]) > aic; aic1[maxindc(aic1[.,1]),.] = aic~u; endif;

if maxc(bic1[.,1]) > bic; bic1[maxindc(bic1[.,1]),.] = bic~u; endif;

next1:;

/* Heterogeneous case: Compute variance estimates for subsets in same pattern as means */

ss = ydat[.,1].*(((mms'-ydat[.,2])^2)+ydat[.,3]);

i=1;

do until i >maxc(u');

s3[i]= sumc(submat(ss,indexcat(u',i),1));

i=i+1; endo;

s3 = (subscat(u',xx,s3'))';

s3 = s3./nns;

/* Compute log-likelihoods and AIC, BIC values */

lik = ydat[.,1]'*ln(s3');

lik = (-.5*lik-(n/2)*(ln(2*pi)+1))';

aic = -2*lik + 4*(maxc(u'));

bic = -2*lik + ln(n)*(2*maxc(u'));

/* Save AIC, BIC if smaller than any currently saved case */

if x < nsave+1; aic2[x,.] = aic~u; bic2[x,.] = bic~u; goto next2; endif;

if maxc(aic2[.,1]) > aic; aic2[maxindc(aic2[.,1]),.] = aic~u; endif;

if maxc(bic2[.,1]) > bic; bic2[maxindc(bic2[.,1]),.] = bic~u; endif;

next2:;

x = x+1;

endo; /* End of loop for generating mean patterns */

/* Sort saved AIC, BIC values in ascending order */

aic1 = sortc(aic1,1); bic1 = sortc(bic1,1); aic2 = sortc(aic2,1); bic2 = sortc(bic2,1);

/* Print results for case of means */

format /mat /rd 10, 3;

ii=1;

print " "; print "NOTE: Means have been sorted from smallest to largest";

print " "; print "Sorted mean are: "; print ydat[.,2]';

print " "; print "Best models assuming Homogeneity of Variance";

print " "; print "Smallest AIC values and ordered subsets: ";

print aic1[ii:nsave,.];

print " "; print "Smallest BIC values and ordered subsets: ";

print bic1[ii:nsave,.];

print " "; print "Best models assuming Heterogeneity of Variance";

print " "; print "Smallest AIC values and ordered subsetss: ";

print aic2[ii:nsave,.];

print " "; print "Smallest BIC values and ordered subsets: ";

print bic2[ii:nsave,.];

format /mat /rd 15, 4;

t2 = hsec;

print "Execution time in seconds = ";; (t2-t1)/100; goto end1;

nextp:

/* Data = Proportions */

{ydat,varnams} = import(fmt,range,1);

ydat = ydat[.,2:3]; /* omits group labels in first column */

k = rows(ydat); /* k is number of groups */

ydat = sortc(ydat,2);

n = sumc(ydat[.,1]);

t1=hsec;

xx=cumsumc(ones(k,1));

base=2;

aic1=zeros(nsave,k+1); bic1=aic1;

x=1;

do until x > 2^(k-1); x1=x-1;

/* Generate v with binary equivalent of integer = x-1 */

if x1 == 0; v = zeros(1,k);

elseif x1 > 0;

nbase = maxc(log(x1)./log(base))+1;

zbase=reshape(base,nbase,rows(x1));

vv = rev(recserrc(x1,zbase))';

v = zeros(1,k-rows(vv'))~vv;

endif;

/* u contains ordered subsets based on changes in v.

For example, if v = {0010}, then u = {1123} */

u = zeros(1,k);

i=2;

u[1]=1;

do until i > k;

if v[i] == v[i-1]; u[i] = u[i-1];

elseif v[i] /= v[i-1]; u[i] = u[i-1]+1; endif;

i = i+1; endo;

/* Create k-proportion vector, mms, with equality restrictions imposed based on patterns in u vector */

mns = zeros(1,k);

nns = mns; mms = mns; s3=mns;

i=1;

do until i > maxc(u');

mns[i]= sumc(submat(ydat[.,1].*ydat[.,2],indexcat(u',i),1));

nns[i]= sumc(submat(ydat[.,1],indexcat(u',i),1));

i=i+1; endo;

mms = (subscat(u',xx,mns'))';

nns = (subscat(u',xx,nns'))';

mms = mms./nns;

/* Compute log-likelihoods and AIC, BIC values*/

lik = (ydat[.,1]'.*mms)*ln(mms')+(ydat[.,1]'.*(1-mms))*ln(1-mms');

aic = -2*lik + 2*(maxc(u'));

bic = -2*lik + ln(n)*(maxc(u'));

/* print aic~u~v; used as diagnostic only */

/* Save AIC, BIC if smaller than any currently saved case */

if x < nsave+1; aic1[x,.] = aic~u; bic1[x,.] = bic~u; goto next3; endif;

if maxc(aic1[.,1]) > aic; aic1[maxindc(aic1[.,1]),.] = aic~u; endif;

if maxc(bic1[.,1]) > bic; bic1[maxindc(bic1[.,1]),.] = bic~u; endif;

next3:;

x = x+1;

endo; /* End of loop for generating patterns of proportions */

/* Sort AIC, BIC values in ascending order */

aic1 = sortc(aic1,1); bic1 = sortc(bic1,1);

/* Print results for case of proportions */

format /mat /rd 10, 3;

ii=1;

print " "; print "NOTE: Proportions have been sorted from smallest to largest";

print " "; print "Sorted proportions are: "; print ydat[.,2]';

print " "; print "Smallest AIC values and ordered subsets: ";

print aic1[ii:nsave,.];

print " "; print "Smallest BIC values and ordered subsets: ";

print bic1[ii:nsave,.];

t2 = hsec;

print "Execution time in seconds = ";; (t2-t1)/100;

end1: output off;

end;