new;
/* Latent Class Analysis for Dichotomous Data
Note: this program is under development and is presented as a research tool.
Programmed by: C. Mitchell Dayton, Professor Department of Measurement and Statistics,
University of Maryland
e-mail: cd4@umail.umd.edu
Uses EM algorithm to fit unconstrained latent classes to dichotomous data in Excel
spreadsheet.
Data are casewise in K columns in an Excel spreadsheet (version 2.1 - version
7.0).
The first row (A1, B1, etc.) of the spreadsheet is assumed to contain variable
names (read and stored as varnams but not otherwise referenced in the program).
Variables
may be coded 0/1 or 1/2. Complete data is assumed (i.e., missing values have been
edited out or
replaced in xls file).
Input: fmt = name for xls file (e.g., C:\Data\Vars.xls);
range = spreadsheet cells to read including variable names (e.g., A1..F201 assuming
data
for 200 cases and 6 variables);
nlc = number of latent classes (2 or more);
nform = 1 (Yes) if data are in 1/2 format; otherwise 0 (No);
kter = maximum number of iterations for EM algorithm (e.g., 500);
critc = convergence criterion for EM algorithm (e.g., .00001);
keq = 1 (Yes) for equality restrictions on conditional probabilities within latent
classes; otherwise 0 (No);
outp = name for output file containing raw data, posterior probabilities for nlc
latent classes
and predicted latent class based on Bayes (maximum posterior probability) strategy.
Note: Other spreadsheet or database input files may be used by changing the import
command; see Gauss Help.
*/
print "Default file for all printed output is lcaout.out in the current directory";
output file = lcaout.out on;
print "Name of Excel (.xls) file?"; fmt = cons;
print "Range in spreadsheet?"; range=cons;
/* Import the Excel spreadsheet */
{y,varnams} = import(fmt,range,1);
print "Are data in 1/2 format? (1=Yes/0=No)"; nform=con(1,1);
if nform == 1; y = abs(y-2); endif;
k = cols(y); nn = rows(y); pr = 1/nn;
rerun: print "Number of latent classes?"; nlc = con(1,1);
print "Max # of iterations?"; kter = con(1,1);
print "Convergence criterion (.0001 or less to ensure accuracy)?"; critc
= con(1,1);
print "Impose equality restrictions on conditional probabilities (1=Yes/0=No)?"
; keq=con(1,1);
iter = 1; jter=-1;
alp = zeros(nlc,k);
j = 1;
do until j == nlc+1;
alp[j,.] = (.05*j)*ones(1,k);
j = j + 1;
endo;
th = zeros(1,nlc)+1/nlc;
p = zeros(nlc,nn);
/* Procedure llik computes recruitment probabilities in p[ ] and returns log-likelihood
function */
proc llik(param);
local i, j, nr, th1, alp1;
nr = rows(param);
th1=param[1:nlc-1]';
th1=th1~(1-sumc(th1'));
alp1=param[nlc:nr];
alp1=reshape(alp1,nlc,k);
i = 1;
do while i <= nlc;
j=1;
do while j <= nn;
p[i,j] = th1[i]*prodc((alp1[i,.]^y[j,.])')*prodc(((1-alp1[i,.])^(1-y[j,.]))');
j = j + 1;
endo;
i = i + 1;
endo;
retp(sumc(ln(sumc(p))));
endp;
/* Start of Loop */
begin: param = th[1:nlc-1]'|vecr(alp);
lik = llik(param);
/* Adjust estimates */
p = (p'./sumc(p))';
thn = sumc(p')*pr;
alpn = alp*0;
l = 1;
do until l == nlc+1;
j = 1;
do until j == k+1;
alpn[l,j] = (p[l,.]*(pr.*y[.,j]))/thn[l];
j = j + 1;
endo;
l = l + 1;
endo;
/* Equality restrictions imposed */
if keq ==1;
aalp = sumc(alpn')/k;
alpn = aalp.*ones(nlc,k); endif;
/* Test for convergence */
crit = sumc(sumc(abs(th'-thn)))+sumc(sumc(abs(alp-alpn)));
if iter > jter+50;
print "Iteration " iter "Criterion = " crit; jter=jter+50;
endif;
if crit < critc;
print "Convergence attained at " crit;
print "Final theta values " thn;
j=1;
do until j == nlc+1;
print "Final alpha values - LC" j;
print alpn[j,.];
j = j +1;
endo;
print "Log Likelihood Function = " lik;
adj = (k+1)*nlc-1;
if keq == 1; adj = 2*nlc-1; endif;
print "Akaike AIC (assuming identified model) = " -2*lik+2*adj;
print "Schwarz BIC (assuming identified model) = " -2*lik+ln(nn)*adj;
/* Hessian */
param = thn[1:nlc-1]|vecr(alpn);
cov = -1*inv(hessp(&llik,param));
print "Note: current standard error estimates are correct only for unconstrained,
identified model";
print "Asymptotic SE estimates " sqrt(diag(cov))';
vv=0;
print "# of Roots equal to/less than 0 = " counts(eig(cov),vv);
print "If # of non-positive roots > 1, standard errors will be incorrect";
/* Posterior probabilities for latent classes and predicted latent class membership
appended to data file */
gp=maxindc(p); yout=y~p'~gp;
print "Write output file with posterior probabilities and predicted latent
class membership (1=Yes/0=No)?" ; wrout=con(1,1);
if wrout == 1; print "Name of output file: "; outp=cons;
output off; output file = ^outp on;
screen off; print yout; screen on; output off;
output file = lcaout.out on; else; goto end; endif;
endif;
th=thn';
alp=alpn;
iter = iter+1;
if iter == kter+1;
print "Convergence not attained";
goto end;
endif;
goto begin;
end: print "Reanalyze same data (1=Yes/0=No)? "; rean = con(1,1);
if rean == 1; goto rerun; endif;
output off;
end;