/* SAS Macro %contigclust(DistFile=,IdVar=,ContigFile=,ContigVars=,Method=,Beta=,MaxSize=,OutHist=,OutTree=,Options=) Purpose: Hierarchically cluster objects while imposing a contiguity constraint Article: "Contiguity-constrained Hierarchical Agglomerative Clustering using SAS" Author: Anthony Recchia Arguments: DistFile - the name of the data set containing the distance matrix for the clustering algorithm IdVar - the name of the variable in the DistFile data set that identifies the object that corresponds to each row of the distance matrix; if there is no such variable, then the ContigVars variables in the ContigFile data set must correspond to row numbers of the distance matrix ContigFile - the name of the data set containing one observation for each pair of contiguous objects; if this is left blank, then an unconstrained analysis is performed ContigVars - the names of the two variables in the ContigFile data set that give the pairs of contiguous objects; the names should be separated by a space; if the data set only contains two variables, then this can be left blank Method - the name of the clustering method to be used; one of 1) average - Average Linkage Analysis 5) mcquitty - McQuitty's Similarity Analysis 2) centroid - Centroid Analysis 6) median - Median Analysis 3) complete - Complete Linkage Analysis 7) single - Single Linkage Analysis 4) flexible - Flexible-Beta Analysis 8) ward - Ward's Minimum Variance Analysis Beta - if the method is 'flexible', then this is the value of the parameter beta in the Flexible-Beta update formula d(H,M) = (1-beta)*[d(H,K)+d(H,L)]/2 + beta*d(K,L); defaults to -0.25 if left blank MaxSize - the maximum number of objects that are allowed to inhabit a single cluster; defaults to the number of objects if left blank Options - the names of any options; can be one or more of the following 1) noprint - suppresses the display of all output 2) nosquare - prevents distances from being squared with the 'average', 'centroid', 'median', and 'ward' methods; in this case, the data are assumed to be squared Euclidean distances for computing statistics 3) rsq - requests computation of root-mean-square standard deviation, R-squared, semipartial R-squared, pseudo F statistic, and pseudo t-squared statistic with the 'average', 'centroid', and 'ward' methods; since these statistics are not printed and will only appear in output data sets, this option is ignored when both of the arguments OutHist and OutTree are left blank; these statistics are always produced with the 'ward' method OutHist - the name of the output data set that the macro should create to contain the cluster history; leave blank if you do not need this data set after the macro has finished executing OutTree - the name of the output data set that the macro should create for drawing tree diagrams; leave blank if you do not need this data set after the macro has finished executing Notes: The intended means of producing a distance matrix for the DistFile data set is the DISTANCE procedure with an ID or COPY statement for the IdVar variable. The matrix itself can be square or lower triangular as long as the lower triangle does not not contain any missing or negative values. See "DISTANCE procedure" in the SAS documentation for more information. Each value of the IdVar variable must be unique. If no IdVar variable is specified, then the values of the ContigVars variables must be positive integers that correspond to row numbers of the distance matrix. In this case, the variables can be numeric or character. The ContigVars variables in the ContigFile data set and the IdVar variable in the DistFile data set must be of the same type, and the set of values taken by the ContigVars variables must be a subset of the set of values taken by the IdVar variable. See "CLUSTER procedure" then the subheading "clustering methods" in the SAS documentation for descriptions of the clustering methods used here. Since a distance matrix is used rather than the original data, the root-mean-square standard deviations that are calculated with the 'rsq' option will not be correct. This situation is true of PROC CLUSTER, as well. However, relative differences between the values at different iterations are preserved. Generally speaking, if unsquared or squared Euclidean distances are used and the 'nosquare' option is used appropriately, then the values that are calculated here will be too large by a factor of sqrt(v) where v is the number of variables that were used in the computation of the distance matrix. The OutTree data set contains variables that can be used to draw other types of tree diagrams besides the one that the macro produces. It can also be used to produce a data set containing the cluster membership at any desired level. See "TREE procedure" in the SAS documentation for a description of how to do these things. */ %macro contigclust(DistFile=,IdVar=,ContigFile=,ContigVars=,Method=,Beta=,MaxSize=,Options=,OutHist=,OutTree=); /* Check that the values of the arguments that were supplied by the user are valid */ %let print=1; %let rsq=0; %let square=1; %if %quote(&Options)^=%str() %then %do; %let i=1; %let tempopt=%upcase(%qscan(&Options,1,%str( ))); %do %while(%quote(&tempopt)^=%str()); %if %quote(&tempopt)=NOPRINT %then %let print=0; %else %if %quote(&tempopt)=NOSQUARE %then %let square=0; %else %if %quote(&tempopt)=RSQ %then %let rsq=1; %else %do; %put ERROR: The argument Options should only contain the following: NOPRINT, NOSQUARE, RSQ.; %return; %end; %let i=%eval(&i+1); %let tempopt=%upcase(%qscan(&Options,&i,%str( ))); %end; %end; %if %quote(&Method)=%str() %then %do; %put ERROR: No value was provided for the argument Method.; %return; %end; %if %qscan(&Method,2,%str( ))^=%str() %then %do; %put ERROR: The argument Method should contain only one name.; %return; %end; %let Method=%upcase(&Method); %if %quote(&Method)=AVERAGE %then %do; %let distcmnds=%str(nk=oldfreqs[1]; nl=oldfreqs[2]; nknlsum=nk+nl;); %let distform=(nk*dhk+nl*dhl)/nknlsum; %if &square=0 %then %let distlabel=%str(;Aver;Dist); %else %let distlabel=%str(;RMS;Dist); %let heightlabel=Average Distance Between Clusters; %let histtitle=Average Linkage Cluster Analysis; %end; %else %if %quote(&Method)=CENTROID %then %do; %let distcmnds=%str(dkl=mindist; nk=oldfreqs[1]; nl=oldfreqs[2]; nknlsum=nk+nl; nknlcoef=nk*nl/nknlsum**2;); %let distform=(nk*dhk+nl*dhl)/nknlsum-nknlcoef*dkl; %if &square=0 %then %let distlabel=%str(;Squared ;Cent Dist); %else %let distlabel=%str(;Cent;Dist); %let heightlabel=Distance Between Cluster Centroids; %let histtitle=Centroid Hierarchical Cluster Analysis; %end; %else %if %quote(&Method)=COMPLETE %then %do; %let distcmnds=; %let distform=max(dhk,dhl); %let distlabel=%str(;Max;Dist); %let heightlabel=Maximum Distance Between Clusters; %let histtitle=Complete Linkage Cluster Analysis; %let square=0; %end; %else %if %quote(&Method)=FLEXIBLE %then %do; %if %quote(&Beta)=%str() %then %let Beta=-0.25; %else %do; %if %qscan(&Beta,2,%str( ))^=%str() %then %do; %put ERROR: The argument Beta should contain exactly one numeric value when the FLEXIBLE method is used.; %return; %end; %if %sysfunc(getoption(mautosource))=MAUTOSOURCE %then %let betatype=%datatyp(&Beta); %else %do; options mautosource; %let betatype=%datatyp(Beta); options nomautosource; %end; %if &betatype=CHAR %then %do; %put ERROR: The argument Beta should contain exactly one numeric value when the FLEXIBLE method is used.; %return; %end; %end; %let distcmnds=%str(betadkl=&Beta*mindist;); %let distform=%sysevalf((1-&Beta)/2)*(dhk+dhl)+betadkl; %let distlabel=%str(;Flex;Dist); %let heightlabel=Flexible-Beta Distance; %let histtitle=Flexible-Beta Cluster Analysis; %let square=0; %end; %else %if %quote(&Method)=MCQUITTY %then %do; %let distcmnds=; %let distform=(dhk+dhl)/2; %let distlabel=McQ; %let heightlabel=McQuitty%str(%')s Similarity; %let histtitle=McQuitty%str(%')s Similarity Analysis; %let square=0; %end; %else %if %quote(&Method)=MEDIAN %then %do; %let distcmnds=%str(dkldivfour=mindist/4;); %let distform=(dhk+dhl)/2-dkldivfour; %if &square=0 %then %let distlabel=%str(;Squared; Median Dist); %else %let distlabel=%str(;Median;Dist); %let heightlabel=Median Distance; %let histtitle=Median Hierarchical Cluster Analysis; %end; %else %if %quote(&Method)=SINGLE %then %do; %let distcmnds=; %let distform=min(dhk,dhl); %let distlabel=%str(;Min;Dist); %let heightlabel=Minimum Distance Between Clusters; %let histtitle=Single Linkage Cluster Analysis; %let square=0; %end; %else %if %quote(&Method)=WARD %then %do; %let distcmnds=%str(dkl=mindist; nk=oldfreqs[1]; nl=oldfreqs[2]; nknlsum=nk+nl;); %let distform=((nh+nk)*dhk+(nh+nl)*dhl-nh*dkl)/(nh+nknlsum); %let distlabel=BSS; %let heightlabel=Between-Cluster Sum of Squares; %let histtitle=Ward%str(%')s Minimum Variance Cluster Analysis; %let rsq=1; %end; %else %do; %put ERROR: The argument Method should be one of the following: AVERAGE, CENTROID, COMPLETE, FLEXIBLE, MCQUITTY, MEDIAN, SINGLE, WARD.; %return; %end; %let notesandwarns=0; %if &rsq=0 or &Method=AVERAGE or &Method=CENTROID or &Method=WARD %then %let rsqwarn=0; %else %do; %let rsq=0; %let rsqwarn=1; %end; %if &Method=AVERAGE or &Method=CENTROID or &Method=MEDIAN or &Method=WARD %then %let squarenote=1; %else %do; %let square=0; %let squarenote=0; %end; %if %quote(&DistFile)=%str() %then %do; %put ERROR: No value was provided for the argument DistFile.; %return; %end; %if %qscan(&DistFile,2,%str( ))^=%str() %then %do; %put ERROR: The argument DistFile should contain only one name.; %return; %end; %if %index(&DistFile,.)=0 %then %let DistFile=work.&DistFile; %if %sysfunc(exist(&DistFile))=0 %then %do; %put ERROR: File %upcase(&DistFile).DATA does not exist.; %return; %end; %let datanum=%sysfunc(open(&DistFile)); %let numobjs=%sysfunc(attrn(&datanum,nobs)); %if &numobjs<2 %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The DistFile data set should contain at least two observations.; %return; %end; %let numtotvars=%sysfunc(attrn(&datanum,nvars)); %if %quote(&IdVar)=%str() %then %do; %let idnum=0; %let numdistvars=&numtotvars; %let numidvars=0; %let objlen=%eval(%length(&numobjs)+2); %end; %else %do; %if %qscan(&IdVar,2,%str( ))^=%str() %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The argument IdVar should contain only one name.; %return; %end; %let idnum=%sysfunc(varnum(&datanum,&IdVar)); %if &idnum=0 %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The variable &IdVar is not in the file %upcase(&DistFile).DATA.; %return; %end; %let idtype=%sysfunc(vartype(&datanum,&idnum)); %let numdistvars=%eval(&numtotvars-1); %let numidvars=1; %end; %if &numobjs<&numdistvars %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: There are too many variables in the DistFile data set.; %return; %end; %if &numobjs>&numdistvars %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: There are too many observations in the DistFile data set.; %return; %end; %do i=1 %to &numtotvars; %if &i^=&idnum and %sysfunc(vartype(&datanum,&i))=C %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The dissimilarity matrix variables in the DistFile data set are not all numeric.; %return; %end; %end; %let datanum=%sysfunc(close(&datanum)); %if %quote(&ContigFile)=%str() %then %do; %if %quote(&ContigVars)^=%str() %then %do; %put ERROR: A value was provided for the argument ContigVars but not for the argument ContigFile.; %return; %end; %let histtitle=&histtitle without Contiguity Constraint; %end; %else %do; %if %qscan(&ContigFile,2,%str( ))^=%str() %then %do; %put ERROR: The argument ContigFile should contain only one name.; %return; %end; %if %index(&ContigFile,.)=0 %then %let ContigFile=work.&ContigFile; %if %sysfunc(exist(&ContigFile))=0 %then %do; %put ERROR: File %upcase(&ContigFile).DATA does not exist.; %return; %end; %let datanum=%sysfunc(open(&ContigFile)); %if %sysfunc(attrn(&datanum,nobs))=0 %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: There are no observations in the ContigFile data set.; %return; %end; %if %quote(&ContigVars)=%str() %then %do; %let numcontigvars=%sysfunc(attrn(&datanum,nvars)); %if &numcontigvars=2 %then %do; %let contignum1=1; %let contignum2=2; %let contigvar1=%sysfunc(varname(&datanum,1)); %let contigvar2=%sysfunc(varname(&datanum,2)); %end; %else %do; %let datanum=%sysfunc(close(&datanum)); %if &numcontigvars<2 %then %put ERROR: There should be at least two variables in the ContigFile data set.; %else %put ERROR: If the ContigFile data set contains more than two variables, then a value for the argument ContigVars must be provided.; %return; %end; %end; %else %do; %let contigvar1=%qscan(&ContigVars,1,%str( )); %let contignum1=%sysfunc(varnum(&datanum,&contigvar1)); %if &contignum1=0 %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The variable &contigvar1 is not in the file %upcase(&ContigFile).DATA.; %return; %end; %let contigvar2=%qscan(&ContigVars,2,%str( )); %if &contigvar2=%str() %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The argument ContigVars should contain exactly two names.; %return; %end; %let contignum2=%sysfunc(varnum(&datanum,&contigvar2)); %if &contignum2=0 %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The variable &contigvar2 is not in the file %upcase(&ContigFile).DATA.; %return; %end; %let contigvar3=%qscan(&ContigVars,3,%str( )); %if &contigvar3^=%str() %then %do; %let datanum=%sysfunc(close(&datanum)); %put ERROR: The argument ContigVars should contain exactly two names.; %return; %end; %end; %let contigtype1=%sysfunc(vartype(&datanum,&contignum1)); %let contigtype2=%sysfunc(vartype(&datanum,&contignum2)); %let maxlen=%sysfunc(max(%sysfunc(varlen(&datanum,&contignum1)),%sysfunc(varlen(&datanum,&contignum2)))); %let datanum=%sysfunc(close(&datanum)); %if &idnum=0 %then %do; %if &contigtype1^=&contigtype2 %then %do; %put ERROR: The variables &contigvar1 and &contigvar2 in the file %upcase(&ContigFile).DATA are not of the same type.; %return; %end; %end; %else %do; %if &contigtype1^=&idtype or &contigtype2^=&idtype %then %do; %put ERROR: The variables &contigvar1 and &contigvar2 in the file %upcase(&ContigFile).DATA and the variable &IdVar in the file %upcase(&DistFile).DATA are not of the same type.; %return; %end; %end; %let histtitle=&histtitle with Contiguity Constraint; %end; %if %quote(&MaxSize)=%str() %then %let MaxSize=&numobjs; %else %do; %if %sysfunc(getoption(mautosource))=MAUTOSOURCE %then %let sizetype=%datatyp(&MaxSize); %else %do; options mautosource; %let sizetype=%datatyp(&MaxSize); options nomautosource; %end; %if &sizetype=CHAR %then %do; %put ERROR: The value of the argument MaxSize must be an integer no less than 2 and no more than &numobjs..; %return; %end; %if &MaxSize<2 or &MaxSize>&numobjs or &MaxSize^=%sysfunc(floorz(&MaxSize)) %then %do; %put ERROR: The value of the argument MaxSize must be an integer no less than 2 and no more than &numobjs..; %return; %end; %end; %if %quote(&OutHist)=%str() %then %do; %let OutHist=contigclusthist; %let outputhist=0; %let todelete2=, contigclusthist; %end; %else %do; %if %qscan(&OutHist,2,%str( ))^=%str() %then %do; %put ERROR: The argument OutHist should contain only one name.; %return; %end; %let dotloc=%index(&OutHist,.); %if &dotloc>0 %then %do; %let libtemp=%substr(&OutHist,1,&dotloc-1); %if %sysfunc(libref(&libtemp))^=0 %then %do; %put ERROR: Libname %upcase(&libtemp) is not assigned.; %return; %end; %end; %let outputhist=1; %let todelete2=; %end; %if %quote(&OutTree)=%str() %then %do; %let OutTree=contigclusttree; %let outputtree=0; %let todelete2=&todelete2, contigclusttree; %end; %else %do; %if %qscan(&OutTree,2,%str( ))^=%str() %then %do; %put ERROR: The argument OutTree should contain only one name.; %return; %end; %let dotloc=%index(&OutTree,.); %if &dotloc>0 %then %do; %let libtemp=%substr(&OutTree,1,&dotloc-1); %if %sysfunc(libref(&libtemp))^=0 %then %do; %put ERROR: Libname %upcase(&libtemp) is not assigned.; %return; %end; %end; %let outputtree=1; %end; %if &outputhist=0 and &outputtree=0 and &Method^=WARD %then %let rsq=0; /* If there is no contiguity file, then an unconstrained analysis is performed */ %if %quote(&ContigFile)=%str() %then %do; /* Create complete contiguity file */ data contigclustcontiglist; do objectnum1=2 to &numobjs; do objectnum2=1 to objectnum1-1; output; end; end; run; %let todelete1=contigclustcontiglist; %if &numidvars=0 %then %do; /* If there is no identification variable, one is created; an object number is assigned */ data contigclustdistmat; set &DistFile; length contigclustobject $ &objlen; contigclustobject=cat('OB',trim(put(_n_,8.-l))); contigclustobjnum=_n_; run; %let todelete1=&todelete1, contigclustdistmat; %end; %else %do; /* If there is an identification variable, then check that it has no duplicate values */ proc sql noprint; select count(distinct &IdVar) into :numdistinctdistobjs from &DistFile; quit; %if &numdistinctdistobjs=&numobjs %then %do; /* Assign object numbers */ data contigclustdistmat; set &DistFile (rename=(&IdVar=contigclustobject)); contigclustobjnum=_n_; run; %let todelete1=&todelete1, contigclustdistmat; %end; %else %do; %put ERROR: Duplicate names have been detected for %eval(&numobjs-&numdistinctdistobjs) object(s) in the DistFile data set.; %return; %end; %end; %end; /* If there is a contiguity file, then a constrained analysis is performed */ %else %do; %if &numidvars=0 %then %do; /* If there is no identification variable, then check that object names are valid row numbers for the distance matrix; eliminate unnecessary pairs of contiguous objects; reorder the members of the remaining pairs */ data contigclustcontigtemp; set &ContigFile end=lastobs; where &contigvar1^=&contigvar2; length contigclustobject1 $ &objlen contigclustobject2 $ &objlen; retain numbadints 0; %if &contigtype1=N %then %do; if not (1<=&contigvar1<=&numobjs and &contigvar1=floorz(&contigvar1)) then numbadints+1; if not (1<=&contigvar2<=&numobjs and &contigvar2=floorz(&contigvar2)) then numbadints+1; %end; %else %do; contigclustnum1=input(&contigvar1,? 8.); if _error_=1 then do; numbadints+1; _error_=0; end; else if not (1<=contigclustnum1<=&numobjs and contigclustnum1=floorz(contigclustnum1)) then numbadints+1; contigclustnum2=input(&contigvar2,? 8.); if _error_=1 then do; numbadints+1; _error_=0; end; else if not (1<=contigclustnum2<=&numobjs and contigclustnum2=floorz(contigclustnum2)) then numbadints+1; %end; if numbadints=0 then do; if &contigvar1<&contigvar2 then do; contigclustobject1=cat('OB',trim(put(&contigvar2,8.-l))); contigclustobject2=cat('OB',trim(put(&contigvar1,8.-l))); end; else do; contigclustobject1=cat('OB',trim(put(&contigvar1,8.-l))); contigclustobject2=cat('OB',trim(put(&contigvar2,8.-l))); end; end; if lastobs then call symputx('numbadints',numbadints); keep contigclustobject1 contigclustobject2; run; %let todelete1=contigclustcontigtemp; %if &numbadints=0 %then %do; /* Create identification variable and assign object numbers */ data contigclustdistmat; set &DistFile; length contigclustobject $ &objlen; contigclustobject=cat('OB',trim(put(_n_,8.-l))); contigclustobjnum=_n_; run; %let todelete1=&todelete1, contigclustdistmat; %end; %else %do; %put ERROR: Invalid values have been detected for &numbadints object(s) among the pairs in the ContigFile data set.; %goto exit; %end; %end; %else %do; /* If there is an identification variable, check that it does not contain any duplicate values */ proc sql noprint; select count(distinct &IdVar) into :numdistinctdistobjs from &DistFile; quit; %if &numdistinctdistobjs=&numobjs %then %do; /* Check that there are no objects in the contiguity file that are not in the distance matrix */ proc sql noprint; select count(object) into :numdistinctcontigobjs from (select distinct &contigvar1 as object from &ContigFile where &contigvar1^=&contigvar2 union select distinct &contigvar2 as object from &ContigFile where &contigvar1^=&contigvar2); quit; %if &numdistinctcontigobjs<=&numobjs %then %do; /* Eliminate unnecessary pairs of contiguous objects and reorder the members of the remaining pairs */ data contigclustcontigtemp; set &ContigFile; where &contigvar1^=&contigvar2; %if &contigtype1=N %then %do; length contigclustobject1 &maxlen contigclustobject2 &maxlen; %end; %else %do; length contigclustobject1 $ &maxlen contigclustobject2 $ &maxlen; %end; if &contigvar1<&contigvar2 then do; contigclustobject1=&contigvar2; contigclustobject2=&contigvar1; end; else do; contigclustobject1=&contigvar1; contigclustobject2=&contigvar2; end; keep contigclustobject1 contigclustobject2; run; /* Assign object numbers */ data contigclustdistmat; set &DistFile (rename=(&IdVar=contigclustobject)); contigclustobjnum=_n_; run; %let todelete1=contigclustcontigtemp, contigclustdistmat; %end; %else %do; %put ERROR: Object names not found in the DistFile data set have been detected for %eval(&numdistinctcontigobjs-&numobjs) object(s) in the ContigFile data set.; %return; %end; %end; %else %do; %put ERROR: Duplicate names have been detected for %eval(&numobjs-&numdistinctdistobjs) object(s) in the DistFile data set.; %return; %end; %end; /* Create a contiguity list using object numbers instead of names and eliminate duplicate pairs */ proc sql; create table contigclustcontiglist as select a1.contigclustobjnum as objectnum1, a2.contigclustobjnum as objectnum2 from contigclustdistmat as a1, contigclustdistmat as a2, (select distinct contigclustobject1, contigclustobject2 from contigclustcontigtemp) as b where a1.contigclustobject=b.contigclustobject1 and a2.contigclustobject=b.contigclustobject2 order by a1.contigclustobjnum, a2.contigclustobjnum; quit; %let todelete1=&todelete1, contigclustcontiglist; %end; /* Execute clustering algorithm */ proc iml worksize=3000; /* Read distance matrix into IML */ distvars=contents('contigclustdistmat'); distvars=distvars[loc(distvars^='contigclustobject' & distvars^='contigclustobjnum')]; use contigclustdistmat; read all var distvars into dist; read all var {contigclustobject} into objc; close contigclustdistmat; /* Check for invalid distances; if applicable, square distances and compute total sum-of-squares */ numbaddists=0; numobj=nrow(dist); %if &rsq=1 %then %do; t=0; %end; do i=2 to nrow(dist); do j=1 to i-1; if dist[i,j]<0 then numbaddists=numbaddists+1; else do; %if &square=1 %then %do; dist[i,j]=dist[i,j]**2; %end; %if &rsq=1 %then %do; t=t+dist[i,j]; %end; end; end; end; %if &rsq=1 %then %do; t=t/numobj; %end; call symputx('numbaddists',numbaddists); if numbaddists=0 then do; /* Read contiguity list into IML */ use contigclustcontiglist; read all var {objectnum1 objectnum2} into cont; close contigclustcontiglist; if type(objc)='N' then objc=left(char(objc)); /* Initialize variables based on clustering method and on whether or not statistics are to be computed */ %if &Method=WARD %then %do; dist=dist/2; distorig=dist; objloc=1:numobj; t=t/2; tempp=0; w=j(1,numobj,0); %end; %else %if &rsq=1 %then %do; distorig=dist; objloc=1:numobj; tempp=0; w=j(1,numobj,0); %end; %else %do; psf=.; pst=.; rmsstd=.; rsq=.; sprsq=.; %end; freq=j(1,numobj,1); imldone=0; iter=0; numties=0; objn=j(1,numobj,.); /* Reorder members of each pair of contiguous objects so that the larger values are first */ do i=1 to nrow(cont); if cont[i,1]minpair[1] then cont[i,j]=cont[i,j]-2; else if cont[i,j]=minpair[2] | cont[i,j]=minpair[1] then cont[i,j]=newclustnum; else if cont[i,j]>minpair[2] then cont[i,j]=cont[i,j]-1; end; if cont[i,1]minpair[1] then cont[i,j]=cont[i,j]-2; else if cont[i,j]=minpair[2] | cont[i,j]=minpair[1] then do; cont[i,j]=newclustnum; sizecheck=1; end; else if cont[i,j]>minpair[2] then cont[i,j]=cont[i,j]-1; end; if sizecheck=1 & sum(freq[cont[i,]])>&MaxSize then toobig[i]=1; else if cont[i,1]minpair[1] then objloc[i]=objloc[i]-2; else if objloc[i]=minpair[2] | objloc[i]=minpair[1] then objloc[i]=newclustnum; else if objloc[i]>minpair[2] then objloc[i]=objloc[i]-1; end; %end; end; end; /* Reorder object names and numbers so that tree diagrams will turn out correctly */ if any(objn=.) then do; lastdotloc=max(loc(objn=.)); if lastdotloc0 %then %put WARNING: Ties for minimum distance between clusters have been detected at &numties level(s) in the cluster history.; %end; %mend contigclust;