function [Ord, Num, BetaVal] = Cluster(Vec) %Data clustering algorithm (matlab version 5.1,by Naama Barkai and Uri Alon). %This program accepts a matrix of input data Vec, %where each object to be clustered is represented by a column. %It outputs three variables: Ord, Num and BetaVal. %Ord contains the post-clustering order of objects along the binary tree. %Additional information about the binary tree is found in Num and BetaVal. %Num contains the sizes of the clusters at each splitting, so that in the second %row of Num are two nonzero entries corresponding to the sizes of the two clusters %in the first division in the tree, the third row has 4 entries, etc. %The matrix BetaVal contains the beta values of the corresponding cluster splits. %Definitions Converge = .001 ; ConvergeSplit = .001 ; Split = .01 ; DiffSplit = 0 ; OldSplit = 0 ; NumClust = 2 ; Beta0 = .5 ; DeltaBeta = .5 ; NumSplit = 4 ; Center(1:NumClust,:) = ones(NumClust,1)*mean(Vec') ; Num = zeros(NumSplit+1,2^NumSplit) ; BetaVal = zeros(NumSplit+1,2^NumSplit) ; Num(1,1) = size(Vec,2) ; S_CompCenter = zeros(NumSplit,2^NumSplit ); E_CompCenter = zeros(NumSplit,2^NumSplit ); Ind_CompCenter = zeros(NumSplit,2^NumSplit ); S_CompCenter(1,1) = 1 ; E_CompCenter(1,1) = size(Vec,2) ; Ind_CompCenter(1,1) = 1 ; tmpOrdVec = Vec ; Ord = 1:1:size(Vec,2) ; tmpOrd = Ord ; for nSplit=1:1:NumSplit Vec = tmpOrdVec ;nS = 0 ; Ord = tmpOrd ; NewClust = 0 ; for nClust = 1:1:2^(nSplit-1) if(Num(nSplit,nClust)>1) clear Dist Prob C Z Beta = Beta0 ;SplitMeasure = 0 ; CInd = nS+1:Num(nSplit,nClust)+nS ; nS bs = 0 ; while( (SplitMeasureConvergeSplit) ) if( (bs==0) & (SplitMeasure>Split) ) BetaVal(nSplit,nClust) = Beta ; bs=1 ; end Beta = Beta+DeltaBeta ; OldCenter = Center*0 ; %figure(1); hold off; plot(Center') ;hold on ;plot(mean(Vec(:,CInd)'),'y') ; Center = Center.*(1+ .001*(rand(size(Center,1),size(Center,2))-.5)) ; DiffClust =5 ; while(DiffClust>Converge) hold off DiffSplit = (OldSplit-SplitMeasure)/max(.01,SplitMeasure) ; DiffClust = sqrt(sum(sum((Center-OldCenter).^2)))/sqrt(sum(sum((Center).^2))) ; OldCenter = Center ; OldSplit = SplitMeasure ; for k=1:1:NumClust C = Center(k,:)'*ones(1,length(CInd)) ; Dist(k,:) = sum(((Vec(:,CInd)-C).^2)) ; Prob(k,:) = exp(-Beta.*Dist(k,:)) ; end Z = sum(Prob) ; for k=1:1:NumClust Prob(k,:) = Prob(k,:)./Z ; Center(k,:) = (Vec(:,CInd)*Prob(k,:)'/(sum(Prob(k,:))))' ; end pause (.001); SplitMeasure = sqrt(sum((Center(1,:)-Center(2,:)).^2)) ; end end %======================================================= %here we make the assignment of Data Points to Clusters %======================================================= %Calculating the distance from the final centers for k=1:1:NumClust C = Center(k,:)'*ones(1,length(CInd)) ; Dist(k,:) = sum(((Vec(:,CInd)-C).^2)) ; end %First We need to decide on the proper way of splitting the vectors (organize by closeness to the near-by cluster) in = S_CompCenter(nSplit,nClust):E_CompCenter(nSplit,nClust) ; if(length(in)>1) CompCenter = mean(Vec(:,in)') ; end if(length(in)==1) CompCenter = Vec(:,in)' ; end C = CompCenter'*ones(1,2) ; d = sum(((Center-C')'.^2)) ; if(Ind_CompCenter(nSplit,nClust)==1) if(d(1)d(2)) Dist = Dist([2,1],:) ; Center = Center([2,1],:) ; end end %Now we do the actual assignment m = min(Dist)'*ones(1,size(Dist,1)) ; [IndMin,j] = find(m'==Dist) ; for k=1:1:NumClust NewClust = NewClust+1 ; IndClust = (IndMin==k) ; Num(nSplit+1,NewClust) = sum(IndClust) ; if(k==1) S_CompCenter(nSplit+1,NewClust) = nS+sum(IndClust)+1 ; S_CompCenter(nSplit+1,NewClust+1) = nS+1 ; end if(k==2) E_CompCenter(nSplit+1,NewClust-1) = nS+sum(IndClust) ; E_CompCenter(nSplit+1,NewClust) = nS ; end Ind_CompCenter(nSplit+1,NewClust) = k ; tmp = Vec(:,CInd); tmp2 = Ord(CInd) ; tmpOrdVec(:,nS+1:nS+sum(IndClust)) = tmp(:,IndClust) ; tmpOrd(nS+1:nS+sum(IndClust)) = tmp2(IndClust) ; nS = nS + sum(IndClust); figure(2) ; subplot(2,2,k) ; plot(mean(Vec(:,IndClust)')) ; title(['NumGenes=',num2str(sum(IndClust))]) end end if(Num(nSplit,nClust)==1) CInd NewClust = NewClust+1 ; Num(nSplit+1,NewClust) = 1 ; nS = nS+1 ; end end Vec = tmpOrdVec ;nS = 0 ; Ord = tmpOrd ; end