00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 #if !defined _CLUS_SPLITPOINTCOMPUTATION_H_
00039 #define _CLUS_SPLITPOINTCOMPUTATION_H_
00040 
00041 #include "general.h"
00042 #include <math.h>
00043 #include <iostream>
00044 #include "trislv.h"
00045 #include "fmat.h"
00046 #include "cholesky.h"
00047 
00048 #include <gsl/gsl_sf_gamma.h> 
00049 #include <gsl/gsl_errno.h>
00050 
00051 #include <stdlib.h>
00052 
00053 using namespace TNT;
00054 using namespace std;
00055 
00056 namespace CLUS
00057 {
00058 
00059 
00060 
00061 template< class T>
00062 bool IsPointInSet(T value, Vector<T> set)
00063 {
00064     int l=0, r=set.dim()-1;
00065 
00066     assert(r>=0);
00067 
00068     while ( l<=r )
00069     {
00070         int m=(l+r)/2;
00071         T vm=set
00072              [m];
00073 
00074         if ( vm ==value )
00075             return true;
00076 
00077         if ( vm < value )
00078             l=m+1;
00079         else
00080             r=m-1;
00081     }
00082     return false;
00083 }
00084 
00085 
00086 
00087 
00088 
00089 
00090 
00091 double PValueBinomialDistribution(double N, double p, double val)
00092 {
00093 
00094     double mean=N*p;
00095     double std=sqrt(N*p*(1-p)); 
00096 
00097     
00098 
00099     
00100     if (val<mean-10*std)
00101         return 1.0;
00102 
00103     if (val>mean+10*std)
00104         return 0.0;
00105 
00106     gsl_sf_result result;
00107 
00108     int status=gsl_sf_beta_inc_e(val+1,N-val,p,&result);
00109 
00110     switch(status)
00111     {
00112     case 0:
00113         
00114         break;
00115 
00116     default:
00117         
00118         
00119         {
00120             cout << "gsl_sf_beta_inc_e failed for N=" << N << " val=" << val << " p=" << p << " .Approximating with normal" << endl;
00121             return .5*(1.0+erf( (mean-val)/(std*sqrt(2.0)) ));
00122         }
00123         break;
00124     }
00125 
00126     return result.val;
00127 
00128 }
00129 
00130 
00131 double PValueNormalDistribution(double mu, double sigma, double eta)
00132 {
00133     if (sigma==0.0)
00134         if (mu>eta)
00135             return 1.0;
00136         else
00137             if (mu==eta)
00138                 return .5;
00139             else
00140                 return 0.0;
00141     return .5*(1.0+erf( (mu-eta)/sqrt(2*pow2(sigma)) ));
00142 }
00143 
00144 
00145 double PValueNormalDistribution(const Vector<double> mu,
00146                                 const Fortran_Matrix<double> cholSigma,
00147                                 const Vector<double> n,
00148                                 const Vector<double> xc)
00149 {
00150     
00151 
00152 
00153 
00154 
00155 
00156 
00157 
00158 
00159 
00160 
00161 
00162 
00163 
00164 
00165 
00166     int d=n.dim(); 
00167 
00168     
00169     double v=dot_prod(n,mu-xc)/sqrt(dot_prod(n,n));
00170 
00171     
00172     Fortran_Matrix<double> M(d,d); 
00173     
00174 
00175     
00176     int i=0;
00177     double max=fabs(n[i]);
00178     for(int j=1; j<d; j++)
00179     {
00180         double aux=fabs(n[j]);
00181         if (aux>max)
00182         {
00183             max=aux;
00184             i=j;
00185         }
00186     }
00187     i++; 
00188     
00189     for (int j=1; j<=d; j++)
00190     {
00191         M(j,j)=1.0;
00192         M(j,1)=n[j-1];
00193     }
00194     
00195     if (i!=1)
00196     {
00197         M(i,i)=0.0;
00198         M(1,i)=1.0;
00199     }
00200 
00201     
00202     for (int j=1; j<=d; j++)
00203     {
00204         for (int k=1; k<j; k++)
00205         {
00206             
00207             double mult=0.0;
00208             for (int l=1; l<=d; l++)
00209                 mult+=M(l,j)*M(l,k);
00210             
00211             for (int l=1; l<=d; l++)
00212                 M(l,j)-=mult*M(l,k);
00213         }
00214         
00215         double norm=0.0;
00216         for (int k=1; k<=d; k++)
00217             norm+=pow2(M(k,j));
00218         norm=sqrt(norm);
00219         
00220         for (int k=1; k<=d; k++)
00221             M(k,j)/=norm;
00222     }
00223 
00224     
00225     for (int i=1; i<=d; i++)
00226     {
00227         
00228         for (int j=1; j<=d; j++)
00229         {
00230             double tmp=0.0;
00231             for (int k=1; k<j; k++)
00232                 tmp+=cholSigma(j,k)*M(k,i);
00233             M(j,i)=(M(j,i)-tmp)/cholSigma(j,j);
00234         }
00235     }
00236 
00237     
00238     double s=0.0;
00239     for (int i=1; i<=d; i++)
00240         s+=pow2(M(i,1));
00241 
00242     
00243     Vector<double> w(d-1);
00244     for (int i=0; i<d-1; i++)
00245     {
00246         
00247         double tmp=0.0;
00248         for (int j=1; j<=d; j++)
00249             tmp+=M(j,1)*M(j,i+2);
00250         w[i]=tmp;
00251     }
00252 
00253     
00254     Fortran_Matrix<double> S(d-1,d-1);
00255     for (int i=1; i<=d-1; i++)
00256         for (int j=i; j<=d-1; j++)
00257         {
00258             
00259             double tmp=0.0;
00260             for (int k=1; k<=d; k++)
00261                 tmp+=M(k,i+1)*M(k,j+1);
00262             S(i,j)=tmp;
00263         }
00264 
00265     Cholesky_upper_factorization(S,S);
00266     
00267     Vector< double > aux = Lower_triangular_solve(S,w);
00268 
00269     double sigma=s-dot_prod(aux,aux);
00270 
00271     sigma=sqrt(sigma);
00272     double det_cholS=1.0;
00273     for (int i=1; i<=d-1; i++)
00274         det_cholS*=S(i,i);
00275     double det_cholSigma=1.0;
00276     for (int i=1; i<=d; i++)
00277         det_cholSigma*=cholSigma(i,i);
00278 
00279     
00280     return 1/( 2*sigma*det_cholSigma*det_cholS )*( 1+erf(sigma*v/sqrt(2.0)) );
00281 }
00282 
00283 
00284 struct T_array
00285 {
00286     int index;
00287     double crit;
00288 };
00289 
00290 
00291 int compare_array_elements(const void* x, const void* y)
00292 {
00293     const T_array ax = *( (T_array*)x );
00294     const T_array ay = *( (T_array*)y );
00295     if (ax.crit<ay.crit)
00296         return -1;
00297     else
00298         if (ax.crit == ay.crit)
00299             return 0;
00300         else
00301             return 1;
00302 }
00303 
00304 
00305 
00306 
00307 
00308 
00309 
00310 
00311 double BinaryGiniGain(double p11, double p_1, double p1_)
00312 {
00313     return p1_>0.0 ? 2*pow2(p11-p_1*p1_)/( p1_*(1-p1_) ) : 0.0;
00314 }
00315 
00316 
00317 
00318 
00319 
00320 
00321 
00322 
00323 
00324 
00325 double DiscreteGiniGain(Vector<double>& d_s_p1, Vector<double>& d_N, double N, double alpha_1,
00326                         Vector<int>& Split)
00327 {
00328 
00329     int n=d_N.dim();
00330 
00331 #if 0
00332 
00333     Vector<int> B_order(n);
00334     for(int i=0; i<n; i++)
00335         B_order[i]=i;
00336     
00337     bool flag;
00338     do
00339     {
00340         flag=false;
00341         for(int i=0; i<n-1; i++)
00342         {
00343             int i1=B_order[i];
00344             double v1=d_N[i1]>0 ? d_s_p1[i1]/d_N[i1] : -1.0;
00345             int i2=B_order[i+1];
00346             double v2=d_N[i2]>0 ? d_s_p1[i2]/d_N[i2] : -1.0;
00347             if (v1>v2)
00348             {
00349                 B_order[i]=i2;
00350                 B_order[i+1]=i1;
00351                 flag=true;
00352             }
00353         }
00354     }
00355     while (flag);
00356 #endif
00357 
00358     T_array* B_order=new T_array[n];
00359 
00360     for (int i=0; i<n; i++)
00361     {
00362         B_order[i].index=i;
00363         B_order[i].crit=d_N[i]>0 ? d_s_p1[i]/d_N[i] : -1.0;
00364         ;
00365     }
00366 
00367     qsort((void*)B_order, n, sizeof(T_array), compare_array_elements );
00368 
00369     
00370     
00371     
00372     
00373 
00374 
00375     
00376     int pos=-1;  
00377     double p11=0.0;
00378     double p1_=0.0;
00379     double maxgini=0.0;
00380     bool partonefirst=true; 
00381     for (int i=0; i<n-1; i++)
00382     {
00383         p11+=d_s_p1[B_order[i].index]/N;
00384         p1_ +=1.0*d_N[B_order[i].index]/N;
00385         double gini=BinaryGiniGain(p11,alpha_1,p1_);
00386 
00387         
00388 
00389         if (p1_>=0.0 && p1_<=1.0 && gini>=maxgini)
00390         {
00391             maxgini=gini;
00392             pos=i;
00393             if ( p11<(p1_-p11) )
00394                 partonefirst=false;
00395             
00396 
00397         }
00398     }
00399 
00400     double boundary;
00401     if (pos==-1 || B_order[pos].crit==-1.0)
00402         return 0.0;
00403     else
00404         boundary=d_s_p1[B_order[pos].index]/d_N[B_order[pos].index];
00405 
00406     
00407 
00408     int nr=0;
00409     int buff[MAX_CATEGORIES];
00410     for (int i=0; i<n; i++)
00411         if ( (d_N[i]==0.0 && rand()*1.0/RAND_MAX > .5) ||
00412                 ((!partonefirst) && d_s_p1[i]/d_N[i]>boundary)
00413                 && NonEqual(d_s_p1[i]/d_N[i],boundary) ||
00414                 (partonefirst &&
00415                  (d_s_p1[i]/d_N[i]<boundary ||
00416                   !NonEqual(d_s_p1[i]/d_N[i],boundary) )) )
00417         {
00418             
00419             buff[nr]=i;
00420             nr++;
00421         }
00422 
00423     
00424     
00425     
00426     
00427 
00428     
00429     Split.newsize(nr);
00430     for (int i=0; i<nr; i++)
00431         Split[i]=buff[i];
00432 
00433     
00434     
00435     
00436     
00437 
00438     return (nr>0 && nr<=n-1) ? maxgini : 0.0; 
00439 }
00440 
00441 
00442 
00443 
00444 double ProbabilisticDiscreteGiniGain(const Vector<double>& d_s_p1, const Vector<double>& d_N,
00445                                      double N, double alpha_1,
00446                                      Vector<double>& probSet)
00447 {
00448 
00449     int n=d_N.dim();
00450 
00451     T_array* B_order=new T_array[n];
00452 
00453     for (int i=0; i<n; i++)
00454     {
00455         B_order[i].index=i;
00456         B_order[i].crit=d_N[i]>0.0 ? d_s_p1[i]/d_N[i] : -1.0;
00457         ;
00458     }
00459 
00460     qsort((void*)B_order, n, sizeof(T_array), compare_array_elements );
00461 
00462     
00463     int pos=-1;  
00464     double p11=0.0;
00465     double p1_=0.0;
00466     double maxgini=0.0;
00467     bool partonefirst=true; 
00468     for (int i=0; i<n-1; i++)
00469     {
00470         p11+=d_s_p1[B_order[i].index]/N;
00471         p1_ +=1.0*d_N[B_order[i].index]/N;
00472         double gini=BinaryGiniGain(p11,alpha_1,p1_);
00473 
00474         if (p1_>=0.0 && p1_<=1.0 && gini>=maxgini)
00475         {
00476             maxgini=gini;
00477             pos=i;
00478             if ( p11<(p1_-p11) )
00479                 partonefirst=false;
00480         }
00481     }
00482 
00483     
00484     double boundary;
00485     probSet.newsize(n);
00486     if (pos==-1 || B_order[pos].crit==-1.0)
00487     {
00488         for (int i=0; i<n; i++)
00489             probSet[i]=0.5;
00490         return 0.0;
00491     }
00492     else
00493         boundary=(d_s_p1[B_order[pos].index]/d_N[B_order[pos].index]+
00494                   d_s_p1[B_order[pos+1].index]/d_N[B_order[pos+1].index])/2.0;
00495 
00496     delete [] B_order;
00497 
00498     
00499     
00500     for (int i=0; i<n; i++)
00501         if (d_N[i]==0.0)
00502             probSet[i]=.5;
00503         else
00504         {
00505             double p=d_s_p1[i]/d_N[i];
00506             probSet[i]=1.0-PValueBinomialDistribution(d_N[i], p, boundary*d_N[i]);
00507             
00508             
00509 
00510         }
00511 
00512     return maxgini;
00513 }
00514 
00515 
00516 
00517 
00518 
00519 
00520 
00521 
00522 
00523 
00524 
00525 
00526 
00527 
00528 
00529 
00530 
00531 
00532 
00533 double UnidimensionalQDA(double alpha_1, double eta1, double var1,
00534                          double alpha_2, double eta2, double var2, int& whichSol)
00535 {
00536     whichSol=4; 
00537 
00538     if (isnan(alpha_1+eta1+var1))
00539     {
00540         assert( !isnan(alpha_2+eta2+var2) );
00541         return eta2;
00542     }
00543 
00544     if (isnan(alpha_2+eta2+var2))
00545     {
00546         assert( !isnan(alpha_1+eta1+var1) );
00547         return eta1;
00548     }
00549 
00550     if (!NonEqual(eta1,eta2))
00551         return eta1;
00552 
00553     double a=1.0/var1-1.0/var2;
00554     double b=-2.0*(eta1/var1-eta2/var2);
00555     double c=pow2(eta1)/var1-pow2(eta2)/var2-2.0*log(alpha_1/alpha_2)/log(exp(1.0))+
00556              log(var1/var2)/log(exp(1.0));
00557 
00558 
00559     if (var1<=SMALL_POZ_VALUE || var2<=SMALL_POZ_VALUE)
00560     {
00561         whichSol=5;
00562         return alpha_1*eta1+alpha_2*eta2;
00563     }
00564 
00565     
00566     goto default_solution;
00567 
00568     if ( fabs(a) < 1e-8*(1.0/var1+1.0/var2) )
00569     {
00570         
00571 
00572         if (NonZero(b))
00573         {
00574             double eta_l=-c/b;
00575             if ( (eta1-eta_l)*(eta2-eta_l)<0.0 )
00576             {
00577                 whichSol=0;
00578                 return eta_l;
00579             }
00580         } 
00581     }
00582     else
00583     {
00584         
00585         double disc=pow2(b)-4.0*a*c;
00586         if (disc<0.0) 
00587             goto default_solution;
00588         else
00589         {
00590             
00591             double eta_1=(-b+sqrt(disc))/(2.0*a);
00592             double eta_2=(-b-sqrt(disc))/(2.0*a);
00593             
00594             if ( (eta1-eta_1)*(eta2-eta_1) < 0.0)
00595             {
00596                 whichSol=1;
00597                 return eta_1;
00598             }
00599             else
00600                 if ( (eta1-eta_2)*(eta2-eta_2) < 0.0)
00601                 {
00602                     whichSol=2;
00603                     return eta_2;
00604                 } 
00605         }
00606     }
00607 
00608 default_solution:
00609     whichSol=3; 
00610     return (alpha_1*sqrt(var1)*eta1+alpha_2*eta2*sqrt(var2))/
00611            (alpha_1*sqrt(var1)+alpha_2*sqrt(var2));
00612     
00613 }
00614 
00615 
00616 
00617 
00618 
00619 double UnidimensionalQDAVariance(double n1, double m1, double v1,
00620                                  double n2, double m2, double v2, int whichSol)
00621 {
00622 
00623     whichSol=6;
00624 
00625 #ifdef DEBUG_PRINT
00626 
00627     cout << "UnidimentionalQDAVariance " << n1 << " " << n2 << " " << v1 << " " << v2;
00628     cout << " " << m1 << " " << m2 << "\t";
00629 #endif
00630 
00631     double variance;
00632 
00633     if (v1<SMALL_POZ_VALUE  || v2<SMALL_POZ_VALUE )
00634     {
00635         if (n1<3 || n2<3)
00636             variance=pow2(m1-m2);
00637         else
00638             variance=SMALL_POZ_VALUE;
00639         return variance;
00640     }
00641 
00642     if (isnan(m1) || isnan(m2) || isnan(v1) || isnan(v2))
00643     {
00644         variance=LARGE_POZ_VALUE;
00645         return variance;
00646     }
00647 
00648 
00649     if ( IsEqual(v1,v2) && IsEqual(m1,m2) )
00650         variance = SMALL_POZ_VALUE;
00651     else
00652     {
00653         double CLog=log(n1/n2*sqrt(v2/v1));
00654         double SR=sqrt(v1*v2*( pow2(m1-m2)+2*(-v1+v2)*CLog ));
00655         double Q1=v1*(pow2(m1-m2)-v1+v2-2*(v1-2*v2)*CLog);
00656         double Q2=v2*(pow2(m1-m2)+v1-v2-2*(2*v1-v2)*CLog);
00657         double Q3=2*(pow(m2,4.0)-2*pow2(m2)*n2*v2+n2*pow2(v2));
00658         double Q4=2*(pow(m1,4.0)-2*pow2(m1)*n1*v1+n1*pow2(v1));
00659         double Q5=-m2*v1+m1*v2;
00660 
00661         
00662         
00663 
00664         double Sol1=
00665             ( v1*pow2(v2) ) / ( n1*pow2(v1-v2) ) * pow2(-1+(m1-m2)*v1/SR)+
00666             ( v2*pow2(v1) ) / ( n2*pow2(v1-v2) ) * pow2( 1-(m1-m2)*v2/SR)-
00667             Q4/( n1*(n1-1)*pow(v1-v2,4.0) ) * pow2(  Q5-SR+(v1-v2)*( m2+Q2/SR) )-
00668             Q3/( n2*(n2-1)*pow(v1-v2,4.0) ) * pow2( -Q5+SR+(v1-v2)*(-m1+Q1/SR) );
00669 
00670         double Sol2=
00671             ( v1*pow2(v2) ) / ( n1*pow2(v1-v2) ) * pow2( 1+(m1-m2)*v1/SR)+
00672             ( v2*pow2(v1) ) / ( n2*pow2(v1-v2) ) * pow2(-1-(m1-m2)*v2/SR)-
00673             Q4/( n1*(n1-1)*pow(v1-v2,4.0) ) * pow2( Q5+SR-(v1-v2)*(-m2+Q2/SR) )-
00674             Q3/( n2*(n2-1)*pow(v1-v2,4.0) ) * pow2( Q5+SR+(v1-v2)*( m1+Q1/SR) );
00675 
00676         
00677         double QL=v1*log(n1/n2)/pow2(m1-m2);
00678         double Sol0=2*pow2(m1-m2)*pow2(QL)*(1.0/n1+1.0/n2)+v1/(4*n1)*pow2(1-2*QL)+
00679                     +v1/(4*n2)*pow2(1+2*QL);
00680 
00681         double SolDefault=(n1*pow2(v1)+n2*pow2(v2))/pow2(n1*sqrt(v1)+n2*sqrt(v2));
00682         
00683 
00684         switch(whichSol)
00685         {
00686         case 0:
00687             variance=Sol0;
00688             break;
00689 
00690         case 1:
00691             variance=Sol1;
00692             break;
00693 
00694         case 2:
00695             variance=Sol2;
00696             break;
00697 
00698         case 3:
00699             variance=SolDefault;
00700             break;
00701 
00702         case 5:
00703             variance=(n1*v1+n2*v2)/pow2(n1+n2);
00704             break;
00705 
00706         case 6: 
00707             {
00708                 double s1=sqrt(v1);
00709                 double s2=sqrt(v2);
00710                 variance=s1*s2/(s1+s2)*(s1/n1+s2/n2);
00711             }
00712             break;
00713 
00714         default: 
00715             variance=LARGE_POZ_VALUE;
00716         }
00717 #ifdef DEBUG_PRINT
00718         cout << "Sol0=" << Sol0 << "\tSol1=" << Sol1 << "\tSol2=" << Sol2;
00719         cout << "\tSolDefault=" << SolDefault << "\twhichSol=" << whichSol;
00720 #endif
00721 
00722         
00723     }
00724 
00725 #ifdef DEBUG_PRINT
00726     cout << "\tvariance=" << variance << endl;
00727 #endif
00728 
00729     return (finite(variance)==true) ? variance : LARGE_POZ_VALUE;
00730 }
00731 
00732 
00733 
00734 
00735 
00736 
00737 
00738 
00739 
00740 
00741 
00742 
00743 
00744 
00745 double ComputeSeparatingHyperplane_Anova(double mass ,
00746         double alpha_1, Vector<double>& mu1, Fortran_Matrix<double>& S1,
00747         double alpha_2, Vector<double>& mu2, Fortran_Matrix<double>& S2,
00748         Vector<double>& SeparatingHyperplane)
00749 {
00750     double maxt=0.0;
00751     int maxi=-1;
00752     int N=mu1.dim();
00753 
00754     for (int i=0; i<N; i++)
00755     {
00756         double t=pow2(mu1[i]-mu2[i])/(S1(i+1,i+1)+S2(i+1,i+1));
00757 
00758         
00759         
00760         
00761 
00762         if (t>maxt)
00763         {
00764             maxi=i;
00765             maxt=t;
00766         }
00767     }
00768 
00769     if (maxi==-1)
00770         return 0.0; 
00771 
00772     
00773     int whichSol; 
00774     double eta=UnidimensionalQDA(alpha_1, mu1[maxi], S1(maxi+1, maxi+1),
00775                                  alpha_1, mu2[maxi], S2(maxi+1, maxi+1), whichSol);
00776     double variance=UnidimensionalQDAVariance(mass*alpha_1, mu1[maxi], S1(maxi+1, maxi+1),
00777                     mass*alpha_1, mu2[maxi], S2(maxi+1, maxi+1), whichSol);
00778 
00779     assert(variance>0.0);
00780 
00781     
00782     
00783     
00784     
00785     
00786     SeparatingHyperplane.newsize(N+1);
00787     SeparatingHyperplane=0.0;
00788     double sgn= (mu1[maxi]>eta) ? 1.0 : -1.0;
00789     SeparatingHyperplane[0]=-sgn*eta/sqrt(variance);
00790     SeparatingHyperplane[maxi+1]=sgn/sqrt(variance);
00791 
00792     double p11=alpha_1*PValueNormalDistribution(mu1[maxi], sqrt(S1(maxi+1, maxi+1)), eta);
00793     double p1_=p11+alpha_2*PValueNormalDistribution(mu2[maxi], sqrt(S2(maxi+1, maxi+1)), eta);
00794     double gini=BinaryGiniGain(p11,alpha_1,p1_);
00795     return gini;
00796 }
00797 
00798 
00799 
00800 
00801 
00802 
00803 
00804 
00805 
00806 
00807 
00808 
00809 
00810 
00811 
00812 
00813 
00814 double ComputeSeparatingHyperplane_QDA(double mass,
00815                                        double alpha_1, Vector<double>& mu1, Fortran_Matrix<double>& S1,
00816                                        double alpha_2, Vector<double>& mu2, Fortran_Matrix<double>& S2,
00817                                        Vector<double>& SeparatingHyperplane)
00818 {
00819 
00820     
00821     
00822 
00823     int N=mu1.dim();
00824 
00825     
00826 
00827 
00828 
00829 
00830 
00831     for (int i=1; i<=N; i++)
00832     {
00833         if (S1(i,i)==0.0 && S2(i,i)==0.0)
00834         {
00835             S1(i,i)=S2(i,i)=SMALL_POZ_VALUE;
00836             continue;
00837         }
00838         if (S1(i,i)==0.0 || S2(i,i)==0.0)
00839         {
00840             
00841 
00842 
00843 
00844             SeparatingHyperplane.newsize(N+1);
00845             SeparatingHyperplane[0]=-(alpha_1*mu1[i-1]+alpha_2*mu2[i-1]);
00846             for (int j=1; j<=N; j++)
00847                 SeparatingHyperplane[j]= (i==j) ? 1.0 : 0.0;
00848 
00849             return 1-pow2(alpha_1)-pow2(alpha_2);
00850         }
00851     }
00852 
00853     
00854     if ( Cholesky_upper_factorization(S1,S1) || Cholesky_upper_factorization(S2,S2) )
00855     {
00856         
00857         cerr << "Badly conditioned matrices in ComputeSeparatingHyperplane" << endl;
00858 
00859     }
00860 
00861     
00862     double detS1=1.0;
00863     double detS2=1.0;
00864     for (int i=1; i<=N; i++)
00865     {
00866         detS1*=S1(i,i);
00867         detS2*=S2(i,i);
00868     }
00869 
00870     
00871 
00872 
00873 
00874     
00875     double C=2.0*log(detS2/detS1)+log(alpha_1/alpha_2);
00876 
00877     Vector<double> dif=mu2-mu1;
00878     Vector<double> G1inv_dif=
00879         Lower_triangular_solve< Fortran_Matrix<double>,Vector<double> > (S1,dif);
00880     double I1=dot_prod(G1inv_dif, G1inv_dif);
00881 
00882     Vector<double> G2inv_dif=
00883         Lower_triangular_solve< Fortran_Matrix<double>,Vector<double> > (S2,dif);
00884     double I2=dot_prod(G2inv_dif, G2inv_dif);
00885 
00886     
00887     double t, delta;
00888     if (fabs(I1-I2)<SMALL_POZ_VALUE)
00889     {
00890         
00891         t=(I1-C)/(2*I1);
00892     }
00893     else
00894     {
00895         
00896         delta=C*(I1-I2)+I1*I2;
00897         if (delta<0.0)
00898         {
00899             t=-1.0;
00900         }
00901         else
00902         {
00903             t=(I1-sqrt(delta))/(I1-I2);
00904             if (t<0.0 || t>1.0)
00905                 t=(I1+sqrt(delta))/(I1-I2);
00906         }
00907     }
00908 
00909     Vector<double> n; 
00910 
00911     
00912 
00913 
00914     if (t<0.0 || t>1.0)
00915     {
00916         
00917         t=alpha_1/(alpha_1+alpha_2); 
00918         n=dif;
00919     }
00920     else
00921     {
00922         
00923         n=
00924             Transposed_Upper_triangular_solve(S1,G1inv_dif)*(-1.0+t)-
00925             Transposed_Upper_triangular_solve(S2,G2inv_dif)*t;
00926     }
00927     n=n*(1/sqrt(dot_prod(n,n)));  
00928 
00929     
00930     Vector<double> mu=t*mu1+(1-t)*mu2;
00931 
00932     
00933     double eta1=dot_prod(n,mu1);
00934     double eta2=dot_prod(n,mu2);
00935 
00936     
00937     double var1=0.0, var2=0.0;
00938     for (int j=1; j<=N; j++)
00939     {
00940         double aux1=0.0, aux2=0.0;
00941         
00942         for (int i=1; i<=j; i++)
00943         {
00944             aux1+=n(i)*S1(i,j);
00945             aux2+=n(i)*S2(i,j);
00946         }
00947         
00948         for (int i=j+1; i<=N; i++)
00949         {
00950             aux1+=n(i)*S1(j,i);
00951             aux2+=n(i)*S2(j,2);
00952         }
00953         var1+=aux1*n(j);
00954         var2+=aux2*n(j);
00955     }
00956 
00957     double variance=UnidimensionalQDAVariance(mass*alpha_1, eta1, var1,
00958                     mass*alpha_2, eta2, var2, 5);
00959 
00960     assert(variance>0.0);
00961 
00962     
00963     double eta=dot_prod(n,mu);
00964     double sgn= (eta1>eta) ? 1.0 : -1.0;
00965     SeparatingHyperplane.newsize(N+1);
00966     SeparatingHyperplane[0]=-sgn*eta/sqrt(variance);
00967     for(int i=1; i<=N; i++)
00968         SeparatingHyperplane[i]=sgn*n[i-1]/sqrt(variance);
00969 
00970     double p11=alpha_1*PValueNormalDistribution(mu1, S1, n, mu);
00971     double p1_=p11+alpha_2*PValueNormalDistribution(mu2, S2, n, mu);
00972     double gini=BinaryGiniGain(p11,alpha_1,p1_);
00973     return gini;
00974 }
00975 
00976 
00977 
00978 
00979 
00980 
00981 
00982 
00983 
00984 
00985 
00986 
00987 
00988 
00989 
00990 
00991 
00992 
00993 
00994 
00995 
00996 
00997 
00998 
00999 double ComputeSeparatingHyperplane_LDA(double mass ,
01000                                        double alpha_1, Vector<double>& mu1, Fortran_Matrix<double>& S1,
01001                                        double alpha_2, Vector<double>& mu2, Fortran_Matrix<double>& S2,
01002                                        Vector<double>& SeparatingHyperplane)
01003 {
01004 
01005     if (alpha_1<TNNearlyZero || alpha_2<TNNearlyZero)
01006         return 0.0; 
01007 
01008     int N = S1.num_rows();
01009     Vector<double> n(N); 
01010     double eta1, eta2;
01011     double var1, var2;
01012 
01013     Fortran_Matrix<double> Sw(N,N);
01014 
01015     
01016     for (int j=1; j<=N; j++)
01017         for (int i=1; i<=j; i++)
01018             Sw(i,j)=alpha_1*S1(i,j)+alpha_2*S2(i,j);
01019 
01020     
01021     for (int i=1; i<=N; i++)
01022         n(i)=mu1(i)-mu2(i);
01023 
01024     
01025     
01026     if ( Cholesky_upper_factorization(Sw,Sw) )
01027     {
01028         cerr << "Badly conditioned matrices in ComputeSeparatingHyperplane" << endl;
01029         cerr << "We use normal anova analysis to find the Separating Hyperplane" << endl;
01030         return ComputeSeparatingHyperplane_Anova(mass,alpha_1, mu1, S1, alpha_2, mu2, S2, SeparatingHyperplane);
01031     }
01032 
01033     Lower_triangular_solve(Sw,n);
01034     Transposed_Upper_triangular_solve(Sw,n);
01035     
01036     if (IsZero(dot_prod(n,n)))
01037         return 0.0; 
01038 
01039     n=n*(1.0/sqrt(dot_prod(n,n)));
01040 
01041     
01042     eta1=dot_prod(n,mu1);
01043     eta2=dot_prod(n,mu2);
01044 
01045     
01046     var1=0.0;
01047     var2=0.0;
01048     for (int j=1; j<=N; j++)
01049     {
01050         double aux1=0.0, aux2=0.0;
01051         
01052         for (int i=1; i<=j; i++)
01053         {
01054             aux1+=n(i)*S1(i,j);
01055             aux2+=n(i)*S2(i,j);
01056         }
01057         
01058         for (int i=j+1; i<=N; i++)
01059         {
01060             aux1+=n(i)*S1(j,i);
01061             aux2+=n(i)*S2(j,2);
01062         }
01063         var1+=aux1*n(j);
01064         var2+=aux2*n(j);
01065     }
01066 
01067 
01068     int whichSol;
01069     double eta=UnidimensionalQDA(alpha_1, eta1, var1, alpha_2, eta2, var2, whichSol);
01070     double variance=UnidimensionalQDAVariance(mass*alpha_1, eta1, var1,
01071                     mass*alpha_2, eta2, var2, whichSol);
01072 
01073     assert(variance>0.0);
01074 
01075     
01076     
01077     
01078     
01079     
01080     SeparatingHyperplane.newsize(N+1);
01081     double sgn= (eta1>eta) ? 1.0 : -1.0;
01082     SeparatingHyperplane[0]=-sgn*eta/sqrt(variance);
01083     for (int i=1; i<=N; i++)
01084         SeparatingHyperplane[i]=sgn*n[i-1]/sqrt(variance);
01085 
01086     Vector<double> mu(N);
01087     mu=eta*n;
01088 
01089 
01090     
01091     if ( Cholesky_upper_factorization(S1,S1) || Cholesky_upper_factorization(S2,S2) )
01092     {
01093         
01094         cerr << "Badly conditioned matrices in ComputeSeparatingHyperplane" << endl;
01095         return 0.0; 
01096     }
01097 
01098     double p11=alpha_1*PValueNormalDistribution(mu1, S1, n, mu);
01099     double p1_=p11+alpha_2*PValueNormalDistribution(mu2, S2, n, mu);
01100     double gini=BinaryGiniGain(p11,alpha_1,p1_);
01101     return gini;
01102 }
01103 
01104 
01105 }
01106 
01107 #endif //_CLUS_SPLITPOINTCOMPUTATION_H_