Main Page | Namespace List | Class Hierarchy | Data Structures | File List | Namespace Members | Data Fields | Globals | Related Pages

simplenormaldistribution.h

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #if !defined _CLUS_SIMPLENORMALDISTRIBUTION_H_
00035 #define _CLUS_SIMPLENORMALDISTRIBUTION_H_
00036 
00037 #include "distribution.h"
00038 #include "multiclasscontinuousdistribution.h"
00039 #include "statfct.h"
00040 
00041 namespace CLUS
00042 {
00043 
00044 /** Implements a unidimensional normal distribution but the "active" dimension
00045     can be specified.
00046 */
00047 class SimpleNormalDistribution : public Distribution
00048 {
00049 protected:
00050     /// expected value
00051     double mu;
00052     
00053     /// the variance of the distribution = sigma^2
00054     double variance;
00055     
00056     /// the active dimention
00057     int dimension;
00058     
00059     /// number of datapoints
00060     int N;
00061     
00062     /// sum of probabilities
00063     double S;
00064     
00065     /// sum prob*x
00066     double S_x;
00067     
00068     /// sum prob*x^2
00069     double S_x2;
00070     
00071     /// alpha/(sqrt(2PI)*sigma)
00072     double alpha_over_sigma;
00073 
00074 public:
00075     SimpleNormalDistribution(int InDim = 0, int Dimension = 0):Distribution(InDim), mu(0.0), variance(1.0),
00076             dimension(Dimension), N(0), S(0.0), S_x(0.0), S_x2(0.0), alpha_over_sigma(1/sqrt(2*M_PI))
00077     { }
00078 
00079     SimpleNormalDistribution(int InDim, int Dimension, double Alpha, double Mu, double Sigma):Distribution(InDim),
00080             mu(Mu), dimension(Dimension), N(0), S(0.0), S_x(0.0), S_x2(0.0)
00081     {
00082         variance=pow2(Sigma);
00083         alpha_over_sigma=Alpha/(Sigma*sqrt(2*M_PI));
00084     }
00085 
00086     double LearnProbability(const double* DataCache)
00087     {
00088         if (weight==0.0)
00089             return 0.0;
00090         dataCache = DataCache;
00091         double X=dataCache[dimension]-mu;
00092         probabilityLearn = alpha_over_sigma*exp(-pow2(X)/(2*variance));
00093         return probabilityLearn;
00094     }
00095 
00096     /// InferProbability is not defined for this distribution
00097     double NormalizeLearnProbability(double Coef, int nrClus=1)
00098     {
00099         double pLearn=Distribution::NormalizeLearnProbability(Coef,nrClus);
00100         UpdateStatistics(dataCache, pLearn);
00101 
00102         return 0.0;
00103     }
00104 
00105     /// @todo to redefine these two since they are not virtual so the wrong methods are called
00106     double Probability(const double* DataCache)
00107     {
00108         return LearnProbability(DataCache);
00109     }
00110 
00111     double NormalizeProbability(double Coef, int nrClus=1)
00112     {
00113         return Distribution::NormalizeLearnProbability(Coef,nrClus);
00114     }
00115 
00116     void UpdateStatistics(const double* DataCache, double prob)
00117     {
00118         N++;
00119         S+=prob;
00120         S_x+=prob*DataCache[dimension];
00121         S_x2+=prob*pow2(DataCache[dimension]);
00122     }
00123 
00124     double UpdateParameters(void)
00125     {
00126         double oldMu, distP=0.0;
00127 
00128         // if the cluster isdead do nothing
00129         if (weight==0.0)
00130             return 0.0;
00131 
00132         if (S<=1.0)
00133         {
00134             cout << "Cluster got too small. We have to throw it out since is not anymore reliable." << endl;
00135             weight=0.0;
00136             distP=1.0;
00137             goto cleanup;
00138         }
00139 
00140         weight=S/N; // S is the number of points on which this clusterdepends
00141 
00142         oldMu = mu;
00143         mu = S_x/S;
00144         S_x2 = (S_x2-mu*S_x)/S;
00145 
00146         variance = S_x2;
00147 
00148         if (variance==0.0)
00149             weight=0;
00150 
00151         distP=pow2(oldMu-mu);
00152 
00153         alpha_over_sigma = weight / sqrt(2*M_PI*S_x2 );
00154 
00155 cleanup:
00156 
00157         N=0;
00158         S=0.0;
00159         S_x=0.0;
00160         S_x2=0.0;
00161 
00162         return sqrt(distP)/(inDim+1); //how much the center has moved
00163     }
00164 
00165     void UpdateANOVAElements(int& numNonzero, double& SSR, double& n, double& sumS_x_overS, double& sumS_x)
00166     {
00167         if (weight!=0)
00168         {
00169             numNonzero++;
00170             SSR+=variance*S;
00171             n+=S;
00172             sumS_x_overS+=mu*S_x;
00173             sumS_x+=S_x;
00174         }
00175     }
00176 
00177     static string TypeName(void)
00178     {
00179         return string("SimpleNormalDistribution");
00180     }
00181 
00182     // @todo write RandomDistribution with reference to parent if needed
00183     void RandomDistribution(int NrClusters)
00184     {
00185         // reset weithts to 1.
00186         weight = 1.0;
00187         // choose mu random
00188         mu = RANDOM01FLOAT;
00189         variance = 1.0;
00190         alpha_over_sigma = 1 / sqrt(2*M_PI);
00191     }
00192     
00193 
00194 #ifdef CLUS_USE_XML
00195     void PrintToXmlStream(ostream& output)
00196     {
00197         output << "<SimpleNormalDistribution";
00198         PrintAttribute(output, "mu", mu);
00199         PrintAttribute(output, "variance", variance);
00200         PrintAttribute(output, "dimension", dimension);
00201         PrintAttribute(output, "alpha_over_sigma", alpha_over_sigma);
00202         output << ">" << endl;
00203 
00204         Distribution::PrintToXmlStream(output);
00205 
00206         output << "</SimpleNormalDistribution>" << endl;
00207     }
00208 #endif
00209 
00210 };
00211 
00212 double MulticlassContinuousDistribution<SimpleNormalDistribution>::PValueStatisticalTest(void)
00213 {
00214     int numNonzero = 0;
00215     double SSR=0.0;
00216     double n=0.0;
00217     double sumS_x_overS=0.0;
00218     double sumS_x=0.0;
00219     for (int i=0; i<noClasses; i++)
00220     {
00221         distributions[i].UpdateANOVAElements(numNonzero, SSR, n, sumS_x_overS, sumS_x);
00222     }
00223 
00224     if (numNonzero>=2)
00225     {
00226         /*double SSA=sumS_x_overS-pow2(sumS_x)/n;
00227         double W=(SSA/(numNonzero-1))/(SSR/(n-numNonzero));*/
00228 
00229         /// @todo fix this: return F(W, noClasses-1, n-noClasses, 3);
00230         return 0.0;        
00231     }
00232     else
00233     {
00234         return 0.0;
00235     }
00236 }
00237 
00238 }
00239 
00240 #endif // _CLUS_SIMPLENORMALDISTRIBUTION_H_

Generated on Mon Jul 21 16:57:25 2003 for SECRET by doxygen 1.3.2