00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #if !defined _CLUS_SIMPLENORMALDISTRIBUTION_H_
00035 #define _CLUS_SIMPLENORMALDISTRIBUTION_H_
00036
00037 #include "distribution.h"
00038 #include "multiclasscontinuousdistribution.h"
00039 #include "statfct.h"
00040
00041 namespace CLUS
00042 {
00043
00044
00045
00046
00047 class SimpleNormalDistribution : public Distribution
00048 {
00049 protected:
00050
00051 double mu;
00052
00053
00054 double variance;
00055
00056
00057 int dimension;
00058
00059
00060 int N;
00061
00062
00063 double S;
00064
00065
00066 double S_x;
00067
00068
00069 double S_x2;
00070
00071
00072 double alpha_over_sigma;
00073
00074 public:
00075 SimpleNormalDistribution(int InDim = 0, int Dimension = 0):Distribution(InDim), mu(0.0), variance(1.0),
00076 dimension(Dimension), N(0), S(0.0), S_x(0.0), S_x2(0.0), alpha_over_sigma(1/sqrt(2*M_PI))
00077 { }
00078
00079 SimpleNormalDistribution(int InDim, int Dimension, double Alpha, double Mu, double Sigma):Distribution(InDim),
00080 mu(Mu), dimension(Dimension), N(0), S(0.0), S_x(0.0), S_x2(0.0)
00081 {
00082 variance=pow2(Sigma);
00083 alpha_over_sigma=Alpha/(Sigma*sqrt(2*M_PI));
00084 }
00085
00086 double LearnProbability(const double* DataCache)
00087 {
00088 if (weight==0.0)
00089 return 0.0;
00090 dataCache = DataCache;
00091 double X=dataCache[dimension]-mu;
00092 probabilityLearn = alpha_over_sigma*exp(-pow2(X)/(2*variance));
00093 return probabilityLearn;
00094 }
00095
00096
00097 double NormalizeLearnProbability(double Coef, int nrClus=1)
00098 {
00099 double pLearn=Distribution::NormalizeLearnProbability(Coef,nrClus);
00100 UpdateStatistics(dataCache, pLearn);
00101
00102 return 0.0;
00103 }
00104
00105
00106 double Probability(const double* DataCache)
00107 {
00108 return LearnProbability(DataCache);
00109 }
00110
00111 double NormalizeProbability(double Coef, int nrClus=1)
00112 {
00113 return Distribution::NormalizeLearnProbability(Coef,nrClus);
00114 }
00115
00116 void UpdateStatistics(const double* DataCache, double prob)
00117 {
00118 N++;
00119 S+=prob;
00120 S_x+=prob*DataCache[dimension];
00121 S_x2+=prob*pow2(DataCache[dimension]);
00122 }
00123
00124 double UpdateParameters(void)
00125 {
00126 double oldMu, distP=0.0;
00127
00128
00129 if (weight==0.0)
00130 return 0.0;
00131
00132 if (S<=1.0)
00133 {
00134 cout << "Cluster got too small. We have to throw it out since is not anymore reliable." << endl;
00135 weight=0.0;
00136 distP=1.0;
00137 goto cleanup;
00138 }
00139
00140 weight=S/N;
00141
00142 oldMu = mu;
00143 mu = S_x/S;
00144 S_x2 = (S_x2-mu*S_x)/S;
00145
00146 variance = S_x2;
00147
00148 if (variance==0.0)
00149 weight=0;
00150
00151 distP=pow2(oldMu-mu);
00152
00153 alpha_over_sigma = weight / sqrt(2*M_PI*S_x2 );
00154
00155 cleanup:
00156
00157 N=0;
00158 S=0.0;
00159 S_x=0.0;
00160 S_x2=0.0;
00161
00162 return sqrt(distP)/(inDim+1);
00163 }
00164
00165 void UpdateANOVAElements(int& numNonzero, double& SSR, double& n, double& sumS_x_overS, double& sumS_x)
00166 {
00167 if (weight!=0)
00168 {
00169 numNonzero++;
00170 SSR+=variance*S;
00171 n+=S;
00172 sumS_x_overS+=mu*S_x;
00173 sumS_x+=S_x;
00174 }
00175 }
00176
00177 static string TypeName(void)
00178 {
00179 return string("SimpleNormalDistribution");
00180 }
00181
00182
00183 void RandomDistribution(int NrClusters)
00184 {
00185
00186 weight = 1.0;
00187
00188 mu = RANDOM01FLOAT;
00189 variance = 1.0;
00190 alpha_over_sigma = 1 / sqrt(2*M_PI);
00191 }
00192
00193
00194 #ifdef CLUS_USE_XML
00195 void PrintToXmlStream(ostream& output)
00196 {
00197 output << "<SimpleNormalDistribution";
00198 PrintAttribute(output, "mu", mu);
00199 PrintAttribute(output, "variance", variance);
00200 PrintAttribute(output, "dimension", dimension);
00201 PrintAttribute(output, "alpha_over_sigma", alpha_over_sigma);
00202 output << ">" << endl;
00203
00204 Distribution::PrintToXmlStream(output);
00205
00206 output << "</SimpleNormalDistribution>" << endl;
00207 }
00208 #endif
00209
00210 };
00211
00212 double MulticlassContinuousDistribution<SimpleNormalDistribution>::PValueStatisticalTest(void)
00213 {
00214 int numNonzero = 0;
00215 double SSR=0.0;
00216 double n=0.0;
00217 double sumS_x_overS=0.0;
00218 double sumS_x=0.0;
00219 for (int i=0; i<noClasses; i++)
00220 {
00221 distributions[i].UpdateANOVAElements(numNonzero, SSR, n, sumS_x_overS, sumS_x);
00222 }
00223
00224 if (numNonzero>=2)
00225 {
00226
00227
00228
00229
00230 return 0.0;
00231 }
00232 else
00233 {
00234 return 0.0;
00235 }
00236 }
00237
00238 }
00239
00240 #endif // _CLUS_SIMPLENORMALDISTRIBUTION_H_