Main Page | Namespace List | Class Hierarchy | Data Structures | File List | Namespace Members | Data Fields | Globals | Related Pages

multiclassdistribution.h

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #if !defined _CLUS_MULTICLASSDISTRIBUTION_H
00035 #define _CLUS_MULTICLASSDISTRIBUTION_H
00036 
00037 #ifdef CLUS_USE_XML
00038 #include "xml.h"
00039 #endif
00040 
00041 namespace CLUS
00042 {
00043 
00044 /** Base class for all distributions that can predict a discrete variable.
00045     The methods are virtual since not only one subclass is used in any given
00046     tree and is easier to let the virtual function mechanism kick in.
00047  */
00048 class MulticlassDistribution
00049 {
00050 protected:
00051     /// the number of classes the predicted variable has
00052     int noClasses;
00053 
00054     /// the value of the test that is usually computed during StopLearning
00055     double statisticalTest; 
00056 public:
00057     MulticlassDistribution(int NoClasses): noClasses(NoClasses)
00058     { }
00059 
00060     virtual ~MulticlassDistribution(void)
00061     { }
00062 
00063     /** Infer will use data to produce noClasses normalized probabilities
00064     into result. 
00065     */
00066     virtual void Infer(const double* cdata, const int* ddata, double* result)
00067     {
00068         for (int i=0; i<noClasses; i++)
00069             result[i]=1.0/noClasses;
00070     }
00071 
00072     /** MultiplicativeInfer uses data to produce probabilities and
00073     multiplies these probabilities with the ones in result.
00074     */
00075     virtual void MultiplicativeInfer(const double* cdata, const int* ddata, double* result)
00076     {
00077         // Implement just a uniform distribution regardless of the inputs
00078         for (int i=0; i<noClasses; i++)
00079             result[i]/=noClasses;
00080     }
00081 
00082     /** Initialize the sufficient statistics that are maintained.
00083      */
00084     virtual void StartLearning(void)
00085 { }
00086 
00087     /** Update the sufficient statistics according to the current input.
00088     Should be used if the class label is known for sure.
00089 
00090     @param cdata        contains values for the continuous variables
00091     @param ddata        for the discrete ones
00092     @param classLabel   known classification label
00093     @param weightSample used to give different importance
00094                         to the samples (magnifying glass effect).
00095     */
00096     virtual void LearnSample(const double* cdata, const int* ddata, int classLabel, double weightSample=1.0)
00097     { }
00098 
00099     /** Update the sufficient statistics according to the current input.
00100     Should be used if the class label cannot be determined with
00101     certainty.
00102 
00103     @param cdata        contains values for the continuous variables
00104     @param ddata        for the discrete ones
00105     @param classProbabilities     classification probabilities
00106     @param weightSample used to give different importance
00107                         to the samples (magnifying glass effect).
00108     */
00109     virtual void LearnSample(const double* cdata, const int* ddata, double classProbabilities, double weightSample=1.0)
00110     { }
00111 
00112 
00113     /** Uses the sufficient statistics to compute estimates of the parameters of the
00114     distribution.
00115     */
00116     virtual void StopLearning(void)
00117     { }
00118     
00119     /** Returns the log of p-value=1-cdf of the apropriate statistical test.
00120     In other words it returns the probability that randomly (no correlations between 
00121     input and classlabel) we do as well. The smaller the p-value the more predictive
00122     the distribution. The criterion depends on the distribution.
00123     This function should be called only after StopLearning
00124     */
00125     virtual double PValueStatisticalTest(void)
00126     {
00127         return log(1.0/noClasses);
00128     } // p-value for uniform distribution }
00129 
00130     void SaveToStream(ostream& output)
00131     {}
00132 
00133 #ifdef CLUS_USE_XML
00134     /** Prints the distribution in a stream in XML */
00135     virtual void PrintToXmlStream(ostream& output)
00136     {
00137         output << "<MulticlassDistribution";
00138         PrintAttribute(output, "noClasses", noClasses);
00139         output << "/>" << endl;
00140     }
00141 #endif
00142 
00143     /** @return true if the classLabel index has no significant apearance */
00144     virtual bool IsClassLabelAbsent(int index)
00145     {
00146         return false;
00147     }
00148 
00149 };
00150 
00151 }
00152 
00153 #endif // _CLUS_MULTICLASSDISTRIBUTION_H

Generated on Mon Jul 21 16:57:24 2003 for SECRET by doxygen 1.3.2