Main Page | Namespace List | Class Hierarchy | Data Structures | File List | Namespace Members | Data Fields | Globals | Related Pages

binarysplitter.h

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #if !defined _CLUS_BINARYSPLITTER_H
00035 #define _CLUS_BINARY_SPLITTER_H
00036 
00037 #include "vec.h"
00038 #include <iostream>
00039 
00040 using namespace TNT;
00041 
00042 /// @todo This solution is very ugly. Use virtual functions to get the same result
00043 
00044 namespace CLUS
00045 {
00046 
00047 /** Base class for all the splitters. Specifies the interface */
00048 class BinarySplitter
00049 {
00050 protected:
00051     /// the number of discrete split variables (nonregressers)
00052     int dsplitDim;
00053 
00054     /// the number of continuous split variables (nonregressoers)
00055     int csplitDim;
00056     
00057     /// the number of regressors
00058     int regDim;
00059 
00060     /// list of discrete domain sizes
00061     const Vector<int>& dDomainSize;
00062     
00063     /** Indicates on what variable this node splits on.
00064     If -1 we have an oblique split. */
00065     int SplitVariable;
00066     
00067     /** Contains the coeficients of the hyperplane that separates the 2 distributions
00068     in the input space. This is an oblique split */
00069     Vector<double> SeparatingHyperplane;
00070     
00071     /** Contains the probability for a value to be in the left partition. In
00072     traditional classifiers is set to 1.0 or 0.0 */
00073     Vector<double> splitSetProbability;
00074 
00075 public:
00076     BinarySplitter():dDomainSize( *( new Vector<int>() ) ),SeparatingHyperplane(), splitSetProbability()
00077     {}
00078     
00079     BinarySplitter(const Vector<int>& DDomainSize,int CsplitDim, int RegDim):
00080             dsplitDim(DDomainSize.dim()), csplitDim(CsplitDim), regDim(RegDim),
00081             dDomainSize(DDomainSize),SeparatingHyperplane(),
00082             splitSetProbability()
00083     { }
00084     
00085     BinarySplitter(BinarySplitter& aux):
00086             dsplitDim(aux.dsplitDim),   csplitDim(aux.csplitDim),
00087             dDomainSize(aux.dDomainSize),SeparatingHyperplane(),
00088             splitSetProbability()
00089     { }
00090 
00091     int GetRegDim(void)
00092     {
00093         return regDim;
00094     }
00095     int GetCSplitDim(void)
00096     {
00097         return csplitDim;
00098     }
00099     
00100     int GetDSplitDim(void)
00101     {
00102         return dsplitDim;
00103     }
00104     
00105     const Vector<int>& GetDDomainSize(void)
00106     {
00107         return dDomainSize;
00108     }
00109 
00110     /** Decides what branch to choose.  */
00111     int ChooseBranch(const int* Dvars, const double* Cvars)
00112     {
00113         return 0;
00114     }
00115     /** Computes probability to take first branch. Need this to accomodate probabilistic splitters */
00116     double ProbabilityFirstBranch(const int* Dvars, const double* Cvars)
00117     {
00118         if (ChooseBranch(Dvars,Cvars)==0)
00119             return 1.0;
00120         else
00121             return 0.0;
00122     }
00123 
00124     /** Initializes the data structures used in split variable selection */
00125     void InitializeSplitStatistics(void)
00126     { }
00127     
00128     /** Updates the necessary statistics for split variable selection */
00129     void UpdateSplitStatistics(const int* Dvars, const double* Cvars,double p1,double p2)
00130     { }
00131     
00132     /** Decides on a split variable and frees the data structures used in split selection.
00133     @return 0 if a split variable could be computed, -1 otherwise.*/
00134     int ComputeSplitVariable(int type)
00135     {
00136         return 0;
00137     }
00138     
00139     /** Ask if it is worth doing more splits in the future */
00140     bool MoreSplits(int branch, int Min_no_datapoints)
00141     {
00142         return false;
00143     }
00144     
00145     /** Cleans up all the unnecessary statistics after the decision has been made */
00146     void DeleteTemporaryStatistics(void)
00147     { }
00148     
00149     void SaveToStream(ostream& out)
00150     {
00151         out << SplitVariable << " ( ";
00152         if (SplitVariable==-1)
00153         {
00154             // oblique split on continuous variables
00155             for (int i=0; i<SeparatingHyperplane.dim(); i++)
00156                 out << SeparatingHyperplane[i] << " ";
00157         }
00158         else
00159             if (SplitVariable<=-2)
00160             {
00161                 // Simple continuous split
00162                 out << SeparatingHyperplane[0] << " " << SeparatingHyperplane[-SplitVariable-1];
00163             }
00164             else
00165             {
00166                 // split on discrete variable
00167                 for (int i=0; i<splitSetProbability.dim(); i++)
00168                     if (splitSetProbability[i]>.5)
00169                         out << i << " ";
00170             }
00171         out << " )";
00172     }
00173 
00174 
00175     ~BinarySplitter(void)
00176     {
00177         // dealocate all the resources
00178     }
00179 
00180 };
00181 
00182 }
00183 
00184 #endif // _CLUS_BINARYSPLITTER_H

Generated on Mon Jul 21 16:57:24 2003 for SECRET by doxygen 1.3.2