SECRET: traingen.h Source File

00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #ifndef _TRAINGEN_H
00035 #define _TRAINGEN_H
00036 
00037 /* This class is the interface for the training data repositories
00038    Each such an object stores data as a matrix and is able to normalize
00039    the data. Two parameters per dimmension can be used for normalization.
00040 */
00041 
00042 #include "cmat.h"
00043 #include "vec.h"
00044 #include "normaliz.h"
00045 #include <iostream>
00046 #include <fstream>
00047 #include "general.h"
00048 #include "extravec.h"
00049 
00050 using namespace TNT;
00051 using namespace std;
00052 
00053 namespace CLUS
00054 {
00055 
00056 class TrainingData
00057 {
00058 
00059 protected:
00060     bool normalized;
00061     Matrix<double> Table;
00062     
00063 public:
00064     TrainingData(Subscript M, Subscript N): Table(M,N)
00065     {
00066         normalized=false;
00067     }
00068     
00069     void Normalize(Vector<Scale>&, Scale::NormType, int);
00070     
00071     virtual void SaveToFile(char* name)
00072     {
00073         ofstream ostr(name);
00074         SaveToStream(ostr);
00075     }
00076     
00077     virtual void SaveToStream(ostream& ostr)
00078     {
00079         // @todo set format for ostream
00080         for(int i=0; i<Table.num_rows(); i++)
00081         {
00082             double *row=Table[i];
00083             for(int j=0; j<Table.num_cols(); j++)
00084                 ostr << row[j] << "\t";
00085             ostr << endl;
00086         }
00087     }
00088     
00089     virtual int NumRows(void)
00090     {
00091         return Table.num_rows();
00092     }
00093     
00094     virtual int NumCols(void)
00095     {
00096         return Table.num_cols();
00097     }
00098     
00099     virtual const Matrix<double>& GetTrainingData(void)
00100     {
00101         return Table;
00102     }
00103 
00104     virtual ~TrainingData()
00105     {
00106         // will call Matrix destructor
00107     }
00108 };
00109 
00110 inline void TrainingData::Normalize(Vector<Scale>& scale, Scale::NormType normtype, int nrInputs)
00111 {
00112     int N=Table.num_cols();
00113     ExitIf(scale.size()<N,"Wrong scaling vector in normalization");
00114     Vector<double> Aux1(N,Table[0]), Aux2(N,Table[0]);
00115     Subscript i=0, j;
00116     double* data;
00117 
00118     switch(normtype)
00119     {
00120     case Scale::Interval:
00121         for (i=1; i<Table.num_rows(); i++)
00122         {
00123             // for each line
00124             Min(Aux1, N, Table[i]);
00125             Max(Aux2, N, Table[i]);
00126         }
00127         
00128         for (j=0;j<nrInputs;j++)
00129             scale[j].SetCoefForInput(Aux1[j],Aux2[j],normtype);
00130             
00131         for (j=nrInputs;j<N;j++)
00132         {
00133             // Aux1[j]*=/*sqrt*/(1.0*nrInputs/(N-nrInputs));
00134             // Aux2[j]*=/*sqrt*/(1.0*nrInputs/(N-nrInputs));
00135             scale[j].SetCoefForOutput(Aux1[j],Aux2[j],normtype);
00136         }
00137 
00138         break;
00139     case Scale::Distribution:
00140         // we have to square the elements of Aux2
00141         for (j=0;j<N;j++)
00142             Aux2[i]=pow2(Aux2[i]);
00143             
00144         for (i=1; i<Table.num_rows(); i++)
00145         {
00146             // for each line
00147             Sum(Aux1, N, Table[i]);
00148             SumPow2(Aux2, N, Table[i]);
00149         }
00150         
00151         for (j=0;j<nrInputs;j++)
00152             scale[j].SetCoefForInput(Aux1[j],Aux2[j],normtype);
00153             
00154         for (j=nrInputs;j<N;j++)
00155             scale[j].SetCoefForOutput(Aux1[j],Aux2[j],normtype);
00156 
00157         break;
00158     }
00159 
00160     // transform the data
00161     for (i=0; i<Table.num_rows(); i++)
00162     {
00163         data=Table[i];
00164         
00165         for (j=0;j<nrInputs;j++)
00166             data[j]=scale[j].Transform(data[j]);
00167             
00168         for (j=nrInputs; j<N; j++)
00169             data[j]=scale[j].InverseTransform(data[j]);
00170     }
00171 }
00172 
00173 }
00174 
00175 
00176 #endif // _TRAINGEN_H