Main Page | Namespace List | Class Hierarchy | Data Structures | File List | Namespace Members | Data Fields | Globals | Related Pages

discretepermutationtransformation.h

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #if !defined _CLUS_DISCRETEPERMUTATIONTRANSFORMATION_H
00035 #define _CLUS_DISCRETEPERMUTATIONTRANSFORMATION_H
00036 
00037 #include "vec.h"
00038 #include "cmat.h"
00039 #include <iostream>
00040 
00041 using namespace TNT;
00042 using namespace std;
00043 
00044 namespace CLUS
00045 {
00046 
00047 /** Permutation[i] is the permuted value of i */
00048 class Permutation
00049 {
00050     Vector<int> permutation;
00051 public:
00052     Permutation(): permutation()
00053     {}
00054 
00055     Permutation(int size): permutation(size)
00056     {
00057         for (int i=0; i<size; i++)
00058             permutation[i]=i;
00059     }
00060 
00061     Permutation(Vector<int>& auxPermutation):
00062             permutation(auxPermutation)
00063     {}
00064 
00065     void RandomizePermutation(int size)
00066     {
00067         permutation.newsize(size);
00068 
00069         int curr;
00070         for (int i =0; i< size; i++)
00071         {
00072             curr  = random()%size;
00073             bool unused = false; // is curr an int that hasn't already been used in the permutation being constructed?
00074 
00075             while (!unused) //find one that hasn't been used
00076             {
00077                 bool used=false;//flag for !unused in loop below
00078                 curr  = random()%size;
00079                 for(int j =0; j<i; j++)
00080                 {
00081                     if (permutation[j] == curr)
00082                         used = true;
00083                 }
00084                 if (!used)
00085                     unused = true;
00086                 used= false;
00087             }
00088             // make curr the  ith entry
00089             permutation[i] = curr;
00090         }
00091 
00092     }
00093 
00094     int ApplyInversePermutation(int d)
00095     {
00096         // cerr << "in ApplyInverse";
00097         for (int i=0; i< permutation.size(); i++)
00098         {
00099             if (permutation[i]==d)
00100                 return i;
00101             // else cerr << "found " << permutation[i] << " want "<< d <<endl;
00102         }
00103         for (int i=0; i<permutation.size(); i++)
00104             //cerr << permutation[i] << " ";
00105             assert(false); //this statement should not be reached.
00106     }
00107 
00108     void ResetSize(int size)
00109     {
00110         permutation.newsize(size);
00111         for (int i=0; i<size; i++)
00112             permutation[i]=i;
00113     }
00114 
00115     int ApplyPermutation(int data)
00116     {
00117         //cerr << "permutation size " <<  permutation.size()<<endl;
00118         return permutation[data];
00119     }
00120 
00121     void saveToStream(ostream& out)
00122     {
00123         for (int i=0; i<permutation.size(); i++)
00124         {
00125             out << i << " -> " << permutation[i] << endl;
00126         }
00127     }
00128 };
00129 
00130 class DiscretePermutationTransformation
00131 {
00132     /// keep track of the attributes with shifts
00133     Vector<bool> mask;
00134 
00135     /// shifts: attributes as rows, datasets as columns
00136     Matrix<Permutation> shifts;
00137 
00138     /// original dataset; keep arround for dataset information only
00139     Matrix<double>& cdataset;
00140 
00141     /// discrete dataset; last column ignored since is classlabel
00142     Matrix<int>& ddataset;
00143     
00144     const Vector<int>& dDomainSize;
00145     
00146     int numDataSets;
00147 
00148 public:
00149     DiscretePermutationTransformation(int NoAttributes, int NoDatasets,
00150                                       Matrix<double>& cDataset, Matrix<int>& dDataset,
00151                                       const Vector<int>& DDomainSize):
00152             mask(NoAttributes), shifts(NoAttributes,NoDatasets), cdataset(cDataset),
00153             ddataset(dDataset),dDomainSize(DDomainSize), numDataSets(NoDatasets)
00154     {
00155         mask=false;
00156 
00157         for (int i=0; i<NoAttributes; i++)
00158             for (int j=0; j<numDataSets; j++)
00159             {
00160                 //   cerr << " (" << i <<", " << j << ") " << DDomainSize[i] << endl;
00161                 (shifts[i][j]).ResetSize(DDomainSize[i]);
00162             }
00163     }
00164 
00165     void SetShiftsAttribute(int attrib, Vector<Permutation>& shiftsAtt)
00166     {
00167         assert(shiftsAtt.dim()==shifts.num_cols());
00168 
00169         for (int i=0; i<shifts.num_cols(); i++)
00170             shifts[attrib][i]=shiftsAtt[i];
00171         mask[attrib]=true;
00172         // cerr << "shifts set";
00173         // apply the shift to the attrib column of the data
00174         for (int i=0; i<ddataset.num_rows(); i++)
00175         {
00176             int datasetIndex=(int)cdataset[i][cdataset.num_cols()-1];
00177             //  cerr << " datasetIndex " << datasetIndex << endl;
00178             //  cerr << " attribute " << attrib << " i " << i << endl;
00179             if (datasetIndex>0)
00180                 ddataset[i][attrib]=shifts[attrib][datasetIndex].ApplyPermutation(ddataset[i][attrib]);
00181         }
00182     }
00183 
00184     Permutation GetShift(int attrib, int datasetIndex)
00185     {
00186         return shifts[attrib][datasetIndex];
00187     }
00188 
00189     void ApplyShiftToTuple(int* dataPoint, int datasetIndex)
00190     {
00191         int noAttribs=mask.dim();
00192         for (int i=0; i<noAttribs; i++)
00193             dataPoint[i]=shifts[i][datasetIndex].ApplyPermutation(dataPoint[i]);
00194     }
00195 
00196     bool HasAttributeShifts(int attrib)
00197     {
00198         return mask[attrib];
00199     }
00200 
00201     void saveToStream(ostream& out, int attribute, int dataset)
00202     {
00203         shifts[attribute][dataset].saveToStream(out);
00204     }
00205 };
00206 
00207 }
00208 
00209 #endif // _CLUS_DISCRETEPERMUTATIONTRANSFORMATION_H

Generated on Mon Jul 21 16:57:24 2003 for SECRET by doxygen 1.3.2