SECRET: continuouslineartransformation.h Source File

00001 /*
00002 
00003 Copyright (c) 2003, Cornell University
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009    - Redistributions of source code must retain the above copyright notice,
00010        this list of conditions and the following disclaimer.
00011    - Redistributions in binary form must reproduce the above copyright
00012        notice, this list of conditions and the following disclaimer in the
00013        documentation and/or other materials provided with the distribution.
00014    - Neither the name of Cornell University nor the names of its
00015        contributors may be used to endorse or promote products derived from
00016        this software without specific prior written permission.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00022 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00028 THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030 */
00031 
00032 // -*- C++ -*-
00033 
00034 #if !defined _CLUS_CONTINUOUSLINEARTRANSFORMATION_H
00035 #define _CLUS_CONTINUOUSLINEARTRANSFORMATION_H
00036 
00037 #include "vec.h"
00038 #include "cmat.h"
00039 #include <math.h>
00040 
00041 using namespace TNT;
00042 
00043 namespace CLUS
00044 {
00045 
00046 /** Applies linear shifts on continuous data. Keeps track of what
00047     columns transformations are applied on.
00048  
00049     Applying the shift means substracting it from the dataset.
00050 */
00051 class ContinuousLinearTransformation
00052 {
00053     /// keep track the attributes
00054     Vector<bool> mask;
00055 
00056     /// shifts: attributes as rows, datasets as columns
00057     Matrix<double> shifts;
00058 
00059     /// original dataset; transformations are applied to it when they change
00060     Matrix<double>& dataset;
00061     
00062 public:
00063     ContinuousLinearTransformation(int NoAttributes, int NoDatasets, Matrix<double>& TrainingDataset):
00064             mask(NoAttributes), shifts(NoAttributes,NoDatasets), dataset(TrainingDataset)
00065     {
00066         mask=false;
00067         shifts=0;
00068     }
00069 
00070     double getShift(int i, int j)
00071     {
00072         assert( mask[i] );
00073         return shifts[i][j];
00074     }
00075 
00076     void SetShiftsAttribute(int attrib, Vector<double>& shiftsAtt)
00077     {
00078         assert(shiftsAtt.dim()==shifts.num_cols());
00079 
00080         bool isAnyNan=false;
00081 
00082         for (int i=0; i<shifts.num_cols(); i++)
00083         {
00084             if (isnan(shiftsAtt[i]))
00085                 isAnyNan=true;
00086 
00087             shifts[attrib][i]=shiftsAtt[i];
00088         }
00089         mask[attrib]=true;
00090 
00091         assert(!isAnyNan);
00092 
00093         if (isAnyNan)
00094             return;
00095         //cerr <<  " deep in setshiftsattribute" << endl;
00096         // apply the shift to the attrib column of the data
00097         for (int i=0; i<dataset.num_rows(); i++)
00098         {
00099             int datasetIndex=(int)dataset[i][(int)dataset.num_cols()-1];
00100             //cerr<<dataset.num_cols();
00101             //cerr<< " dataset index " << datasetIndex<< endl;
00102             dataset[i][attrib]=dataset[i][attrib]-shiftsAtt[datasetIndex];
00103             //cerr <<" attribute " << attrib << endl;
00104         }
00105     }
00106 
00107     double GetShift(int attrib, int datasetIndex)
00108     {
00109         return shifts[attrib][datasetIndex];
00110     }
00111 
00112     /** Use the same convention with last position encoding the original dataset */
00113     void ApplyShiftToTuple(double* dataPoint, int datasetIndex)
00114     {
00115         int noAttribs=mask.dim();
00116         for (int i=0; i<noAttribs; i++)
00117         {
00118             dataPoint[i]=dataPoint[i]-shifts[i][datasetIndex];
00119         }
00120     }
00121 
00122     bool HasAttributeShifts(int attrib)
00123     {
00124         return mask[attrib];
00125     }
00126 };
00127 
00128 }
00129 
00130 #endif // _CLUS_CONTINUOUSLINEARTRANSFORMATION_H