00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 #include <fstream>
00039 #include <math.h>
00040 #include <stdio.h>
00041 #include "SeqBitmap.h"
00042 #include "Bitmap4.h"
00043 #include "Bitmap8.h"
00044 #include "Bitmap16.h"
00045 #include "Bitmap32.h"
00046 #include "Bitmap64.h"
00047 #include "DatasetInfo.h"
00048 #include "ResizableArray.h"
00049 #include "StringMap.h"
00050 #include <iostream>
00051 
00052 #define FILEERR_64TRANSACTIONS 1
00053 #define FILEERR_NOTFOUNDBINARY 2
00054 #define FILEERR_NOTFOUNDASCII 3
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 const int NUM_BITMAP = 5;
00063 
00064 
00065 const int BITMAP_LENGTH[5] =
00066     {
00067         4, 8, 16, 32, 64
00068     };
00069 
00070 
00071 const int NUM_BITMAPS_USED = 4;
00072 
00073 
00074 
00075 const int MAX_STRING_SIZE = 256;
00076 
00077 
00078 
00079 
00080 
00081 
00082 
00083 void PrintFileReadError(int errorType)
00084 {
00085     cerr << "\nInput file error:\n\n";
00086 
00087     switch (errorType)
00088     {
00089     case FILEERR_64TRANSACTIONS:
00090         cerr << "A customer has more than 64 transactions.\n";
00091         break;
00092     case FILEERR_NOTFOUNDBINARY:
00093         cerr << "The input file either does not exist, or is not\n";
00094         cerr << "a valid binary input file (Perhaps try running\n";
00095         cerr << "SPAM with the -ascii flag to see if it is an\n";
00096         cerr << "ASCII input file)\n";
00097         break;
00098     case FILEERR_NOTFOUNDASCII:
00099         cerr << "The input file either does not exist, or is not\n";
00100         cerr << "a valid ascii input file. If the input file was\n";
00101         cerr << "automatically generated using a program like AssocGen,\n";
00102         cerr << "perhaps try running SPAM without the -ascii flag to\n";
00103         cerr << "see if it is a binary input file.\n";
00104         break;
00105     default:
00106         cerr << "Unknown file read error.\n";
00107     }
00108 
00109     cerr << "\nNotes about the input file:\n";
00110     cerr << "The input file should be an ASCII text file containing\n";
00111     cerr << "three integers, separated by spaces, on each line:\n";
00112     cerr << "<Customer ID> <Transaction ID> <Item ID>\n";
00113     cerr << "Customer IDs and Item IDs should be assigned relative to\n";
00114     cerr << "the overall transactional database. Transaction IDs should\n";
00115     cerr << "be assigned relative to the customer they belong to. Each\n";
00116     cerr << "customer can have no more than 64 transactions. Make sure to\n";
00117     cerr << "use the -ascii flag, since the input is ASCII text.\n";
00118 
00119     exit(0);
00120 }
00121 
00122 
00123 
00124 
00125 
00126 
00127 
00128 
00129 void IncArraySize(int*& array, int oldSize, int newSize)
00130 {
00131     int i;
00132 
00133     
00134     int *newArray = new int[newSize];
00135     for (i = 0;i < oldSize;i++)
00136         newArray[i] = array[i];
00137     for (i = oldSize;i < newSize;i++)
00138         newArray[i] = 0;
00139 
00140     
00141     delete [] array;
00142     array = newArray;
00143 }
00144 
00145 
00146 
00147 
00148 
00149 
00150 
00151 
00152 
00153 
00154 
00155 
00156 
00157 
00158 
00159 
00160 
00161 
00162 
00163 
00164 
00165 
00166 
00167 
00168 
00169 
00170 
00171 
00172 
00173 
00174 
00175 
00176 
00177 
00178 bool CollectBinaryInfo(
00179     char* filename,
00180     int& custCount,
00181     int& itemCount,
00182     int& transCount,
00183     int*& custTransCount,
00184     int*& itemCustCount,
00185     int*& cids,
00186     int*& tids,
00187     int*& iids,
00188     int*& transLens,
00189     int& overallCount,
00190     int& transLensLength)
00191 {
00192 
00193     
00194     
00195     ResizableArray * cidArr = new ResizableArray(64);
00196     ResizableArray * tidArr = new ResizableArray(64);
00197     ResizableArray * iidArr = new ResizableArray(64);
00198     ResizableArray * transLensArr = new ResizableArray(64);
00199     int custID;                   
00200     int transID;                  
00201     int numItem;                  
00202     int *itemlist;                
00203     ifstream inFile;              
00204     int custTransSize = 400;      
00205     int itemCustSize = 400;       
00206     bool useStdin;                
00207 
00208     
00209     
00210     if (filename == 0)
00211         useStdin = true;
00212     else
00213         useStdin = false;
00214 
00215     if (!useStdin)
00216     {
00217         inFile.open(filename, ios::binary);
00218         if (!inFile.is_open())
00219         {
00220             PrintFileReadError(FILEERR_NOTFOUNDBINARY);
00221             return false;
00222         }
00223     }
00224 
00225     
00226     
00227     custCount = -1;               
00228     itemCount = -1;               
00229     transCount = 0;               
00230     custTransCount = new int[custTransSize];
00231     itemCustCount = new int[itemCustSize];
00232 
00233     for (int cti = 0; cti < custTransSize; cti++)
00234         custTransCount[cti] = 0;
00235 
00236     for (int ici = 0; ici < itemCustSize; ici++)
00237         itemCustCount[ici] = 0;
00238 
00239     
00240     
00241     int *itemPrevCustID = new int[itemCustSize];
00242     for (int ipi = 0; ipi < itemCustSize; ipi++)
00243         itemPrevCustID[ipi] = -1;
00244 
00245     
00246     while ((!useStdin && !inFile.eof()) || (useStdin && !cin.eof()))
00247     {
00248 
00249         if (useStdin)
00250         {
00251             
00252             cin.read((char *)&custID, sizeof(int));
00253             cin.read((char *)&transID, sizeof(int));
00254             cin.read((char *)&numItem, sizeof(int));
00255 
00256             itemlist = new int[numItem];
00257 
00258             
00259             cin.read((char *)itemlist, numItem * sizeof(int));
00260         }
00261         else
00262         {
00263             
00264             inFile.read((char *)&custID, sizeof(int));
00265             inFile.read((char *)&transID, sizeof(int));
00266             inFile.read((char *)&numItem, sizeof(int));
00267 
00268             itemlist = new int[numItem];
00269 
00270             
00271             inFile.read((char *)itemlist, numItem * sizeof(int));
00272         }
00273 
00274         
00275         if ((!useStdin && inFile.eof()) || (useStdin && cin.eof()))
00276         {
00277             delete [] itemlist;
00278             break;
00279         }
00280 
00281         transLensArr->Add(numItem);
00282         for (int i = 0; i < numItem; i++)
00283         {
00284             
00285             cidArr->Add(custID);
00286             tidArr->Add(transID);
00287             iidArr->Add(itemlist[i]);
00288         }
00289 
00290 
00291         
00292         if (custID >= custCount)
00293         {
00294             custCount = custID + 1;
00295 
00296             
00297             if (custCount > custTransSize)
00298             {
00299                 int newSize = (custCount > 2 * custTransSize) ?
00300                               custCount : 2 * custTransSize;
00301                 IncArraySize(custTransCount, custTransSize, newSize);
00302                 custTransSize = newSize;
00303             }
00304         }
00305         custTransCount[custID]++;
00306         transCount++;
00307 
00308 
00309         
00310         for (int ici = 0; ici < numItem; ici++)
00311         {
00312             if (itemlist[ici] >= itemCount)
00313                 itemCount = itemlist[ici] + 1;
00314         }
00315 
00316         
00317         if (itemCount >= itemCustSize)
00318         {
00319             int newSize = (itemCount > 2 * itemCustSize) ?
00320                           itemCount : 2 * itemCustSize;
00321             IncArraySize(itemCustCount, itemCustSize, newSize);
00322             IncArraySize(itemPrevCustID, itemCustSize, newSize);
00323             itemCustSize = newSize;
00324         }
00325 
00326         for (int itemIndex = 0; itemIndex < numItem; itemIndex++)
00327         {
00328             
00329             if (itemPrevCustID[itemlist[itemIndex]] != custID)
00330             {
00331                 itemCustCount[itemlist[itemIndex]]++;
00332                 itemPrevCustID[itemlist[itemIndex]] = custID;
00333             }
00334         }
00335 
00336         delete [] itemlist;
00337     }
00338 
00339     delete [] itemPrevCustID;
00340     if (!useStdin)
00341         inFile.close();
00342 
00343     
00344     
00345     cidArr->ToArray(cids, overallCount);
00346     tidArr->ToArray(tids, overallCount);
00347     iidArr->ToArray(iids, overallCount);
00348     transLensArr->ToArray(transLens, transLensLength);
00349     delete cidArr;
00350     delete tidArr;
00351     delete iidArr;
00352     delete transLensArr;
00353 
00354     return true;
00355 }
00356 
00357 
00358 
00359 
00360 
00361 
00362 
00363 
00364 
00365 
00366 
00367 
00368 
00369 
00370 
00371 
00372 
00373 
00374 
00375 
00376 
00377 
00378 
00379 
00380 
00381 
00382 
00383 
00384 
00385 
00386 
00387 
00388 
00389 
00390 
00391 
00392 
00393 
00394 
00395 
00396 
00397 
00398 bool CollectASCIIInfo(
00399     char* filename,
00400     bool isStringFile,
00401     StringMap*& custStrMap,
00402     StringMap*& transStrMap,
00403     StringMap*& itemStrMap,
00404     int& custCount,
00405     int& itemCount,
00406     int& lineCount,
00407     int*& custTransCount,
00408     int*& itemCustCount,
00409     int*& cids,
00410     int*& tids,
00411     int*& iids,
00412     int& overallCount)
00413 {
00414 
00415     
00416     
00417     ResizableArray * cidArr = new ResizableArray(64);
00418     ResizableArray * tidArr = new ResizableArray(64);
00419     ResizableArray * iidArr = new ResizableArray(64);
00420     int custID;                   
00421     int transID;                  
00422     int itemID;                   
00423     int prevTransID = -1;         
00424     ifstream inFile;              
00425     int custTransSize = 400;      
00426     int itemCustSize = 400;       
00427     int i;                        
00428     bool useStdin;                
00429     int custStrMapID = 1;
00430     int transStrMapID = 1;
00431     int itemStrMapID = 1;
00432 
00433     
00434     if (isStringFile)
00435     {
00436         custStrMap = new StringMap();
00437         transStrMap = new StringMap();
00438         itemStrMap = new StringMap();
00439     }
00440 
00441     
00442     
00443     if (filename == 0)
00444         useStdin = true;
00445     else
00446         useStdin = false;
00447 
00448     if (!useStdin)
00449     {
00450         inFile.open(filename);
00451         if (!inFile.is_open())
00452         {
00453             PrintFileReadError(FILEERR_NOTFOUNDASCII);
00454             return false;
00455         }
00456     }
00457 
00458     
00459     custCount = -1;               
00460     itemCount = -1;               
00461     lineCount = 0;                
00462     custTransCount = new int[custTransSize];
00463     itemCustCount = new int[itemCustSize];
00464     for (i = 0; i < custTransSize; i++)
00465         custTransCount[i] = 0;
00466     for (i = 0; i < itemCustSize; i++)
00467         itemCustCount[i] = 0;
00468 
00469 
00470     
00471     
00472     int *itemPrevCustID = new int[itemCustSize];
00473     for (i = 0; i < itemCustSize; i++)
00474         itemPrevCustID[i] = -1;
00475 
00476     
00477     while ((!useStdin && !inFile.eof()) || (useStdin && !cin.eof()))
00478     {
00479         
00480         if (isStringFile)
00481         {
00482             
00483             
00484             
00485             char *custStr = new char[MAX_STRING_SIZE];
00486             char *transStr = new char[MAX_STRING_SIZE];
00487             char *itemStr = new char[MAX_STRING_SIZE];
00488             if (useStdin)
00489             {
00490                 cin.getline(custStr, MAX_STRING_SIZE);
00491                 cin.getline(transStr, MAX_STRING_SIZE);
00492                 cin.getline(itemStr, MAX_STRING_SIZE);
00493             }
00494             else
00495             {
00496                 inFile.getline(custStr, MAX_STRING_SIZE);
00497                 inFile.getline(transStr, MAX_STRING_SIZE);
00498                 inFile.getline(itemStr, MAX_STRING_SIZE);
00499             }
00500 
00501             
00502             
00503             const int * custKeyID = custStrMap->GetKey(custStr);
00504             const int * transKeyID = transStrMap->GetKey(transStr);
00505             const int * itemKeyID = itemStrMap->GetKey(itemStr);
00506             if (custKeyID != 0)
00507                 custID = *custKeyID;
00508             else
00509             {
00510                 custID = custStrMapID;
00511                 custStrMap->Add(custID, custStr);
00512                 custStrMapID++;
00513             }
00514             if (transKeyID != 0)
00515                 transID = *transKeyID;
00516             else
00517             {
00518                 transID = transStrMapID;
00519                 transStrMap->Add(transID, transStr);
00520                 transStrMapID++;
00521             }
00522             if (itemKeyID != 0)
00523                 itemID = *itemKeyID;
00524             else
00525             {
00526                 itemID = itemStrMapID;
00527                 itemStrMap->Add(itemID, itemStr);
00528                 itemStrMapID++;
00529             }
00530         }
00531         else
00532         {
00533             if (useStdin)
00534             {
00535                 cin >> custID;
00536                 cin >> transID;
00537                 cin >> itemID;
00538             }
00539             else
00540             {
00541                 inFile >> custID;
00542                 inFile >> transID;
00543                 inFile >> itemID;
00544             }
00545         }
00546 
00547         
00548         cidArr->Add(custID);
00549         tidArr->Add(transID);
00550         iidArr->Add(itemID);
00551 
00552         
00553         if (custID >= custCount)
00554         {
00555             custCount = custID + 1;
00556 
00557             
00558             if (custCount > custTransSize)
00559             {
00560                 int newSize = (custCount > 2 * custTransSize) ?
00561                               custCount : 2 * custTransSize;
00562                 IncArraySize(custTransCount, custTransSize, newSize);
00563                 custTransSize = newSize;
00564             }
00565         }
00566 
00567         
00568         if (prevTransID != transID)
00569         {
00570             custTransCount[custID]++;
00571             prevTransID = transID;
00572         }
00573         lineCount++;
00574 
00575         
00576         if (itemID >= itemCount)
00577         {
00578             itemCount = itemID + 1;
00579 
00580             
00581             if (itemCount >= itemCustSize)
00582             {
00583                 int newSize = (itemCount > 2 * itemCustSize) ?
00584                               itemCount : 2 * itemCustSize;
00585                 IncArraySize(itemCustCount, itemCustSize, newSize);
00586                 IncArraySize(itemPrevCustID, itemCustSize, newSize);
00587                 itemCustSize = newSize;
00588             }
00589         }
00590 
00591         
00592         if (itemPrevCustID[itemID] != custID)
00593         {
00594             itemCustCount[itemID]++;
00595             itemPrevCustID[itemID] = custID;
00596         }
00597     }
00598 
00599     delete [] itemPrevCustID;
00600     if (!useStdin)
00601         inFile.close();
00602 
00603     
00604     
00605     cidArr->ToArray(cids, overallCount);
00606     tidArr->ToArray(tids, overallCount);
00607     iidArr->ToArray(iids, overallCount);
00608     delete cidArr;
00609     delete tidArr;
00610     delete iidArr;
00611 
00612     return true;
00613 }
00614 
00615 
00616 
00617 
00618 
00619 
00620 
00621 
00622 
00623 
00624 
00625 
00626 
00627 
00628 
00629 
00630 
00631 
00632 
00633 
00634 
00635 
00636 
00637 
00638 
00639 
00640 
00641 
00642 bool ReadBinary(
00643     char* filename,
00644     int* cids,
00645     int* tids,
00646     int* iids,
00647     int numEntries,
00648     int* transLens,
00649     int transLensLength,
00650     int* custBitmapMap,
00651     int** custMap,
00652     int* itemMap,
00653     SeqBitmap** f1Buff)
00654 {
00655 
00656     
00657     
00658     int custID;              
00659     int transID;             
00660     int numItem;             
00661     int *itemlist;           
00662     int prevCustID = -1;
00663     int bitmapID = 0;
00664     int index = 0;
00665     ifstream inFile;         
00666 
00667     
00668     
00669     bool secondScan = false;
00670     int lenIndex = 0;
00671     int scanIndex = 0;
00672 
00673     if (secondScan)
00674     {
00675         if (filename == 0)
00676         {
00677             cout << "Error: cannot read input a second time when -stdin is on";
00678             exit(-1);
00679         }
00680 
00681         
00682         inFile.open(filename, ios::binary);
00683         if (!inFile.is_open())
00684         {
00685             return false;
00686         }
00687     }
00688 
00689     while (     (secondScan && !inFile.eof())
00690                 || (!secondScan && scanIndex < numEntries) )
00691     {
00692         if (secondScan)
00693         {
00694             inFile.read((char *)&custID, sizeof(int));
00695             inFile.read((char *)&transID, sizeof(int));
00696             inFile.read((char *)&numItem, sizeof(int));
00697             itemlist = new int[numItem];
00698 
00699             
00700             inFile.read((char *)itemlist, numItem * sizeof(int));
00701 
00702             
00703             if (inFile.eof())
00704                 break;
00705         }
00706         else
00707         {
00708             numItem = transLens[lenIndex];
00709             itemlist = new int[numItem];
00710             custID = cids[scanIndex];
00711             transID = tids[scanIndex];
00712             for (int i = 0; i < numItem; i++)
00713                 itemlist[i] = iids[scanIndex + i];
00714             scanIndex+=numItem;
00715             lenIndex++;
00716         }
00717 
00718         if (custID != prevCustID)
00719         {
00720             prevCustID = custID;
00721             bitmapID = custBitmapMap[custID];
00722             index = custMap[bitmapID][custID] * BITMAP_LENGTH[bitmapID];
00723         }
00724 
00725         
00726         for (int j = 0; j < numItem; j++)
00727         {
00728             if (itemMap[itemlist[j]] >= 0)
00729                 f1Buff[itemMap[itemlist[j]]]->
00730                 FillEmptyPosition(bitmapID, index);
00731         }
00732 
00733 
00734         index++;
00735         delete [] itemlist;
00736     }
00737 
00738     if (secondScan)
00739         inFile.close();
00740     return true;
00741 }
00742 
00743 
00744 
00745 
00746 
00747 
00748 
00749 
00750 
00751 
00752 
00753 
00754 
00755 
00756 
00757 
00758 
00759 
00760 
00761 
00762 
00763 
00764 
00765 
00766 
00767 
00768 
00769 
00770 
00771 
00772 
00773 
00774 bool ReadASCII(
00775     char* filename,
00776     int* cids,
00777     int* tids,
00778     int* iids,
00779     int numEntries,
00780     int* custBitmapMap,
00781     int** custMap,
00782     int* itemMap,
00783     SeqBitmap** f1Buff)
00784 {
00785 
00786     
00787     
00788     int custID;              
00789     int transID;             
00790     int itemID;              
00791     int prevTransID = -1;    
00792     int prevCustID = -1;     
00793     int bitmapID = 0;        
00794     int index = 0;
00795     ifstream inFile;         
00796 
00797     
00798     bool secondScan = false;
00799     int scanIndex = 0;
00800 
00801     if (secondScan)
00802     {
00803         if (filename == 0)
00804         {
00805             cout << "Error: cannot read input a second time when -stdin is on";
00806             exit(-1);
00807         }
00808         
00809         inFile.open(filename);
00810         if (!inFile.is_open())
00811         {
00812             return false;
00813         }
00814     }
00815 
00816     
00817     while (   (secondScan && !inFile.eof())
00818               || (!secondScan && scanIndex < numEntries))
00819     {
00820 
00821         if (secondScan)
00822         {
00823             
00824             inFile >> custID;
00825 
00826             if (inFile.eof())
00827                 break;
00828 
00829             inFile >> transID;
00830             inFile >> itemID;
00831         }
00832         else
00833         {
00834             custID = cids[scanIndex];
00835             transID = tids[scanIndex];
00836             itemID = iids[scanIndex];
00837             scanIndex++;
00838         }
00839 
00840         if (custID != prevCustID)
00841         {
00842             prevCustID = custID;
00843             bitmapID = custBitmapMap[custID];
00844             index = custMap[bitmapID][custID] * BITMAP_LENGTH[bitmapID] - 1;
00845         }
00846 
00847         if (prevTransID != transID)
00848         {
00849             index++;
00850             prevTransID = transID;
00851         }
00852 
00853         
00854         if (itemMap[itemID] >= 0)
00855             f1Buff[itemMap[itemID]]->FillEmptyPosition(bitmapID, index);
00856     }
00857 
00858     if (secondScan)
00859         inFile.close();
00860 
00861     return true;
00862 }
00863 
00864 
00865 
00866 
00867 
00868 
00869 
00870 
00871 
00872 
00873 
00874 
00875 
00876 
00877 DatasetInfo* ReadDataset(
00878     bool isBinaryFile,
00879     bool isStringFile,
00880     char *filename,
00881     double minSupPercent,
00882     StringMap *&custStrMap,
00883     StringMap *&transStrMap,
00884     StringMap *&itemStrMap)
00885 {
00886 
00887     DatasetInfo* info = new DatasetInfo(); 
00888 
00889     
00890     int tempCustCount = -1;  
00891     int itemCount = -1;      
00892     int transCount = 0;      
00893     int *custTransCount;     
00894     int *itemCustCount;      
00895     
00896 
00897     
00898     int *bitmapSizes;        
00899     
00900     int *custBitmapMap;      
00901     int **custMap;           
00902 
00903     int noTransCount = 0;    
00904     int *itemMap;            
00905 
00906     int numCompression;      
00907 
00908     bool result = false;     
00909     int i, j;                
00910 
00911     
00912     
00913     
00914     
00915     
00916 
00917     
00918     int *cids;
00919     int *tids;
00920     int *iids;
00921     int numEntries;
00922     int *transLens;
00923     int transLensLength;
00924 
00925     if (isBinaryFile)
00926     {
00927         result = CollectBinaryInfo(
00928                      filename,
00929                      tempCustCount,
00930                      itemCount,
00931                      transCount,
00932                      custTransCount,
00933                      itemCustCount,
00934                      cids,
00935                      tids,
00936                      iids,
00937                      transLens,
00938                      numEntries,
00939                      transLensLength);
00940     }
00941     else
00942     {
00943         result = CollectASCIIInfo(
00944                      filename,
00945                      isStringFile,
00946                      custStrMap,
00947                      transStrMap,
00948                      itemStrMap,
00949                      tempCustCount,
00950                      itemCount,
00951                      transCount,
00952                      custTransCount,
00953                      itemCustCount,
00954                      cids,
00955                      tids,
00956                      iids,
00957                      numEntries);
00958     }
00959     if (!result)
00960     {
00961         delete info;
00962         return 0;
00963     }
00964 
00965     
00966     
00967     
00968 
00969     
00970     
00971     
00972     
00973     
00974     
00975     
00976     
00977     
00978     
00979     
00980 
00981 
00982     
00983     info->maxCustTrans = 0;
00984     noTransCount = 0;
00985     custBitmapMap = new int[tempCustCount]; 
00986     bitmapSizes = new int[NUM_BITMAP];      
00987     custMap = new int * [NUM_BITMAP];       
00988     
00989     for (i = 0; i < NUM_BITMAP; i++)
00990     {
00991         custMap[i] = new int[tempCustCount];
00992         bitmapSizes[i] = 0;
00993     }
00994 
00995     
00996     for (i = 0; i < tempCustCount; i++)
00997     {
00998         if (custTransCount[i] > BITMAP_LENGTH[NUM_BITMAP - 1])
00999         {
01000             
01001             
01002             PrintFileReadError(FILEERR_64TRANSACTIONS);
01003         }
01004 
01005         if (custTransCount[i] <= 0)
01006         {
01007             
01008             custBitmapMap[i] = -1;
01009             noTransCount++;
01010         }
01011         else
01012         {
01013             
01014             for (j = 0; j < NUM_BITMAP; j++)
01015                 if (custTransCount[i] <= BITMAP_LENGTH[j])
01016                 {
01017                     custBitmapMap[i] = j;
01018                     break;
01019                 }
01020         }
01021 
01022         
01023         
01024         for (j = 0; j < NUM_BITMAP; j++)
01025             if (custBitmapMap[i] == j)
01026             {
01027                 custMap[j][i] = bitmapSizes[j];
01028                 bitmapSizes[j]++;
01029             }
01030             else
01031                 custMap[j][i] = -1;
01032 
01033         
01034         if (custTransCount[i] > info->maxCustTrans && custTransCount[i] <= 64)
01035             info->maxCustTrans = custTransCount[i];
01036     }
01037 
01038     
01039     info->custCount = tempCustCount - noTransCount;
01040     info->bitmapSizes = new int[NUM_BITMAP];
01041     for (i = 0; i < NUM_BITMAP; i++)
01042         info->bitmapSizes[i] = bitmapSizes[i];
01043 
01044     
01045     
01046     
01047 
01048 #ifdef ___PREFIXSPAN___
01049 
01050     info->minSup = (int) ceil(minSupPercent * info->custCount);
01051 #else
01052 
01053     info->minSup = (int) floor(minSupPercent * info->custCount + 0.5);
01054 #endif
01055 
01056     info->minSup = (info->minSup == 0) ? 1 : info->minSup;
01057 
01058     
01059     
01060     
01061     
01062     
01063 
01064     info->f1Size = 0;
01065     itemMap = new int[itemCount];
01066 
01067     for (i = 0; i < itemCount; i++)
01068     {
01069         if (itemCustCount[i] >= info->minSup )
01070         {
01071             itemMap[i] = info->f1Size;
01072             info->f1Size++;
01073         }
01074         else
01075         {
01076             itemMap[i] = -1;
01077         }
01078     }
01079 
01080     
01081     if (info->f1Size == 0)
01082         return info;
01083 
01084     numCompression = info->maxCustTrans * info->f1Size;
01085 
01086     
01087     
01088     int size64 = bitmapSizes[4];
01089     int size32 = bitmapSizes[3] + size64;
01090     int size16 = bitmapSizes[2] + size32;
01091     int size8 = bitmapSizes[1] + size16;
01092     int size4 = bitmapSizes[0] + size8;
01093 
01094     size4 = Bitmap4::CalcSize(size4);
01095     size8 = Bitmap8::CalcSize(size8);
01096     size16 = Bitmap16::CalcSize(size16);
01097     size32 = Bitmap32::CalcSize(size32);
01098     size64 = Bitmap64::CalcSize(size64);
01099 
01100 
01101     
01102     SeqBitmap::MemAlloc(size4 * (info->maxCustTrans * info->f1Size) *
01103                         (NUM_BITMAPS_USED + info->f1BufSize),
01104                         size8 * (info->maxCustTrans * info->f1Size) *
01105                         (NUM_BITMAPS_USED + info->f1BufSize),
01106                         size16 * (info->maxCustTrans * info->f1Size) *
01107                         (NUM_BITMAPS_USED + info->f1BufSize),
01108                         size32 * (info->maxCustTrans * info->f1Size) *
01109                         (NUM_BITMAPS_USED + info->f1BufSize),
01110                         size64 * (info->maxCustTrans * info->f1Size) *
01111                         (NUM_BITMAPS_USED + info->f1BufSize));
01112 
01113 
01114     
01115     
01116     
01117     info->f1BufSize = info->f1Size * (numCompression + 1) + 10;
01118 
01119     info->f1Buff = new SeqBitmap * [info->f1BufSize];
01120 
01121     for (i = 0; i < info->f1Size; i++)
01122         info->f1Buff[i] = new SeqBitmap(
01123                               bitmapSizes[0],
01124                               bitmapSizes[1],
01125                               bitmapSizes[2],
01126                               bitmapSizes[3],
01127                               bitmapSizes[4]);
01128 
01129     
01130     
01131     
01132     for (; i < info->f1BufSize; i++)
01133         info->f1Buff[i] = 0;
01134 
01135     info->f1NameBuff = new int[info->f1BufSize];
01136     for (i = 0; i < itemCount; i++)
01137         if (itemMap[i] >= 0)
01138             info->f1NameBuff[itemMap[i]] = i;
01139 
01140     
01141     
01142     
01143     
01144     
01145     info->sListBuff = new int[(info->maxCustTrans * info->f1Size)
01146                               * info->f1Size + 1];
01147 
01148     
01149     
01150     
01151     
01152     
01153     info->iListBuff = new int[(info->maxCustTrans * info->f1Size )
01154                               * info->f1Size + 1];
01155 
01156     for (i = 0; i < info->f1Size; i++)
01157         info->sListBuff[i] = i;
01158 
01159     
01160     info->countBuff = new CountInfo[info->f1Size];
01161 
01162 
01163 
01164 
01165     
01166     
01167     
01168     
01169     
01170 
01171     if (isBinaryFile)
01172         result = ReadBinary(
01173                      filename,
01174                      cids,
01175                      tids,
01176                      iids,
01177                      numEntries,
01178                      transLens,
01179                      transLensLength,
01180                      custBitmapMap,
01181                      custMap,
01182                      itemMap,
01183                      info->f1Buff);
01184     else
01185         result = ReadASCII(
01186                      filename,
01187                      cids,
01188                      tids,
01189                      iids,
01190                      numEntries,
01191                      custBitmapMap,
01192                      custMap,
01193                      itemMap,
01194                      info->f1Buff);
01195 
01196     if (!result)
01197     {
01198         delete info;
01199         return 0;
01200     }
01201 
01202     
01203     delete [] cids;
01204     delete [] tids;
01205     delete [] iids;
01206 
01207     
01208     delete [] custTransCount;
01209     delete [] itemCustCount;
01210     delete [] custBitmapMap;
01211     for (i = 0; i < NUM_BITMAP; i++)
01212         delete [] custMap[i];
01213     delete [] custMap;
01214     delete [] bitmapSizes;
01215     delete [] itemMap;
01216 
01217     return info;
01218 }
01219 
01220