00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <fstream>
00039 #include <math.h>
00040 #include <stdio.h>
00041 #include "SeqBitmap.h"
00042 #include "Bitmap4.h"
00043 #include "Bitmap8.h"
00044 #include "Bitmap16.h"
00045 #include "Bitmap32.h"
00046 #include "Bitmap64.h"
00047 #include "DatasetInfo.h"
00048 #include "ResizableArray.h"
00049 #include "StringMap.h"
00050 #include <iostream>
00051
00052 #define FILEERR_64TRANSACTIONS 1
00053 #define FILEERR_NOTFOUNDBINARY 2
00054 #define FILEERR_NOTFOUNDASCII 3
00055
00056
00057
00058
00059
00060
00061
00062 const int NUM_BITMAP = 5;
00063
00064
00065 const int BITMAP_LENGTH[5] =
00066 {
00067 4, 8, 16, 32, 64
00068 };
00069
00070
00071 const int NUM_BITMAPS_USED = 4;
00072
00073
00074
00075 const int MAX_STRING_SIZE = 256;
00076
00077
00078
00079
00080
00081
00082
00083 void PrintFileReadError(int errorType)
00084 {
00085 cerr << "\nInput file error:\n\n";
00086
00087 switch (errorType)
00088 {
00089 case FILEERR_64TRANSACTIONS:
00090 cerr << "A customer has more than 64 transactions.\n";
00091 break;
00092 case FILEERR_NOTFOUNDBINARY:
00093 cerr << "The input file either does not exist, or is not\n";
00094 cerr << "a valid binary input file (Perhaps try running\n";
00095 cerr << "SPAM with the -ascii flag to see if it is an\n";
00096 cerr << "ASCII input file)\n";
00097 break;
00098 case FILEERR_NOTFOUNDASCII:
00099 cerr << "The input file either does not exist, or is not\n";
00100 cerr << "a valid ascii input file. If the input file was\n";
00101 cerr << "automatically generated using a program like AssocGen,\n";
00102 cerr << "perhaps try running SPAM without the -ascii flag to\n";
00103 cerr << "see if it is a binary input file.\n";
00104 break;
00105 default:
00106 cerr << "Unknown file read error.\n";
00107 }
00108
00109 cerr << "\nNotes about the input file:\n";
00110 cerr << "The input file should be an ASCII text file containing\n";
00111 cerr << "three integers, separated by spaces, on each line:\n";
00112 cerr << "<Customer ID> <Transaction ID> <Item ID>\n";
00113 cerr << "Customer IDs and Item IDs should be assigned relative to\n";
00114 cerr << "the overall transactional database. Transaction IDs should\n";
00115 cerr << "be assigned relative to the customer they belong to. Each\n";
00116 cerr << "customer can have no more than 64 transactions. Make sure to\n";
00117 cerr << "use the -ascii flag, since the input is ASCII text.\n";
00118
00119 exit(0);
00120 }
00121
00122
00123
00124
00125
00126
00127
00128
00129 void IncArraySize(int*& array, int oldSize, int newSize)
00130 {
00131 int i;
00132
00133
00134 int *newArray = new int[newSize];
00135 for (i = 0;i < oldSize;i++)
00136 newArray[i] = array[i];
00137 for (i = oldSize;i < newSize;i++)
00138 newArray[i] = 0;
00139
00140
00141 delete [] array;
00142 array = newArray;
00143 }
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 bool CollectBinaryInfo(
00179 char* filename,
00180 int& custCount,
00181 int& itemCount,
00182 int& transCount,
00183 int*& custTransCount,
00184 int*& itemCustCount,
00185 int*& cids,
00186 int*& tids,
00187 int*& iids,
00188 int*& transLens,
00189 int& overallCount,
00190 int& transLensLength)
00191 {
00192
00193
00194
00195 ResizableArray * cidArr = new ResizableArray(64);
00196 ResizableArray * tidArr = new ResizableArray(64);
00197 ResizableArray * iidArr = new ResizableArray(64);
00198 ResizableArray * transLensArr = new ResizableArray(64);
00199 int custID;
00200 int transID;
00201 int numItem;
00202 int *itemlist;
00203 ifstream inFile;
00204 int custTransSize = 400;
00205 int itemCustSize = 400;
00206 bool useStdin;
00207
00208
00209
00210 if (filename == 0)
00211 useStdin = true;
00212 else
00213 useStdin = false;
00214
00215 if (!useStdin)
00216 {
00217 inFile.open(filename, ios::binary);
00218 if (!inFile.is_open())
00219 {
00220 PrintFileReadError(FILEERR_NOTFOUNDBINARY);
00221 return false;
00222 }
00223 }
00224
00225
00226
00227 custCount = -1;
00228 itemCount = -1;
00229 transCount = 0;
00230 custTransCount = new int[custTransSize];
00231 itemCustCount = new int[itemCustSize];
00232
00233 for (int cti = 0; cti < custTransSize; cti++)
00234 custTransCount[cti] = 0;
00235
00236 for (int ici = 0; ici < itemCustSize; ici++)
00237 itemCustCount[ici] = 0;
00238
00239
00240
00241 int *itemPrevCustID = new int[itemCustSize];
00242 for (int ipi = 0; ipi < itemCustSize; ipi++)
00243 itemPrevCustID[ipi] = -1;
00244
00245
00246 while ((!useStdin && !inFile.eof()) || (useStdin && !cin.eof()))
00247 {
00248
00249 if (useStdin)
00250 {
00251
00252 cin.read((char *)&custID, sizeof(int));
00253 cin.read((char *)&transID, sizeof(int));
00254 cin.read((char *)&numItem, sizeof(int));
00255
00256 itemlist = new int[numItem];
00257
00258
00259 cin.read((char *)itemlist, numItem * sizeof(int));
00260 }
00261 else
00262 {
00263
00264 inFile.read((char *)&custID, sizeof(int));
00265 inFile.read((char *)&transID, sizeof(int));
00266 inFile.read((char *)&numItem, sizeof(int));
00267
00268 itemlist = new int[numItem];
00269
00270
00271 inFile.read((char *)itemlist, numItem * sizeof(int));
00272 }
00273
00274
00275 if ((!useStdin && inFile.eof()) || (useStdin && cin.eof()))
00276 {
00277 delete [] itemlist;
00278 break;
00279 }
00280
00281 transLensArr->Add(numItem);
00282 for (int i = 0; i < numItem; i++)
00283 {
00284
00285 cidArr->Add(custID);
00286 tidArr->Add(transID);
00287 iidArr->Add(itemlist[i]);
00288 }
00289
00290
00291
00292 if (custID >= custCount)
00293 {
00294 custCount = custID + 1;
00295
00296
00297 if (custCount > custTransSize)
00298 {
00299 int newSize = (custCount > 2 * custTransSize) ?
00300 custCount : 2 * custTransSize;
00301 IncArraySize(custTransCount, custTransSize, newSize);
00302 custTransSize = newSize;
00303 }
00304 }
00305 custTransCount[custID]++;
00306 transCount++;
00307
00308
00309
00310 for (int ici = 0; ici < numItem; ici++)
00311 {
00312 if (itemlist[ici] >= itemCount)
00313 itemCount = itemlist[ici] + 1;
00314 }
00315
00316
00317 if (itemCount >= itemCustSize)
00318 {
00319 int newSize = (itemCount > 2 * itemCustSize) ?
00320 itemCount : 2 * itemCustSize;
00321 IncArraySize(itemCustCount, itemCustSize, newSize);
00322 IncArraySize(itemPrevCustID, itemCustSize, newSize);
00323 itemCustSize = newSize;
00324 }
00325
00326 for (int itemIndex = 0; itemIndex < numItem; itemIndex++)
00327 {
00328
00329 if (itemPrevCustID[itemlist[itemIndex]] != custID)
00330 {
00331 itemCustCount[itemlist[itemIndex]]++;
00332 itemPrevCustID[itemlist[itemIndex]] = custID;
00333 }
00334 }
00335
00336 delete [] itemlist;
00337 }
00338
00339 delete [] itemPrevCustID;
00340 if (!useStdin)
00341 inFile.close();
00342
00343
00344
00345 cidArr->ToArray(cids, overallCount);
00346 tidArr->ToArray(tids, overallCount);
00347 iidArr->ToArray(iids, overallCount);
00348 transLensArr->ToArray(transLens, transLensLength);
00349 delete cidArr;
00350 delete tidArr;
00351 delete iidArr;
00352 delete transLensArr;
00353
00354 return true;
00355 }
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398 bool CollectASCIIInfo(
00399 char* filename,
00400 bool isStringFile,
00401 StringMap*& custStrMap,
00402 StringMap*& transStrMap,
00403 StringMap*& itemStrMap,
00404 int& custCount,
00405 int& itemCount,
00406 int& lineCount,
00407 int*& custTransCount,
00408 int*& itemCustCount,
00409 int*& cids,
00410 int*& tids,
00411 int*& iids,
00412 int& overallCount)
00413 {
00414
00415
00416
00417 ResizableArray * cidArr = new ResizableArray(64);
00418 ResizableArray * tidArr = new ResizableArray(64);
00419 ResizableArray * iidArr = new ResizableArray(64);
00420 int custID;
00421 int transID;
00422 int itemID;
00423 int prevTransID = -1;
00424 ifstream inFile;
00425 int custTransSize = 400;
00426 int itemCustSize = 400;
00427 int i;
00428 bool useStdin;
00429 int custStrMapID = 1;
00430 int transStrMapID = 1;
00431 int itemStrMapID = 1;
00432
00433
00434 if (isStringFile)
00435 {
00436 custStrMap = new StringMap();
00437 transStrMap = new StringMap();
00438 itemStrMap = new StringMap();
00439 }
00440
00441
00442
00443 if (filename == 0)
00444 useStdin = true;
00445 else
00446 useStdin = false;
00447
00448 if (!useStdin)
00449 {
00450 inFile.open(filename);
00451 if (!inFile.is_open())
00452 {
00453 PrintFileReadError(FILEERR_NOTFOUNDASCII);
00454 return false;
00455 }
00456 }
00457
00458
00459 custCount = -1;
00460 itemCount = -1;
00461 lineCount = 0;
00462 custTransCount = new int[custTransSize];
00463 itemCustCount = new int[itemCustSize];
00464 for (i = 0; i < custTransSize; i++)
00465 custTransCount[i] = 0;
00466 for (i = 0; i < itemCustSize; i++)
00467 itemCustCount[i] = 0;
00468
00469
00470
00471
00472 int *itemPrevCustID = new int[itemCustSize];
00473 for (i = 0; i < itemCustSize; i++)
00474 itemPrevCustID[i] = -1;
00475
00476
00477 while ((!useStdin && !inFile.eof()) || (useStdin && !cin.eof()))
00478 {
00479
00480 if (isStringFile)
00481 {
00482
00483
00484
00485 char *custStr = new char[MAX_STRING_SIZE];
00486 char *transStr = new char[MAX_STRING_SIZE];
00487 char *itemStr = new char[MAX_STRING_SIZE];
00488 if (useStdin)
00489 {
00490 cin.getline(custStr, MAX_STRING_SIZE);
00491 cin.getline(transStr, MAX_STRING_SIZE);
00492 cin.getline(itemStr, MAX_STRING_SIZE);
00493 }
00494 else
00495 {
00496 inFile.getline(custStr, MAX_STRING_SIZE);
00497 inFile.getline(transStr, MAX_STRING_SIZE);
00498 inFile.getline(itemStr, MAX_STRING_SIZE);
00499 }
00500
00501
00502
00503 const int * custKeyID = custStrMap->GetKey(custStr);
00504 const int * transKeyID = transStrMap->GetKey(transStr);
00505 const int * itemKeyID = itemStrMap->GetKey(itemStr);
00506 if (custKeyID != 0)
00507 custID = *custKeyID;
00508 else
00509 {
00510 custID = custStrMapID;
00511 custStrMap->Add(custID, custStr);
00512 custStrMapID++;
00513 }
00514 if (transKeyID != 0)
00515 transID = *transKeyID;
00516 else
00517 {
00518 transID = transStrMapID;
00519 transStrMap->Add(transID, transStr);
00520 transStrMapID++;
00521 }
00522 if (itemKeyID != 0)
00523 itemID = *itemKeyID;
00524 else
00525 {
00526 itemID = itemStrMapID;
00527 itemStrMap->Add(itemID, itemStr);
00528 itemStrMapID++;
00529 }
00530 }
00531 else
00532 {
00533 if (useStdin)
00534 {
00535 cin >> custID;
00536 cin >> transID;
00537 cin >> itemID;
00538 }
00539 else
00540 {
00541 inFile >> custID;
00542 inFile >> transID;
00543 inFile >> itemID;
00544 }
00545 }
00546
00547
00548 cidArr->Add(custID);
00549 tidArr->Add(transID);
00550 iidArr->Add(itemID);
00551
00552
00553 if (custID >= custCount)
00554 {
00555 custCount = custID + 1;
00556
00557
00558 if (custCount > custTransSize)
00559 {
00560 int newSize = (custCount > 2 * custTransSize) ?
00561 custCount : 2 * custTransSize;
00562 IncArraySize(custTransCount, custTransSize, newSize);
00563 custTransSize = newSize;
00564 }
00565 }
00566
00567
00568 if (prevTransID != transID)
00569 {
00570 custTransCount[custID]++;
00571 prevTransID = transID;
00572 }
00573 lineCount++;
00574
00575
00576 if (itemID >= itemCount)
00577 {
00578 itemCount = itemID + 1;
00579
00580
00581 if (itemCount >= itemCustSize)
00582 {
00583 int newSize = (itemCount > 2 * itemCustSize) ?
00584 itemCount : 2 * itemCustSize;
00585 IncArraySize(itemCustCount, itemCustSize, newSize);
00586 IncArraySize(itemPrevCustID, itemCustSize, newSize);
00587 itemCustSize = newSize;
00588 }
00589 }
00590
00591
00592 if (itemPrevCustID[itemID] != custID)
00593 {
00594 itemCustCount[itemID]++;
00595 itemPrevCustID[itemID] = custID;
00596 }
00597 }
00598
00599 delete [] itemPrevCustID;
00600 if (!useStdin)
00601 inFile.close();
00602
00603
00604
00605 cidArr->ToArray(cids, overallCount);
00606 tidArr->ToArray(tids, overallCount);
00607 iidArr->ToArray(iids, overallCount);
00608 delete cidArr;
00609 delete tidArr;
00610 delete iidArr;
00611
00612 return true;
00613 }
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642 bool ReadBinary(
00643 char* filename,
00644 int* cids,
00645 int* tids,
00646 int* iids,
00647 int numEntries,
00648 int* transLens,
00649 int transLensLength,
00650 int* custBitmapMap,
00651 int** custMap,
00652 int* itemMap,
00653 SeqBitmap** f1Buff)
00654 {
00655
00656
00657
00658 int custID;
00659 int transID;
00660 int numItem;
00661 int *itemlist;
00662 int prevCustID = -1;
00663 int bitmapID = 0;
00664 int index = 0;
00665 ifstream inFile;
00666
00667
00668
00669 bool secondScan = false;
00670 int lenIndex = 0;
00671 int scanIndex = 0;
00672
00673 if (secondScan)
00674 {
00675 if (filename == 0)
00676 {
00677 cout << "Error: cannot read input a second time when -stdin is on";
00678 exit(-1);
00679 }
00680
00681
00682 inFile.open(filename, ios::binary);
00683 if (!inFile.is_open())
00684 {
00685 return false;
00686 }
00687 }
00688
00689 while ( (secondScan && !inFile.eof())
00690 || (!secondScan && scanIndex < numEntries) )
00691 {
00692 if (secondScan)
00693 {
00694 inFile.read((char *)&custID, sizeof(int));
00695 inFile.read((char *)&transID, sizeof(int));
00696 inFile.read((char *)&numItem, sizeof(int));
00697 itemlist = new int[numItem];
00698
00699
00700 inFile.read((char *)itemlist, numItem * sizeof(int));
00701
00702
00703 if (inFile.eof())
00704 break;
00705 }
00706 else
00707 {
00708 numItem = transLens[lenIndex];
00709 itemlist = new int[numItem];
00710 custID = cids[scanIndex];
00711 transID = tids[scanIndex];
00712 for (int i = 0; i < numItem; i++)
00713 itemlist[i] = iids[scanIndex + i];
00714 scanIndex+=numItem;
00715 lenIndex++;
00716 }
00717
00718 if (custID != prevCustID)
00719 {
00720 prevCustID = custID;
00721 bitmapID = custBitmapMap[custID];
00722 index = custMap[bitmapID][custID] * BITMAP_LENGTH[bitmapID];
00723 }
00724
00725
00726 for (int j = 0; j < numItem; j++)
00727 {
00728 if (itemMap[itemlist[j]] >= 0)
00729 f1Buff[itemMap[itemlist[j]]]->
00730 FillEmptyPosition(bitmapID, index);
00731 }
00732
00733
00734 index++;
00735 delete [] itemlist;
00736 }
00737
00738 if (secondScan)
00739 inFile.close();
00740 return true;
00741 }
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774 bool ReadASCII(
00775 char* filename,
00776 int* cids,
00777 int* tids,
00778 int* iids,
00779 int numEntries,
00780 int* custBitmapMap,
00781 int** custMap,
00782 int* itemMap,
00783 SeqBitmap** f1Buff)
00784 {
00785
00786
00787
00788 int custID;
00789 int transID;
00790 int itemID;
00791 int prevTransID = -1;
00792 int prevCustID = -1;
00793 int bitmapID = 0;
00794 int index = 0;
00795 ifstream inFile;
00796
00797
00798 bool secondScan = false;
00799 int scanIndex = 0;
00800
00801 if (secondScan)
00802 {
00803 if (filename == 0)
00804 {
00805 cout << "Error: cannot read input a second time when -stdin is on";
00806 exit(-1);
00807 }
00808
00809 inFile.open(filename);
00810 if (!inFile.is_open())
00811 {
00812 return false;
00813 }
00814 }
00815
00816
00817 while ( (secondScan && !inFile.eof())
00818 || (!secondScan && scanIndex < numEntries))
00819 {
00820
00821 if (secondScan)
00822 {
00823
00824 inFile >> custID;
00825
00826 if (inFile.eof())
00827 break;
00828
00829 inFile >> transID;
00830 inFile >> itemID;
00831 }
00832 else
00833 {
00834 custID = cids[scanIndex];
00835 transID = tids[scanIndex];
00836 itemID = iids[scanIndex];
00837 scanIndex++;
00838 }
00839
00840 if (custID != prevCustID)
00841 {
00842 prevCustID = custID;
00843 bitmapID = custBitmapMap[custID];
00844 index = custMap[bitmapID][custID] * BITMAP_LENGTH[bitmapID] - 1;
00845 }
00846
00847 if (prevTransID != transID)
00848 {
00849 index++;
00850 prevTransID = transID;
00851 }
00852
00853
00854 if (itemMap[itemID] >= 0)
00855 f1Buff[itemMap[itemID]]->FillEmptyPosition(bitmapID, index);
00856 }
00857
00858 if (secondScan)
00859 inFile.close();
00860
00861 return true;
00862 }
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877 DatasetInfo* ReadDataset(
00878 bool isBinaryFile,
00879 bool isStringFile,
00880 char *filename,
00881 double minSupPercent,
00882 StringMap *&custStrMap,
00883 StringMap *&transStrMap,
00884 StringMap *&itemStrMap)
00885 {
00886
00887 DatasetInfo* info = new DatasetInfo();
00888
00889
00890 int tempCustCount = -1;
00891 int itemCount = -1;
00892 int transCount = 0;
00893 int *custTransCount;
00894 int *itemCustCount;
00895
00896
00897
00898 int *bitmapSizes;
00899
00900 int *custBitmapMap;
00901 int **custMap;
00902
00903 int noTransCount = 0;
00904 int *itemMap;
00905
00906 int numCompression;
00907
00908 bool result = false;
00909 int i, j;
00910
00911
00912
00913
00914
00915
00916
00917
00918 int *cids;
00919 int *tids;
00920 int *iids;
00921 int numEntries;
00922 int *transLens;
00923 int transLensLength;
00924
00925 if (isBinaryFile)
00926 {
00927 result = CollectBinaryInfo(
00928 filename,
00929 tempCustCount,
00930 itemCount,
00931 transCount,
00932 custTransCount,
00933 itemCustCount,
00934 cids,
00935 tids,
00936 iids,
00937 transLens,
00938 numEntries,
00939 transLensLength);
00940 }
00941 else
00942 {
00943 result = CollectASCIIInfo(
00944 filename,
00945 isStringFile,
00946 custStrMap,
00947 transStrMap,
00948 itemStrMap,
00949 tempCustCount,
00950 itemCount,
00951 transCount,
00952 custTransCount,
00953 itemCustCount,
00954 cids,
00955 tids,
00956 iids,
00957 numEntries);
00958 }
00959 if (!result)
00960 {
00961 delete info;
00962 return 0;
00963 }
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983 info->maxCustTrans = 0;
00984 noTransCount = 0;
00985 custBitmapMap = new int[tempCustCount];
00986 bitmapSizes = new int[NUM_BITMAP];
00987 custMap = new int * [NUM_BITMAP];
00988
00989 for (i = 0; i < NUM_BITMAP; i++)
00990 {
00991 custMap[i] = new int[tempCustCount];
00992 bitmapSizes[i] = 0;
00993 }
00994
00995
00996 for (i = 0; i < tempCustCount; i++)
00997 {
00998 if (custTransCount[i] > BITMAP_LENGTH[NUM_BITMAP - 1])
00999 {
01000
01001
01002 PrintFileReadError(FILEERR_64TRANSACTIONS);
01003 }
01004
01005 if (custTransCount[i] <= 0)
01006 {
01007
01008 custBitmapMap[i] = -1;
01009 noTransCount++;
01010 }
01011 else
01012 {
01013
01014 for (j = 0; j < NUM_BITMAP; j++)
01015 if (custTransCount[i] <= BITMAP_LENGTH[j])
01016 {
01017 custBitmapMap[i] = j;
01018 break;
01019 }
01020 }
01021
01022
01023
01024 for (j = 0; j < NUM_BITMAP; j++)
01025 if (custBitmapMap[i] == j)
01026 {
01027 custMap[j][i] = bitmapSizes[j];
01028 bitmapSizes[j]++;
01029 }
01030 else
01031 custMap[j][i] = -1;
01032
01033
01034 if (custTransCount[i] > info->maxCustTrans && custTransCount[i] <= 64)
01035 info->maxCustTrans = custTransCount[i];
01036 }
01037
01038
01039 info->custCount = tempCustCount - noTransCount;
01040 info->bitmapSizes = new int[NUM_BITMAP];
01041 for (i = 0; i < NUM_BITMAP; i++)
01042 info->bitmapSizes[i] = bitmapSizes[i];
01043
01044
01045
01046
01047
01048 #ifdef ___PREFIXSPAN___
01049
01050 info->minSup = (int) ceil(minSupPercent * info->custCount);
01051 #else
01052
01053 info->minSup = (int) floor(minSupPercent * info->custCount + 0.5);
01054 #endif
01055
01056 info->minSup = (info->minSup == 0) ? 1 : info->minSup;
01057
01058
01059
01060
01061
01062
01063
01064 info->f1Size = 0;
01065 itemMap = new int[itemCount];
01066
01067 for (i = 0; i < itemCount; i++)
01068 {
01069 if (itemCustCount[i] >= info->minSup )
01070 {
01071 itemMap[i] = info->f1Size;
01072 info->f1Size++;
01073 }
01074 else
01075 {
01076 itemMap[i] = -1;
01077 }
01078 }
01079
01080
01081 if (info->f1Size == 0)
01082 return info;
01083
01084 numCompression = info->maxCustTrans * info->f1Size;
01085
01086
01087
01088 int size64 = bitmapSizes[4];
01089 int size32 = bitmapSizes[3] + size64;
01090 int size16 = bitmapSizes[2] + size32;
01091 int size8 = bitmapSizes[1] + size16;
01092 int size4 = bitmapSizes[0] + size8;
01093
01094 size4 = Bitmap4::CalcSize(size4);
01095 size8 = Bitmap8::CalcSize(size8);
01096 size16 = Bitmap16::CalcSize(size16);
01097 size32 = Bitmap32::CalcSize(size32);
01098 size64 = Bitmap64::CalcSize(size64);
01099
01100
01101
01102 SeqBitmap::MemAlloc(size4 * (info->maxCustTrans * info->f1Size) *
01103 (NUM_BITMAPS_USED + info->f1BufSize),
01104 size8 * (info->maxCustTrans * info->f1Size) *
01105 (NUM_BITMAPS_USED + info->f1BufSize),
01106 size16 * (info->maxCustTrans * info->f1Size) *
01107 (NUM_BITMAPS_USED + info->f1BufSize),
01108 size32 * (info->maxCustTrans * info->f1Size) *
01109 (NUM_BITMAPS_USED + info->f1BufSize),
01110 size64 * (info->maxCustTrans * info->f1Size) *
01111 (NUM_BITMAPS_USED + info->f1BufSize));
01112
01113
01114
01115
01116
01117 info->f1BufSize = info->f1Size * (numCompression + 1) + 10;
01118
01119 info->f1Buff = new SeqBitmap * [info->f1BufSize];
01120
01121 for (i = 0; i < info->f1Size; i++)
01122 info->f1Buff[i] = new SeqBitmap(
01123 bitmapSizes[0],
01124 bitmapSizes[1],
01125 bitmapSizes[2],
01126 bitmapSizes[3],
01127 bitmapSizes[4]);
01128
01129
01130
01131
01132 for (; i < info->f1BufSize; i++)
01133 info->f1Buff[i] = 0;
01134
01135 info->f1NameBuff = new int[info->f1BufSize];
01136 for (i = 0; i < itemCount; i++)
01137 if (itemMap[i] >= 0)
01138 info->f1NameBuff[itemMap[i]] = i;
01139
01140
01141
01142
01143
01144
01145 info->sListBuff = new int[(info->maxCustTrans * info->f1Size)
01146 * info->f1Size + 1];
01147
01148
01149
01150
01151
01152
01153 info->iListBuff = new int[(info->maxCustTrans * info->f1Size )
01154 * info->f1Size + 1];
01155
01156 for (i = 0; i < info->f1Size; i++)
01157 info->sListBuff[i] = i;
01158
01159
01160 info->countBuff = new CountInfo[info->f1Size];
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171 if (isBinaryFile)
01172 result = ReadBinary(
01173 filename,
01174 cids,
01175 tids,
01176 iids,
01177 numEntries,
01178 transLens,
01179 transLensLength,
01180 custBitmapMap,
01181 custMap,
01182 itemMap,
01183 info->f1Buff);
01184 else
01185 result = ReadASCII(
01186 filename,
01187 cids,
01188 tids,
01189 iids,
01190 numEntries,
01191 custBitmapMap,
01192 custMap,
01193 itemMap,
01194 info->f1Buff);
01195
01196 if (!result)
01197 {
01198 delete info;
01199 return 0;
01200 }
01201
01202
01203 delete [] cids;
01204 delete [] tids;
01205 delete [] iids;
01206
01207
01208 delete [] custTransCount;
01209 delete [] itemCustCount;
01210 delete [] custBitmapMap;
01211 for (i = 0; i < NUM_BITMAP; i++)
01212 delete [] custMap[i];
01213 delete [] custMap;
01214 delete [] bitmapSizes;
01215 delete [] itemMap;
01216
01217 return info;
01218 }
01219
01220