00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #include <iostream>
00041 #include <fstream>
00042 #include <time.h>
00043 #include <stdio.h>
00044 #include <list>
00045 #include <vector>
00046 #include <algorithm>
00047 #include <string>
00048 #include <assert.h>
00049 #include <math.h>
00050 #include <map>
00051 #include "Bitmap.h"
00052 #include "TreeNode.h"
00053 #include "Transaction.h"
00054 #include "ItemsetOutput.h"
00055
00056 using namespace std;
00057
00058
00059
00060
00061 typedef vector<TreeNode *> NodeList;
00062 typedef vector<TreeNode *> BranchList;
00063 typedef vector<Bitmap *> BitmapList;
00064 typedef vector<BaseBitmap *> BaseBitmapList;
00065 typedef vector<int> ItemSet;
00066 typedef map<long, ItemSet *> HashTable;
00067
00068
00069 class SubtreeEstimate {
00070 public:
00071 int Count;
00072 int Sum;
00073 SubtreeEstimate () {
00074 Count = 0;
00075 Sum = 0;
00076 }
00077 };
00078
00079
00080 class TailElement {
00081 public:
00082 int Count;
00083 int Item;
00084
00085 TailElement () {
00086 Count = 0;
00087 Item = 0;
00088 }
00089
00090 bool operator < (const TailElement& rhs) const {
00091 return this->Count < rhs.Count;
00092 };
00093 };
00094
00095
00096
00097
00098 string method;
00099 char* outFilename;
00100 ItemsetOutput *outFile;
00101 bool outputMFI = false;
00102 bool MethodIsFI = false;
00103 bool MethodIsFCI = false;
00104 int ItemCount;
00105 int TransCount;
00106 double MSF;
00107 int MS;
00108 int VerScale = 1;
00109 int HorScale = 1;
00110 bool GoFHUT = true;
00111 bool HUTMFI = true;
00112 bool PEPrune = true;
00113 bool Reorder = true;
00114
00115
00116
00117
00118
00119
00120 int CountFHUT = 0;
00121 int CountNodes = 0;
00122 int CountCounts = 0;
00123 int CountAnds = 0;
00124 int CountSmallAnds = 0;
00125 int CountPEPrunes = 0;
00126 int CountCheckPosition = 0;
00127 int CountHUTMFI = 0;
00128 int CountHUTMFISuccess = 0;
00129 int CountRebuilds;
00130
00131
00132
00133
00134
00135
00136 int maxtail = 0;
00137 int MFISize = 0;
00138 int MFIDepth = 0;
00139 int F1size = 0;
00140 int FullF1size = 0;
00141 int k = 50;
00142 int MAX_compID = 1;
00143 int projectDepth = -1;
00144 int EstimateSize;
00145 int EstimateDiv = 5;
00146 int maxItemsetSize = 0;
00147
00148
00149
00150
00151
00152
00153 NodeList F1;
00154 BitmapList TransBuffy;
00155 BaseBitmapList NameBuffy;
00156 NodeList NodeBuffy;
00157 TreeNode *Root;
00158 TailElement *gTail;
00159 TailElement *TailBuffy;
00160 Bitmap *NullTrans;
00161 int *ItemMap;
00162 int *ReverseItemMap;
00163 BaseBitmapList MFI;
00164 HashTable HT;
00165 vector <int> SupportCountList;
00166 BaseBitmap* TempName;
00167 SubtreeEstimate* EstimateBuffy;
00168 int *MFIBySizes;
00169 int *ItemsetBuffy;
00170
00171
00172
00173
00174
00175
00176 time_t total_start, total_finish;
00177 double total_time;
00178 time_t read_start, read_finish;
00179 double read_time;
00180 time_t algorithm_start, algorithm_finish;
00181 double algorithm_time;
00182 time_t print_start, print_finish;
00183 double print_time;
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 void AddToF1(TreeNode *newNode) {
00201 if (F1.empty())
00202 F1.push_back(newNode);
00203 else {
00204
00205 NodeList::iterator noli = F1.begin();
00206 while (noli != F1.end()) {
00207 if ((*noli)->Trans->_count >= newNode->Trans->_count) {
00208 F1.insert(noli, newNode);
00209 return ;
00210 }
00211 noli++;
00212 }
00213
00214
00215 F1.push_back(newNode);
00216 }
00217 }
00218
00219
00220
00221
00222
00223
00224 void F1UsingProb(double P) {
00225 int blah = 0;
00226
00227
00228
00229 for (int i = 0; i < ItemCount / HorScale; i++) {
00230
00231 Bitmap *trans = new Bitmap(TransCount);
00232 trans->FillRand(P);
00233
00234
00235 if (trans->Count(blah) >= MS) {
00236 TreeNode *node = new TreeNode(NULL, trans, 1, -1, i, -1, 0);
00237 for (int r = 0; r < HorScale; r++)
00238 AddToF1(node);
00239 } else
00240 delete trans;
00241 }
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254 void F1FromFile(char *filename, bool isAsciiFile) {
00255 TransCount = 0;
00256 int MAXitemID = -1;
00257 int *Counters = new int[MAX_NUM_ITEMS];
00258 int *F1IndexList = new int[MAX_NUM_ITEMS];
00259 int *InvF1IndexList = new int[MAX_NUM_ITEMS];
00260 int itemIndex = 0;
00261
00262
00263 for (int ct = 0; ct < MAX_NUM_ITEMS; ++ct) {
00264 Counters[ct] = 0;
00265 F1IndexList[ct] = -1;
00266 InvF1IndexList[ct] = -1;
00267 }
00268
00269 time(&read_start);
00270 BitmapList Trans;
00271
00272 int *itemlist = new int[MAX_NUM_ITEMS];
00273 InputData *inData = new InputData(filename, itemlist, isAsciiFile);
00274 if (!inData->isOpen()) {
00275 cerr << "Input file not found!" << endl;
00276 exit(1);
00277 }
00278
00279 Transaction *newTransaction = inData->getNextTransaction();
00280 while (newTransaction != NULL) {
00281 for (itemIndex = 0; itemIndex < newTransaction->length; itemIndex++) {
00282
00283 if (newTransaction->itemlist[itemIndex] >= MAX_NUM_ITEMS) {
00284 cerr << "Read item_id=" << newTransaction->itemlist[itemIndex]
00285 << " which is more than the max item_id allowed (" << MAX_NUM_ITEMS << ")";
00286 exit(1);
00287 }
00288
00289 if (newTransaction->itemlist[itemIndex] > MAXitemID)
00290 MAXitemID = newTransaction->itemlist[itemIndex];
00291
00292 Counters[newTransaction->itemlist[itemIndex]]++;
00293 }
00294
00295 TransCount++;
00296 delete newTransaction;
00297 newTransaction = inData->getNextTransaction();
00298 }
00299
00300 delete newTransaction;
00301 delete inData;
00302
00303 MS = (int)ceil(MSF * (double)TransCount);
00304
00305
00306 ItemCount = MAXitemID + 1;
00307
00308 int F1items = 0;
00309
00310 for (itemIndex = 0; itemIndex <= MAXitemID; itemIndex++) {
00311 if (Counters[itemIndex] >= MS) {
00312 F1IndexList[F1items++] = itemIndex;
00313 InvF1IndexList[itemIndex] = F1items - 1;
00314 Bitmap *trans = new Bitmap(TransCount);
00315 trans->_count = Counters[itemIndex];
00316 Trans.push_back(trans);
00317 }
00318 }
00319
00320 int transIndex = 0;
00321 InputData *inData2 = new InputData(filename, itemlist, isAsciiFile);
00322 newTransaction = inData2->getNextTransaction();
00323 while (newTransaction != NULL) {
00324 for (int itemIndex = 0; itemIndex < newTransaction->length; itemIndex++) {
00325 if (InvF1IndexList[newTransaction->itemlist[itemIndex]] != -1) {
00326 Trans[InvF1IndexList[newTransaction->itemlist[itemIndex]]]->FillEmptyPosition(transIndex);
00327 }
00328 }
00329
00330 transIndex++;
00331 delete newTransaction;
00332 newTransaction = inData2->getNextTransaction();
00333 }
00334
00335 delete newTransaction;
00336 delete inData2;
00337 delete [] itemlist;
00338
00339 time(&read_finish);
00340 read_time = difftime(read_finish, read_start);
00341 printf("Reading input time: %.2f seconds.\n", read_time);
00342
00343
00344 BitmapList::iterator bli = Trans.begin();
00345 itemIndex = 0;
00346
00347 while (bli != Trans.end()) {
00348 TreeNode *node = new TreeNode(
00349 NULL,
00350 (*bli),
00351 1,
00352 -1,
00353 F1IndexList[itemIndex],
00354 -1,
00355 0);
00356 AddToF1(node);
00357 bli++;
00358 itemIndex++;
00359 }
00360
00361 delete [] Counters;
00362 delete [] F1IndexList;
00363 delete [] InvF1IndexList;
00364 }
00365
00366
00367
00368
00369
00370
00371 void PrintMFI() {
00372
00373 outFile = new ItemsetOutput(outFilename);
00374 if (!outFile->isOpen()) {
00375 cerr << "Output file not open!" << endl;
00376 exit(1);
00377 }
00378
00379 if (FullF1size != 0) {
00380
00381 int* ITEMS = new int[FullF1size];
00382 for (int i = 0; i < MFISize; i++) {
00383 int j = 0;
00384 for (int cc = 0; cc < FullF1size; cc++) {
00385 if (MFI[i]->CheckPosition(cc, CountCheckPosition) > 0) {
00386 ITEMS[j] = ItemMap[cc];
00387 j++;
00388 }
00389 }
00390
00391 outFile->printSet(MFI[i]->_count, ITEMS, SupportCountList[i]);
00392 }
00393 delete [] ITEMS;
00394 } else {
00395 outFile->printSet(0, NULL, TransCount);
00396 }
00397
00398 delete outFile;
00399 }
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 bool LMFISuperSet(TreeNode* location) {
00420 return (location->rEnd > location->rBegin);
00421 }
00422
00423
00424
00425
00426
00427
00428 void AddToFI(TreeNode *C) {
00429 int itemsetIndex = 0;
00430 for (int cc = 0; cc < FullF1size; cc++) {
00431 if (C->Name->CheckPosition(cc, CountCheckPosition) > 0) {
00432 ItemsetBuffy[itemsetIndex] = ItemMap[cc];
00433 itemsetIndex++;
00434 }
00435 }
00436
00437 MFIBySizes[C->Name->_count]++;
00438 if (C->Name->_count > maxItemsetSize)
00439 maxItemsetSize = C->Name->_count;
00440
00441 if (outputMFI)
00442 outFile->printSet(C->Name->_count, ItemsetBuffy, C->Trans->_count);
00443
00444
00445 MFIDepth += C->Name->_count;
00446 MFISize++;
00447 }
00448
00449
00450
00451
00452
00453
00454 void AddToMFI(TreeNode *C) {
00455
00456 BaseBitmap *name = new BaseBitmap(*C->Name);
00457
00458
00459 MFI.push_back(name);
00460
00461
00462 C->rEnd++;
00463
00464
00465 MFIDepth += C->Name->_count;
00466 MFISize++;
00467
00468 SupportCountList.push_back(C->Trans->_count);
00469 MFIBySizes[name->_count]++;
00470 if (name->_count > maxItemsetSize)
00471 maxItemsetSize = name->_count;
00472 }
00473
00474
00475
00476
00477
00478
00479 void AddToFCI(TreeNode *C) {
00480
00481 HashTable::iterator h = HT.find(C->Trans->_count);
00482
00483
00484 if (h == HT.end()) {
00485
00486 ItemSet* newList = new ItemSet();
00487 newList->reserve(500);
00488 newList->push_back(MFI.size());
00489 HT.insert(HashTable::value_type(C->Trans->_count, newList));
00490
00491
00492 } else {
00493 for (ItemSet::reverse_iterator goli = (*h).second->rbegin();
00494 goli != (*h).second->rend();
00495 goli++)
00496 if (MFI[*goli]->Superset(C->Name))
00497 return ;
00498
00499
00500 (*h).second->push_back(MFI.size());
00501 }
00502
00503
00504 BaseBitmap *name = new BaseBitmap(*C->Name);
00505
00506
00507 MFI.push_back(name);
00508
00509
00510 MFIDepth += C->Name->_count;
00511 MFISize++;
00512
00513 SupportCountList.push_back(C->Trans->_count);
00514 MFIBySizes[name->_count]++;
00515 if (name->_count > maxItemsetSize)
00516 maxItemsetSize = name->_count;
00517 }
00518
00519 int SortLMFI(int rBegin, int rEnd, int sortBy) {
00520 int left = rBegin;
00521 int right = rEnd - 1;
00522 while (left <= right) {
00523 while (left < rEnd && !MFI[left]->CheckPosition(sortBy, CountCheckPosition))
00524 left++;
00525 while (right >= rBegin && MFI[right]->CheckPosition(sortBy, CountCheckPosition))
00526 right--;
00527 if (left < right) {
00528
00529
00530 BaseBitmap* tempBitmap = MFI[left];
00531 MFI[left] = MFI[right];
00532 MFI[right] = tempBitmap;
00533 tempBitmap = NULL;
00534 left++;
00535 right--;
00536 }
00537 }
00538
00539
00540 return left;
00541 }
00542
00543
00544
00545
00546
00547
00548
00549
00550 bool CheckHUTMFI(TreeNode *C, int iTAIL) {
00551
00552
00553
00554 int rBegin = C->rBegin;
00555 int rEnd = C->rEnd;
00556 for (; iTAIL < C->tEnd; iTAIL++) {
00557 rBegin = SortLMFI(rBegin, rEnd, F1[gTail[iTAIL].Item]->Prefix);
00558
00559 if (rEnd <= rBegin)
00560 return false;
00561 }
00562
00563 return true;
00564 }
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577 void ReorderTail(
00578 TreeNode *C,
00579 int &iTAIL,
00580 bool useComp,
00581 bool &NoChild,
00582 bool &AllFreq) {
00583
00584 int tailIndex = 0;
00585 int lol = 0;
00586
00587 for (lol = iTAIL; lol < C->tEnd; lol++) {
00588 Bitmap *trans = TransBuffy[C->Depth];
00589 int theCount = 0;
00590
00591
00592 if (useComp && (F1[gTail[lol].Item]->compID != C->compID)) {
00593 F1[gTail[lol].Item]->Trans->BuildRelComp(
00594 *TransBuffy[projectDepth]);
00595 F1[gTail[lol].Item]->compID = C->compID;
00596 CountRebuilds++;
00597 }
00598
00599
00600 if (useComp) {
00601
00602 trans->AndCompOnly(
00603 *C->Trans,
00604 *F1[gTail[lol].Item]->Trans,
00605 CountSmallAnds);
00606 theCount = trans->SmallCount(CountCounts);
00607
00608
00609 } else {
00610
00611 trans->AndOnly(*C->Trans, *F1[gTail[lol].Item]->Trans, CountAnds);
00612 theCount = trans->Count(CountCounts);
00613 }
00614
00615
00616 if (theCount >= MS) {
00617
00618 if (PEPrune && (trans->_count == C->Trans->_count)) {
00619
00620 C->Name->Or(*C->Name, *F1[gTail[lol].Item]->Name);
00621 CountPEPrunes++;
00622
00623
00624 } else {
00625 NoChild = false;
00626
00627
00628 TailBuffy[tailIndex].Count = theCount;
00629 TailBuffy[tailIndex].Item = gTail[lol].Item;
00630 tailIndex++;
00631 }
00632 } else
00633 AllFreq = false;
00634 }
00635
00636 sort(TailBuffy, TailBuffy + tailIndex);
00637
00638
00639 iTAIL = C->tEnd;
00640 C->tEnd = iTAIL + tailIndex;
00641 C->tBegin = iTAIL;
00642 int r = 0;
00643
00644
00645 for (lol = iTAIL; lol < C->tEnd; lol++) {
00646 gTail[lol] = TailBuffy[r];
00647 r++;
00648 }
00649 }
00650
00651
00652
00653
00654
00655
00656
00657
00658 void NoorderTail(
00659 TreeNode *C,
00660 int &iTAIL) {
00661
00662
00663 iTAIL = C->tEnd;
00664 C->tEnd = C->tEnd - C->tBegin + C->tEnd;
00665 C->tBegin = iTAIL;
00666 int r = 0;
00667
00668
00669 for (int lol = iTAIL; lol < C->tEnd; lol++) {
00670 gTail[lol] = gTail[lol - C->tEnd + C->tBegin];
00671 r++;
00672 }
00673 }
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684 void MAFIA(TreeNode *C, bool HUT, bool &FHUT, bool useComp) {
00685 CountNodes++;
00686 int iTAIL = C->tBegin;
00687 bool NoChild = true;
00688 bool AllFreq = true;
00689 FHUT = false;
00690 int beforeCountNodes = CountNodes;
00691 int frequentTailSize = C->tEnd - iTAIL;
00692
00693 if (iTAIL < C->tEnd) {
00694
00695 if (C != Root) {
00696 if (Reorder) {
00697 ReorderTail(C, iTAIL, useComp, NoChild, AllFreq);
00698 } else {
00699 NoorderTail(C, iTAIL);
00700 }
00701 }
00702
00703 frequentTailSize = C->tEnd - iTAIL;
00704
00705 int estimateTail = (int)(frequentTailSize / (double)EstimateDiv);
00706 if (estimateTail > 0 && C->Trans->_count != TransCount) {
00707 double estimateSubTree = EstimateBuffy[estimateTail].Sum / (double)EstimateBuffy[estimateTail].Count;
00708 double support = C->Trans->_count / (double) TransCount;
00709 double factor = 11.597 - 29.914 * (support - .52392) * (support - .52392);
00710 double cost = abs(factor * frequentTailSize / (1 - 1.2 * support));
00711
00712
00713
00714 if ((!useComp) && (estimateSubTree > cost)) {
00715
00716
00717 C->Trans->BuildSource();
00718
00719
00720 projectDepth = C->Depth - 1;
00721
00722
00723 C->compID = MAX_compID;
00724 MAX_compID++;
00725 useComp = true;
00726 }
00727 }
00728
00729
00730
00731
00732
00733 while (iTAIL < C->tEnd) {
00734
00735 Bitmap *trans = TransBuffy[C->Depth];
00736 BaseBitmap *name = NameBuffy[C->Depth];
00737 TreeNode *newNode = NodeBuffy[C->Depth];
00738
00739
00740 name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name);
00741
00742
00743 if (useComp && (F1[gTail[iTAIL].Item]->compID != C->compID)) {
00744
00745 F1[gTail[iTAIL].Item]->Trans->BuildRelComp(
00746 *TransBuffy[projectDepth]);
00747 F1[gTail[iTAIL].Item]->compID = C->compID;
00748 CountRebuilds++;
00749 }
00750
00751 int theCount = 0;
00752
00753
00754 if (useComp) {
00755
00756 trans->AndCompOnly(
00757 *C->Trans,
00758 *F1[gTail[iTAIL].Item]->Trans,
00759 CountSmallAnds);
00760
00761 if (Reorder)
00762 trans->_count = gTail[iTAIL].Count;
00763 else
00764 theCount = trans->SmallCount(CountCounts);
00765 } else {
00766
00767 trans->AndOnly(
00768 *C->Trans,
00769 *F1[gTail[iTAIL].Item]->Trans,
00770 CountAnds);
00771
00772 if (Reorder)
00773 trans->_count = gTail[iTAIL].Count;
00774 else
00775 theCount = trans->Count(CountCounts);
00776 }
00777 if (!Reorder && PEPrune && (theCount == C->Trans->_count)) {
00778 CountPEPrunes++;
00779 C->Name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name);
00780 iTAIL++;
00781 continue;
00782 }
00783
00784
00785
00786 if ((iTAIL != C->tBegin) && (C != Root))
00787 HUT = 0;
00788 else
00789 HUT = 1;
00790
00791 if (!AllFreq)
00792 HUT = 0;
00793
00794 if (Reorder || (theCount >= MS)) {
00795
00796 newNode->setTreeNode(name,
00797 trans,
00798 C->Depth + 1,
00799 C->compID,
00800 F1[gTail[iTAIL].Item]->Prefix,
00801 iTAIL + 1,
00802 C->tEnd);
00803
00804
00805
00806
00807
00808
00809
00810
00811 newNode->rEnd = C->rEnd;
00812 newNode->rBegin = SortLMFI(C->rBegin, C->rEnd, newNode->Prefix);
00813
00814
00815 if (HUTMFI && newNode->tBegin != newNode->tEnd && !HUT) {
00816 CountHUTMFI++;
00817
00818 if (CheckHUTMFI(newNode, newNode->tBegin)) {
00819
00820 CountHUTMFISuccess++;
00821 AllFreq = false;
00822 break;
00823 }
00824 }
00825
00826 NoChild = false;
00827
00828
00829 MAFIA(newNode, HUT, FHUT, useComp);
00830
00831
00832
00833
00834 C->rEnd = newNode->rEnd;
00835 } else
00836 AllFreq = false;
00837
00838
00839 if (FHUT) {
00840
00841 if (HUT) {
00842 return ;
00843
00844
00845
00846 } else {
00847 FHUT = false;
00848 break;
00849 }
00850 }
00851
00852
00853 iTAIL++;
00854 }
00855 }
00856
00857
00858 if (GoFHUT && HUT && AllFreq) {
00859 FHUT = true;
00860 CountFHUT++;
00861 }
00862
00863
00864 if (MethodIsFI)
00865 AddToFI(C);
00866 else if (MethodIsFCI && C != Root)
00867 AddToFCI(C);
00868 else if (NoChild && !LMFISuperSet(C)) {
00869 AddToMFI(C);
00870 }
00871
00872 int subtreeSize = CountNodes - beforeCountNodes + 1;
00873 int estimateTail = (int)(frequentTailSize / (double)EstimateDiv);
00874 if (estimateTail > 0 && C->Trans->_count != TransCount) {
00875 EstimateBuffy[estimateTail].Count++;
00876 EstimateBuffy[estimateTail].Sum += subtreeSize;
00877 }
00878 }
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888 void MergeRepeatedItemsets() {
00889 NodeList::iterator bali = F1.begin();
00890 Bitmap out(*(*bali)->Trans);
00891 int blah = 0;
00892
00893
00894 while (bali != F1.end()) {
00895 out.FillOnes();
00896 NodeList::iterator noli = bali;
00897 noli++;
00898
00899
00900 while (noli != F1.end()) {
00901
00902 if ((*bali)->Trans->_count != (*noli)->Trans->_count)
00903 break;
00904 else {
00905
00906 out.AndOnly(*(*bali)->Trans, *(*noli)->Trans, CountAnds);
00907 out.Count(blah);
00908
00909
00910 if (out._count == (*noli)->Trans->_count) {
00911 (*bali)->Name->Or(*(*bali)->Name, *(*noli)->Name);
00912 F1.erase(noli);
00913 } else
00914 noli++;
00915 }
00916 }
00917 bali++;
00918 }
00919 }
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933 int main(int argc, char **argv) {
00934
00935
00936 if (argc < 5) {
00937 cerr << "Usage: " << argv[0] << " [-mfi/-fci/-fi] [min sup (percent)] " << endl;
00938 cerr << "\t[-ascii/-binary] [input filename] " << endl;
00939 cerr << "\t[output filename (optional)]" << endl;
00940 cerr << "Ex: " << argv[0] << " -mfi .5 -ascii connect4.ascii mfi.txt" << endl;
00941 cerr << "Ex: " << argv[0] << " -mfi .3 -binary chess.binary" << endl;
00942 exit(0);
00943 }
00944
00945
00946 time(&total_start);
00947
00948
00949 method = argv[1];
00950
00951
00952 MSF = atof(argv[2]);
00953
00954 if (strcmp(argv[3], "-ascii") == 0)
00955 F1FromFile(argv[4], true);
00956 else if (strcmp(argv[3], "-binary") == 0)
00957 F1FromFile(argv[4], false);
00958 else {
00959 cerr << "File format must be -ascii or -binary" << endl;
00960 exit(1);
00961 }
00962
00963 if (argc == 6) {
00964 outputMFI = true;
00965 outFilename = argv[5];
00966 }
00967
00968
00969 NullTrans = new Bitmap(TransCount);
00970 NullTrans->FillOnes();
00971 NullTrans->_count = TransCount;
00972 ItemSet NullList;
00973
00974 MFIBySizes = new int[MAX_ITEMSET_SIZE];
00975 for (int h = 0; h < MAX_ITEMSET_SIZE; h++) {
00976 MFIBySizes[h] = 0;
00977 }
00978
00979
00980 FullF1size = F1size = F1.size();
00981
00982
00983 if (FullF1size != 0) {
00984
00985 BaseBitmap *NullName = new BaseBitmap(FullF1size);
00986 MFI.reserve(100000);
00987
00988 int p = 0;
00989 ItemMap = new int[FullF1size];
00990 ItemsetBuffy = new int[FullF1size];
00991
00992
00993 for (NodeList::iterator nli = F1.begin(); nli != F1.end(); nli++) {
00994
00995 ItemMap[p] = (*nli)->Prefix;
00996
00997
00998 (*nli)->Prefix = p;
00999
01000
01001 (*nli)->Name = new BaseBitmap(FullF1size);
01002 (*nli)->Name->FillEmptyPosition(p);
01003 (*nli)->Name->_count = 1;
01004 p++;
01005 }
01006
01007
01008
01009 MergeRepeatedItemsets();
01010 F1size = F1.size();
01011
01012
01013 maxtail = F1size * (F1size + 1) / 2;
01014 gTail = new TailElement[maxtail];
01015
01016
01017 TailBuffy = new TailElement[F1size];
01018
01019
01020 EstimateSize = (int)ceil(F1size / (double)EstimateDiv);
01021 EstimateBuffy = new SubtreeEstimate[EstimateSize];
01022 for (int estimateIndex = 0; estimateIndex < EstimateSize; estimateIndex++) {
01023 EstimateBuffy[estimateIndex].Count = 1;
01024 EstimateBuffy[estimateIndex].Sum = estimateIndex * EstimateDiv * estimateIndex * EstimateDiv / 2;
01025 }
01026
01027
01028 int uu;
01029 for (uu = 0; uu < maxtail; uu++) {
01030 gTail[uu].Item = -1;
01031 gTail[uu].Count = 0;
01032 }
01033
01034
01035 for (uu = 0; uu < F1size; uu++) {
01036 gTail[uu].Item = uu;
01037 gTail[uu].Count = F1[uu]->Trans->_count;
01038
01039
01040 F1[uu]->tBegin = uu + 1;
01041 if (uu == F1size - 1)
01042 F1[uu]->tBegin = -1;
01043
01044 TempName = new BaseBitmap(FullF1size);
01045
01046
01047 BaseBitmap *name = new BaseBitmap(FullF1size);
01048 NameBuffy.push_back(name);
01049 Bitmap *buff = new Bitmap(TransCount);
01050 TransBuffy.push_back(buff);
01051 TreeNode *newNode = new TreeNode();
01052 NodeBuffy.push_back(newNode);
01053 }
01054
01055 srand(666);
01056 bool FHUT;
01057
01058
01059 clock_t start, finish;
01060 double duration = -1;
01061 start = clock();
01062
01063 time(&algorithm_start);
01064
01065
01066 Root = new TreeNode(NullName, NullTrans, 0, 0, -1, 0, F1size);
01067
01068
01069 Root->rBegin = 0;
01070 Root->rEnd = 0;
01071
01072
01073 if (method.compare("-fci") == 0) {
01074
01075
01076 GoFHUT = false;
01077 HUTMFI = false;
01078 PEPrune = true;
01079 Reorder = true;
01080 MethodIsFCI = true;
01081
01082 MAFIA(Root, false, FHUT, false);
01083 } else if (method.compare("-mfi") == 0) {
01084
01085 GoFHUT = true;
01086 HUTMFI = true;
01087 PEPrune = true;
01088 Reorder = true;
01089
01090 MAFIA(Root, true, FHUT, false);
01091 } else if (method.compare("-fi") == 0) {
01092 if (outputMFI) {
01093
01094 outFile = new ItemsetOutput(outFilename);
01095 if (!outFile->isOpen()) {
01096 cerr << "Output file not open!" << endl;
01097 exit(1);
01098 }
01099 }
01100
01101
01102 GoFHUT = false;
01103 HUTMFI = false;
01104 PEPrune = false;
01105 Reorder = false;
01106 MethodIsFI = true;
01107
01108 MAFIA(Root, false, FHUT, false);
01109
01110 if (outputMFI) {
01111 delete outFile;
01112 }
01113 } else {
01114 cerr << "Invalid algorithm option!" << endl;
01115 exit(0);
01116 }
01117
01118 finish = clock();
01119 duration = (finish - start) / (double)CLOCKS_PER_SEC;
01120
01121
01122
01123 time(&algorithm_finish);
01124 algorithm_time = difftime(algorithm_finish, algorithm_start);
01125 printf("Algorithm time: %.2f seconds.\n", algorithm_time);
01126 } else {
01127 MFIBySizes[0]++;
01128 }
01129
01130
01131
01132
01133
01134
01135
01136
01137 if (outputMFI && !MethodIsFI) {
01138
01139
01140 time(&print_start);
01141
01142 PrintMFI();
01143
01144 time(&print_finish);
01145 print_time = difftime(print_finish, print_start);
01146 printf("Printing output time: %.2f seconds.\n", print_time);
01147 }
01148
01149 time(&total_finish);
01150 total_time = difftime(total_finish, total_start);
01151 printf("Total time: %.2f seconds.\n\n", total_time);
01152
01153
01154 cout << "MinSup: " << MS << endl;
01155 cout << "TransCount: " << TransCount << endl;
01156 cout << "F1size: " << FullF1size << endl;
01157 cout << "MFISize: " << MFI.size() << endl;
01158 cout << "MFIAvg: " << MFIDepth / (double) MFISize << endl << endl;
01159
01160 #ifdef DEBUG
01161
01162 cout << "CountNodes: " << CountNodes << endl;
01163 cout << "CountAnds: " << CountAnds << endl;
01164 cout << "CountSmallAnds: " << CountSmallAnds << endl;
01165 cout << "CountRebuilds: " << CountRebuilds << endl;
01166 cout << "CountCounts: " << CountCounts << endl << endl;
01167 cout << "CountFHUT: " << CountFHUT << endl;
01168 cout << "CountHUTMFI: " << CountHUTMFI << endl;
01169 cout << "CountHUTMFISuccess: " << CountHUTMFISuccess << endl;
01170 cout << "CountCheckPosition: " << CountCheckPosition << endl;
01171 cout << "CountPEPrunes: " << CountPEPrunes << endl << endl;
01172
01173 #endif
01174
01175 return 0;
01176 }
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318