Android教程網
  1. 首頁
  2. Android 技術
  3. Android 手機
  4. Android 系統教程
  5. Android 游戲
 Android教程網 >> Android技術 >> 關於Android編程 >> 數據挖掘: Apriori算法

數據挖掘: Apriori算法

編輯:關於Android編程

 數據挖掘: Apriori算法

1 概述

1.1 Apriori算法是由Rakesh Agrawal和Ramakrishnan Srikan
發明的一種數據挖掘算法。最初是解決找到transaction數據庫中
不同item關聯規則的問題。

2 算法

2.1 基本概念
I = {i1, i2, ..., im} 是由m個item組成的集合;
D是多個transaction組成的集合;D中的每個transaction T都是item組成的集合,
每一個T都有一個唯一的標示:TID;
關聯規則:X, Y是T的子集,且X和Y的交集為空, 則X->Y是一個關聯規則;
支持度:關聯規則X->Y存在一個支持度s, 如果D中有s%的transaction含有
X和Y;

2.2 算法流程
假設每一個transaction有多個item組成,其中的item都是排序的;
每個transaction包括兩個部分:TID和item ;
k-itemset是個集合,它的元素是由k個item組成itemset,其中的item是排序的,itemset也是排序的;
L[k]用於存放較大的k-itemset, 每個元素包括兩個部分:itemset和支持度;
C[k]用於存放k-itemset的候選集合,每個元素包括itemset和TID;

輸入:transaction集合
輸出:L[k]

1) 初始化候選集合C[k]
2) 分別計算每個itemset的數量
3) 找到數量最小的itemset
4) 在C[k]中刪除數量最小的itemset,把剩下的itemset加入到k-itemset
5) 將k-itemset存放到L[k]中
6) 重新生成候選集合C[k],若C[k]的元素數量大於1,則執行1),否則退出

3 算法的簡單實現
3.1 後面的代碼只是演示算法的C++原型程序,設計和編寫上有很多不適當的地方。
沒有經過仔細的測試,但基本是正確的。main函數中的例子來自於Rakesh Agrawal
和Ramakrishnan Srikan的論文。

編譯:
    g++ -g -W -Wall -Wextra -o mytest apriori.cpp main.cpp
執行:
    ./mytest

3.2 代碼

apriori.h:
=========================================================
// 2012年 08月 01日 星期三 13:46:13 CST
// author: 李小丹(Li Shao Dan) 字 殊恆(shuheng)
// K.I.S.S
// S.P.O.T


#ifndef APRIORI_H
#define APRIORI_H


#include <map>
#include <vector>


typedef std::map<std::vector<int>, int> associate_rule_t;


class apriori {
public:
    apriori();
    ~apriori();

    int push_item(int, const int *, int);
    int cal_associate();
    void reset_associate_rule();
    bool next_associate_rule(associate_rule_t **);

private:
    struct transaction {
        int tid;
        std::vector<std::vector<int> > items;
    };

    typedef std::vector<struct transaction *> transactions_t;
    typedef std::vector<associate_rule_t *> itemset_t;

private:
    void clear_rule();
    void clear_tran();
    bool cal_nassociate();
    int recal_transaction();
    void cal_support(associate_rule_t *);
    int find_min_support(const associate_rule_t *, int *);
    void rm_min_support(associate_rule_t *, int);

private:
    transactions_t apr_tran;
    itemset_t apr_rules;
    itemset_t::iterator current;
};

#endif
==================================================

apriori.cpp:
==================================================
// 2012年 08月 01日 星期三 14:21:08 CST
// author: 李小丹(Li Shao Dan) 字 殊恆(shuheng)
// K.I.S.S
// S.P.O.T


#include <algorithm>
#include <utility>
#include <iostream>

#include <cassert>

#include "apriori.h"


using namespace std;


apriori::apriori()
:current(apr_rules.end())
{
}

apriori::~apriori()
{
    clear_tran();
    clear_rule();
}

int apriori::push_item(int tid, const int *items, int s)
{
    struct transaction *t = new struct transaction;

    t->tid = tid;
    for(int i = 0; i < s; ++i)
        t->items.push_back(vector<int>(1, items[i]));
    sort(t->items.begin(), t->items.end());

    apr_tran.push_back(t);
    return 0;
}

int apriori::cal_associate()
{
    while(cal_nassociate())
        recal_transaction();
    return 0;
}

void apriori::cal_support(associate_rule_t *rules)
{
    for(vector<struct transaction *>::const_iterator pos
            = apr_tran.begin(); pos != apr_tran.end(); ++pos) {
        const vector<vector<int> > &items = (*pos)->items;
        //cout << __LINE__ << endl;
        for(vector<vector<int> >::const_iterator p = items.begin();
                p != items.end(); ++p) {
            //cout << __LINE__ << endl;
            /*for(vector<int>::const_iterator ppp = p->begin();
                    ppp != p->end(); ++ppp)
                cout << *ppp << ' ';*/
            //cout << " : ";
            ++(*rules)[*p];
            //cout << (*rules)[*p] << endl;
            //cout << "-----------------" << endl;
        }
    }
    assert(rules->size());
}

int apriori::find_min_support(const associate_rule_t *rules, int *c)
{
    int min = rules->begin()->second;
    *c = 0; // XXX
    for(associate_rule_t::const_iterator pos = rules->begin();
            pos != rules->end(); ++pos) {
        if(pos->second == min) {
            ++*c;
        } else if(pos->second < min) {
            min = pos->second;
            *c = 1;
        }
    }
    return min;
}


bool apriori::cal_nassociate()
{
    //cout << __FILE__ << endl;
    if(!apr_tran.size())
        return false;

    associate_rule_t *rules = new associate_rule_t;

    cal_support(rules);

    const int s = rules->size();

    if(s < 1)
        return false;

    int c;
    const int min = find_min_support(rules, &c);
    if(c < s)
        rm_min_support(rules, min);
   
    if(rules->size())
        apr_rules.push_back(rules);
    else
        delete rules;

    if(apr_tran.size())
        return true;
    return false;
}

void apriori::rm_min_support(associate_rule_t *rules, int min)
{
    for(transactions_t::iterator pos = apr_tran.begin();
            pos != apr_tran.end();) {
        vector<vector<int> > &items = (*pos)->items;
        for(vector<vector<int> >::iterator p = items.begin();
                p != items.end();) {
            /*for(vector<int>::iterator ppp = p->begin();
                    ppp != p->end(); ++ppp)
                cout << *ppp << ' ';*/
            //cout << "\n=================" << endl;
            associate_rule_t::iterator f = rules->find(*p);
            //assert(f != rules->end());
            if(f != rules->end() && f->second == min) {
                //++p;
                items.erase(p);
                rules->erase(f);
                continue;
            } else if(f == rules->end()) {
                items.erase(p);
                continue;
            }
            ++p;
        }
        if(!items.size()) {
            delete *pos;
            apr_tran.erase(pos);
            continue;
        }
        ++pos;
    }
}

void apriori::clear_rule()
{
    for(itemset_t::iterator pos = apr_rules.begin();
            pos != apr_rules.end(); ++pos)
        delete *pos;

    apr_rules.clear();
}

void apriori::clear_tran()
{
    for(transactions_t::iterator pos = apr_tran.begin();
            pos != apr_tran.end(); ++pos)
        delete *pos;

    apr_tran.clear();
}

int apriori::recal_transaction()
{
    assert(apr_tran.size());

    const int s = (*apr_tran.begin())->items.begin()->size();
    //cout << "S: " << s << endl;
    for(transactions_t::iterator pos = apr_tran.begin();
            pos != apr_tran.end();) {
        vector<vector<int> > &items = (*pos)->items;
        if(items.size() > 1) {
            struct transaction *t = new struct transaction;
            t->tid = (*pos)->tid;
            for(vector<vector<int> >::iterator p1 = items.begin();
                    p1 != items.end() - 1; ++p1) {
                for(vector<vector<int> >::iterator p2
                        = p1 + 1; p2 != items.end(); ++p2) {
                    int i = 0;
                    vector<int> tmp;
                    for(vector<int>::iterator p1p = p1->begin(),
                            p2p = p2->begin(); i < s
                            && p1p != p1->end(); ++i, ++p1p, ++p2p) {
                        /*cout << "p1p: " << *p1p << endl;
                        cout << "p2p: " << *p2p << endl;
                        cout << "I: " << i << endl;*/
                        if(*p1p == *p2p) {
                            tmp.push_back(*p1p);
                        } else if(*p1p < *p2p && i == s - 1) {
                            tmp.push_back(*p1p);
                            tmp.push_back(*p2p);
                        } else if(*p1p > *p2p && i == s - 1) {
                            tmp.push_back(*p2p);
                            tmp.push_back(*p1p);
                        } else {
                            break;
                        }
                    }
                    if(i == s) {
                        /*for(vector<int>::iterator p
                                = tmp.begin(); p != tmp.end(); ++p)
                            cout << *p << ' ';
                        cout << endl;
                        cout << "++++++++++++++++++\n";*/
                        t->items.push_back(tmp);
                    }
                }
            }
            delete *pos;
            if(t->items.size()) {
                *pos = t;
                ++pos;
            } else {
                apr_tran.erase(pos);
            }
            continue;
        } else {
            delete *pos;
            apr_tran.erase(pos);
        }
    }
    return 0;
}

void apriori::reset_associate_rule()
{
    current = apr_rules.begin();
}

bool apriori::next_associate_rule(associate_rule_t **p)
{
    if(current == apr_rules.end())
        return false;
    *p = *current++;
    return true;
}
========================================================

main.cpp:
========================================================
// 2012年 08月 07日 星期二 09:17:28 CST
// author: 李小丹(Li Shao Dan) 字 殊恆(shuheng)
// K.I.S.S
// S.P.O.T


#include <iostream>
#include <vector>

#include "apriori.h"


using namespace std;


int main()
{
    apriori apr;
    int item[7] = {1, 3, 4};
    apr.push_item(100, item, 3);

    item[0] = 2; item[1] = 3; item[2] = 5;
    apr.push_item(200, item, 3);

    item[0] = 1; item[1] = 2; item[2] = 3; item[3] = 5;
    apr.push_item(300, item, 4);

    item[0] = 2; item[1] = 5;
    apr.push_item(400, item, 2);

    ////////////////////////////////////////
    /*item[0] = 1; item[1] = 2; item[2] = 4; item[3] = 7;
    apr.push_item(500, item, 4);

    item[0] = 1; item[1] = 2; item[2] = 3;
    apr.push_item(600, item, 3);

    item[0] = 1; item[1] = 2; item[2] = 4; item[3] = 7;
    apr.push_item(700, item, 4);

    item[0] = 1; item[1] = 2;
    apr.push_item(800, item, 2);

    item[0] = 1; item[1] = 2; item[2] = 3; item[3] = 5;
    apr.push_item(700, item, 4);

    item[0] = 1; item[1] = 2; item[2] = 3; item[3] = 5; item[4] = 6; item[5] = 7;
    apr.push_item(900, item, 6);

    item[0] = 5; item[1] = 6; item[2] = 7;
    apr.push_item(110, item, 3);

    item[0] = 2; item[1] = 3; item[2] = 7;
    apr.push_item(120, item, 3);

    item[0] = 1; item[1] = 4; item[2] = 7;
    apr.push_item(128, item, 3);

    item[0] = 1; item[1] = 2; item[2] = 3; item[3] = 5;
    apr.push_item(101, item, 4);*/
    //////////////////////////////////////

    apr.cal_associate();

    associate_rule_t *ar;
    apr.reset_associate_rule();
    while(apr.next_associate_rule(&ar)) {
        for(associate_rule_t::iterator p = ar->begin();
                p != ar->end(); ++p) {
            const vector<int> &items = p->first;
            for(vector<int>::const_iterator pp = items.begin();
                    pp != items.end(); ++pp)
                cout << *pp << ' ';
            cout << ": ";www.2cto.com
            cout << p->second << endl;
        }
        cout << "+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=\n";
    }
    return 0;
}
==========================================================

 


作者:leeshuheng
  1. 上一頁:
  2. 下一頁:
熱門文章
閱讀排行版
Copyright © Android教程網 All Rights Reserved