词频统计与排序(二)

C++/STL/Set容器实现

Posted by Jack on October 11, 2010

采用STL中的Set容器实现词频统计与排序,代码如下:

#include <iostream>  
#include <string>  
#include <vector>  
#include <set>  
#include <map>  
#include <algorithm>  
  

using namespace std;  
  
// 函数声明  
void Split(const string strInput, vector<string>& vecWords, set<string>& setWords);  
int Count(const string& strWord, const vector<string>& vecWords);  
int cmp(const pair<string, int>& pWord1, const pair<string, int>& pWord2);  
  
// 主程序  
int main()  
{  
    // 输入  
    string strInput = "我 爱 吃 苹果 , 我 更 爱 吃 香蕉 。 ";  
  
  
    vector<string> vecWords;  
    set<string> setWords;  
    Split(strInput, vecWords, setWords); // 将句子按照空格切分成词语  
  
  
    set<string>::iterator iter = 0;  
    vector<pair<string, int> > vecpWords;// 存储词及其频率  
    pair<string, int> pWords;  
    string strWord;  
    int nCount = 0; //出现次数  
  
  
    for(iter = setWords.begin(); iter != setWords.end(); ++iter)  
    {  
        strWord = *iter;  
  
  
        // 针对每个strWord,在vecWords中查找strWord的出现次数  
        nCount = Count(strWord,vecWords);  
  
  
        pWords.first = strWord;  
        pWords.second = nCount;  
        vecpWords.push_back(pWords);  
    }  
  
  
/* 
    sort(vecpWords.begin(), vecpWords.end(),cmp); // 排序,泛型算法 
*/  
  
  
    // 将实验结果在屏幕上打印  
    unsigned int n = 0;  
    for(n = 0; n < vecpWords.size(); ++n)  
    {  
        cout << vecpWords[n].first << "     "  
             << vecpWords[n].second << endl;  
    }  
  
  
    return 0;  
}  
  
  
/* 
//排序部分 
int cmp(const pair<string, int>& pWord1, const pair<string, int>& pWord2) 
{ 
    if(pWord1.second > pWord2.second) 
       return 1; 
    else 
       return 0; 
} 
*/  
  
  
// 将strInput按照空格切分成词语  
void Split(const string strInput, vector<string>& vecWords, set<string>& setWords)  
{  
    vecWords.clear();  
    setWords.clear();  
    unsigned int pos1 = 0;  
    unsigned int pos2 = 0;  
    pos2 = strInput.find(" ", pos1);  
    string strWord;  
    while(pos2 != string::npos)  
    {  
        strWord = strInput.substr(pos1, pos2 - pos1);  
        vecWords.push_back(strWord);  
        setWords.insert(strWord);  
        pos1 = pos2 + 1;  
        pos2 = strInput.find(" ", pos1);  
    }  
}  
  
  
//计算strWord在句子中的出现次数  
int Count(const string& strWord, const vector<string>& vecWords)  
{  
    int nCount = 0;  
    unsigned int n = 0;  
    string strWordTmp;  
    for(n = 0; n < vecWords.size(); ++n)  
    {  
        strWordTmp = vecWords[n];  
        if(strWordTmp == strWord)  
            nCount++;  
    }  
    return nCount;  
}