采用STL中的Set容器实现词频统计与排序,代码如下:
#include <iostream>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <algorithm>
using namespace std;
// 函数声明
void Split(const string strInput, vector<string>& vecWords, set<string>& setWords);
int Count(const string& strWord, const vector<string>& vecWords);
int cmp(const pair<string, int>& pWord1, const pair<string, int>& pWord2);
// 主程序
int main()
{
// 输入
string strInput = "我 爱 吃 苹果 , 我 更 爱 吃 香蕉 。 ";
vector<string> vecWords;
set<string> setWords;
Split(strInput, vecWords, setWords); // 将句子按照空格切分成词语
set<string>::iterator iter = 0;
vector<pair<string, int> > vecpWords;// 存储词及其频率
pair<string, int> pWords;
string strWord;
int nCount = 0; //出现次数
for(iter = setWords.begin(); iter != setWords.end(); ++iter)
{
strWord = *iter;
// 针对每个strWord,在vecWords中查找strWord的出现次数
nCount = Count(strWord,vecWords);
pWords.first = strWord;
pWords.second = nCount;
vecpWords.push_back(pWords);
}
/*
sort(vecpWords.begin(), vecpWords.end(),cmp); // 排序,泛型算法
*/
// 将实验结果在屏幕上打印
unsigned int n = 0;
for(n = 0; n < vecpWords.size(); ++n)
{
cout << vecpWords[n].first << " "
<< vecpWords[n].second << endl;
}
return 0;
}
/*
//排序部分
int cmp(const pair<string, int>& pWord1, const pair<string, int>& pWord2)
{
if(pWord1.second > pWord2.second)
return 1;
else
return 0;
}
*/
// 将strInput按照空格切分成词语
void Split(const string strInput, vector<string>& vecWords, set<string>& setWords)
{
vecWords.clear();
setWords.clear();
unsigned int pos1 = 0;
unsigned int pos2 = 0;
pos2 = strInput.find(" ", pos1);
string strWord;
while(pos2 != string::npos)
{
strWord = strInput.substr(pos1, pos2 - pos1);
vecWords.push_back(strWord);
setWords.insert(strWord);
pos1 = pos2 + 1;
pos2 = strInput.find(" ", pos1);
}
}
//计算strWord在句子中的出现次数
int Count(const string& strWord, const vector<string>& vecWords)
{
int nCount = 0;
unsigned int n = 0;
string strWordTmp;
for(n = 0; n < vecWords.size(); ++n)
{
strWordTmp = vecWords[n];
if(strWordTmp == strWord)
nCount++;
}
return nCount;
}