c++ - I want to print the most visited sites/urls in the browser -
below code have written in c++ , printing wrong result 2nd , 3rd output line. not able figure out why happening.
below code have written , functional code on visual studio. code expects 1 input file named urlmgr.txt content should urls. below sample urls using it.
web.whatsapp.com web.whatsapp.com cplusplus.com/reference/algorithm/find_if stackoverflow.com/questions/760221/breaking-in-stdfor-each-loop mail.google.com/mail/u/0/#inbox http://stackoverflow.com/questions/18085331/recursive-lambda-functions-in-c14 mail.google.com/mail/u/0/#inbox en.cppreference.com/w/cpp/language/lambda https://www.google.co.in/?ion=1&espv=2#q=invariant%20meaning mail.google.com/mail/u/0/#inbox http://stackoverflow.com/questions/11699083/where-can-i-find-all-the-exception-guarantees-for-the-standard-containers-and-al https://www.google.co.in/?ion=1&espv=2#q=array+of+references:quora&start=10 mail.google.com/mail/u/0/#inbox web.whatsapp.com quora.com/whats-the-purpose-of-load-factor-in-hash-tables https://www.quora.com/whats-the-difference-between-the-rehash-and-reserve- methods-of-the-c++-unordered_map cplusplus.com/reference/unordered_map/unordered_map/load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor cplusplus.com/max_load_factor
code pasted below.
#include <iostream> #include <string> #include <unordered_set> #include <algorithm> #include <fstream> #include <sstream> #include <functional> #include <unordered_map> #include <queue> using namespace std; class urlinfo { public: urlinfo(string &url):urlname(url),hitcount(1) { } int gethitcount() const { return hitcount; } string geturl() { return urlname; } string geturl() const { return urlname; } void updatehitcount() { hitcount++; } void sethitcount(int count) { hitcount = count; } private: string urlname; int hitcount; }; class urlinfomaxheap { public: bool operator() (urlinfo *url1, urlinfo *url2) const { if(url2->gethitcount() > url1->gethitcount()) return true; else return false; } }; bool operator==(const urlinfo &ui1,const urlinfo& ui2) { //return (ui1.geturl().compare(ui2.geturl()) == 0) ? 1:0; return (ui1.geturl() == ui2.geturl()); } namespace std { template <> struct hash<urlinfo> { size_t operator()(urlinfo const & ui) { return hash<string>()(ui.geturl()); } }; } class urlmgr { public: urlmgr(string &filename) { ifstream rdstr; string str; rdstr.open(filename.c_str(),ios::in); if(rdstr.is_open()) { int len; rdstr.seekg(0,ios::end); len = rdstr.tellg(); rdstr.seekg(0,ios::beg); str.reserve(len+1); char *buff = new char[len +1]; memset(buff,0,len+1); rdstr.read(buff,len); rdstr.close(); str.assign(buff); delete [] buff; } stringstream ss(str); string token; while(getline(ss,token,'\n')) { //cout<<endl<<token; addurl(token); } } void addurl(string &url) { unordered_map<string,urlinfo*>::iterator itr; itr = urls.find(url); if(itr == urls.end()) { urlinfo *u = new urlinfo(url); urls[url] = u; maxheap.push_back(u); } else { itr->second->updatehitcount(); urlinfo* u = itr->second; vector<urlinfo*>::iterator vitr; vitr = find(maxheap.begin(),maxheap.end(),u); if(vitr!=maxheap.end()) { maxheap.erase(vitr); maxheap.push_back(u); } } make_heap(maxheap.begin(),maxheap.end(),urlinfomaxheap()); } void releaseresources() { for_each(urls.begin(),urls.end(),[](pair<string,urlinfo*> p){ urlinfo* u = p.second; delete u; }); } void printheap() { for_each(maxheap.begin(),maxheap.end(),[](urlinfo* u){ cout<<endl<<u->gethitcount()<<" "<<u->geturl(); }); } private: unordered_map<string,urlinfo*> urls; vector<urlinfo*> maxheap; }; int main() { string filename("urlmgr.txt"); urlmgr um(filename); um.printheap(); um.releaseresources(); cout<<endl<<"successfully inserted data"<<endl; }
the output getting
8 cplusplus.com/max_load_factor 3 web.whatsapp.com 4 mail.google.com/mail/u/0/#inbox 1 en.cppreference.com/w/cpp/language/lambda 1 other url's , on. //all other url's show count 1.
what expect
8 cplusplus.com/max_load_factor 4 mail.google.com/mail/u/0/#inbox 3 web.whatsapp.com 1 en.cppreference.com/w/cpp/language/lambda 1 other url's , on. //all other url's show count 1.
finally after debugging have found problem.the problem in way interpreting how max_heap()
works.
consider this.
url1 occurs 8 times url2 occurs 4 times url3 occurs 3 times
after call max_heap()
is
maxheap[0]=8 8 maxheap[1]=4 4 3 maxheap[2]=3
or can get
maxheap[0]=8 8 maxheap[1]=3 3 4 maxheap[2]=4
both of above maxheaps considering first heap can occur , in below code printing maxheap
content without realizing may printing second heap.
void printheap() { for_each(maxheap.begin(),maxheap.end(),[](urlinfo* u){ cout<<endl<<u->gethitcount()<<" "<<u->geturl(); }); }
to resolve this.one way after picking maxheap[0]
.you delete first element , again call max_heap before picking maxheap[0]
again.or can have below.
while(maxheap.size()>0){ cout<<(*maxheap.begin())->gethitcount()<<" "<<(*maxheap.begin())->geturl()<<endl; std::pop_heap(maxheap.begin(),maxheap.end(),urlinfomaxheap());maxheap.pop_back();}
in above code pop_heap() move topmost element(which has highest priority according implementation of compare function pass make_heap()
) end , heapify again. can delete last element then.
also did not find use of below in code
vector<urlinfo*>::iterator vitr; vitr = find(maxheap.begin(),maxheap.end(),u); if(vitr!=maxheap.end()) { maxheap.erase(vitr); maxheap.push_back(u); }
Comments
Post a Comment