| 導購 | 订阅 | 在线投稿
分享
 
 
 

读取英文单词,按顺序排列(C++学习题目)

2006-01-09 11:13:17  編輯來源:互聯網  简体版  手機版  評論  字體: ||
 
 
  读取英文单词,按顺序排列(C++学习题目)

  题目:

  1. Implement a function that counting word frequency. It reads an English article from an user-specified txt file(article.txt) and counts their number. Those exclusive words should not be counted. Output the words and counts to 2 files. One(3_1_1out.txt) is in lexicographic order, and the other (3_1_2out.txt) is in descending frequency order.

  – Exclusive words:

  – Using map

  代码:

  #pragma warning(disable:4786)

  #include <string>

  #include <iostream>

  #include <fstream>

  #include <functional>

  #include <algorithm>

  #include <vector>

  #include <map>

  #include <set>

  using namespace std;

  typedef map<string, int>::value_type sival_type;

  vector<string> *retrieve_text(string file_name)

  {

   ifstream artcile_file( file_name.c_str(), ios::in );

   if (!artcile_file) {

   cout << "Conn't open " << file_name.c_str() << " !" << endl;

   exit (1);

   }

   vector<string> *lines_of_text = new vector<string>;

   string textline;

   while ( getline(artcile_file, textline, '\n'))

   {

   //cout << " " << textline << '\n';

   lines_of_text->push_back(textline);

   }

   return lines_of_text;

  }

  void strip_caps( vector<string> *text_file )

  {

   string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );

  

   vector<string>::iterator iter = text_file->begin();

   for ( ; iter != text_file->end(); ++iter )

   {

   string::size_type pos = 0;

   while ( (pos = (*iter).find_first_of( caps, pos ))

   != string::npos )

   {

   (*iter)[ pos ] = tolower( (*iter)[pos] );

   }

   } //end of for

  }

  vector<string> *separate_words( const vector<string> *text_file )

  {

   string filter("abcdefghijklmnopqrstuvwxy");

  

   //包含独立的单词集合

   vector<string> *words = new vector<string>;

  

   short line_pos = 0;

   for ( ; line_pos <text_file->size(); ++line_pos )

   {

   string textline = (*text_file)[line_pos];

  

   // 用来遍历所有的字母

   string::size_type pos = 0;

   // 单词的开始位置

   string::size_type prev_pos = textline.find_first_of(filter);

   // 单词末尾的下一空格位置

   string::size_type temp_pos = textline.find_first_of(filter);

  

   // 一个小开关,其值为TURE时,prev_pos指向单词开始的位置

   bool onoff = false;

  

   while ( (pos = textline.find_first_of(filter, pos))

   != string::npos )

   {

   if ( onoff )

   {

   prev_pos = temp_pos - 1;

   // 将onoff值改为false,使单词开始的位置不会改变

   onoff = false;

   }

  

   ++pos;

  

   if ( (pos - temp_pos) != 1 )

   {

   // 为下一次的赋值做准备

   onoff = true;

   // 将分离出的单词输入words

   words->push_back(

   textline.substr( prev_pos, temp_pos - prev_pos ));

   }

  

   temp_pos = pos;

   } // end of while

  

   // 输入最后一个单词,除非这一段没有找到任何字母

   if ( prev_pos != string::npos )

   {

   words->push_back(

   textline.substr( prev_pos, temp_pos - prev_pos ));

   }

   } // end of for

  

   return words;

  }

  map< string, int > *appear_total( const vector<string> *words )

  {

   // 创建单词排除集合

   set<string> exclusion_set;

   ifstream exclusion_file( "pkg95.txt", ios::in );

   if (!exclusion_file) {

   cout << "Conn't open pkg95.txt !" << endl;

   exit (1);

   }

   string textline;

   while ( getline(exclusion_file, textline, '\n'))

   {

   //cout << " " << textline << '\n';

   exclusion_set.insert(textline);

   }

   map<string, int> *word_map = new map<string, int>;

  

   // 开始向word_map中记录数据

   vector<string>::const_iterator iter = words->begin();

   for ( ; iter != words->end(); ++iter )

   {

   // 如果少于3个字符或在排除集合中存在,则不输入到map中

   if ( (*iter).size() < 3 || exclusion_set.count( *iter ) )

   {

   continue;

   }

  

   // 如果count()返回0,则单词不存在,加入它

   if ( !word_map->count(*iter) )

   {

   word_map->insert( sival_type( (*iter), 1 ) );

   }

   else

   {

   //将单词的出现次数加1

   (*word_map)[ (*iter) ] += 1;

   }

   } //end of for

  

   return word_map;

  }

  multimap< int, string, greater<int> > * multimap_total( map<string, int> *text_map )

  {

   multimap<int, string, greater<int> > *word_map =

   new multimap< int, string, greater<int> >;

   map< string, int >::iterator map_siter = text_map->begin();

   for ( ; map_siter != text_map->end(); ++map_siter )

   {

   word_map->insert(make_pair((*map_siter).second, (*map_siter).first));

   }

   {

   string ofile("3_1_2out.txt");

  

   ofstream outfile( ofile.c_str() );

   if (!outfile)

   {

   cerr << "error: unable to open output file: "

   << ofile << endl;

   }

  

   multimap< int, string, greater<int> >::iterator map_siter = word_map->begin();

  

   for ( ; map_siter != word_map->end(); ++map_siter )

   {

   outfile << (*map_siter).second;

  

   for ( int n = 0; n < 15 - (*map_siter).second.size(); ++n )

   {

   outfile << ' ';

   }

  

   outfile << "出现 " << (*map_siter).first << "\t次" << endl;

   } // end of for

  

   cout << "程序已将处理结果写入3_1_2out.txt,该文件保存在当前目录"

   << endl;

   }

   return word_map;

  }

  void map_output( map<string, int> *text_map )

  {

   string ofile("3_1_1out.txt");

  

   ofstream outfile( ofile.c_str() );

   if (!outfile)

   {

   cerr << "error: unable to open output file: "

   << ofile << endl;

   }

  

   map< string, int >::iterator map_siter = text_map->begin();

   for ( ; map_siter != text_map->end(); ++map_siter )

   {

   outfile << (*map_siter).first;

  

   for ( int n = 0; n < 15 - (*map_siter).first.size(); ++n )

   {

   outfile << ' ';

   }

  

   outfile << "出现 " << (*map_siter).second << "\t次" << endl;

   } // end of for

  

   cout << "程序已将处理结果写入3_1_1out.txt,该文件保存在当前目录"

   << endl;

  }

  int main()

  {

  

   vector<string> *text_file = retrieve_text("article.txt");

   strip_caps(text_file);

   vector<string> *words = separate_words(text_file);

   map< string, int > *text_map = appear_total(words);

   map_output( text_map );

   multimap_total(text_map);

  

   return 0;

  }

  程序执行结果

  3_1_1out.txt

  article 出现 2 次

  counted 出现 1 次

  counting 出现 1 次

  counts 出现 2 次

  descending 出现 1 次

  english 出现 1 次

  exclusive 出现 1 次

  file 出现 1 次

  files 出现 1 次

  frequency 出现 2 次

  function 出现 1 次

  implement 出现 1 次

  lexicographic 出现 1 次

  output 出现 1 次

  reads 出现 1 次

  specified 出现 1 次

  txt 出现 4 次

  user 出现 1 次

  word 出现 1 次

  words 出现 2 次

  3_1_2out.txt

  txt 出现 4 次

  article 出现 2 次

  counts 出现 2 次

  frequency 出现 2 次

  words 出现 2 次

  counted 出现 1 次

  counting 出现 1 次

  descending 出现 1 次

  english 出现 1 次

  exclusive 出现 1 次

  file 出现 1 次

  files 出现 1 次

  function 出现 1 次

  implement 出现 1 次

  lexicographic 出现 1 次

  output 出现 1 次

  reads 出现 1 次

  specified 出现 1 次

  user 出现 1 次

  word 出现 1 次

  附PKG95.TXT 文件内容:

  different

  necessary

  need

  needed

  needing

  newest

  next

  no

  nobody

  non

  none

  not

  nothing

  now

  nowhere

  of

  off

  often

  new

  old

  older

  oldest

  on

  once

  one

  only

  open

  again

  among

  already

  about

  above

  against

  alone

  after

  also

  although

  along

  always

  an

  across

  and

  another

  ask

  asking

  asks

  backed

  away

  should

  show

  came

  all

  almost

  before

  began

  back

  backing

  be

  became

  because

  becomes

  been

  at

  behind

  being

  best

  better

  between

  big

  showed

  ended

  ending

  both

  but

  by

  asked

  backs

  can

  cannot

  number

  numbers

  case

  few

  find

  finds

  cases

  clearly

  her

  herself

  come

  could

  did

  here

  beings

  fact

  far

  felt

  become

  first

  for

  four

  from

  full

  fully

  furthers

  gave

  general

  generally

  get

  gets

  gives

  facts

  go

  going

  good

  goods

  certain

  certainly

  clear

  great

  greater

  greatest

  group

  grouped

  grouping

  groups

  got

  has

  have

  having

  he

  further

  furthered

  had

  furthering

  itself

  faces

  highest

  him

  himself

  his

  how

  however

  if

  important

  interests

  into

  is

  it

  its

  anyone

  anything

  anywhere

  are

  area

  areas

  around

  as

  seconds

  see

  seem

  seemed

  seeming

  seems

  sees

  right

  several

  shall

  she

  enough

  even

  evenly

  over

  part

  parted

  parting

  parts

  per

  down

  place

  places

  point

  pointed

  pointing

  points

  possible

  present

  presented

  presenting

  ends

  high

  mrs

  much

  must

  my

  myself

  presents

  down

  problem

  problems

  put

  puts

  quite

  will

  with

  within

  rather

  really

  room

  rooms

  said

  same

  right

  showing

  shows

  side

  sides

  since

  small

  smaller

  smallest

  so

  some

  somebody

  someone

  something

  somewhere

  state

  states

  such

  sure

  take

  taken

  than

  that

  the

  their

  then

  there

  therefore

  these

  thought

  thoughts

  three

  through

  thus

  to

  today

  together

  too

  took

  toward

  turn

  turned

  turning

  turns

  two

  still

  under

  until

  up

  others

  upon

  us

  use

  used

  uses

  very

  want

  wanted

  wanting

  wants

  was

  way

  we

  well

  wells

  went

  were

  what

  when

  where

  whether

  which

  while

  who

  whole

  year

  years

  yet

  you

  everyone

  everything

  everywhere

  young

  younger

  youngest

  your

  yours

  ever

  works

  every

  everybody

  face

  other

  our

  out

  just

  interesting

  high

  might

  keep

  keeps

  give

  given

  higher

  kind

  knew

  know

  known

  knows

  large

  largely

  last

  later

  latest

  least

  less

  needs

  never

  newer

  let

  lets

  like

  likely

  long

  high

  longer

  longest

  made

  make

  making

  man

  many

  may

  me

  member

  members

  men

  more

  in

  interest

  interested

  most

  mostly

  mr

  opened

  opening

  new

  opens

  or

  perhaps

  order

  ordered

  ordering

  orders

  differ

  differently

  do

  does

  done

  downed

  downing

  downs

  they

  thing

  things

  think

  thinks

  this

  those

  ways

  why

  without

  work

  worked

  working

  would

  during

  each

  early

  either

  end

  though

  still

  whose

  saw

  say

  says

  them

  second

  any

  anybody
 
 
 
读取英文单词,按顺序排列(C++学习题目) 题目: 1. Implement a function that counting word frequency. It reads an English article from an user-specified txt file(article.txt) and counts their number. Those exclusive words should not be counted. Output the words and counts to 2 files. One(3_1_1out.txt) is in lexicographic order, and the other (3_1_2out.txt) is in descending frequency order. – Exclusive words: – Using map 代码: #pragma warning(disable:4786) #include <string> #include <iostream> #include <fstream> #include <functional> #include <algorithm> #include <vector> #include <map> #include <set> using namespace std; typedef map<string, int>::value_type sival_type; vector<string> *retrieve_text(string file_name) { ifstream artcile_file( file_name.c_str(), ios::in ); if (!artcile_file) { cout << "Conn't open " << file_name.c_str() << " !" << endl; exit (1); } vector<string> *lines_of_text = new vector<string>; string textline; while ( getline(artcile_file, textline, '\n')) { //cout << " " << textline << '\n'; lines_of_text->push_back(textline); } return lines_of_text; } void strip_caps( vector<string> *text_file ) { string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); vector<string>::iterator iter = text_file->begin(); for ( ; iter != text_file->end(); ++iter ) { string::size_type pos = 0; while ( (pos = (*iter).find_first_of( caps, pos )) != string::npos ) { (*iter)[ pos ] = tolower( (*iter)[pos] ); } } // end of for } vector<string> *separate_words( const vector<string> *text_file ) { string filter("abcdefghijklmnopqrstuvwxy"); // 包含独立的单词集合 vector<string> *words = new vector<string>; short line_pos = 0; for ( ; line_pos <text_file->size(); ++line_pos ) { string textline = (*text_file)[line_pos]; // 用来遍历所有的字母 string::size_type pos = 0; // 单词的开始位置 string::size_type prev_pos = textline.find_first_of(filter); // 单词末尾的下一空格位置 string::size_type temp_pos = textline.find_first_of(filter); // 一个小开关,其值为TURE时,prev_pos指向单词开始的位置 bool onoff = false; while ( (pos = textline.find_first_of(filter, pos)) != string::npos ) { if ( onoff ) { prev_pos = temp_pos - 1; // 将onoff值改为false,使单词开始的位置不会改变 onoff = false; } ++pos; if ( (pos - temp_pos) != 1 ) { // 为下一次的赋值做准备 onoff = true; // 将分离出的单词输入words words->push_back( textline.substr( prev_pos, temp_pos - prev_pos )); } temp_pos = pos; } // end of while // 输入最后一个单词,除非这一段没有找到任何字母 if ( prev_pos != string::npos ) { words->push_back( textline.substr( prev_pos, temp_pos - prev_pos )); } } // end of for return words; } map< string, int > *appear_total( const vector<string> *words ) { // 创建单词排除集合 set<string> exclusion_set; ifstream exclusion_file( "pkg95.txt", ios::in ); if (!exclusion_file) { cout << "Conn't open pkg95.txt !" << endl; exit (1); } string textline; while ( getline(exclusion_file, textline, '\n')) { //cout << " " << textline << '\n'; exclusion_set.insert(textline); } map<string, int> *word_map = new map<string, int>; // 开始向word_map中记录数据 vector<string>::const_iterator iter = words->begin(); for ( ; iter != words->end(); ++iter ) { // 如果少于3个字符或在排除集合中存在,则不输入到map中 if ( (*iter).size() < 3 || exclusion_set.count( *iter ) ) { continue; } // 如果count()返回0,则单词不存在,加入它 if ( !word_map->count(*iter) ) { word_map->insert( sival_type( (*iter), 1 ) ); } else { // 将单词的出现次数加1 (*word_map)[ (*iter) ] += 1; } } // end of for return word_map; } multimap< int, string, greater<int> > * multimap_total( map<string, int> *text_map ) { multimap<int, string, greater<int> > *word_map = new multimap< int, string, greater<int> >; map< string, int >::iterator map_siter = text_map->begin(); for ( ; map_siter != text_map->end(); ++map_siter ) { word_map->insert(make_pair((*map_siter).second, (*map_siter).first)); } { string ofile("3_1_2out.txt"); ofstream outfile( ofile.c_str() ); if (!outfile) { cerr << "error: unable to open output file: " << ofile << endl; } multimap< int, string, greater<int> >::iterator map_siter = word_map->begin(); for ( ; map_siter != word_map->end(); ++map_siter ) { outfile << (*map_siter).second; for ( int n = 0; n < 15 - (*map_siter).second.size(); ++n ) { outfile << ' '; } outfile << "出现 " << (*map_siter).first << "\t次" << endl; } // end of for cout << "程序已将处理结果写入3_1_2out.txt,该文件保存在当前目录" << endl; } return word_map; } void map_output( map<string, int> *text_map ) { string ofile("3_1_1out.txt"); ofstream outfile( ofile.c_str() ); if (!outfile) { cerr << "error: unable to open output file: " << ofile << endl; } map< string, int >::iterator map_siter = text_map->begin(); for ( ; map_siter != text_map->end(); ++map_siter ) { outfile << (*map_siter).first; for ( int n = 0; n < 15 - (*map_siter).first.size(); ++n ) { outfile << ' '; } outfile << "出现 " << (*map_siter).second << "\t次" << endl; } // end of for cout << "程序已将处理结果写入3_1_1out.txt,该文件保存在当前目录" << endl; } int main() { vector<string> *text_file = retrieve_text("article.txt"); strip_caps(text_file); vector<string> *words = separate_words(text_file); map< string, int > *text_map = appear_total(words); map_output( text_map ); multimap_total(text_map); return 0; } 程序执行结果 3_1_1out.txt article 出现 2 次 counted 出现 1 次 counting 出现 1 次 counts 出现 2 次 descending 出现 1 次 english 出现 1 次 exclusive 出现 1 次 file 出现 1 次 files 出现 1 次 frequency 出现 2 次 function 出现 1 次 implement 出现 1 次 lexicographic 出现 1 次 output 出现 1 次 reads 出现 1 次 specified 出现 1 次 txt 出现 4 次 user 出现 1 次 word 出现 1 次 words 出现 2 次 3_1_2out.txt txt 出现 4 次 article 出现 2 次 counts 出现 2 次 frequency 出现 2 次 words 出现 2 次 counted 出现 1 次 counting 出现 1 次 descending 出现 1 次 english 出现 1 次 exclusive 出现 1 次 file 出现 1 次 files 出现 1 次 function 出现 1 次 implement 出现 1 次 lexicographic 出现 1 次 output 出现 1 次 reads 出现 1 次 specified 出现 1 次 user 出现 1 次 word 出现 1 次 附PKG95.TXT 文件内容: different necessary need needed needing newest next no nobody non none not nothing now nowhere of off often new old older oldest on once one only open again among already about above against alone after also although along always an across and another ask asking asks backed away should show came all almost before began back backing be became because becomes been at behind being best better between big showed ended ending both but by asked backs can cannot number numbers case few find finds cases clearly her herself come could did here beings fact far felt become first for four from full fully furthers gave general generally get gets gives facts go going good goods certain certainly clear great greater greatest group grouped grouping groups got has have having he further furthered had furthering itself faces highest him himself his how however if important interests into is it its anyone anything anywhere are area areas around as seconds see seem seemed seeming seems sees right several shall she enough even evenly over part parted parting parts per down place places point pointed pointing points possible present presented presenting ends high mrs much must my myself presents down problem problems put puts quite will with within rather really room rooms said same right showing shows side sides since small smaller smallest so some somebody someone something somewhere state states such sure take taken than that the their then there therefore these thought thoughts three through thus to today together too took toward turn turned turning turns two still under until up others upon us use used uses very want wanted wanting wants was way we well wells went were what when where whether which while who whole year years yet you everyone everything everywhere young younger youngest your yours ever works every everybody face other our out just interesting high might keep keeps give given higher kind knew know known knows large largely last later latest least less needs never newer let lets like likely long high longer longest made make making man many may me member members men more in interest interested most mostly mr opened opening new opens or perhaps order ordered ordering orders differ differently do does done downed downing downs they thing things think thinks this those ways why without work worked working would during each early either end though still whose saw say says them second any anybody
󰈣󰈤
 
 
>>返回首頁<<
 
 
 
 
 熱帖排行
 
 
王朝网络微信公众号
微信扫码关注本站公众号 wangchaonetcn
 
  免责声明:本文仅代表作者个人观点,与王朝网络无关。王朝网络登载此文出于传递更多信息之目的,并不意味著赞同其观点或证实其描述,其原创性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容、文字的真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
© 2005- 王朝網路 版權所有