分享
 
 
 

读取英文单词,按顺序排列(C++学习题目)

王朝c/c++·作者佚名  2006-01-09
窄屏简体版  字體: |||超大  

读取英文单词,按顺序排列(C++学习题目)

题目:

1. Implement a function that counting word frequency. It reads an English article from an user-specified txt file(article.txt) and counts their number. Those exclusive words should not be counted. Output the words and counts to 2 files. One(3_1_1out.txt) is in lexicographic order, and the other (3_1_2out.txt) is in descending frequency order.

– Exclusive words:

– Using map

代码:

#pragma warning(disable:4786)

#include <string>

#include <iostream>

#include <fstream>

#include <functional>

#include <algorithm>

#include <vector>

#include <map>

#include <set>

using namespace std;

typedef map<string, int>::value_type sival_type;

vector<string> *retrieve_text(string file_name)

{

ifstream artcile_file( file_name.c_str(), ios::in );

if (!artcile_file) {

cout << "Conn't open " << file_name.c_str() << " !" << endl;

exit (1);

}

vector<string> *lines_of_text = new vector<string>;

string textline;

while ( getline(artcile_file, textline, '\n'))

{

//cout << " " << textline << '\n';

lines_of_text->push_back(textline);

}

return lines_of_text;

}

void strip_caps( vector<string> *text_file )

{

string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );

vector<string>::iterator iter = text_file->begin();

for ( ; iter != text_file->end(); ++iter )

{

string::size_type pos = 0;

while ( (pos = (*iter).find_first_of( caps, pos ))

!= string::npos )

{

(*iter)[ pos ] = tolower( (*iter)[pos] );

}

} //end of for

}

vector<string> *separate_words( const vector<string> *text_file )

{

string filter("abcdefghijklmnopqrstuvwxy");

//包含独立的单词集合

vector<string> *words = new vector<string>;

short line_pos = 0;

for ( ; line_pos <text_file->size(); ++line_pos )

{

string textline = (*text_file)[line_pos];

// 用来遍历所有的字母

string::size_type pos = 0;

// 单词的开始位置

string::size_type prev_pos = textline.find_first_of(filter);

// 单词末尾的下一空格位置

string::size_type temp_pos = textline.find_first_of(filter);

// 一个小开关,其值为TURE时,prev_pos指向单词开始的位置

bool onoff = false;

while ( (pos = textline.find_first_of(filter, pos))

!= string::npos )

{

if ( onoff )

{

prev_pos = temp_pos - 1;

// 将onoff值改为false,使单词开始的位置不会改变

onoff = false;

}

++pos;

if ( (pos - temp_pos) != 1 )

{

// 为下一次的赋值做准备

onoff = true;

// 将分离出的单词输入words

words->push_back(

textline.substr( prev_pos, temp_pos - prev_pos ));

}

temp_pos = pos;

} // end of while

// 输入最后一个单词,除非这一段没有找到任何字母

if ( prev_pos != string::npos )

{

words->push_back(

textline.substr( prev_pos, temp_pos - prev_pos ));

}

} // end of for

return words;

}

map< string, int > *appear_total( const vector<string> *words )

{

// 创建单词排除集合

set<string> exclusion_set;

ifstream exclusion_file( "pkg95.txt", ios::in );

if (!exclusion_file) {

cout << "Conn't open pkg95.txt !" << endl;

exit (1);

}

string textline;

while ( getline(exclusion_file, textline, '\n'))

{

//cout << " " << textline << '\n';

exclusion_set.insert(textline);

}

map<string, int> *word_map = new map<string, int>;

// 开始向word_map中记录数据

vector<string>::const_iterator iter = words->begin();

for ( ; iter != words->end(); ++iter )

{

// 如果少于3个字符或在排除集合中存在,则不输入到map中

if ( (*iter).size() < 3 || exclusion_set.count( *iter ) )

{

continue;

}

// 如果count()返回0,则单词不存在,加入它

if ( !word_map->count(*iter) )

{

word_map->insert( sival_type( (*iter), 1 ) );

}

else

{

//将单词的出现次数加1

(*word_map)[ (*iter) ] += 1;

}

} //end of for

return word_map;

}

multimap< int, string, greater<int> > * multimap_total( map<string, int> *text_map )

{

multimap<int, string, greater<int> > *word_map =

new multimap< int, string, greater<int> >;

map< string, int >::iterator map_siter = text_map->begin();

for ( ; map_siter != text_map->end(); ++map_siter )

{

word_map->insert(make_pair((*map_siter).second, (*map_siter).first));

}

{

string ofile("3_1_2out.txt");

ofstream outfile( ofile.c_str() );

if (!outfile)

{

cerr << "error: unable to open output file: "

<< ofile << endl;

}

multimap< int, string, greater<int> >::iterator map_siter = word_map->begin();

for ( ; map_siter != word_map->end(); ++map_siter )

{

outfile << (*map_siter).second;

for ( int n = 0; n < 15 - (*map_siter).second.size(); ++n )

{

outfile << ' ';

}

outfile << "出现 " << (*map_siter).first << "\t次" << endl;

} // end of for

cout << "程序已将处理结果写入3_1_2out.txt,该文件保存在当前目录"

<< endl;

}

return word_map;

}

void map_output( map<string, int> *text_map )

{

string ofile("3_1_1out.txt");

ofstream outfile( ofile.c_str() );

if (!outfile)

{

cerr << "error: unable to open output file: "

<< ofile << endl;

}

map< string, int >::iterator map_siter = text_map->begin();

for ( ; map_siter != text_map->end(); ++map_siter )

{

outfile << (*map_siter).first;

for ( int n = 0; n < 15 - (*map_siter).first.size(); ++n )

{

outfile << ' ';

}

outfile << "出现 " << (*map_siter).second << "\t次" << endl;

} // end of for

cout << "程序已将处理结果写入3_1_1out.txt,该文件保存在当前目录"

<< endl;

}

int main()

{

vector<string> *text_file = retrieve_text("article.txt");

strip_caps(text_file);

vector<string> *words = separate_words(text_file);

map< string, int > *text_map = appear_total(words);

map_output( text_map );

multimap_total(text_map);

return 0;

}

程序执行结果

3_1_1out.txt

article 出现 2 次

counted 出现 1 次

counting 出现 1 次

counts 出现 2 次

descending 出现 1 次

english 出现 1 次

exclusive 出现 1 次

file 出现 1 次

files 出现 1 次

frequency 出现 2 次

function 出现 1 次

implement 出现 1 次

lexicographic 出现 1 次

output 出现 1 次

reads 出现 1 次

specified 出现 1 次

txt 出现 4 次

user 出现 1 次

word 出现 1 次

words 出现 2 次

3_1_2out.txt

txt 出现 4 次

article 出现 2 次

counts 出现 2 次

frequency 出现 2 次

words 出现 2 次

counted 出现 1 次

counting 出现 1 次

descending 出现 1 次

english 出现 1 次

exclusive 出现 1 次

file 出现 1 次

files 出现 1 次

function 出现 1 次

implement 出现 1 次

lexicographic 出现 1 次

output 出现 1 次

reads 出现 1 次

specified 出现 1 次

user 出现 1 次

word 出现 1 次

附PKG95.TXT 文件内容:

different

necessary

need

needed

needing

newest

next

no

nobody

non

none

not

nothing

now

nowhere

of

off

often

new

old

older

oldest

on

once

one

only

open

again

among

already

about

above

against

alone

after

also

although

along

always

an

across

and

another

ask

asking

asks

backed

away

should

show

came

all

almost

before

began

back

backing

be

became

because

becomes

been

at

behind

being

best

better

between

big

showed

ended

ending

both

but

by

asked

backs

can

cannot

number

numbers

case

few

find

finds

cases

clearly

her

herself

come

could

did

here

beings

fact

far

felt

become

first

for

four

from

full

fully

furthers

gave

general

generally

get

gets

gives

facts

go

going

good

goods

certain

certainly

clear

great

greater

greatest

group

grouped

grouping

groups

got

has

have

having

he

further

furthered

had

furthering

itself

faces

highest

him

himself

his

how

however

if

important

interests

into

is

it

its

anyone

anything

anywhere

are

area

areas

around

as

seconds

see

seem

seemed

seeming

seems

sees

right

several

shall

she

enough

even

evenly

over

part

parted

parting

parts

per

down

place

places

point

pointed

pointing

points

possible

present

presented

presenting

ends

high

mrs

much

must

my

myself

presents

down

problem

problems

put

puts

quite

will

with

within

rather

really

room

rooms

said

same

right

showing

shows

side

sides

since

small

smaller

smallest

so

some

somebody

someone

something

somewhere

state

states

such

sure

take

taken

than

that

the

their

then

there

therefore

these

thought

thoughts

three

through

thus

to

today

together

too

took

toward

turn

turned

turning

turns

two

still

under

until

up

others

upon

us

use

used

uses

very

want

wanted

wanting

wants

was

way

we

well

wells

went

were

what

when

where

whether

which

while

who

whole

year

years

yet

you

everyone

everything

everywhere

young

younger

youngest

your

yours

ever

works

every

everybody

face

other

our

out

just

interesting

high

might

keep

keeps

give

given

higher

kind

knew

know

known

knows

large

largely

last

later

latest

least

less

needs

never

newer

let

lets

like

likely

long

high

longer

longest

made

make

making

man

many

may

me

member

members

men

more

in

interest

interested

most

mostly

mr

opened

opening

new

opens

or

perhaps

order

ordered

ordering

orders

differ

differently

do

does

done

downed

downing

downs

they

thing

things

think

thinks

this

those

ways

why

without

work

worked

working

would

during

each

early

either

end

though

still

whose

saw

say

says

them

second

any

anybody

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
2023年上半年GDP全球前十五强
 百态   2023-10-24
美众议院议长启动对拜登的弹劾调查
 百态   2023-09-13
上海、济南、武汉等多地出现不明坠落物
 探索   2023-09-06
印度或要将国名改为“巴拉特”
 百态   2023-09-06
男子为女友送行,买票不登机被捕
 百态   2023-08-20
手机地震预警功能怎么开?
 干货   2023-08-06
女子4年卖2套房花700多万做美容:不但没变美脸,面部还出现变形
 百态   2023-08-04
住户一楼被水淹 还冲来8头猪
 百态   2023-07-31
女子体内爬出大量瓜子状活虫
 百态   2023-07-25
地球连续35年收到神秘规律性信号,网友:不要回答!
 探索   2023-07-21
全球镓价格本周大涨27%
 探索   2023-07-09
钱都流向了那些不缺钱的人,苦都留给了能吃苦的人
 探索   2023-07-02
倩女手游刀客魅者强控制(强混乱强眩晕强睡眠)和对应控制抗性的关系
 百态   2020-08-20
美国5月9日最新疫情:美国确诊人数突破131万
 百态   2020-05-09
荷兰政府宣布将集体辞职
 干货   2020-04-30
倩女幽魂手游师徒任务情义春秋猜成语答案逍遥观:鹏程万里
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案神机营:射石饮羽
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案昆仑山:拔刀相助
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案天工阁:鬼斧神工
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案丝路古道:单枪匹马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:与虎谋皮
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:李代桃僵
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:指鹿为马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:小鸟依人
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:千金买邻
 干货   2019-11-12
 
推荐阅读
 
 
 
>>返回首頁<<
 
靜靜地坐在廢墟上,四周的荒凉一望無際,忽然覺得,淒涼也很美
© 2005- 王朝網路 版權所有