如何将全文检索中的“干扰词”去除

王朝other·作者佚名  2006-01-08
窄屏简体版  字體: |||超大  

包括以下内容,然后调用:remove_noise_word()即可

<SCRIPT LANGUAGE=javascript>

<!--

noise_word_list_ch = new Array("?","about","$","1","2","3","4","5","6","7","8","9","0","_",

"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",

"p","q","r","s","t","u","v","w","x","y","z","after","all","also",

"an","and","another","any","are","as","at","be","because","been",

"before","being","between","both","but","by","came","can","come",

"could","did","do","each","for","from","get","got","had","has",

"have","he","her","here","him","himself","his","how","if","in","into",

"is","it","like","make","many","me","might","more","most","much","must",

"my","never","now","of","on","only","or","other","our","out","over","said",

"same","see","should","since","some","still","such","take","than","that",

"the","their","them","then","there","these","they","this","those","through",

"to","too","under","up","very","was","way","we","well","were","what","where",

"which","while","who","with","would","you","your",

"的","一","不","在","人","有","是","为","以","于","上","他","而","后","之","来",

"及","了","因","下","可","到","由","这","与","也","此","但","并","个","其","已",

"无","小","我","们","起","最","再","今","去","好","只","又","或","很","亦","某",

"把","那","你","乃","它");

function trim_str_key(inputVal){

inputStr = inputVal.toString()

while ((inputStr.charAt(inputStr.length - 1) == " ") || (inputStr.charAt(0) == " ")){

//如果最右边为空格则删去

if (inputStr.charAt(inputStr.length - 1) == " "){

inputStr = inputStr.substring(0,inputStr.length - 1)

}

//如果最左边为空格则删去

if (inputStr.charAt(0) == " "){

inputStr = inputStr.substring(1,inputStr.length)

}

}

return inputStr

}

function is_ch_noise_word(str_key){

var key_word = trim_str_key(str_key);

key_word = key_word.toLowerCase();

var listlength=noise_word_list_ch.length;

var tmp_str = "";

for(i=0;i<listlength;i++){

tmp_str = noise_word_list_ch[i]

if(tmp_str==key_word){

return true;

}

}

return false;

}

function remove_noise_word(str_source){

var tmp_str = "";

var ch = "";

var str_out = "";

var i = 0;

str_source = trim_str_key(str_source);

var str_source_length = str_source.length;

if(str_source_length == 0){

return str_out;

}

for (i=0;i < str_source_length; i++){

ch = str_source.charAt(i);

if(ch==" "){ //如果为空格则表示是下一个关键词

if(!(is_ch_noise_word(tmp_str))){ //不是干扰词就输出

if(tmp_str!=" "){ //防止连续的两个空格

str_out = str_out + tmp_str + " ";

}

}

tmp_str = "";

}

else{

tmp_str = tmp_str + ch;

}

}

str_out = str_out + tmp_str;

return trim_str_key(str_out);

}

//下面是一个测试

//var abc = "av n";

//var nnnn = remove_noise_word(abc);

//alert(nnnn);

//-->

</SCRIPT>

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
© 2005- 王朝網路 版權所有 導航