我们经常将Excel格式的文件保存为csv格式以方便上传和修改,可是当数据中包含逗号和双引号的时候Excel会把该字段用双引号括住并把数据中的"改为"",从而给解析带来了困难。我写了以下函数来解析这样的字符串:
testSplitCSV.java:
import java.util.Vector;
class testSplitCSV{
/**
* Split one line of csv file
* @return a String array results
*/
public static String[] splitCSV(String src) throws Exception{
if (src==null || src.equals("")) return new String[0];
StringBuffer st=new StringBuffer();
Vector result=new Vector();
boolean beginWithQuote = false;
for (int i=0;i<src.length();i++){
char ch = src.charAt(i);
if (ch=='\"'){
if (beginWithQuote){
i++;
if (i>=src.length()){
result.addElement(st.toString());
st=new StringBuffer();
beginWithQuote=false;
}else{
ch=src.charAt(i);
if (ch == '\"'){
st.append(ch);
}else if (ch == ','){
result.addElement(st.toString());
st=new StringBuffer();
beginWithQuote = false;
}else{
throw new Exception("Single double-quote char mustn't exist in filed "+(result.size()+1)+" while it is begined with quote\nchar at:"+i);
}
}
}else if (st.length()==0){
beginWithQuote = true;
}else{
throw new Exception("Quote cannot exist in a filed which doesn't begin with quote!\nfield:"+(result.size()+1));
}
}else if (ch==','){
if (beginWithQuote){
st.append(ch);
}else{
result.addElement(st.toString());
st=new StringBuffer();
beginWithQuote = false;
}
}else{
st.append(ch);
}
}
if (st.length()!=0){
if (beginWithQuote){
throw new Exception("last field is begin with but not end with double quote");
}else{
result.addElement(st.toString());
}
}
String rs[] = new String[result.size()];
for (int i=0;i<rs.length;i++){
rs[i]=(String)result.elementAt(i);
}
return rs;
}
public static void main(String[] args){
String src1= "\"fh,zg\",sdf,\"asfs,\",\",dsdf\",\"aadf\"\"\",\"\"\"hdfg\",\"fgh\"\"dgnh\",hgfg'dfh,\"asdfa\"\"\"\"\",\"\"\"\"\"fgjhg\",\"gfhg\"\"\"\"hb\"";
try {
String[] Ret = splitCSV(src1);
for (int i=0;i<Ret.length;i++){
System.out.println(i+": "+Ret[i]);
}
}
catch(Exception e) {
e.printStackTrace();
}
}
}