文章来源:
http://zzzhc.spaces.MSN.com/blog/cns!3905D34B10C3C381!129.entry
对于简单的图形验证码(字体规则,没有杂点或杂点容易过滤掉),
用模板匹配的方式可以比较容易地识别出来.
0.图片黑白化,用1表示有字的像素,0表示无字的像素
1.字块分隔,将图片分隔成只包含成单字的最小块
2.生成模板,将字块与字符关联
3.识别,将新图片分块并与模板匹配
Java(jdk1.5)实现:
//先运行TemplateCreator创建模板,再运行Recognize2识别
//图片数据表示,也用来表示字块
package pay365;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
public class ImageData {
public int[][] data;
public int w;
public int h;
public char code;
public ImageData() {
}
public ImageData(BufferedImage bi) {
this(bi,new WhiteFilter());
}
public ImageData(BufferedImage bi,Filter filter) {
h = bi.getHeight();
w = bi.getWidth();
data = new int[h][w];
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
int p = bi.getRGB(j, i);
data[i][j] = p;
}
}
filter.doFilter(data);
}
public ImageData[] split() {
ArrayList list = new ArrayList();
ImageIterator ite = new ImageIterator(this);
while (ite.hasNext()) {
list.add(ite.next());
}
return (ImageData[]) list.toArray(new ImageData[0]);
}
int skipEmpty(int begin, boolean isX, int value) {
if (isX) {
for (int i = begin; i < w; i++) {
for (int j = 0; j < h; j++) {
if (data[j][i] != value) {
return i;
}
}
}
return -1;
} else {
for (int i = begin; i < h; i++) {
for (int j = 0; j < w; j++) {
if (data[i][j] != value) {
return i;
}
}
}
return -1;
}
}
int skipEntity(int begin, boolean isX, int value) {
if (isX) {
for (int i = begin; i < w; i++) {
for (int j = 0; j < h; j++) {
if (data[j][i] == value) {
break;
}
if (j == h - 1)
return i;
}
}
return -1;
} else {
for (int i = begin; i < h; i++) {
for (int j = 0; j < w; j++) {
if (data[i][j] == value) {
break;
}
if (j == w - 1)
return i;
}
}
return -1;
}
}
class ImageIterator implements Iterator {
int x;
ImageData ia;
ImageData next;
public ImageIterator(ImageData ia) {
this.ia = ia;
}
public boolean hasNext() {
if (next != null)
return true;
next = getNext();
return next != null;
}
ImageData getNext() {
int x1 = skipEmpty(x, true, 0);
if (x1 == -1) {
return null;
}
int x2 = skipEntity(x1, true, 1);
if (x2 == -1) {
x2 = w;
}
x = x2;
int y1 = skipEmpty(0, false, 0);
if (y1 == -1)
return null;
int y2 = skipEntity(y1, false, 1);
if (y2 == -1)
y2 = h;
return ia.clone(x1, y1, x2 - x1, y2 - y1);
}
public Object next() {
ImageData temp = next;
next = null;
return temp;
}
public void remove() {
}
}
ImageData clone(int x, int y, int w0, int h0) {
ImageData ia = new ImageData();
ia.w = w0;
ia.h = h0;
ia.data = new int[ia.h][ia.w];
for (int i = 0; i < h0; i++) {
for (int j = 0; j < w0; j++) {
ia.data[i][j] = data[i + y][j + x];
}
}
return ia;
}
public void show() {
System.out.println();
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
System.out.print((data[i][j] == 1 ? "1" : " ") + "");
}
System.out.println();
}
System.out.println();
}
public int hashCode() {
int code = w ^ h;
int count = 0;
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
if (data[i][j] == 1)
count++;
}
}
code ^= count;
return code;
}
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj instanceof ImageData) {
ImageData o = (ImageData) obj;
if (o.h != h)
return false;
if (o.w != w)
return false;
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
if (o.data[i][j] != data[i][j])
return false;
}
}
return true;
} else {
return false;
}
}
public static ImageData[] decodeFromFile(String path) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(
new File(path)));
String line;
ArrayList list = new ArrayList();
while ((line = reader.readLine()) != null) {
ImageData ia = decode(line);
if (ia != null) {
list.add(ia);
}
}
return (ImageData[]) list.toArray(new ImageData[0]);
}
public static ImageData decode(String s) {
String[] ss = s.split("\\,", 4);
if (ss.length != 4)
return null;
if (ss[0].length() != 1)
return null;
ImageData ia = new ImageData();
ia.code = ss[0].charAt(0);
ia.w = Integer.parseInt(ss[1]);
ia.h = Integer.parseInt(ss[2]);
if (ss[3].length() != ia.w * ia.h) {
return null;
}
ia.data = new int[ia.h][ia.w];
for (int i = 0; i < ia.h; i++) {
for (int j = 0; j < ia.w; j++) {
if (ss[3].charAt(i * ia.w + j) =='1') {
ia.data[i][j] = 1;
} else {
ia.data[i][j] = 0;
}
}
}
return ia;
}
public String encode() {
StringBuffer sb = new StringBuffer();
sb.append(code).append(",");
sb.append(w).append(",");
sb.append(h).append(",");
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
if (data[i][j] == 1) {
sb.append('1');
} else {
sb.append('0');
}
}
}
return sb.toString();
}
}
//像素过滤接口
package pay365;
public interface Filter {
void doFilter(int[][] data);
}
//
package pay365;
public abstract class AbstractFilter implements Filter {
public void doFilter(int[][] data) {
int h = data.length;
if (h<=0)
return;
int w = data[0].length;
if (w<=0)
return ;
for(int i=0;i< h;i++) {
for(int j=0;j< w;j++) {
data[i][j] = filter(data[i][j]);
}
}
}
protected abstract int filter(int p);
}
//过滤csdn验证码的过滤器
package pay365;
public class CsdnFilter extends AbstractFilter {
protected int filter(int p) {
return isNotWhite(p)?1:0;
}
private boolean isNotWhite(int p) {
boolean b = (p & 0×0ff) == 255 && (p >> 8 & 0×0ff) == 255
&& (p >> 16 & 0xff) == 255;
return !b;
}
}
//过滤前景色为白色的过滤器
package pay365;
public class WhiteFilter extends AbstractFilter {
protected int filter(int p) {
if (isWhite(p)) {
return 1;
}
else {
return 0;
}
}
private boolean isWhite(int p) {
return (p & 0x0ff) > 240 && (p >> 8 & 0x0ff) > 240
&& (p >> 16 & 0xff) > 240;
}
}
//模板创建类
package pay365;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import javax.imageio.ImageIO;
public class TemplateCreator {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
Set set = new HashSet();
String url = "http://passport.csdn.net/member/ShowEXPwd.ASPx";
String filterClazz = "pay365.CsdnFilter";
if (args.length>=1) {
url = args[0];
}
if (args.length>=2) {
filterClazz = args[1];
}
Filter csdnFilter = (Filter) Class.forName(filterClazz).newInstance();
for (int i = 1; i < 10; i++) {
URL u = new URL(url);
BufferedImage bi = ImageIO.read(u);
ImageData ia2 = new ImageData(bi,csdnFilter);
ImageData[] ii = ia2.split();
for (int x = 0; x < ii.length; x++) {
ImageData imageArr = ii[x];
set.add(imageArr);
}
// set.addAll(Arrays.asList(ia2.split()));
}
System.out.println(set.size());
for (Iterator iter = set.iterator(); iter.hasNext();) {
ImageData ele = (ImageData) iter.next();
ele.show();
System.out.print("char:");
String s = readLine();
if (s.length() == 1) {
ele.code = s.charAt(0);
}
}
PrintWriter pw = new PrintWriter(new File("template.data"));
for (Iterator iter = set.iterator(); iter.hasNext();) {
ImageData ele = (ImageData) iter.next();
pw.println(ele.encode());
}
pw.flush();
pw.close();
}
private static BufferedReader reader = new BufferedReader(
new InputStreamReader(System.in));
private static String readLine() {
try {
return reader.readLine();
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
}
//识别类
package pay365;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import javax.imageio.ImageIO;
public class Recognize2 {
static String url = "http://passport.csdn.net/member/ShowExPwd.aspx";
static Filter filter;
public static void main(String[] args) throws Exception {
if (args.length >= 1) {
url = args[0];
}
String filterClazz = "pay365.CsdnFilter";
if (args.length >= 2) {
filterClazz = args[1];
}
filter = (Filter) Class.forName(filterClazz).newInstance();
int total = 10;
int count = 0;
for (int i = 0; i < total; i++) {
boolean b = recognize(i);
if (b)
count++;
}
System.out.println("rate:" + (count * 1.0 / total * 100) + "%100");
}
/**
* @throws IOException
*/
private static boolean recognize(int num) throws IOException {
BufferedImage bi = ImageIO.read(new URL(url));
ImageIO.write(bi,"png",new File(num+".png"));
ImageData ia2 = new ImageData(bi, filter);
ImageData[] ii = ia2.split();
ArrayList list = new ArrayList();
ImageData[] template = ImageData.decodeFromFile("template.data");
HashMap map = new HashMap();
for (int i = 0; i < template.length; i++) {
map.put(template[i], new Character(template[i].code));
}
for (int x = 0; x < ii.length; x++) {
ImageData imageArr = ii[x];
if (imageArr.w > 15)
continue;
Character c = (Character) map.get(imageArr);
if (c != null) {
list.add(c);
}
}
String s = "";
System.out.print(num + ":");
for (Iterator iter = list.iterator(); iter.hasNext();) {
Character c = (Character) iter.next();
s += c;
System.out.print(c);
}
System.out.println();
return s.length() != 0;
}
}
(出处:http://www.knowsky.com)