1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
| package com.colin.tool.img;
import org.bytedeco.javacpp.BytePointer; import org.bytedeco.leptonica.PIX; import org.bytedeco.leptonica.global.lept; import org.bytedeco.tesseract.TessBaseAPI;
public class ImageUtil {
public static String ocr(String imagePath, OcrLanguage language) { TessBaseAPI api=new TessBaseAPI(); if (api.Init(language.getPath(), language.getLangName()) != 0) { api.End(); return ""; } PIX image = lept.pixRead(imagePath); if (image == null) { return ""; } api.SetImage(image); BytePointer outText=api.GetUTF8Text(); String result=outText.getString(); api.End(); outText.deallocate(); lept.pixDestroy(image); return result; }
public static void main(String[] args) { String text = ocr("F:\\test\\img\\0.jpg", OcrLanguage.ZH_CN); System.out.println(text); }
public enum OcrLanguage { ZH_CN("chi_sim", "F:\\test\\img\\traineddata"), EN("eng", "F:\\test\\img\\traineddata");
private String langName; private String path;
OcrLanguage(String langName, String path) { this.langName = langName; this.path = path; }
public String getLangName() { return langName; }
public String getPath() { return path; } } }
|