点击查看代码
import pytesseract
from PIL import Image
import cv2
import numpy as npclass TesseractHandwritingRecognizer:def __init__(self):# 设置Tesseract路径(根据您的安装位置调整)# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Windows# 对于Linux/Mac: 通常不需要设置,或使用 which tesseract 找到路径# 使用专门的手写识别模型self.config = '--oem 3 --psm 8 -l eng'def preprocess_image(self, image_path):"""图像预处理"""image = cv2.imread(image_path)gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)# 噪声去除denoised = cv2.medianBlur(gray, 5)# 二值化_, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)return binarydef recognize_text(self, image_path):"""识别手写文字"""try:# 预处理processed_image = self.preprocess_image(image_path)# 使用PIL打开图像pil_image = Image.fromarray(processed_image)# 识别文字text = pytesseract.image_to_string(pil_image, config=self.config)return {'text': text.strip(),'confidence': 'N/A' # Tesseract的置信度需要额外处理}except Exception as e:return {'error': str(e)}# 使用示例
def test_tesseract():recognizer = TesseractHandwritingRecognizer()result = recognizer.recognize_text('handwritten_sample.jpg')print(f"识别结果: {result}")