简介:本文详细介绍PaddleOCR的安装、配置与使用方法,涵盖基础识别、多语言支持及工业级部署方案,助力开发者快速实现图片文字提取需求。
PaddleOCR是百度开源的OCR工具库,基于PaddlePaddle深度学习框架开发,提供全流程文字识别解决方案。其核心优势体现在三方面:
典型应用场景包括:金融票据识别、医疗报告数字化、工业仪表读数、古籍文献电子化等。某物流企业通过部署PaddleOCR,实现快递面单信息自动录入,单票处理时间从15秒降至2秒,错误率下降87%。
# 创建虚拟环境(推荐)python -m venv paddle_envsource paddle_env/bin/activate # Linux/macOSpaddle_env\Scripts\activate # Windows# 安装PaddlePaddle基础版pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple# 安装PaddleOCR完整包pip install paddleocr -i https://mirror.baidu.com/pypi/simple
# 根据CUDA版本选择安装命令# CUDA 11.2示例pip install paddlepaddle-gpu==2.4.2.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html# 验证GPU可用性import paddlepaddle.utils.run_check() # 应输出"PaddlePaddle is installed successfully!"
运行以下命令检查环境完整性:
from paddleocr import PaddleOCRocr = PaddleOCR(use_angle_cls=True, lang="ch") # 中文模型print("PaddleOCR初始化成功")
from paddleocr import PaddleOCR# 初始化模型(支持中英文混合)ocr = PaddleOCR(use_angle_cls=True, lang="ch")# 图片路径识别img_path = "test_images/demo.jpg"result = ocr.ocr(img_path, cls=True)# 结果解析for line in result:print(f"坐标: {line[0]}, 文本: {line[1][0]}, 置信度: {line[1][1]:.2f}")
输出示例:
坐标: [[12, 34], [200, 56], [198, 78], [10, 60]], 文本: 百度飞桨, 置信度: 0.98
import osfrom paddleocr import PaddleOCRocr = PaddleOCR()image_dir = "batch_images/"output_file = "results.txt"with open(output_file, "w") as f:for img_name in os.listdir(image_dir):if img_name.lower().endswith((".png", ".jpg", ".jpeg")):img_path = os.path.join(image_dir, img_name)result = ocr.ocr(img_path)for line in result:f.write(f"{img_name}: {line[1][0]}\n")
使用OpenCV绘制识别框:
import cv2from paddleocr import PaddleOCRocr = PaddleOCR()img = cv2.imread("demo.jpg")result = ocr.ocr(img, cls=True)for line in result:points = line[0]text = line[1][0]# 绘制四边形框pts = np.array(points, np.int32)cv2.polylines(img, [pts], True, (0, 255, 0), 2)# 添加文本cv2.putText(img, text, (points[0][0], points[0][1]-10),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)cv2.imwrite("result.jpg", img)
# 日语识别配置ocr_jp = PaddleOCR(lang="japan")# 韩语识别配置ocr_kr = PaddleOCR(lang="korean")# 中英德三语混合识别ocr_multi = PaddleOCR(lang="chinese_cht") # 繁体中文
from paddleocr import PPStructure, draw_structure_result, save_structure_restable_engine = PPStructure(recovery=True)img_path = "table.jpg"result = table_engine(img_path)# 保存为excelsave_structure_res(result, "table_result", output_file="output.xlsx")# 可视化渲染vis_result = draw_structure_result(img_path, result)cv2.imwrite("table_vis.jpg", vis_result)
from flask import Flask, request, jsonifyfrom paddleocr import PaddleOCRimport base64import cv2import numpy as npapp = Flask(__name__)ocr = PaddleOCR()@app.route('/ocr', methods=['POST'])def ocr_api():data = request.jsonimg_base64 = data['image']img_data = base64.b64decode(img_base64.split(',')[1])nparr = np.frombuffer(img_data, np.uint8)img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)result = ocr.ocr(img)return jsonify({"result": result})if __name__ == '__main__':app.run(host='0.0.0.0', port=5000)
FROM python:3.8-slimWORKDIR /appCOPY requirements.txt .RUN pip install -r requirements.txt --no-cache-dirCOPY . .CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "app:app"]
模型选择:
PaddleOCR(rec_model_dir='ch_PP-OCRv3_rec_infer')PaddleOCR(rec_algorithm='SVTR_LCNet')参数优化:
ocr = PaddleOCR(det_db_thresh=0.3, # 文本检测阈值det_db_box_thresh=0.5, # 框过滤阈值rec_char_dict_path='ppocr/utils/dict/chinese_cht_dict.txt' # 自定义字典)
use_gpu=True且CUDA环境正确配置
ocr = PaddleOCR(rec_model_dir='ch_PP-OCRv3_rec_quant_infer')
并行处理:多进程批量预测
from multiprocessing import Pooldef process_image(img_path):return ocr.ocr(img_path)with Pool(4) as p: # 4进程results = p.map(process_image, image_list)
upstream ocr_servers {server 10.0.0.1:5000 weight=3;server 10.0.0.2:5000 weight=2;}
apiVersion: autoscaling/v2kind: HorizontalPodAutoscalermetadata:name: ocr-hpaspec:scaleTargetRef:apiVersion: apps/v1kind: Deploymentname: ocr-serviceminReplicas: 2maxReplicas: 10metrics:- type: Resourceresource:name: cputarget:type: UtilizationaverageUtilization: 70
nvidia-smi确认版本,安装对应PaddlePaddle-GPU包pip install --ignore-installed
from paddlehub.module.module import Moduleesrgan = Module(name="ESRGAN_x4_plus")enhanced_img = esrgan.enhance(img)[0]
use_angle_cls=Truenum_workers=4PaddleHub集成:一键调用预训练模型
import paddlehub as hubocr = hub.Module(name="chinese_ocr_db_crnn_server")results = ocr.recognize_text(images=[cv2.imread("img.jpg")])
EasyEdge端侧部署:生成Android/iOS SDK
paddle-lite-opt --model_file=inference.pdmodel \--param_file=inference.pdiparams \--optimize_out=ocr_opt \--valid_targets=arm
数据标注工具:使用LabelImg进行OCR数据标注
git clone https://github.com/tzutalin/labelImg.gitcd labelImg && pip install .labelImg # 启动标注工具
通过本教程的系统学习,开发者可掌握从环境搭建到工业级部署的全流程技能。实际测试数据显示,采用优化后的PaddleOCR方案,在NVIDIA T4 GPU上可实现700FPS的识别速度,满足大多数实时场景需求。建议持续关注PaddleOCR GitHub仓库的更新,及时获取最新算法改进和性能优化方案。