简介:本文详细介绍PaddleOCR的安装、配置及使用方法,涵盖基础识别、多语言支持、版面分析等核心功能,提供完整代码示例与优化建议。
PaddleOCR是百度开源的OCR工具库,基于PaddlePaddle深度学习框架构建,集成了文本检测、方向分类和文字识别三大核心模块。其技术架构采用CRNN(卷积循环神经网络)与DB(Differentiable Binarization)算法的组合,在保持高精度的同时实现高效推理。
典型应用场景包括:
# 基础环境(推荐Python 3.7+)conda create -n paddleocr python=3.8conda activate paddleocr# 核心依赖安装pip install paddlepaddle-gpu==2.4.0.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html # GPU版pip install paddleocrpip install opencv-python shapely pyclipper
PaddleOCR提供预训练模型仓库,可通过以下方式获取:
# 下载中英文通用模型(检测+识别)mkdir -p ./inferencecd ./inferencewget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tarwget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tartar xvf ch_PP-OCRv3_det_infer.tartar xvf ch_PP-OCRv3_rec_infer.tar
from paddleocr import PaddleOCR# 初始化OCR引擎ocr = PaddleOCR(use_angle_cls=True, # 启用方向分类lang="ch", # 中文识别det_model_dir="./inference/ch_PP-OCRv3_det_infer",rec_model_dir="./inference/ch_PP-OCRv3_rec_infer")# 单张图片识别img_path = "test.jpg"result = ocr.ocr(img_path, cls=True)# 结果解析for line in result:print(f"坐标: {line[0]}, 文本: {line[1][0]}, 置信度: {line[1][1]:.2f}")
PaddleOCR支持通过lang参数切换语言模型:
# 英文识别配置ocr_en = PaddleOCR(lang="en",det_model_dir="./en_PP-OCRv3_det_infer",rec_model_dir="./en_PP-OCRv3_rec_infer")# 日文识别(需下载对应模型)ocr_jp = PaddleOCR(lang="japan",use_gpu=True,rec_char_dict_path="./ppocr/utils/dict/japan_dict.txt")
启用版面分析可获取文字区域层级关系:
ocr_layout = PaddleOCR(use_layout=True)result = ocr_layout.ocr("document.jpg", layout=True)# 解析版面信息for idx, (box, (text, prob), layout) in enumerate(result):print(f"区域{idx}: 类型={layout['type']}, 置信度={layout['score']:.2f}")
import osfrom paddleocr import PaddleOCRdef batch_ocr(img_dir, output_dir):ocr = PaddleOCR()if not os.path.exists(output_dir):os.makedirs(output_dir)for img_name in os.listdir(img_dir):if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):img_path = os.path.join(img_dir, img_name)result = ocr.ocr(img_path)# 保存结果到文本文件with open(os.path.join(output_dir, f"{img_name}.txt"), 'w') as f:for line in result:f.write(f"{line[1][0]}\n")batch_ocr("./images", "./results")
from paddleocr import PaddleOCRfrom paddle.inference import Config, create_predictor# 量化配置示例config = Config("./inference/ch_PP-OCRv3_det_infer/model.pdmodel","./inference/ch_PP-OCRv3_det_infer/model.pdiparams")config.enable_use_gpu(100, 0)config.switch_ir_optim(True)config.enable_tensorrt_engine(workspace_size=1 << 30,precision_mode=Config.Precision.Int8, # 启用INT8量化max_batch_size=1)predictor = create_predictor(config)ocr = PaddleOCR(use_angle_cls=True, _predictor=predictor)
from concurrent.futures import ThreadPoolExecutorfrom paddleocr import PaddleOCRdef process_image(img_path):ocr = PaddleOCR()return ocr.ocr(img_path)with ThreadPoolExecutor(max_workers=4) as executor:img_paths = ["img1.jpg", "img2.jpg", "img3.jpg", "img4.jpg"]results = list(executor.map(process_image, img_paths))
batch_size参数(默认1)use_tensorrt并设置precision_mode=Config.Precision.FP16ch_PP-OCRv3_det_lite_infer
# 预处理增强方案import cv2import numpy as npdef preprocess_image(img_path):img = cv2.imread(img_path)# 灰度化gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)# 二值化_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)# 形态学操作kernel = np.ones((3,3), np.uint8)processed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)return processed# 在OCR前调用预处理processed_img = preprocess_image("complex_bg.jpg")result = ocr.ocr(processed_img)
import redef post_process(ocr_result):processed = []for line in ocr_result:text = line[1][0]# 去除特殊字符text = re.sub(r'[^\w\u4e00-\u9fff]', '', text)# 数字标准化text = re.sub(r'(\d)\s+(\d)', r'\1\2', text)processed.append((line[0], text, line[1][1]))return processed
容器化部署:
FROM python:3.8-slimRUN pip install paddlepaddle-gpu paddleocr opencv-pythonCOPY ./app /appWORKDIR /appCMD ["python", "ocr_service.py"]
REST API封装(使用FastAPI):
```python
from fastapi import FastAPI, UploadFile, File
from paddleocr import PaddleOCR
import cv2
import numpy as np
app = FastAPI()
ocr = PaddleOCR()
@app.post(“/ocr”)
async def ocr_endpoint(file: UploadFile = File(…)):
contents = await file.read()
nparr = np.frombuffer(contents, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
result = ocr.ocr(img)
return {“result”: result}
```
最新V3.0版本主要改进:
生态扩展工具:
通过本教程的系统学习,开发者可快速掌握PaddleOCR的核心功能,并根据实际业务需求进行定制开发。建议持续关注PaddleOCR官方GitHub仓库获取最新模型和功能更新。