简介:本文详细介绍如何使用cnocr库实现简体中文与繁体中文竖排文字识别,涵盖环境搭建、参数配置、代码实现及优化技巧,为开发者提供可落地的技术方案。
cnocr是由国内开发者开源的OCR工具库,基于深度学习框架PyTorch实现,支持多语言、多方向文字识别。其核心技术优势体现在三个方面:
cn_dict.txt)和繁体中文字典(tw_dict.txt),通过lang参数动态切换densenet_lite_136-gru),在CPU环境下可实现实时识别竖排文字识别相较于横排存在三大技术挑战:字符间距不规则、行间干扰严重、标点符号位置特殊。cnocr通过改进的CTC损失函数和方向感知特征提取模块,在古籍、书法等竖排场景中保持较高准确率。
# 创建虚拟环境(可选)conda create -n cnocr_env python=3.8conda activate cnocr_env# 安装核心依赖pip install torch torchvision torchaudiopip install opencv-python numpy# 安装cnocr(最新稳定版)pip install cnocr -U
import cnocrprint(cnocr.__version__) # 应输出2.2.x及以上版本
| 参数 | 类型 | 默认值 | 功能说明 |
|---|---|---|---|
lang |
str | ch_sim |
语言选择:ch_sim(简体)、ch_tra(繁体) |
det_model_name |
str | ch_PP-OCRv3_det |
检测模型,竖排场景建议用ch_PP-OCRv3_det_vertical |
rec_model_name |
str | densenet_lite_136-gru |
识别模型 |
context |
str | cpu |
计算设备:cpu/cuda |
root_engine |
str | ./cnocr_models |
模型下载目录 |
from cnocr import CnOcr# 竖排识别专用配置config = {'lang': 'ch_sim', # 或'ch_tra''det_model_name': 'ch_PP-OCRv3_det_vertical', # 竖排专用检测模型'rec_model_name': 'densenet_lite_136-gru','context': 'cuda' if torch.cuda.is_available() else 'cpu','root_engine': './models'}ocr = CnOcr(**config)
import cv2import numpy as npdef preprocess_image(img_path):# 读取图像并转为RGBimg = cv2.imread(img_path)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)# 竖排场景建议进行二值化处理gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)# 调整尺寸(建议长边不超过1200px)h, w = binary.shapeif max(h, w) > 1200:scale = 1200 / max(h, w)binary = cv2.resize(binary, None, fx=scale, fy=scale)return binary
def recognize_vertical_text(img_paths, lang='ch_sim'):ocr = CnOcr(lang=lang, det_model_name='ch_PP-OCRv3_det_vertical')results = []for path in img_paths:img = preprocess_image(path)# 指定detect_area可限制识别区域(可选)res = ocr.ocr(img, det_model_name='ch_PP-OCRv3_det_vertical')# 处理竖排结果(cnocr默认返回坐标和文本)vertical_texts = []for line in res:# 坐标处理(竖排坐标需要转换)box = line['position']# 文本内容text = line['text']vertical_texts.append({'text': text,'box': box,'confidence': line['score']})results.append({'image_path': path, 'texts': vertical_texts})return results
# 繁体识别专用配置def recognize_traditional(img_path):ocr = CnOcr(lang='ch_tra',det_model_name='ch_PP-OCRv3_det_vertical',rec_model_name='resnet_stn_densenet-gru' # 繁体专用识别模型)img = preprocess_image(img_path)# 繁体识别建议增加后处理res = ocr.ocr(img)# 常见繁简转换(可选)from zhconv import convert # pip install zhconvprocessed_res = []for line in res:simplified = convert(line['text'], 'zh-cn')processed_res.append({'original': line['text'],'simplified': simplified,'confidence': line['score']})return processed_res
模型选择:
densenet_lite_136-gru(平衡速度与精度)resnet_stn_densenet-gru(带空间变换网络)图像增强:
def augment_image(img):# 随机旋转(-15°~+15°)h, w = img.shape[:2]angle = np.random.uniform(-15, 15)center = (w//2, h//2)M = cv2.getRotationMatrix2D(center, angle, 1.0)rotated = cv2.warpAffine(img, M, (w, h))# 随机对比度调整alpha = np.random.uniform(0.9, 1.1)enhanced = cv2.convertScaleAbs(rotated, alpha=alpha, beta=0)return enhanced
后处理优化:
def postprocess(texts):# 竖排文本行合并merged_lines = []current_line = []for text in texts:if len(current_line) > 0 and abs(current_line[-1]['box'][1][1] - text['box'][0][1]) < 10:current_line.append(text)else:if current_line:merged_lines.append(''.join([t['text'] for t in current_line]))current_line = [text]if current_line:merged_lines.append(''.join([t['text'] for t in current_line]))return merged_lines
识别乱码:
lang参数是否匹配方向误判:
# 强制指定方向(不推荐常规使用)from cnocr.utils import set_seedset_seed(42) # 固定随机种子ocr = CnOcr(det_rotate_degrees=90) # 强制90度识别
性能瓶颈:
densenet_lite系列模型cuda并设置batch_size=4
# 示例:识别《论语》竖排扫描件img_path = 'lunyu_vertical.jpg'results = recognize_vertical_text([img_path], lang='ch_sim')for res in results:print(f"图像: {res['image_path']}")for line in res['texts']:print(f"位置: {line['box']}, 文本: {line['text']}, 置信度: {line['confidence']:.2f}")
# 示例:识别繁体书法作品from zhconv import convertdef recognize_calligraphy(img_path):ocr = CnOcr(lang='ch_tra', rec_model_name='resnet_stn_densenet-gru')img = preprocess_image(img_path)res = ocr.ocr(img)# 繁简转换与结果展示simplified_res = []for line in res:simplified = convert(line['text'], 'zh-cn')simplified_res.append({'traditional': line['text'],'simplified': simplified,'confidence': line['score']})return simplified_res# 使用示例calligraphy_res = recognize_calligraphy('shufajia_vertical.jpg')for item in calligraphy_res[:5]: # 显示前5条结果print(f"繁体: {item['traditional']}")print(f"简体: {item['simplified']}")print(f"置信度: {item['confidence']:.2f}\n")
混合排版处理:
# 自动检测横竖排(需自定义检测逻辑)def auto_detect_orientation(img):# 这里可以接入方向分类模型# 简单实现:假设高度>宽度则为竖排h, w = img.shape[:2]return 90 if h > w else 0
多语言混合识别:
# 同时识别中英文(需修改字典文件)ocr = CnOcr(lang='custom',dict_path='./custom_dict.txt', # 需包含中英文词汇rec_model_name='resnet_stn_densenet-gru')
服务化部署:
# FastAPI示例from fastapi import FastAPIfrom pydantic import BaseModelapp = FastAPI()class OCRRequest(BaseModel):image_path: strlang: str = 'ch_sim'@app.post("/ocr/vertical")async def vertical_ocr(request: OCRRequest):ocr = CnOcr(lang=request.lang)img = preprocess_image(request.image_path)res = ocr.ocr(img)return {"result": res}
通过以上技术方案,开发者可以高效实现简体中文与繁体中文的竖排文字识别。实际部署时,建议根据具体场景调整预处理参数和模型选择,并通过持续的数据积累优化识别效果。对于古籍、书法等特殊领域,可考虑微调预训练模型以获得更好的适应性。