python - PaddleOCR

发布时间 2023-03-22 21:11:42作者: wstong

1. 安装

pip3 install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install paddlepaddle -i https://mirror.baidu.com/pypi/simple

2. 使用

from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls = True, use_gpu = False, lang='ch')
res = ocr.ocr('test.png',cls=True)
for r in res[0]:
    print(r)

结果
image

[[[47.0, 48.0], [128.0, 48.0], [128.0, 90.0], [47.0, 90.0]], ('姓名', 0.9996767044067383)]
[[[152.0, 46.0], [232.0, 46.0], [232.0, 88.0], [152.0, 88.0]], ('性别', 0.8384654521942139)]
[[[390.0, 47.0], [508.0, 47.0], [508.0, 87.0], [390.0, 87.0]], ('身份证', 0.9947993159294128)]
[[[661.0, 44.0], [744.0, 44.0], [744.0, 90.0], [661.0, 90.0]], ('年龄', 0.7915459871292114)]
[[[46.0, 103.0], [129.0, 106.0], [127.0, 152.0], [45.0, 149.0]], ('张三', 0.93336421251297)]
[[[171.0, 106.0], [212.0, 106.0], [212.0, 150.0], [171.0, 150.0]], ('男', 0.9995383024215698)]
[[[280.0, 114.0], [615.0, 117.0], [615.0, 149.0], [279.0, 146.0]], ('12345678901234500', 0.9003013968467712)]
[[[682.0, 113.0], [723.0, 113.0], [723.0, 151.0], [682.0, 151.0]], ('32', 0.9932941794395447)]
[[[48.0, 165.0], [127.0, 165.0], [127.0, 208.0], [48.0, 208.0]], ('李四', 0.997494637966156)]
[[[169.0, 164.0], [216.0, 164.0], [216.0, 210.0], [169.0, 210.0]], ('女', 0.6857110857963562)]
[[[279.0, 176.0], [616.0, 176.0], [616.0, 206.0], [279.0, 206.0]], ('98765432109876543', 0.9408526420593262)]
[[[679.0, 171.0], [722.0, 171.0], [722.0, 212.0], [679.0, 212.0]], ('21', 0.789783239364624)]

3. 简单表格识别

import pandas as pd
from paddleocr import PaddleOCR

ocr = PaddleOCR(use_angle_cls = True, use_gpu = False, lang='ch')
res = ocr.ocr('test.png',cls=True)
name_list = []
sex_list = []
idCard_list = []
age_list = []
for i in range(0,len(res[0]),4):
    name_list.append(res[0][i][1][0])
    sex_list.append(res[0][i+1][1][0])
    idCard_list.append(res[0][i+2][1][0])
    age_list.append(res[0][i+3][1][0])

tmp = pd.DataFrame({name_list[0]:name_list[1:],sex_list[0]:sex_list[1:],idCard_list[0]:idCard_list[1:],age_list[0]:age_list[1:]})
tmp.to_excel('./test.xlsx',index=None)

image