kmuthudurai commited on
Commit
39fc86b
·
verified ·
1 Parent(s): b43ecb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -30
app.py CHANGED
@@ -7,6 +7,7 @@ from paddleocr import PaddleOCR, PPStructure, save_structure_res
7
  from PIL import Image
8
  import io
9
  import numpy as np
 
10
 
11
  app = FastAPI(docs_url='/')
12
  use_gpu = False
@@ -19,40 +20,59 @@ class LangEnum(str, Enum):
19
  # cache with ocr
20
  ocr_cache = {}
21
 
22
- # get ocr ins
23
  def get_ocr(lang, use_gpu=False):
24
  if not ocr_cache.get(lang):
25
  ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
26
 
27
  return ocr_cache.get(lang)
28
 
29
-
 
 
 
 
 
 
 
 
 
30
  @app.post("/ocr")
31
  async def create_upload_file(
32
  file: UploadFile = File(...),
33
  lang: LangEnum = LangEnum.ch,
34
- # use_gpu: bool = False
35
  ):
36
  contents = await file.read()
37
- image = Image.open(io.BytesIO(contents))
 
 
 
 
 
 
 
38
  ocr = get_ocr(lang=lang, use_gpu=use_gpu)
39
- img2np = np.array(image)
40
- result = ocr.ocr(img2np, cls=True)[0]
41
 
42
- boxes = [line[0] for line in result]
43
- txts = [line[1][0] for line in result]
44
- scores = [line[1][1] for line in result]
 
 
 
 
 
 
45
 
46
- # 识别结果
47
- final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
48
- return final_result
49
 
 
50
 
51
  @app.post("/ocr_table")
52
- async def create_upload_file(
53
  file: UploadFile = File(...),
54
  lang: LangEnum = LangEnum.ch,
55
- # use_gpu: bool = False
56
  ):
57
  table_engine = PPStructure(show_log=True, table=True, lang=lang)
58
 
@@ -60,30 +80,37 @@ async def create_upload_file(
60
  # 计算文件内容的哈希值
61
  file_hash = hashlib.sha256(contents).hexdigest()
62
 
63
- image = Image.open(io.BytesIO(contents))
64
- img2np = np.array(image)
65
- result = table_engine(img2np)
 
 
 
 
 
 
66
 
67
- save_structure_res(result, output_dir, f'{file_hash}')
 
 
68
 
69
- htmls = []
70
- types = []
71
- bboxes = []
72
 
73
- for item in result:
74
- item_res = item.get('res', {})
75
- htmls.append(item_res.get('html', ''))
76
- types.append(item.get('type', ''))
77
- bboxes.append(item.get('bbox', ''))
78
 
79
  return {
80
- 'htmls': htmls,
81
  'hash': file_hash,
82
- 'bboxes': bboxes,
83
- 'types': types,
84
  }
85
 
 
86
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
87
 
88
  if __name__ == '__main__':
89
- uvicorn.run(app=app)
 
7
  from PIL import Image
8
  import io
9
  import numpy as np
10
+ import fitz # PyMuPDF for PDF handling
11
 
12
  app = FastAPI(docs_url='/')
13
  use_gpu = False
 
20
  # cache with ocr
21
  ocr_cache = {}
22
 
23
+ # get ocr instance
24
  def get_ocr(lang, use_gpu=False):
25
  if not ocr_cache.get(lang):
26
  ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
27
 
28
  return ocr_cache.get(lang)
29
 
30
+ # Function to extract images from PDF
31
+ def pdf_to_images(file_contents):
32
+ doc = fitz.open(io.BytesIO(file_contents))
33
+ images = []
34
+ for page in doc:
35
+ pix = page.get_pixmap()
36
+ img = Image.open(io.BytesIO(pix.tobytes("png")))
37
+ images.append(img)
38
+ return images
39
+
40
  @app.post("/ocr")
41
  async def create_upload_file(
42
  file: UploadFile = File(...),
43
  lang: LangEnum = LangEnum.ch,
 
44
  ):
45
  contents = await file.read()
46
+
47
+ # Determine if the uploaded file is a PDF or image
48
+ if file.content_type == "application/pdf":
49
+ images = pdf_to_images(contents)
50
+ else:
51
+ # If it's an image file
52
+ images = [Image.open(io.BytesIO(contents))]
53
+
54
  ocr = get_ocr(lang=lang, use_gpu=use_gpu)
 
 
55
 
56
+ final_results = []
57
+
58
+ for image in images:
59
+ img2np = np.array(image)
60
+ result = ocr.ocr(img2np, cls=True)[0]
61
+
62
+ boxes = [line[0] for line in result]
63
+ txts = [line[1][0] for line in result]
64
+ scores = [line[1][1] for line in result]
65
 
66
+ # 识别结果
67
+ final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
68
+ final_results.extend(final_result)
69
 
70
+ return final_results
71
 
72
  @app.post("/ocr_table")
73
+ async def create_upload_file_for_table(
74
  file: UploadFile = File(...),
75
  lang: LangEnum = LangEnum.ch,
 
76
  ):
77
  table_engine = PPStructure(show_log=True, table=True, lang=lang)
78
 
 
80
  # 计算文件内容的哈希值
81
  file_hash = hashlib.sha256(contents).hexdigest()
82
 
83
+ # Determine if the uploaded file is a PDF or image
84
+ if file.content_type == "application/pdf":
85
+ images = pdf_to_images(contents)
86
+ else:
87
+ images = [Image.open(io.BytesIO(contents))]
88
+
89
+ final_htmls = []
90
+ final_bboxes = []
91
+ final_types = []
92
 
93
+ for image in images:
94
+ img2np = np.array(image)
95
+ result = table_engine(img2np)
96
 
97
+ save_structure_res(result, output_dir, f'{file_hash}')
 
 
98
 
99
+ for item in result:
100
+ item_res = item.get('res', {})
101
+ final_htmls.append(item_res.get('html', ''))
102
+ final_types.append(item.get('type', ''))
103
+ final_bboxes.append(item.get('bbox', ''))
104
 
105
  return {
106
+ 'htmls': final_htmls,
107
  'hash': file_hash,
108
+ 'bboxes': final_bboxes,
109
+ 'types': final_types,
110
  }
111
 
112
+ # Serve the output folder as static files
113
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
114
 
115
  if __name__ == '__main__':
116
+ uvicorn.run(app=app)