kmuthudurai commited on
Commit
dca3ec3
·
verified ·
1 Parent(s): d3f944d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -69
app.py CHANGED
@@ -2,12 +2,17 @@ import uvicorn
2
  from fastapi.staticfiles import StaticFiles
3
  import hashlib
4
  from enum import Enum
5
- from fastapi import FastAPI, UploadFile, File
6
  from paddleocr import PaddleOCR, PPStructure, save_structure_res
7
  from PIL import Image
8
  import io
9
  import numpy as np
10
  import fitz # PyMuPDF for PDF handling
 
 
 
 
 
11
 
12
  app = FastAPI(docs_url='/')
13
  use_gpu = False
@@ -17,10 +22,10 @@ class LangEnum(str, Enum):
17
  ch = "ch"
18
  en = "en"
19
 
20
- # cache with ocr
21
  ocr_cache = {}
22
 
23
- # get ocr instance
24
  def get_ocr(lang, use_gpu=False):
25
  if not ocr_cache.get(lang):
26
  ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
@@ -29,85 +34,54 @@ def get_ocr(lang, use_gpu=False):
29
 
30
  # Function to extract images from PDF
31
  def pdf_to_images(file_contents):
32
- doc = fitz.open(io.BytesIO(file_contents))
33
- images = []
34
- for page in doc:
35
- pix = page.get_pixmap()
36
- img = Image.open(io.BytesIO(pix.tobytes("png")))
37
- images.append(img)
38
- return images
 
 
 
 
39
 
40
  @app.post("/ocr")
41
  async def create_upload_file(
42
  file: UploadFile = File(...),
43
  lang: LangEnum = LangEnum.ch,
44
  ):
45
- contents = await file.read()
 
46
 
47
- # Determine if the uploaded file is a PDF or image
48
- if file.content_type == "application/pdf":
49
- images = pdf_to_images(contents)
50
- else:
51
- # If it's an image file
52
- images = [Image.open(io.BytesIO(contents))]
53
 
54
- ocr = get_ocr(lang=lang, use_gpu=use_gpu)
55
 
56
- final_results = []
57
-
58
- for image in images:
59
- img2np = np.array(image)
60
- result = ocr.ocr(img2np, cls=True)[0]
61
 
62
- boxes = [line[0] for line in result]
63
- txts = [line[1][0] for line in result]
64
- scores = [line[1][1] for line in result]
65
 
66
- # 识别结果
67
- final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
68
- final_results.extend(final_result)
69
 
70
- return final_results
71
 
72
- @app.post("/ocr_table")
73
- async def create_upload_file_for_table(
74
- file: UploadFile = File(...),
75
- lang: LangEnum = LangEnum.ch,
76
- ):
77
- table_engine = PPStructure(show_log=True, table=True, lang=lang)
78
-
79
- contents = await file.read()
80
- # 计算文件内容的哈希值
81
- file_hash = hashlib.sha256(contents).hexdigest()
82
-
83
- # Determine if the uploaded file is a PDF or image
84
- if file.content_type == "application/pdf":
85
- images = pdf_to_images(contents)
86
- else:
87
- images = [Image.open(io.BytesIO(contents))]
88
-
89
- final_htmls = []
90
- final_bboxes = []
91
- final_types = []
92
-
93
- for image in images:
94
- img2np = np.array(image)
95
- result = table_engine(img2np)
96
-
97
- save_structure_res(result, output_dir, f'{file_hash}')
98
-
99
- for item in result:
100
- item_res = item.get('res', {})
101
- final_htmls.append(item_res.get('html', ''))
102
- final_types.append(item.get('type', ''))
103
- final_bboxes.append(item.get('bbox', ''))
104
-
105
- return {
106
- 'htmls': final_htmls,
107
- 'hash': file_hash,
108
- 'bboxes': final_bboxes,
109
- 'types': final_types,
110
- }
111
 
112
  # Serve the output folder as static files
113
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
 
2
  from fastapi.staticfiles import StaticFiles
3
  import hashlib
4
  from enum import Enum
5
+ from fastapi import FastAPI, UploadFile, File, HTTPException
6
  from paddleocr import PaddleOCR, PPStructure, save_structure_res
7
  from PIL import Image
8
  import io
9
  import numpy as np
10
  import fitz # PyMuPDF for PDF handling
11
+ import logging
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
  app = FastAPI(docs_url='/')
18
  use_gpu = False
 
22
  ch = "ch"
23
  en = "en"
24
 
25
+ # Cache with ocr
26
  ocr_cache = {}
27
 
28
+ # Get OCR instance
29
  def get_ocr(lang, use_gpu=False):
30
  if not ocr_cache.get(lang):
31
  ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
 
34
 
35
  # Function to extract images from PDF
36
  def pdf_to_images(file_contents):
37
+ try:
38
+ doc = fitz.open(io.BytesIO(file_contents))
39
+ images = []
40
+ for page in doc:
41
+ pix = page.get_pixmap()
42
+ img = Image.open(io.BytesIO(pix.tobytes("png")))
43
+ images.append(img)
44
+ return images
45
+ except Exception as e:
46
+ logger.error(f"Error processing PDF: {str(e)}")
47
+ raise HTTPException(status_code=500, detail="Error processing PDF file")
48
 
49
  @app.post("/ocr")
50
  async def create_upload_file(
51
  file: UploadFile = File(...),
52
  lang: LangEnum = LangEnum.ch,
53
  ):
54
+ try:
55
+ contents = await file.read()
56
 
57
+ # Determine if the uploaded file is a PDF or image
58
+ if file.content_type == "application/pdf":
59
+ images = pdf_to_images(contents)
60
+ else:
61
+ # If it's an image file
62
+ images = [Image.open(io.BytesIO(contents))]
63
 
64
+ ocr = get_ocr(lang=lang, use_gpu=use_gpu)
65
 
66
+ final_results = []
67
+
68
+ for image in images:
69
+ img2np = np.array(image)
70
+ result = ocr.ocr(img2np, cls=True)[0]
71
 
72
+ boxes = [line[0] for line in result]
73
+ txts = [line[1][0] for line in result]
74
+ scores = [line[1][1] for line in result]
75
 
76
+ # 识别结果
77
+ final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
78
+ final_results.extend(final_result)
79
 
80
+ return final_results
81
 
82
+ except Exception as e:
83
+ logger.error(f"Error processing file: {str(e)}")
84
+ raise HTTPException(status_code=500, detail="Internal server error while processing the file")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  # Serve the output folder as static files
87
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")