kmuthudurai commited on
Commit
3aa7abf
·
verified ·
1 Parent(s): 58b796b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -35,11 +35,19 @@ def get_ocr(lang, use_gpu=False):
35
  # Function to extract images from PDF
36
  def pdf_to_images(uploaded_file):
37
  try:
38
- # Read the uploaded file as bytes
39
- file_data = uploaded_file.file.read() # This returns the file as bytes
 
 
 
 
40
 
41
- # Open the PDF using fitz (PyMuPDF) from the bytes data
42
  doc = fitz.open(stream=file_data, filetype="pdf")
 
 
 
 
43
  logger.info(f"PDF loaded successfully with {len(doc)} pages.")
44
 
45
  image_parts = []
@@ -70,34 +78,53 @@ async def create_upload_file(
70
  lang: LangEnum = LangEnum.ch,
71
  ):
72
  try:
 
73
  contents = await file.read()
74
 
75
- # Determine if the uploaded file is a PDF or image
 
 
 
 
 
 
 
76
  if file.content_type == "application/pdf":
77
  images = pdf_to_images(file)
 
 
 
 
78
  else:
79
- # If it's an image file
80
- images = [Image.open(io.BytesIO(contents))]
81
 
 
82
  ocr = get_ocr(lang=lang, use_gpu=use_gpu)
83
-
84
- final_results = []
85
 
 
 
 
86
  for image in images:
87
  img2np = np.array(image)
88
- result = ocr.ocr(img2np, cls=True)[0]
89
-
90
- boxes = [line[0] for line in result]
91
- txts = [line[1][0] for line in result]
92
- scores = [line[1][1] for line in result]
93
-
94
- # 识别结果
95
- final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
96
- final_results.extend(final_result)
 
 
 
 
 
97
 
98
  return final_results
99
 
100
  except Exception as e:
 
101
  logger.error(f"Error processing file: {str(e)}")
102
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
103
 
 
35
  # Function to extract images from PDF
36
  def pdf_to_images(uploaded_file):
37
  try:
38
+ # Read file content and log the size of the file
39
+ file_data = uploaded_file.file.read()
40
+ logger.info(f"Received file of size {len(file_data)} bytes.")
41
+
42
+ if len(file_data) == 0:
43
+ raise HTTPException(status_code=400, detail="Uploaded PDF is empty.")
44
 
45
+ # Open the PDF using fitz (PyMuPDF) from the byte stream
46
  doc = fitz.open(stream=file_data, filetype="pdf")
47
+
48
+ if len(doc) == 0:
49
+ raise HTTPException(status_code=400, detail="The PDF document is empty.")
50
+
51
  logger.info(f"PDF loaded successfully with {len(doc)} pages.")
52
 
53
  image_parts = []
 
78
  lang: LangEnum = LangEnum.ch,
79
  ):
80
  try:
81
+ # Read the file contents
82
  contents = await file.read()
83
 
84
+ # Log the file size
85
+ logger.info(f"Received file of size {len(contents)} bytes.")
86
+
87
+ # Ensure file is not empty
88
+ if len(contents) == 0:
89
+ raise HTTPException(status_code=400, detail="Uploaded file is empty.")
90
+
91
+ # Determine if the uploaded file is a PDF or an image
92
  if file.content_type == "application/pdf":
93
  images = pdf_to_images(file)
94
+ elif file.content_type.startswith("image/"):
95
+ # If it's an image file, process it
96
+ image = Image.open(io.BytesIO(contents))
97
+ images = [image]
98
  else:
99
+ raise HTTPException(status_code=400, detail="Unsupported file type")
 
100
 
101
+ # Initialize OCR model for the chosen language
102
  ocr = get_ocr(lang=lang, use_gpu=use_gpu)
 
 
103
 
104
+ final_results = []
105
+
106
+ # Iterate over the images and process with OCR
107
  for image in images:
108
  img2np = np.array(image)
109
+ result = ocr.ocr(img2np, cls=True)
110
+
111
+ if result:
112
+ result = result[0] # Extract the result for this image
113
+
114
+ boxes = [line[0] for line in result]
115
+ txts = [line[1][0] for line in result]
116
+ scores = [line[1][1] for line in result]
117
+
118
+ # Combine results into a list of dictionaries
119
+ final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
120
+ final_results.extend(final_result)
121
+ else:
122
+ logger.warning("OCR did not return any results for the image.")
123
 
124
  return final_results
125
 
126
  except Exception as e:
127
+ # Log the error and raise a 500 HTTP error
128
  logger.error(f"Error processing file: {str(e)}")
129
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
130