kmuthudurai commited on
Commit
9bcc761
·
verified ·
1 Parent(s): 53a4f24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -4
app.py CHANGED
@@ -33,10 +33,11 @@ def get_ocr(lang, use_gpu=False):
33
  return ocr_cache.get(lang)
34
 
35
  # Function to extract images from PDF
36
- async def pdf_to_images(uploaded_file):
 
37
  try:
38
- # Read file content and log the size of the file
39
- file_data = await uploaded_file.read()
40
  logger.info(f"Received file of size {len(file_data)} bytes.")
41
 
42
  if len(file_data) == 0:
@@ -45,6 +46,7 @@ async def pdf_to_images(uploaded_file):
45
  # Open the PDF using fitz (PyMuPDF) from the byte stream
46
  doc = fitz.open(stream=file_data, filetype="pdf")
47
 
 
48
  if len(doc) == 0:
49
  raise HTTPException(status_code=400, detail="The PDF document is empty.")
50
 
@@ -90,7 +92,63 @@ async def create_upload_file(
90
 
91
  # Determine if the uploaded file is a PDF or an image
92
  if file.content_type == "application/pdf":
93
- images = pdf_to_images(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  elif file.content_type.startswith("image/"):
95
  # If it's an image file, process it
96
  image = Image.open(io.BytesIO(contents))
 
33
  return ocr_cache.get(lang)
34
 
35
  # Function to extract images from PDF
36
+ # Function to extract images from PDF
37
+ def pdf_to_images(uploaded_file):
38
  try:
39
+ # Read the file content
40
+ file_data = uploaded_file.file.read()
41
  logger.info(f"Received file of size {len(file_data)} bytes.")
42
 
43
  if len(file_data) == 0:
 
46
  # Open the PDF using fitz (PyMuPDF) from the byte stream
47
  doc = fitz.open(stream=file_data, filetype="pdf")
48
 
49
+ # Check if the document has pages
50
  if len(doc) == 0:
51
  raise HTTPException(status_code=400, detail="The PDF document is empty.")
52
 
 
92
 
93
  # Determine if the uploaded file is a PDF or an image
94
  if file.content_type == "application/pdf":
95
+ images = pdf_to_images(file) # No need to await this since it's not async
96
+ elif file.content_type.startswith("image/"):
97
+ # If it's an image file, process it
98
+ image = Image.open(io.BytesIO(contents))
99
+ images = [image]
100
+ else:
101
+ raise HTTPException(status_code=400, detail="Unsupported file type")
102
+
103
+ # Initialize OCR model for the chosen language
104
+ ocr = get_ocr(lang=lang, use_gpu=use_gpu)
105
+
106
+ final_results = []
107
+
108
+ # Iterate over the images and process with OCR
109
+ for image in images:
110
+ img2np = np.array(image)
111
+ result = ocr.ocr(img2np, cls=True)
112
+
113
+ if result:
114
+ result = result[0] # Extract the result for this image
115
+
116
+ boxes = [line[0] for line in result]
117
+ txts = [line[1][0] for line in result]
118
+ scores = [line[1][1] for line in result]
119
+
120
+ # Combine results into a list of dictionaries
121
+ final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
122
+ final_results.extend(final_result)
123
+ else:
124
+ logger.warning("OCR did not return any results for the image.")
125
+
126
+ return final_results
127
+
128
+ except Exception as e:
129
+ # Log the error and raise a 500 HTTP error
130
+ logger.error(f"Error processing file: {str(e)}")
131
+ raise HTTPException(status_code=500, detail="Internal server error while processing the file")
132
+
133
+ @app.post("/ocr")
134
+ async def create_upload_file(
135
+ file: UploadFile = File(...),
136
+ lang: LangEnum = LangEnum.ch,
137
+ ):
138
+ try:
139
+ # Read the file contents
140
+ contents = await file.read()
141
+
142
+ # Log the file size
143
+ logger.info(f"Received file of size {len(contents)} bytes.")
144
+
145
+ # Ensure file is not empty
146
+ if len(contents) == 0:
147
+ raise HTTPException(status_code=400, detail="Uploaded file is empty.")
148
+
149
+ # Determine if the uploaded file is a PDF or an image
150
+ if file.content_type == "application/pdf":
151
+ images = pdf_to_images(file) # No need to await this since it's not async
152
  elif file.content_type.startswith("image/"):
153
  # If it's an image file, process it
154
  image = Image.open(io.BytesIO(contents))