kmuthudurai commited on
Commit
18902ad
·
verified ·
1 Parent(s): 81259ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -53
app.py CHANGED
@@ -130,61 +130,46 @@ async def create_upload_file(
130
  logger.error(f"Error processing file: {str(e)}")
131
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
132
 
133
- @app.post("/ocr")
134
- async def create_upload_file(
135
- file: UploadFile = File(...),
136
- lang: LangEnum = LangEnum.ch,
137
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  try:
139
- # Read the file contents
140
- contents = await file.read()
141
-
142
- # Log the file size
143
- logger.info(f"Received file of size {len(contents)} bytes.")
144
-
145
- # Ensure file is not empty
146
- if len(contents) == 0:
147
- raise HTTPException(status_code=400, detail="Uploaded file is empty.")
148
-
149
- # Determine if the uploaded file is a PDF or an image
150
- if file.content_type == "application/pdf":
151
- images = pdf_to_images(file) # No need to await this since it's not async
152
- elif file.content_type.startswith("image/"):
153
- # If it's an image file, process it
154
- image = Image.open(io.BytesIO(contents))
155
- images = [image]
156
- else:
157
- raise HTTPException(status_code=400, detail="Unsupported file type")
158
-
159
- # Initialize OCR model for the chosen language
160
- ocr = get_ocr(lang=lang, use_gpu=use_gpu)
161
-
162
- final_results = []
163
-
164
- # Iterate over the images and process with OCR
165
- for image in images:
166
- img2np = np.array(image)
167
- result = ocr.ocr(img2np, cls=True)
168
-
169
- if result:
170
- result = result[0] # Extract the result for this image
171
-
172
- boxes = [line[0] for line in result]
173
- txts = [line[1][0] for line in result]
174
- scores = [line[1][1] for line in result]
175
-
176
- # Combine results into a list of dictionaries
177
- final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
178
- final_results.extend(final_result)
179
- else:
180
- logger.warning("OCR did not return any results for the image.")
181
-
182
- return final_results
183
-
184
  except Exception as e:
185
- # Log the error and raise a 500 HTTP error
186
- logger.error(f"Error processing file: {str(e)}")
187
- raise HTTPException(status_code=500, detail="Internal server error while processing the file")
188
 
189
  # Serve the output folder as static files
190
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
 
130
  logger.error(f"Error processing file: {str(e)}")
131
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
132
 
133
+ # Initialize PaddleOCR
134
+ ocr = PaddleOCR(use_angle_cls=True, lang="en")
135
+
136
+ # Load the summarization model
137
+ MODEL_NAME = "t5-small" # Lightweight summarization model
138
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
139
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
140
+
141
+ def extract_text_with_paddleocr(image: Image.Image) -> str:
142
+ """Perform OCR using PaddleOCR to extract text from an image."""
143
+ image_np = np.array(image)
144
+ result = ocr.ocr(image_np, cls=True)
145
+ extracted_text = "\n".join([line[1][0] for line in result[0]])
146
+ return extracted_text
147
+
148
+ def summarize_text(text: str) -> str:
149
+ """Summarize the extracted text using a transformer model."""
150
+ inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
151
+ outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
152
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
153
+
154
+ @app.post("/ocr-summarize/")
155
+ async def ocr_and_summarize(file: UploadFile = File(...)):
156
+ """
157
+ Upload an image file, extract text using PaddleOCR, and return the summarized content.
158
+ """
159
  try:
160
+ image = Image.open(io.BytesIO(await file.read()))
161
+ extracted_text = extract_text_with_paddleocr(image)
162
+ if not extracted_text.strip():
163
+ return {"error": "No text detected in the uploaded image."}
164
+ summary = summarize_text(extracted_text)
165
+
166
+ return {
167
+ "filename": file.filename,
168
+ "extracted_text": extracted_text,
169
+ "summary": summary,
170
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  except Exception as e:
172
+ return {"error": f"Failed to process the image. Details: {str(e)}"}
 
 
173
 
174
  # Serve the output folder as static files
175
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")