document-extraction

Sleeping

App Files Files Community

kmuthudurai commited on Dec 15, 2024

Commit

18902ad

verified ·

1 Parent(s): 81259ba

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -53

app.py CHANGED Viewed

@@ -130,61 +130,46 @@ async def create_upload_file(
         logger.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Internal server error while processing the file")
-@app.post("/ocr")
-async def create_upload_file(
-    file: UploadFile = File(...),
-    lang: LangEnum = LangEnum.ch,
-):
     try:
-        # Read the file contents
-        contents = await file.read()
-        # Log the file size
-        logger.info(f"Received file of size {len(contents)} bytes.")
-        # Ensure file is not empty
-        if len(contents) == 0:
-            raise HTTPException(status_code=400, detail="Uploaded file is empty.")
-        # Determine if the uploaded file is a PDF or an image
-        if file.content_type == "application/pdf":
-            images = pdf_to_images(file)  # No need to await this since it's not async
-        elif file.content_type.startswith("image/"):
-            # If it's an image file, process it
-            image = Image.open(io.BytesIO(contents))
-            images = [image]
-        else:
-            raise HTTPException(status_code=400, detail="Unsupported file type")
-        # Initialize OCR model for the chosen language
-        ocr = get_ocr(lang=lang, use_gpu=use_gpu)
-        final_results = []
-        # Iterate over the images and process with OCR
-        for image in images:
-            img2np = np.array(image)
-            result = ocr.ocr(img2np, cls=True)
-            if result:
-                result = result[0]  # Extract the result for this image
-                boxes = [line[0] for line in result]
-                txts = [line[1][0] for line in result]
-                scores = [line[1][1] for line in result]
-                # Combine results into a list of dictionaries
-                final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
-                final_results.extend(final_result)
-            else:
-                logger.warning("OCR did not return any results for the image.")
-        return final_results
     except Exception as e:
-        # Log the error and raise a 500 HTTP error
-        logger.error(f"Error processing file: {str(e)}")
-        raise HTTPException(status_code=500, detail="Internal server error while processing the file")
 # Serve the output folder as static files
 app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")

         logger.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Internal server error while processing the file")
+# Initialize PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True, lang="en")
+# Load the summarization model
+MODEL_NAME = "t5-small"  # Lightweight summarization model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+def extract_text_with_paddleocr(image: Image.Image) -> str:
+    """Perform OCR using PaddleOCR to extract text from an image."""
+    image_np = np.array(image)
+    result = ocr.ocr(image_np, cls=True)
+    extracted_text = "\n".join([line[1][0] for line in result[0]])
+    return extracted_text
+def summarize_text(text: str) -> str:
+    """Summarize the extracted text using a transformer model."""
+    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
+    outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+@app.post("/ocr-summarize/")
+async def ocr_and_summarize(file: UploadFile = File(...)):
+    """
+    Upload an image file, extract text using PaddleOCR, and return the summarized content.
+    """
     try:
+        image = Image.open(io.BytesIO(await file.read()))
+        extracted_text = extract_text_with_paddleocr(image)
+        if not extracted_text.strip():
+            return {"error": "No text detected in the uploaded image."}
+        summary = summarize_text(extracted_text)
+        return {
+            "filename": file.filename,
+            "extracted_text": extracted_text,
+            "summary": summary,
+        }
     except Exception as e:
+        return {"error": f"Failed to process the image. Details: {str(e)}"}
 # Serve the output folder as static files
 app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")