document-extraction

Sleeping

App Files Files Community

kmuthudurai commited on Dec 15, 2024

Commit

c088f72

verified ·

1 Parent(s): 18902ad

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -41

app.py CHANGED Viewed

@@ -130,47 +130,6 @@ async def create_upload_file(
         logger.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Internal server error while processing the file")
-# Initialize PaddleOCR
-ocr = PaddleOCR(use_angle_cls=True, lang="en")
-# Load the summarization model
-MODEL_NAME = "t5-small"  # Lightweight summarization model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-def extract_text_with_paddleocr(image: Image.Image) -> str:
-    """Perform OCR using PaddleOCR to extract text from an image."""
-    image_np = np.array(image)
-    result = ocr.ocr(image_np, cls=True)
-    extracted_text = "\n".join([line[1][0] for line in result[0]])
-    return extracted_text
-def summarize_text(text: str) -> str:
-    """Summarize the extracted text using a transformer model."""
-    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
-    outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-@app.post("/ocr-summarize/")
-async def ocr_and_summarize(file: UploadFile = File(...)):
-    """
-    Upload an image file, extract text using PaddleOCR, and return the summarized content.
-    """
-    try:
-        image = Image.open(io.BytesIO(await file.read()))
-        extracted_text = extract_text_with_paddleocr(image)
-        if not extracted_text.strip():
-            return {"error": "No text detected in the uploaded image."}
-        summary = summarize_text(extracted_text)
-        return {
-            "filename": file.filename,
-            "extracted_text": extracted_text,
-            "summary": summary,
-        }
-    except Exception as e:
-        return {"error": f"Failed to process the image. Details: {str(e)}"}
 # Serve the output folder as static files
 app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")

         logger.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Internal server error while processing the file")
 # Serve the output folder as static files
 app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")