Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -130,47 +130,6 @@ async def create_upload_file(
|
|
| 130 |
logger.error(f"Error processing file: {str(e)}")
|
| 131 |
raise HTTPException(status_code=500, detail="Internal server error while processing the file")
|
| 132 |
|
| 133 |
-
# Initialize PaddleOCR
|
| 134 |
-
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
| 135 |
-
|
| 136 |
-
# Load the summarization model
|
| 137 |
-
MODEL_NAME = "t5-small" # Lightweight summarization model
|
| 138 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 139 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
| 140 |
-
|
| 141 |
-
def extract_text_with_paddleocr(image: Image.Image) -> str:
|
| 142 |
-
"""Perform OCR using PaddleOCR to extract text from an image."""
|
| 143 |
-
image_np = np.array(image)
|
| 144 |
-
result = ocr.ocr(image_np, cls=True)
|
| 145 |
-
extracted_text = "\n".join([line[1][0] for line in result[0]])
|
| 146 |
-
return extracted_text
|
| 147 |
-
|
| 148 |
-
def summarize_text(text: str) -> str:
|
| 149 |
-
"""Summarize the extracted text using a transformer model."""
|
| 150 |
-
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
|
| 151 |
-
outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
|
| 152 |
-
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 153 |
-
|
| 154 |
-
@app.post("/ocr-summarize/")
|
| 155 |
-
async def ocr_and_summarize(file: UploadFile = File(...)):
|
| 156 |
-
"""
|
| 157 |
-
Upload an image file, extract text using PaddleOCR, and return the summarized content.
|
| 158 |
-
"""
|
| 159 |
-
try:
|
| 160 |
-
image = Image.open(io.BytesIO(await file.read()))
|
| 161 |
-
extracted_text = extract_text_with_paddleocr(image)
|
| 162 |
-
if not extracted_text.strip():
|
| 163 |
-
return {"error": "No text detected in the uploaded image."}
|
| 164 |
-
summary = summarize_text(extracted_text)
|
| 165 |
-
|
| 166 |
-
return {
|
| 167 |
-
"filename": file.filename,
|
| 168 |
-
"extracted_text": extracted_text,
|
| 169 |
-
"summary": summary,
|
| 170 |
-
}
|
| 171 |
-
except Exception as e:
|
| 172 |
-
return {"error": f"Failed to process the image. Details: {str(e)}"}
|
| 173 |
-
|
| 174 |
# Serve the output folder as static files
|
| 175 |
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
|
| 176 |
|
|
|
|
| 130 |
logger.error(f"Error processing file: {str(e)}")
|
| 131 |
raise HTTPException(status_code=500, detail="Internal server error while processing the file")
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
# Serve the output folder as static files
|
| 134 |
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
|
| 135 |
|