Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -130,61 +130,46 @@ async def create_upload_file(
|
|
| 130 |
logger.error(f"Error processing file: {str(e)}")
|
| 131 |
raise HTTPException(status_code=500, detail="Internal server error while processing the file")
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if file.content_type == "application/pdf":
|
| 151 |
-
images = pdf_to_images(file) # No need to await this since it's not async
|
| 152 |
-
elif file.content_type.startswith("image/"):
|
| 153 |
-
# If it's an image file, process it
|
| 154 |
-
image = Image.open(io.BytesIO(contents))
|
| 155 |
-
images = [image]
|
| 156 |
-
else:
|
| 157 |
-
raise HTTPException(status_code=400, detail="Unsupported file type")
|
| 158 |
-
|
| 159 |
-
# Initialize OCR model for the chosen language
|
| 160 |
-
ocr = get_ocr(lang=lang, use_gpu=use_gpu)
|
| 161 |
-
|
| 162 |
-
final_results = []
|
| 163 |
-
|
| 164 |
-
# Iterate over the images and process with OCR
|
| 165 |
-
for image in images:
|
| 166 |
-
img2np = np.array(image)
|
| 167 |
-
result = ocr.ocr(img2np, cls=True)
|
| 168 |
-
|
| 169 |
-
if result:
|
| 170 |
-
result = result[0] # Extract the result for this image
|
| 171 |
-
|
| 172 |
-
boxes = [line[0] for line in result]
|
| 173 |
-
txts = [line[1][0] for line in result]
|
| 174 |
-
scores = [line[1][1] for line in result]
|
| 175 |
-
|
| 176 |
-
# Combine results into a list of dictionaries
|
| 177 |
-
final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
|
| 178 |
-
final_results.extend(final_result)
|
| 179 |
-
else:
|
| 180 |
-
logger.warning("OCR did not return any results for the image.")
|
| 181 |
-
|
| 182 |
-
return final_results
|
| 183 |
-
|
| 184 |
except Exception as e:
|
| 185 |
-
|
| 186 |
-
logger.error(f"Error processing file: {str(e)}")
|
| 187 |
-
raise HTTPException(status_code=500, detail="Internal server error while processing the file")
|
| 188 |
|
| 189 |
# Serve the output folder as static files
|
| 190 |
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
|
|
|
|
| 130 |
logger.error(f"Error processing file: {str(e)}")
|
| 131 |
raise HTTPException(status_code=500, detail="Internal server error while processing the file")
|
| 132 |
|
| 133 |
+
# Initialize PaddleOCR
|
| 134 |
+
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
| 135 |
+
|
| 136 |
+
# Load the summarization model
|
| 137 |
+
MODEL_NAME = "t5-small" # Lightweight summarization model
|
| 138 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 139 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
| 140 |
+
|
| 141 |
+
def extract_text_with_paddleocr(image: Image.Image) -> str:
|
| 142 |
+
"""Perform OCR using PaddleOCR to extract text from an image."""
|
| 143 |
+
image_np = np.array(image)
|
| 144 |
+
result = ocr.ocr(image_np, cls=True)
|
| 145 |
+
extracted_text = "\n".join([line[1][0] for line in result[0]])
|
| 146 |
+
return extracted_text
|
| 147 |
+
|
| 148 |
+
def summarize_text(text: str) -> str:
|
| 149 |
+
"""Summarize the extracted text using a transformer model."""
|
| 150 |
+
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
|
| 151 |
+
outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
|
| 152 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 153 |
+
|
| 154 |
+
@app.post("/ocr-summarize/")
|
| 155 |
+
async def ocr_and_summarize(file: UploadFile = File(...)):
|
| 156 |
+
"""
|
| 157 |
+
Upload an image file, extract text using PaddleOCR, and return the summarized content.
|
| 158 |
+
"""
|
| 159 |
try:
|
| 160 |
+
image = Image.open(io.BytesIO(await file.read()))
|
| 161 |
+
extracted_text = extract_text_with_paddleocr(image)
|
| 162 |
+
if not extracted_text.strip():
|
| 163 |
+
return {"error": "No text detected in the uploaded image."}
|
| 164 |
+
summary = summarize_text(extracted_text)
|
| 165 |
+
|
| 166 |
+
return {
|
| 167 |
+
"filename": file.filename,
|
| 168 |
+
"extracted_text": extracted_text,
|
| 169 |
+
"summary": summary,
|
| 170 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
except Exception as e:
|
| 172 |
+
return {"error": f"Failed to process the image. Details: {str(e)}"}
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# Serve the output folder as static files
|
| 175 |
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
|