kmuthudurai commited on
Commit
c088f72
·
verified ·
1 Parent(s): 18902ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -41
app.py CHANGED
@@ -130,47 +130,6 @@ async def create_upload_file(
130
  logger.error(f"Error processing file: {str(e)}")
131
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
132
 
133
- # Initialize PaddleOCR
134
- ocr = PaddleOCR(use_angle_cls=True, lang="en")
135
-
136
- # Load the summarization model
137
- MODEL_NAME = "t5-small" # Lightweight summarization model
138
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
139
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
140
-
141
- def extract_text_with_paddleocr(image: Image.Image) -> str:
142
- """Perform OCR using PaddleOCR to extract text from an image."""
143
- image_np = np.array(image)
144
- result = ocr.ocr(image_np, cls=True)
145
- extracted_text = "\n".join([line[1][0] for line in result[0]])
146
- return extracted_text
147
-
148
- def summarize_text(text: str) -> str:
149
- """Summarize the extracted text using a transformer model."""
150
- inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
151
- outputs = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
152
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
153
-
154
- @app.post("/ocr-summarize/")
155
- async def ocr_and_summarize(file: UploadFile = File(...)):
156
- """
157
- Upload an image file, extract text using PaddleOCR, and return the summarized content.
158
- """
159
- try:
160
- image = Image.open(io.BytesIO(await file.read()))
161
- extracted_text = extract_text_with_paddleocr(image)
162
- if not extracted_text.strip():
163
- return {"error": "No text detected in the uploaded image."}
164
- summary = summarize_text(extracted_text)
165
-
166
- return {
167
- "filename": file.filename,
168
- "extracted_text": extracted_text,
169
- "summary": summary,
170
- }
171
- except Exception as e:
172
- return {"error": f"Failed to process the image. Details: {str(e)}"}
173
-
174
  # Serve the output folder as static files
175
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
176
 
 
130
  logger.error(f"Error processing file: {str(e)}")
131
  raise HTTPException(status_code=500, detail="Internal server error while processing the file")
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  # Serve the output folder as static files
134
  app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
135