Spaces:
Sleeping
Sleeping
Changes updated
Browse files
app.py
CHANGED
|
@@ -238,11 +238,7 @@ def ocr_from_s3(
|
|
| 238 |
document_type: str = Query(..., description="Type of document"),
|
| 239 |
entity_ref_key: str = Query(..., description="Entity Reference Key")
|
| 240 |
):
|
| 241 |
-
"""
|
| 242 |
-
Extract text from a PDF or Image stored in S3 and process it based on document size.
|
| 243 |
-
If more than 2 pages, skip Base64 conversion and summarization.
|
| 244 |
-
Store extracted data in MongoDB.
|
| 245 |
-
"""
|
| 246 |
try:
|
| 247 |
# Fetch file from S3
|
| 248 |
file_data, content_type = fetch_file_from_s3_file(file_key)
|
|
@@ -278,7 +274,6 @@ def ocr_from_s3(
|
|
| 278 |
if num_pages <= 2:
|
| 279 |
full_text = " ".join(extracted_text)
|
| 280 |
summary = summarize_text(full_text)
|
| 281 |
-
|
| 282 |
else:
|
| 283 |
return {"error": f"Unsupported file type: {content_type}"}
|
| 284 |
|
|
|
|
| 238 |
document_type: str = Query(..., description="Type of document"),
|
| 239 |
entity_ref_key: str = Query(..., description="Entity Reference Key")
|
| 240 |
):
|
| 241 |
+
"""Extract text from a PDF or Image stored in S3 and process it based on document size."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
try:
|
| 243 |
# Fetch file from S3
|
| 244 |
file_data, content_type = fetch_file_from_s3_file(file_key)
|
|
|
|
| 274 |
if num_pages <= 2:
|
| 275 |
full_text = " ".join(extracted_text)
|
| 276 |
summary = summarize_text(full_text)
|
|
|
|
| 277 |
else:
|
| 278 |
return {"error": f"Unsupported file type: {content_type}"}
|
| 279 |
|