vkumartr commited on
Commit
010967e
·
verified ·
1 Parent(s): b15de05

Changes updated

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -238,11 +238,7 @@ def ocr_from_s3(
238
  document_type: str = Query(..., description="Type of document"),
239
  entity_ref_key: str = Query(..., description="Entity Reference Key")
240
  ):
241
- """
242
- Extract text from a PDF or Image stored in S3 and process it based on document size.
243
- If more than 2 pages, skip Base64 conversion and summarization.
244
- Store extracted data in MongoDB.
245
- """
246
  try:
247
  # Fetch file from S3
248
  file_data, content_type = fetch_file_from_s3_file(file_key)
@@ -278,7 +274,6 @@ def ocr_from_s3(
278
  if num_pages <= 2:
279
  full_text = " ".join(extracted_text)
280
  summary = summarize_text(full_text)
281
-
282
  else:
283
  return {"error": f"Unsupported file type: {content_type}"}
284
 
 
238
  document_type: str = Query(..., description="Type of document"),
239
  entity_ref_key: str = Query(..., description="Entity Reference Key")
240
  ):
241
+ """Extract text from a PDF or Image stored in S3 and process it based on document size."""
 
 
 
 
242
  try:
243
  # Fetch file from S3
244
  file_data, content_type = fetch_file_from_s3_file(file_key)
 
274
  if num_pages <= 2:
275
  full_text = " ".join(extracted_text)
276
  summary = summarize_text(full_text)
 
277
  else:
278
  return {"error": f"Unsupported file type: {content_type}"}
279