document-extraction

Sleeping

vkumartr commited on Feb 6, 2025

Commit

03771f0

verified ·

1 Parent(s): 3d971ee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import uvicorn
 from fastapi.staticfiles import StaticFiles
 import hashlib
-from enum import Enum
 from fastapi import FastAPI, Header, Query, Depends, HTTPException
 from PIL import Image
 import io
@@ -92,7 +91,7 @@ def extract_invoice_data(file_data, content_type):
     if content_type.startswith("image/"):
         mime_type = content_type  # e.g., image/png, image/jpeg
     elif content_type == "application/pdf":
-        mime_type = "application/pdf"
     else:
         raise ValueError(f"Unsupported content type: {content_type}")
@@ -230,15 +229,6 @@ def extract_invoice_data(file_data, content_type):
         logger.error(f"Error in data extraction: {e}")
         return {"error": str(e)}
-# def extract_text_from_s3(file_key, content_type):
-#     return "Extracted text from file", 1  # Placeholder for real extraction logic
-# def convert_to_base64(file_key):
-#     return "Base64 encoded data"  # Placeholder
-# def generate_summary(extracted_text):
-#     return "Summarized text"  # Placeholder
 def get_content_type_from_s3(file_key):
     """Fetch the content type (MIME type) of a file stored in S3."""
     try:
@@ -297,6 +287,7 @@ def extract_text_from_file(
         return {
             "message": "Document successfully stored in MongoDB",
             "document_id": document_id,
             "extracted_data": extracted_data
         }

 import uvicorn
 from fastapi.staticfiles import StaticFiles
 import hashlib
 from fastapi import FastAPI, Header, Query, Depends, HTTPException
 from PIL import Image
 import io
     if content_type.startswith("image/"):
         mime_type = content_type  # e.g., image/png, image/jpeg
     elif content_type == "application/pdf":
+        mime_type = content_type
     else:
         raise ValueError(f"Unsupported content type: {content_type}")
         logger.error(f"Error in data extraction: {e}")
         return {"error": str(e)}
 def get_content_type_from_s3(file_key):
     """Fetch the content type (MIME type) of a file stored in S3."""
     try:
         return {
             "message": "Document successfully stored in MongoDB",
             "document_id": document_id,
+            "entityrefkey":entity_ref_key,
             "extracted_data": extracted_data
         }