Cybersecurity-Panel

Running

App Files Files Community

Sohan Kshirsagar commited on Jul 1, 2025

Commit

ba760bb

1 Parent(s): 6bbf91b

file upload feature

Browse files

Files changed (6) hide show

multi_llm_chatbot_backend/app/api/routes.py +18 -19
multi_llm_chatbot_backend/app/core/context.py +17 -0
multi_llm_chatbot_backend/app/llm/gemini_client.py +1 -0
multi_llm_chatbot_backend/app/tests/test_context_propagation.py +1 -1
multi_llm_chatbot_backend/app/utils/document_extractor.py +3 -6
multi_llm_chatbot_backend/app/utils/file_limits.py +11 -0

multi_llm_chatbot_backend/app/api/routes.py CHANGED Viewed

@@ -6,10 +6,12 @@ from app.llm.gemini_client import GeminiClient
 from app.models.persona import Persona
 from app.core.orchestrator import ChatOrchestrator
 from app.core.seamless_orchestrator import SeamlessOrchestrator
 from pydantic import BaseModel
 from typing import Optional, List
 from fastapi import UploadFile, File
 from app.utils.document_extractor import extract_text_from_file
 router = APIRouter()
@@ -135,20 +137,6 @@ llm = create_llm_client()
 chat_orchestrator = ChatOrchestrator()
 seamless_orchestrator = SeamlessOrchestrator(llm=llm)
-# Global context storage
-class GlobalSessionContext:
-    def __init__(self):
-        self.full_log: list[dict] = []
-    def append(self, role: str, content: str):
-        self.full_log.append({"role": role, "content": content})
-    def filter_by_persona(self, persona_id: str):
-        return self.full_log
-    def clear(self):
-        self.full_log = []
 session_context = GlobalSessionContext()
 def create_default_personas(llm_client: LLMClient):
@@ -498,6 +486,7 @@ async def get_current_model():
 @router.post("/upload-document")
 async def upload_document(file: UploadFile = File(...)):
     if file.content_type not in [
         "application/pdf",
         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -506,16 +495,22 @@ async def upload_document(file: UploadFile = File(...)):
         raise HTTPException(status_code=400, detail="Unsupported file type.")
     try:
-        # Read file content into memory
-        contents = await file.read()
-        # Now pass raw contents and file type to extractor
-        content = extract_text_from_file(contents, file.content_type)
         if not content.strip():
             raise HTTPException(status_code=400, detail="Document is empty or unreadable.")
         session_context.append("user", f"[Uploaded Document Content]\n{content.strip()}")
         return {"message": "Document uploaded and added to context successfully."}
@@ -534,4 +529,8 @@ async def debug_personas():
         },
         "context_length": len(session_context.full_log),
         "current_provider": current_provider
-    }

 from app.models.persona import Persona
 from app.core.orchestrator import ChatOrchestrator
 from app.core.seamless_orchestrator import SeamlessOrchestrator
+from app.core.context import GlobalSessionContext
 from pydantic import BaseModel
 from typing import Optional, List
 from fastapi import UploadFile, File
 from app.utils.document_extractor import extract_text_from_file
+from app.utils.file_limits import is_within_upload_limit
 router = APIRouter()
 chat_orchestrator = ChatOrchestrator()
 seamless_orchestrator = SeamlessOrchestrator(llm=llm)
 session_context = GlobalSessionContext()
 def create_default_personas(llm_client: LLMClient):
 @router.post("/upload-document")
 async def upload_document(file: UploadFile = File(...)):
+    # Validate file type
     if file.content_type not in [
         "application/pdf",
         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
         raise HTTPException(status_code=400, detail="Unsupported file type.")
     try:
+        # Read file bytes
+        file_bytes = await file.read()
+        # Check file size limit
+        if not is_within_upload_limit("default", file_bytes, session_context):
+            raise HTTPException(status_code=400, detail="Upload exceeds session document size limit (10 MB).")
+        # Extract and validate text
+        content = extract_text_from_file(file_bytes, file.content_type)
         if not content.strip():
             raise HTTPException(status_code=400, detail="Document is empty or unreadable.")
+        # Track file size and name
         session_context.append("user", f"[Uploaded Document Content]\n{content.strip()}")
+        session_context.uploaded_files.append(file.filename)
+        session_context.total_upload_size += len(file_bytes)
         return {"message": "Document uploaded and added to context successfully."}
         },
         "context_length": len(session_context.full_log),
         "current_provider": current_provider
+    }
+@router.get("/uploaded-files")
+def get_uploaded_filenames():
+    return {"files": session_context.uploaded_files}

multi_llm_chatbot_backend/app/core/context.py CHANGED Viewed

	@@ -0,0 +1,17 @@

+# Global context storage
+class GlobalSessionContext:
+    def __init__(self):
+        self.full_log: list[dict] = []
+        self.uploaded_files: list[str] = []
+        self.total_upload_size: int = 0
+    def append(self, role: str, content: str):
+        self.full_log.append({"role": role, "content": content})
+    def filter_by_persona(self, persona_id: str):
+        return self.full_log
+    def clear(self):
+        self.full_log = []
+        self.uploaded_files = []
+        self.total_upload_size = 0

multi_llm_chatbot_backend/app/llm/gemini_client.py CHANGED Viewed

@@ -10,6 +10,7 @@ class GeminiClient(LLMClient):
         self.model_name = model_name
         self.api_key = os.getenv("GEMINI_API_KEY")
         if not self.api_key:
             raise ValueError("GEMINI_API_KEY environment variable is required")

         self.model_name = model_name
         self.api_key = os.getenv("GEMINI_API_KEY")
+        print("DEBUG: GEMINI_API_KEY =", os.getenv("GEMINI_API_KEY"))
         if not self.api_key:
             raise ValueError("GEMINI_API_KEY environment variable is required")

multi_llm_chatbot_backend/app/tests/test_context_propagation.py CHANGED Viewed

@@ -6,7 +6,7 @@ BASE_URL = "http://localhost:8000"
 def test_unified_chat():
     print("\nSending unified chat request to /chat...\n")
     payload = {
-        "user_input": "I'm a second year PhD student in Machine Learning. Any advice for my research paper presentation?"
     }
     try:

 def test_unified_chat():
     print("\nSending unified chat request to /chat...\n")
     payload = {
+        "user_input": "I'm a second year PhD student in Machine Learning. Any advice for my research paper presentation? I am preparing for final QnA session."
     }
     try:

multi_llm_chatbot_backend/app/utils/document_extractor.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import tempfile
 import docx2txt
-import PyPDF2
-from io import BytesIO
 def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
     if content_type == "application/pdf":
@@ -12,10 +12,7 @@ def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
             tmp.write(file_bytes)
             tmp_path = tmp.name
-        try:
-            return docx2txt.process(tmp_path)
-        finally:
-            os.unlink(tmp_path)  # Clean up temp file
     elif content_type == "text/plain":
         return file_bytes.decode("utf-8")

+from io import BytesIO
+import PyPDF2
 import tempfile
 import docx2txt
 def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
     if content_type == "application/pdf":
         with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
             tmp.write(file_bytes)
             tmp_path = tmp.name
+        return docx2txt.process(tmp_path)
     elif content_type == "text/plain":
         return file_bytes.decode("utf-8")

multi_llm_chatbot_backend/app/utils/file_limits.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# utils/file_limits.py
+from app.core.context import GlobalSessionContext
+MAX_SESSION_UPLOAD_SIZE_MB = 10
+MAX_TOTAL_UPLOAD_MB = 10
+def is_within_upload_limit(session_id: str, new_file_bytes: bytes, session_context: GlobalSessionContext) -> bool:
+    size_mb = (session_context.total_upload_size + len(new_file_bytes)) / (1024 * 1024)
+    return size_mb <= MAX_TOTAL_UPLOAD_MB