Sohan Kshirsagar commited on
Commit
ba760bb
·
1 Parent(s): 6bbf91b

file upload feature

Browse files
multi_llm_chatbot_backend/app/api/routes.py CHANGED
@@ -6,10 +6,12 @@ from app.llm.gemini_client import GeminiClient
6
  from app.models.persona import Persona
7
  from app.core.orchestrator import ChatOrchestrator
8
  from app.core.seamless_orchestrator import SeamlessOrchestrator
 
9
  from pydantic import BaseModel
10
  from typing import Optional, List
11
  from fastapi import UploadFile, File
12
  from app.utils.document_extractor import extract_text_from_file
 
13
 
14
  router = APIRouter()
15
 
@@ -135,20 +137,6 @@ llm = create_llm_client()
135
  chat_orchestrator = ChatOrchestrator()
136
  seamless_orchestrator = SeamlessOrchestrator(llm=llm)
137
 
138
- # Global context storage
139
- class GlobalSessionContext:
140
- def __init__(self):
141
- self.full_log: list[dict] = []
142
-
143
- def append(self, role: str, content: str):
144
- self.full_log.append({"role": role, "content": content})
145
-
146
- def filter_by_persona(self, persona_id: str):
147
- return self.full_log
148
-
149
- def clear(self):
150
- self.full_log = []
151
-
152
  session_context = GlobalSessionContext()
153
 
154
  def create_default_personas(llm_client: LLMClient):
@@ -498,6 +486,7 @@ async def get_current_model():
498
 
499
  @router.post("/upload-document")
500
  async def upload_document(file: UploadFile = File(...)):
 
501
  if file.content_type not in [
502
  "application/pdf",
503
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -506,16 +495,22 @@ async def upload_document(file: UploadFile = File(...)):
506
  raise HTTPException(status_code=400, detail="Unsupported file type.")
507
 
508
  try:
509
- # Read file content into memory
510
- contents = await file.read()
511
 
512
- # Now pass raw contents and file type to extractor
513
- content = extract_text_from_file(contents, file.content_type)
 
514
 
 
 
515
  if not content.strip():
516
  raise HTTPException(status_code=400, detail="Document is empty or unreadable.")
517
 
 
518
  session_context.append("user", f"[Uploaded Document Content]\n{content.strip()}")
 
 
519
 
520
  return {"message": "Document uploaded and added to context successfully."}
521
 
@@ -534,4 +529,8 @@ async def debug_personas():
534
  },
535
  "context_length": len(session_context.full_log),
536
  "current_provider": current_provider
537
- }
 
 
 
 
 
6
  from app.models.persona import Persona
7
  from app.core.orchestrator import ChatOrchestrator
8
  from app.core.seamless_orchestrator import SeamlessOrchestrator
9
+ from app.core.context import GlobalSessionContext
10
  from pydantic import BaseModel
11
  from typing import Optional, List
12
  from fastapi import UploadFile, File
13
  from app.utils.document_extractor import extract_text_from_file
14
+ from app.utils.file_limits import is_within_upload_limit
15
 
16
  router = APIRouter()
17
 
 
137
  chat_orchestrator = ChatOrchestrator()
138
  seamless_orchestrator = SeamlessOrchestrator(llm=llm)
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  session_context = GlobalSessionContext()
141
 
142
  def create_default_personas(llm_client: LLMClient):
 
486
 
487
  @router.post("/upload-document")
488
  async def upload_document(file: UploadFile = File(...)):
489
+ # Validate file type
490
  if file.content_type not in [
491
  "application/pdf",
492
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 
495
  raise HTTPException(status_code=400, detail="Unsupported file type.")
496
 
497
  try:
498
+ # Read file bytes
499
+ file_bytes = await file.read()
500
 
501
+ # Check file size limit
502
+ if not is_within_upload_limit("default", file_bytes, session_context):
503
+ raise HTTPException(status_code=400, detail="Upload exceeds session document size limit (10 MB).")
504
 
505
+ # Extract and validate text
506
+ content = extract_text_from_file(file_bytes, file.content_type)
507
  if not content.strip():
508
  raise HTTPException(status_code=400, detail="Document is empty or unreadable.")
509
 
510
+ # Track file size and name
511
  session_context.append("user", f"[Uploaded Document Content]\n{content.strip()}")
512
+ session_context.uploaded_files.append(file.filename)
513
+ session_context.total_upload_size += len(file_bytes)
514
 
515
  return {"message": "Document uploaded and added to context successfully."}
516
 
 
529
  },
530
  "context_length": len(session_context.full_log),
531
  "current_provider": current_provider
532
+ }
533
+
534
+ @router.get("/uploaded-files")
535
+ def get_uploaded_filenames():
536
+ return {"files": session_context.uploaded_files}
multi_llm_chatbot_backend/app/core/context.py CHANGED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Global context storage
2
+ class GlobalSessionContext:
3
+ def __init__(self):
4
+ self.full_log: list[dict] = []
5
+ self.uploaded_files: list[str] = []
6
+ self.total_upload_size: int = 0
7
+
8
+ def append(self, role: str, content: str):
9
+ self.full_log.append({"role": role, "content": content})
10
+
11
+ def filter_by_persona(self, persona_id: str):
12
+ return self.full_log
13
+
14
+ def clear(self):
15
+ self.full_log = []
16
+ self.uploaded_files = []
17
+ self.total_upload_size = 0
multi_llm_chatbot_backend/app/llm/gemini_client.py CHANGED
@@ -10,6 +10,7 @@ class GeminiClient(LLMClient):
10
 
11
  self.model_name = model_name
12
  self.api_key = os.getenv("GEMINI_API_KEY")
 
13
  if not self.api_key:
14
  raise ValueError("GEMINI_API_KEY environment variable is required")
15
 
 
10
 
11
  self.model_name = model_name
12
  self.api_key = os.getenv("GEMINI_API_KEY")
13
+ print("DEBUG: GEMINI_API_KEY =", os.getenv("GEMINI_API_KEY"))
14
  if not self.api_key:
15
  raise ValueError("GEMINI_API_KEY environment variable is required")
16
 
multi_llm_chatbot_backend/app/tests/test_context_propagation.py CHANGED
@@ -6,7 +6,7 @@ BASE_URL = "http://localhost:8000"
6
  def test_unified_chat():
7
  print("\nSending unified chat request to /chat...\n")
8
  payload = {
9
- "user_input": "I'm a second year PhD student in Machine Learning. Any advice for my research paper presentation?"
10
  }
11
 
12
  try:
 
6
  def test_unified_chat():
7
  print("\nSending unified chat request to /chat...\n")
8
  payload = {
9
+ "user_input": "I'm a second year PhD student in Machine Learning. Any advice for my research paper presentation? I am preparing for final QnA session."
10
  }
11
 
12
  try:
multi_llm_chatbot_backend/app/utils/document_extractor.py CHANGED
@@ -1,7 +1,7 @@
 
 
1
  import tempfile
2
  import docx2txt
3
- import PyPDF2
4
- from io import BytesIO
5
 
6
  def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
7
  if content_type == "application/pdf":
@@ -12,10 +12,7 @@ def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
12
  with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
13
  tmp.write(file_bytes)
14
  tmp_path = tmp.name
15
- try:
16
- return docx2txt.process(tmp_path)
17
- finally:
18
- os.unlink(tmp_path) # Clean up temp file
19
 
20
  elif content_type == "text/plain":
21
  return file_bytes.decode("utf-8")
 
1
+ from io import BytesIO
2
+ import PyPDF2
3
  import tempfile
4
  import docx2txt
 
 
5
 
6
  def extract_text_from_file(file_bytes: bytes, content_type: str) -> str:
7
  if content_type == "application/pdf":
 
12
  with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
13
  tmp.write(file_bytes)
14
  tmp_path = tmp.name
15
+ return docx2txt.process(tmp_path)
 
 
 
16
 
17
  elif content_type == "text/plain":
18
  return file_bytes.decode("utf-8")
multi_llm_chatbot_backend/app/utils/file_limits.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/file_limits.py
2
+ from app.core.context import GlobalSessionContext
3
+
4
+ MAX_SESSION_UPLOAD_SIZE_MB = 10
5
+
6
+
7
+ MAX_TOTAL_UPLOAD_MB = 10
8
+
9
+ def is_within_upload_limit(session_id: str, new_file_bytes: bytes, session_context: GlobalSessionContext) -> bool:
10
+ size_mb = (session_context.total_upload_size + len(new_file_bytes)) / (1024 * 1024)
11
+ return size_mb <= MAX_TOTAL_UPLOAD_MB