rohannsinghal commited on
Commit
c4c5f40
Β·
1 Parent(s): 9d7e0bb

few changes

Browse files
Files changed (1) hide show
  1. app/main_api.py +26 -48
app/main_api.py CHANGED
@@ -985,20 +985,10 @@ doc_processor = UniversalDocumentProcessor()
985
  kaggle_client = LazyKaggleModelClient()
986
 
987
  # --- API MODELS ---
988
- # In main_api.py
989
- from pydantic import BaseModel, validator
990
- from typing import List
991
-
992
  class SubmissionRequest(BaseModel):
993
- documents: List[str]
994
  questions: List[str]
995
 
996
- @validator('documents', pre=True)
997
- def allow_single_string(cls, v):
998
- if isinstance(v, str):
999
- return [v] # Automatically convert string to a list
1000
- return v
1001
-
1002
  class SubmissionResponse(BaseModel):
1003
  answers: List[str]
1004
 
@@ -1023,7 +1013,8 @@ def test_endpoint():
1023
  @app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
1024
  async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
1025
  start_time = time.time()
1026
- logger.info(f"🎯 DEADLOCK-FREE KAGGLE-POWERED PROCESSING: {len(submission_request.documents)} docs, {len(submission_request.questions)} questions")
 
1027
 
1028
  try:
1029
  # LAZY INITIALIZATION: Only now do we connect to Kaggle!
@@ -1040,47 +1031,34 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1040
  session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
1041
  rag_pipeline = DeadlockFreeRAGPipeline(session_id, multi_llm, kaggle_client)
1042
 
1043
- # Process all documents with higher concurrency
1044
  all_chunks = []
1045
 
1046
  async with httpx.AsyncClient(
1047
  timeout=45.0,
1048
  headers={"ngrok-skip-browser-warning": "true"}
1049
- ) as client: # Tighter timeout + ngrok header
1050
- # SPEED OPTIMIZATION: Higher concurrency
1051
- semaphore = asyncio.Semaphore(5) # Increased from 3
1052
 
1053
  async def process_single_document(doc_idx: int, doc_url: str):
1054
- async with semaphore:
1055
- try:
1056
- logger.info(f"πŸ“₯ Downloading document {doc_idx + 1}")
1057
- response = await client.get(doc_url, follow_redirects=True)
1058
- response.raise_for_status()
1059
-
1060
- # Get filename from URL or generate one
1061
- filename = os.path.basename(doc_url.split('?')[0]) or f"document_{doc_idx}"
1062
-
1063
- # Process document with caching
1064
- chunks = await doc_processor.process_document(filename, response.content)
1065
-
1066
- logger.info(f"βœ… Document {doc_idx + 1}: {len(chunks)} chunks")
1067
- return chunks
1068
-
1069
- except Exception as e:
1070
- logger.error(f"❌ Document {doc_idx + 1} failed: {e}")
1071
- return []
1072
-
1073
- # Process all documents concurrently
1074
- tasks = [
1075
- process_single_document(i, url)
1076
- for i, url in enumerate(submission_request.documents)
1077
- ]
1078
-
1079
- results = await asyncio.gather(*tasks)
1080
 
1081
- # Flatten results
1082
- for chunks in results:
1083
- all_chunks.extend(chunks)
 
 
 
1084
 
1085
  logger.info(f"πŸ“Š Total chunks processed: {len(all_chunks)}")
1086
 
@@ -1097,8 +1075,7 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1097
  # SPEED OPTIMIZATION: Full parallel question answering
1098
  logger.info(f"⚑ Answering questions in parallel...")
1099
 
1100
- # INCREASED concurrency for questions
1101
- semaphore = asyncio.Semaphore(4) # Increased from 2
1102
 
1103
  async def answer_single_question(question: str) -> str:
1104
  async with semaphore:
@@ -1120,7 +1097,8 @@ async def run_submission(request: Request, submission_request: SubmissionRequest
1120
  "Processing error occurred. Please try again."
1121
  for _ in submission_request.questions
1122
  ])
1123
-
 
1124
  # --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL + DEADLOCK-FREE INFO) ---
1125
  @app.get("/")
1126
  def read_root():
 
985
  kaggle_client = LazyKaggleModelClient()
986
 
987
  # --- API MODELS ---
 
 
 
 
988
  class SubmissionRequest(BaseModel):
989
+ documents: str # <-- This now correctly expects a single string
990
  questions: List[str]
991
 
 
 
 
 
 
 
992
  class SubmissionResponse(BaseModel):
993
  answers: List[str]
994
 
 
1013
  @app.post("/api/v1/hackrx/run", response_model=SubmissionResponse, dependencies=[Depends(verify_bearer_token)])
1014
  async def run_submission(request: Request, submission_request: SubmissionRequest = Body(...)):
1015
  start_time = time.time()
1016
+ # This log is changed to reflect one document
1017
+ logger.info(f"🎯 DEADLOCK-FREE KAGGLE-POWERED PROCESSING: 1 doc, {len(submission_request.questions)} questions")
1018
 
1019
  try:
1020
  # LAZY INITIALIZATION: Only now do we connect to Kaggle!
 
1031
  session_id = f"kaggle_{uuid.uuid4().hex[:6]}" # Shorter UUID
1032
  rag_pipeline = DeadlockFreeRAGPipeline(session_id, multi_llm, kaggle_client)
1033
 
1034
+ # Process the single document
1035
  all_chunks = []
1036
 
1037
  async with httpx.AsyncClient(
1038
  timeout=45.0,
1039
  headers={"ngrok-skip-browser-warning": "true"}
1040
+ ) as client:
 
 
1041
 
1042
  async def process_single_document(doc_idx: int, doc_url: str):
1043
+ # This inner function remains the same
1044
+ try:
1045
+ logger.info(f"πŸ“₯ Downloading document {doc_idx + 1}")
1046
+ response = await client.get(doc_url, follow_redirects=True)
1047
+ response.raise_for_status()
1048
+ filename = os.path.basename(doc_url.split('?')[0]) or f"document_{doc_idx}"
1049
+ chunks = await doc_processor.process_document(filename, response.content)
1050
+ logger.info(f"βœ… Document {doc_idx + 1}: {len(chunks)} chunks")
1051
+ return chunks
1052
+ except Exception as e:
1053
+ logger.error(f"❌ Document {doc_idx + 1} failed: {e}")
1054
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
 
1056
+ # --- THIS IS THE CORRECTED LOGIC ---
1057
+ # It now processes only the single string from submission_request.documents
1058
+ single_doc_url = submission_request.documents
1059
+ chunks_for_single_doc = await process_single_document(0, single_doc_url)
1060
+ all_chunks.extend(chunks_for_single_doc)
1061
+ # ------------------------------------
1062
 
1063
  logger.info(f"πŸ“Š Total chunks processed: {len(all_chunks)}")
1064
 
 
1075
  # SPEED OPTIMIZATION: Full parallel question answering
1076
  logger.info(f"⚑ Answering questions in parallel...")
1077
 
1078
+ semaphore = asyncio.Semaphore(4)
 
1079
 
1080
  async def answer_single_question(question: str) -> str:
1081
  async with semaphore:
 
1097
  "Processing error occurred. Please try again."
1098
  for _ in submission_request.questions
1099
  ])
1100
+
1101
+
1102
  # --- HEALTH ENDPOINTS (YOUR EXCELLENT ORIGINAL + DEADLOCK-FREE INFO) ---
1103
  @app.get("/")
1104
  def read_root():