Spaces:

PercivalFletcher
/

Shreyansh-HackRx

Sleeping

App Files Files Community

PercivalFletcher commited on Aug 7, 2025

Commit

f44abf4

verified ·

1 Parent(s): 9bce7bd

Update main.py

Browse files

Files changed (1) hide show

main.py +24 -64

main.py CHANGED Viewed

@@ -30,7 +30,7 @@ from rag_utils import (
     process_markdown_with_manual_sections,
     generate_answer_with_groq,
     HybridSearchManager,
-    EmbeddingClient, # This might not be needed directly in main.py, but good to have
     CHUNK_SIZE,
     CHUNK_OVERLAP,
     TOP_K_CHUNKS,
@@ -47,15 +47,13 @@ app = FastAPI(
 )
 # --- Global instance for the HybridSearchManager ---
-# This will be initialized on startup
 hybrid_search_manager: Optional[HybridSearchManager] = None
 @app.on_event("startup")
 async def startup_event():
     global hybrid_search_manager
-    # Initialize the HybridSearchManager at startup
     hybrid_search_manager = HybridSearchManager()
-    #initialize_llama_extract_agent()  # From processing_utility
     print("Application startup complete. HybridSearchManager is ready.")
 # --- Groq API Key Setup ---
@@ -65,16 +63,9 @@ if GROQ_API_KEY == "NOT_FOUND":
         "WARNING: GROQ_API_KEY is using a placeholder or hardcoded value. Please set GROQ_API_KEY environment variable for production."
     )
-# --- Authorization Token Setup ---
-# EXPECTED_AUTH_TOKEN = os.getenv("AUTHORIZATION_TOKEN")
-# if not EXPECTED_AUTH_TOKEN:
-#     print(
-#         "WARNING: AUTHORIZATION_TOKEN environment variable is not set. Authorization will not work as expected."
-#     )
 # --- Pydantic Models for Request and Response ---
 class RunRequest(BaseModel):
-    documents: str  # URL to the PDF document
     questions: List[str]
 class Answer(BaseModel):
@@ -82,33 +73,12 @@ class Answer(BaseModel):
 class RunResponse(BaseModel):
     answers: List[str]
-    #processing_time: float
-    #step_timings: dict  # New field for detailed timings
-# --- Security Dependency ---
-security = HTTPBearer()
-# async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
-#     """
-#     Verifies the Bearer token in the Authorization header.
-#     """
-#     if not EXPECTED_AUTH_TOKEN:
-#         raise HTTPException(
-#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-#             detail="Authorization token not configured on the server.",
-#         )
-#     if credentials.scheme != "Bearer" or credentials.credentials != EXPECTED_AUTH_TOKEN:
-#         raise HTTPException(
-#             status_code=status.HTTP_401_UNAUTHORIZED,
-#             detail="Invalid or missing authentication token",
-#             headers={"WWW-Authenticate": "Bearer"},
-#         )
-#     return True
 @app.post("/hackrx/run", response_model=RunResponse)
 async def run_rag_pipeline(
-    request: RunRequest,
-    # authorized: bool = Depends(verify_token)
 ):
     """
     Runs the RAG pipeline for a given PDF document (converted to Markdown internally)
@@ -118,11 +88,8 @@ async def run_rag_pipeline(
     questions = request.questions
     local_markdown_path = None
     step_timings = {}
     start_time_total = time.perf_counter()
     try:
-        # Ensure the HybridSearchManager is initialized
         if hybrid_search_manager is None:
             raise HTTPException(
                 status_code=500, detail="HybridSearchManager not initialized."
@@ -142,20 +109,6 @@ async def run_rag_pipeline(
             f"Parsing to Markdown took {step_timings['parsing_to_markdown']:.2f} seconds."
         )
-        # 2. Headings Generation: Extract headings JSON
-        '''start_time = time.perf_counter()
-        headings_json = extract_schema_from_file(local_markdown_path)
-        if not headings_json or not headings_json.get("headings"):
-            raise HTTPException(
-                status_code=400,
-                detail="Could not retrieve valid headings from the provided document.",
-            )
-        end_time = time.perf_counter()
-        step_timings["headings_generation"] = end_time - start_time
-        print(
-            f"Headings Generation took {step_timings['headings_generation']:.2f} seconds."
-        )'''
         headings_json = {"headings":["p"]}
         # 3. Chunk Generation: Process Markdown into chunks
@@ -178,7 +131,6 @@ async def run_rag_pipeline(
         # 4. Model Initialization and Embeddings Pre-computation
         start_time = time.perf_counter()
-        # --- FIX: Await the async function call ---
         await hybrid_search_manager.initialize_models(processed_documents)
         end_time = time.perf_counter()
         step_timings["model_initialization"] = end_time - start_time
@@ -188,29 +140,36 @@ async def run_rag_pipeline(
         # 5. Concurrent Query Processing (Search and Generation)
         start_time_query_processing = time.perf_counter()
         # Search Phase
         batch_size = 3
         all_retrieved_results = []
         print(f"Starting concurrent search in batches of {batch_size}...")
         for i in range(0, len(questions), batch_size):
             current_batch_questions = questions[i : i + batch_size]
             print(
                 f"Processing batch {i // batch_size + 1} with {len(current_batch_questions)} queries."
             )
-            # --- FIX: Directly create a list of coroutines, no asyncio.to_thread needed here ---
             search_tasks = [
                 hybrid_search_manager.perform_hybrid_search(
                     question, TOP_K_CHUNKS
                 )
                 for question in current_batch_questions
             ]
-            batch_results = await asyncio.gather(*search_tasks)
-            all_retrieved_results.extend(batch_results)
         print("Search phase completed for all queries.")
         # Generation Phase
         print(f"Starting concurrent answer generation for {len(questions)} questions...")
@@ -230,7 +189,6 @@ async def run_rag_pipeline(
                 generation_tasks.append(no_info_future)
         all_answer_texts = await asyncio.gather(*generation_tasks)
         end_time_query_processing = time.perf_counter()
         step_timings["query_processing"] = (
             end_time_query_processing - start_time_query_processing
@@ -241,12 +199,13 @@ async def run_rag_pipeline(
         end_time_total = time.perf_counter()
         total_processing_time = end_time_total - start_time_total
         print("All questions processed.")
         all_answers = [answer_text for answer_text in all_answer_texts]
         return RunResponse(
-            answers=all_answers
         )
     except HTTPException as e:
@@ -259,4 +218,5 @@ async def run_rag_pipeline(
     finally:
         if local_markdown_path and os.path.exists(local_markdown_path):
             os.unlink(local_markdown_path)
-            print(f"Cleaned up temporary markdown file: {local_markdown_path}")

     process_markdown_with_manual_sections,
     generate_answer_with_groq,
     HybridSearchManager,
+    EmbeddingClient,
     CHUNK_SIZE,
     CHUNK_OVERLAP,
     TOP_K_CHUNKS,
 )
 # --- Global instance for the HybridSearchManager ---
 hybrid_search_manager: Optional[HybridSearchManager] = None
 @app.on_event("startup")
 async def startup_event():
     global hybrid_search_manager
     hybrid_search_manager = HybridSearchManager()
+    #initialize_llama_extract_agent()
     print("Application startup complete. HybridSearchManager is ready.")
 # --- Groq API Key Setup ---
         "WARNING: GROQ_API_KEY is using a placeholder or hardcoded value. Please set GROQ_API_KEY environment variable for production."
     )
 # --- Pydantic Models for Request and Response ---
 class RunRequest(BaseModel):
+    documents: str
     questions: List[str]
 class Answer(BaseModel):
 class RunResponse(BaseModel):
     answers: List[str]
+    step_timings: Dict[str, float] # Added field for timing information
 @app.post("/hackrx/run", response_model=RunResponse)
 async def run_rag_pipeline(
+    request: RunRequest
+    # authorized: bool = Depends(verify_token)):
 ):
     """
     Runs the RAG pipeline for a given PDF document (converted to Markdown internally)
     questions = request.questions
     local_markdown_path = None
     step_timings = {}
     start_time_total = time.perf_counter()
     try:
         if hybrid_search_manager is None:
             raise HTTPException(
                 status_code=500, detail="HybridSearchManager not initialized."
             f"Parsing to Markdown took {step_timings['parsing_to_markdown']:.2f} seconds."
         )
         headings_json = {"headings":["p"]}
         # 3. Chunk Generation: Process Markdown into chunks
         # 4. Model Initialization and Embeddings Pre-computation
         start_time = time.perf_counter()
         await hybrid_search_manager.initialize_models(processed_documents)
         end_time = time.perf_counter()
         step_timings["model_initialization"] = end_time - start_time
         # 5. Concurrent Query Processing (Search and Generation)
         start_time_query_processing = time.perf_counter()
         # Search Phase
         batch_size = 3
         all_retrieved_results = []
+        all_rerank_times = []
         print(f"Starting concurrent search in batches of {batch_size}...")
         for i in range(0, len(questions), batch_size):
             current_batch_questions = questions[i : i + batch_size]
             print(
                 f"Processing batch {i // batch_size + 1} with {len(current_batch_questions)} queries."
             )
+            # The search method now returns a tuple of results and rerank time
             search_tasks = [
                 hybrid_search_manager.perform_hybrid_search(
                     question, TOP_K_CHUNKS
                 )
                 for question in current_batch_questions
             ]
+            batch_results_and_times = await asyncio.gather(*search_tasks)
+            # Unpack results and timings
+            for results, rerank_time in batch_results_and_times:
+                all_retrieved_results.append(results)
+                all_rerank_times.append(rerank_time)
         print("Search phase completed for all queries.")
+        # Add the total reranking time to the step timings
+        step_timings["reranking_total_time"] = sum(all_rerank_times)
+        step_timings["reranking_avg_time_per_query"] = sum(all_rerank_times) / len(all_rerank_times)
         # Generation Phase
         print(f"Starting concurrent answer generation for {len(questions)} questions...")
                 generation_tasks.append(no_info_future)
         all_answer_texts = await asyncio.gather(*generation_tasks)
         end_time_query_processing = time.perf_counter()
         step_timings["query_processing"] = (
             end_time_query_processing - start_time_query_processing
         end_time_total = time.perf_counter()
         total_processing_time = end_time_total - start_time_total
+        step_timings["total_processing_time"] = total_processing_time
         print("All questions processed.")
         all_answers = [answer_text for answer_text in all_answer_texts]
         return RunResponse(
+            answers=all_answers,
+            step_timings=step_timings
         )
     except HTTPException as e:
     finally:
         if local_markdown_path and os.path.exists(local_markdown_path):
             os.unlink(local_markdown_path)
+            print(f"Cleaned up temporary markdown file: {local_markdown_path}")