Spaces:

PercivalFletcher
/

ParentHackRx

Sleeping

App Files Files Community

PercivalFletcher commited on Aug 8, 2025

Commit

d4b4587

verified ·

1 Parent(s): 0c7fe1b

Update main3.py

Browse files

Files changed (1) hide show

main3.py +39 -7

main3.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# file: main.py
 import time
 import os
 import asyncio
@@ -8,7 +8,7 @@ from typing import List, Dict, Any
 from dotenv import load_dotenv
 from document_processor import ingest_and_parse_document
-from chunking_parent import create_parent_child_chunks # <-- Using the parent-child function
 from embedding import EmbeddingClient
 from retrieval_parent import Retriever, generate_hypothetical_document
 from generation import generate_answer
@@ -18,7 +18,7 @@ load_dotenv()
 app = FastAPI(
     title="Modular RAG API",
     description="A modular API for Retrieval-Augmented Generation with Parent-Child Retrieval.",
-    version="2.2.1", # Updated version
 )
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
@@ -33,7 +33,7 @@ class RunRequest(BaseModel):
 class RunResponse(BaseModel):
     answers: List[str]
-class TestRequest(BaseModel): # <-- Model for test endpoints
     document_url: HttpUrl
 # --- NEW: Test Endpoint for Parent-Child Chunking ---
@@ -84,37 +84,69 @@ async def test_chunking_endpoint(request: TestRequest):
 @app.post("/hackrx/run", response_model=RunResponse)
 async def run_rag_pipeline(request: RunRequest):
     try:
         print("--- Kicking off RAG Pipeline with Parent-Child Strategy ---")
-        # --- STAGE 1: DOCUMENT INGESTION ---
         markdown_content = await ingest_and_parse_document(request.document_url)
         # --- STAGE 2: PARENT-CHILD CHUNKING ---
         child_documents, docstore, _ = create_parent_child_chunks(markdown_content)
         if not child_documents:
             raise HTTPException(status_code=400, detail="Document could not be processed into chunks.")
-        # --- STAGE 3: INDEXING ---
         retriever.index(child_documents, docstore)
         # --- CONCURRENT WORKFLOW ---
         hyde_tasks = [generate_hypothetical_document(q, GROQ_API_KEY) for q in request.questions]
         all_hyde_docs = await asyncio.gather(*hyde_tasks)
         retrieval_tasks = [
             retriever.retrieve(q, hyde_doc)
             for q, hyde_doc in zip(request.questions, all_hyde_docs)
         ]
         all_retrieved_chunks = await asyncio.gather(*retrieval_tasks)
         answer_tasks = [
             generate_answer(q, chunks, GROQ_API_KEY)
             for q, chunks in zip(request.questions, all_retrieved_chunks)
         ]
         final_answers = await asyncio.gather(*answer_tasks)
-        print("--- RAG Pipeline Completed Successfully ---")
         return RunResponse(answers=final_answers)
     except Exception as e:

+# file: main3.py
 import time
 import os
 import asyncio
 from dotenv import load_dotenv
 from document_processor import ingest_and_parse_document
+from chunking_parent import create_parent_child_chunks
 from embedding import EmbeddingClient
 from retrieval_parent import Retriever, generate_hypothetical_document
 from generation import generate_answer
 app = FastAPI(
     title="Modular RAG API",
     description="A modular API for Retrieval-Augmented Generation with Parent-Child Retrieval.",
+    version="2.2.2", # Version updated
 )
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 class RunResponse(BaseModel):
     answers: List[str]
+class TestRequest(BaseModel):
     document_url: HttpUrl
 # --- NEW: Test Endpoint for Parent-Child Chunking ---
 @app.post("/hackrx/run", response_model=RunResponse)
 async def run_rag_pipeline(request: RunRequest):
     try:
+        total_pipeline_start_time = time.perf_counter()
+        timings = {}
         print("--- Kicking off RAG Pipeline with Parent-Child Strategy ---")
+        # --- STAGE 1: DOCUMENT INGESTION (Parsing) ---
+        parse_start = time.perf_counter()
         markdown_content = await ingest_and_parse_document(request.document_url)
+        timings["1_parsing"] = time.perf_counter() - parse_start
+        print(f"Time taken for Parsing: {timings['1_parsing']:.4f} seconds.")
         # --- STAGE 2: PARENT-CHILD CHUNKING ---
+        chunk_start = time.perf_counter()
         child_documents, docstore, _ = create_parent_child_chunks(markdown_content)
+        timings["2_chunking"] = time.perf_counter() - chunk_start
+        print(f"Time taken for Parent-Child Chunking: {timings['2_chunking']:.4f} seconds.")
         if not child_documents:
             raise HTTPException(status_code=400, detail="Document could not be processed into chunks.")
+        # --- STAGE 3: INDEXING (Embedding) ---
+        index_start = time.perf_counter()
         retriever.index(child_documents, docstore)
+        timings["3_indexing_and_embedding"] = time.perf_counter() - index_start
+        print(f"Time taken for Indexing (incl. Embeddings): {timings['3_indexing_and_embedding']:.4f} seconds.")
         # --- CONCURRENT WORKFLOW ---
+        # Step A: Concurrently generate hypothetical documents (HyDE)
+        print("Generating hypothetical documents...")
+        hyde_start = time.perf_counter()
         hyde_tasks = [generate_hypothetical_document(q, GROQ_API_KEY) for q in request.questions]
         all_hyde_docs = await asyncio.gather(*hyde_tasks)
+        timings["4_hyde_generation_total"] = time.perf_counter() - hyde_start
+        print(f"Time taken for HyDE Generation (total): {timings['4_hyde_generation_total']:.4f} seconds.")
+        # Step B: Concurrently retrieve relevant chunks
+        print("Retrieving chunks...")
+        retrieval_start = time.perf_counter()
         retrieval_tasks = [
             retriever.retrieve(q, hyde_doc)
             for q, hyde_doc in zip(request.questions, all_hyde_docs)
         ]
         all_retrieved_chunks = await asyncio.gather(*retrieval_tasks)
+        timings["5_retrieval_total"] = time.perf_counter() - retrieval_start
+        print(f"Time taken for Retrieval (total): {timings['5_retrieval_total']:.4f} seconds.")
+        # Step C: Concurrently generate final answers
+        print("Generating final answers...")
+        generation_start = time.perf_counter()
         answer_tasks = [
             generate_answer(q, chunks, GROQ_API_KEY)
             for q, chunks in zip(request.questions, all_retrieved_chunks)
         ]
         final_answers = await asyncio.gather(*answer_tasks)
+        timings["6_answer_generation_total"] = time.perf_counter() - generation_start
+        print(f"Time taken for Answer Generation (total): {timings['6_answer_generation_total']:.4f} seconds.")
+        timings["total_pipeline_time"] = time.perf_counter() - total_pipeline_start_time
+        print("\n--- RAG Pipeline Completed Successfully ---")
+        print(f"--- Total Pipeline Time: {timings['total_pipeline_time']:.4f} seconds ---")
+        print("--- Timing Breakdown ---")
+        for stage, duration in timings.items():
+            print(f"- {stage}: {duration:.4f} seconds")
         return RunResponse(answers=final_answers)
     except Exception as e: