Spaces:

Rivalcoder
/

Issurance_Agent_Rag

Runtime error

App Files Files Community

Rivalcoder commited on Aug 2

Commit

6bc8549

1 Parent(s): ea49415

Update The Model issues and Prompt

Browse files

Files changed (6) hide show

app.py +22 -20
embedder.py +3 -30
llm.py +3 -52
main.py +22 -20
parser.py +0 -21
retriever.py +0 -22

app.py CHANGED Viewed

@@ -81,6 +81,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
     timing_data = {}
     try:
         print(f"Processing {len(request.questions)} questions...")
         # Time PDF parsing
@@ -88,7 +93,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         text_chunks = parse_pdf_from_url(request.documents)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
-        print(f"PDF Parsing took: {pdf_time:.2f} seconds")
         print(f"Extracted {len(text_chunks)} text chunks from PDF")
         # Time FAISS index building
@@ -96,7 +100,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
-        print(f"FAISS Index Building took: {index_time:.2f} seconds")
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
@@ -105,12 +108,10 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
-            print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
-        print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
@@ -119,7 +120,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
-        print(f"LLM Processing took: {llm_time:.2f} seconds")
         # Time response processing
         response_start = time.time()
@@ -140,13 +140,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
-        print(f"Response Processing took: {response_time:.2f} seconds")
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
-        timing_data['timestamp'] = datetime.now().isoformat()
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -157,9 +155,12 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
-        return {
-            "answers": answers
-        }
     except Exception as e:
         total_time = time.time() - start_time
@@ -172,6 +173,11 @@ async def run_local_query(request: LocalQueryRequest):
     timing_data = {}
     try:
         print(f"Processing local document: {request.document_path}")
         print(f"Processing {len(request.questions)} questions...")
@@ -180,7 +186,6 @@ async def run_local_query(request: LocalQueryRequest):
         text_chunks = parse_pdf_from_file(request.document_path)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
-        print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
         print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         # Time FAISS index building
@@ -188,7 +193,6 @@ async def run_local_query(request: LocalQueryRequest):
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
-        print(f"FAISS Index Building took: {index_time:.2f} seconds")
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
@@ -197,12 +201,10 @@ async def run_local_query(request: LocalQueryRequest):
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
-            print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
-        print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
@@ -211,7 +213,6 @@ async def run_local_query(request: LocalQueryRequest):
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
-        print(f"LLM Processing took: {llm_time:.2f} seconds")
         # Time response processing
         response_start = time.time()
@@ -232,13 +233,11 @@ async def run_local_query(request: LocalQueryRequest):
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
-        print(f"Response Processing took: {response_time:.2f} seconds")
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
-        timing_data['timestamp'] = datetime.now().isoformat()
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -249,9 +248,12 @@ async def run_local_query(request: LocalQueryRequest):
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
-        return {
-            "answers": answers
-        }
     except Exception as e:
         total_time = time.time() - start_time

     timing_data = {}
     try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Documents: {request.documents}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
         print(f"Processing {len(request.questions)} questions...")
         # Time PDF parsing
         text_chunks = parse_pdf_from_url(request.documents)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
         print(f"Extracted {len(text_chunks)} text chunks from PDF")
         # Time FAISS index building
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
         # Time response processing
         response_start = time.time()
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
     except Exception as e:
         total_time = time.time() - start_time
     timing_data = {}
     try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Document Path: {request.document_path}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
         print(f"Processing local document: {request.document_path}")
         print(f"Processing {len(request.questions)} questions...")
         text_chunks = parse_pdf_from_file(request.document_path)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
         print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         # Time FAISS index building
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
         # Time response processing
         response_start = time.time()
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
     except Exception as e:
         total_time = time.time() - start_time

embedder.py CHANGED Viewed

@@ -2,7 +2,6 @@ import faiss
 from sentence_transformers import SentenceTransformer
 import numpy as np
 import os
-import time
 # Set up cache directory in a writable location
 cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -17,19 +16,16 @@ def preload_model():
     """Preload the sentence transformer model at startup"""
     global _model
     if _model is None:
-        model_start = time.time()
         print("Preloading sentence transformer model...")
         try:
             _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
-            model_time = time.time() - model_start
-            print(f"Model preloading completed in {model_time:.2f} seconds")
         except Exception as e:
             print(f"Error loading model: {e}")
             # Fallback to a different model if the first one fails
             try:
                 _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
-                model_time = time.time() - model_start
-                print(f"Fallback model preloading completed in {model_time:.2f} seconds")
             except Exception as e2:
                 print(f"Error loading fallback model: {e2}")
                 raise
@@ -39,37 +35,14 @@ def get_model():
     """Get the sentence transformer model, loading it lazily if needed"""
     global _model
     if _model is None:
-        # If model is not preloaded, load it now (should not happen in production)
         print("Warning: Model not preloaded, loading now...")
         return preload_model()
     return _model
 def build_faiss_index(chunks):
-    start_time = time.time()
-    print(f"Building FAISS index for {len(chunks)} chunks...")
-    # Time model retrieval (should be instant now)
-    model_start = time.time()
     model = get_model()
-    model_time = time.time() - model_start
-    print(f"Model retrieval took: {model_time:.3f} seconds")
-    # Time embedding generation
-    embed_start = time.time()
     embeddings = model.encode(chunks)
-    embed_time = time.time() - embed_start
-    print(f"Embedding generation took: {embed_time:.2f} seconds")
-    print(f"Generated embeddings shape: {embeddings.shape}")
-    # Time FAISS index creation
-    index_start = time.time()
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
-    index_time = time.time() - index_start
-    print(f"FAISS index creation took: {index_time:.2f} seconds")
-    total_time = time.time() - start_time
-    print(f"Total FAISS index building took: {total_time:.2f} seconds")
-    return index, chunks

 from sentence_transformers import SentenceTransformer
 import numpy as np
 import os
 # Set up cache directory in a writable location
 cache_dir = os.path.join(os.getcwd(), ".cache")
     """Preload the sentence transformer model at startup"""
     global _model
     if _model is None:
         print("Preloading sentence transformer model...")
         try:
             _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
+            print("Model preloading completed")
         except Exception as e:
             print(f"Error loading model: {e}")
             # Fallback to a different model if the first one fails
             try:
                 _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
+                print("Fallback model preloading completed")
             except Exception as e2:
                 print(f"Error loading fallback model: {e2}")
                 raise
     """Get the sentence transformer model, loading it lazily if needed"""
     global _model
     if _model is None:
         print("Warning: Model not preloaded, loading now...")
         return preload_model()
     return _model
 def build_faiss_index(chunks):
     model = get_model()
     embeddings = model.encode(chunks)
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
+    return index, chunks

llm.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import google.generativeai as genai
 import os
 import json
-import time
 from dotenv import load_dotenv
 load_dotenv()
@@ -13,22 +12,9 @@ print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key fo
 genai.configure(api_key=api_key)
 def query_gemini(questions, contexts):
-    start_time = time.time()
-    print(f"Starting LLM processing for {len(questions)} questions with {len(contexts)} context chunks")
     try:
-        # Time context preparation
-        context_start = time.time()
         context = "\n\n".join(contexts)
-        context_time = time.time() - context_start
-        print(f"Context preparation took: {context_time:.2f} seconds")
-        print(f"Total context length: {len(context)} characters")
-        # Time prompt preparation
-        prompt_start = time.time()
-        # Create a numbered list of questions
         questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
         prompt = f"""
 You are a skilled insurance policy assistant. Based only on the provided context, answer each question clearly and briefly.
@@ -69,54 +55,19 @@ Respond in the exact JSON format below — no extra text or explanations.
 Your task: Answer each question concisely and professionally. Use plain phrasing, stay within 1–2 clear sentences, and avoid unnecessary detail or repetition.
 """
-        prompt_time = time.time() - prompt_start
-        print(f"Prompt preparation took: {prompt_time:.2f} seconds")
-        print(f"Total prompt length: {len(prompt)} characters")
-        # Time model initialization and API call
-        api_start = time.time()
-        model = genai.GenerativeModel('gemini-2.0-flash-exp')
         response = model.generate_content(prompt)
-        api_time = time.time() - api_start
-        print(f"Gemini API call took: {api_time:.2f} seconds")
-        # Time response processing
-        process_start = time.time()
         response_text = response.text.strip()
-        print(f"Raw response length: {len(response_text)} characters")
-        # Try to parse the response as JSON
         try:
-            # Remove any markdown code blocks if present
             if response_text.startswith("```json"):
                 response_text = response_text.replace("```json", "").replace("```", "").strip()
             elif response_text.startswith("```"):
                 response_text = response_text.replace("```", "").strip()
             parsed_response = json.loads(response_text)
-            process_time = time.time() - process_start
-            print(f"Response processing took: {process_time:.2f} seconds")
-            total_time = time.time() - start_time
-            print(f"Total LLM processing took: {total_time:.2f} seconds")
             return parsed_response
         except json.JSONDecodeError:
-            # If JSON parsing fails, return a structured response
-            process_time = time.time() - process_start
-            print(f"Response processing took: {process_time:.2f} seconds (JSON parsing failed)")
             print(f"Failed to parse JSON response: {response_text}")
-            total_time = time.time() - start_time
-            print(f"Total LLM processing took: {total_time:.2f} seconds")
             return {"answers": ["Error parsing response"] * len(questions)}
     except Exception as e:
-        total_time = time.time() - start_time
-        print(f"Error in query_gemini after {total_time:.2f} seconds: {str(e)}")
-        return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}

 import google.generativeai as genai
 import os
 import json
 from dotenv import load_dotenv
 load_dotenv()
 genai.configure(api_key=api_key)
 def query_gemini(questions, contexts):
     try:
         context = "\n\n".join(contexts)
         questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
         prompt = f"""
 You are a skilled insurance policy assistant. Based only on the provided context, answer each question clearly and briefly.
 Your task: Answer each question concisely and professionally. Use plain phrasing, stay within 1–2 clear sentences, and avoid unnecessary detail or repetition.
 """
+        model = genai.GenerativeModel('gemini-2.5-flash')
         response = model.generate_content(prompt)
         response_text = response.text.strip()
         try:
             if response_text.startswith("```json"):
                 response_text = response_text.replace("```json", "").replace("```", "").strip()
             elif response_text.startswith("```"):
                 response_text = response_text.replace("```", "").strip()
             parsed_response = json.loads(response_text)
             return parsed_response
         except json.JSONDecodeError:
             print(f"Failed to parse JSON response: {response_text}")
             return {"answers": ["Error parsing response"] * len(questions)}
     except Exception as e:
+        print(f"Error in query_gemini: {str(e)}")
+        return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}

main.py CHANGED Viewed

@@ -75,6 +75,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
     timing_data = {}
     try:
         print(f"Processing {len(request.questions)} questions...")
         # Time PDF parsing
@@ -82,7 +87,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         text_chunks = parse_pdf_from_url(request.documents)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
-        print(f"PDF Parsing took: {pdf_time:.2f} seconds")
         print(f"Extracted {len(text_chunks)} text chunks from PDF")
         # Time FAISS index building
@@ -90,7 +94,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
-        print(f"FAISS Index Building took: {index_time:.2f} seconds")
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
@@ -99,12 +102,10 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
-            print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
-        print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
@@ -113,7 +114,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
-        print(f"LLM Processing took: {llm_time:.2f} seconds")
         # Time response processing
         response_start = time.time()
@@ -134,13 +134,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
-        print(f"Response Processing took: {response_time:.2f} seconds")
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
-        timing_data['timestamp'] = datetime.now().isoformat()
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -151,9 +149,12 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
-        return {
-            "answers": answers
-        }
     except Exception as e:
         total_time = time.time() - start_time
@@ -166,6 +167,11 @@ async def run_local_query(request: LocalQueryRequest):
     timing_data = {}
     try:
         print(f"Processing local document: {request.document_path}")
         print(f"Processing {len(request.questions)} questions...")
@@ -174,7 +180,6 @@ async def run_local_query(request: LocalQueryRequest):
         text_chunks = parse_pdf_from_file(request.document_path)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
-        print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
         print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         # Time FAISS index building
@@ -182,7 +187,6 @@ async def run_local_query(request: LocalQueryRequest):
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
-        print(f"FAISS Index Building took: {index_time:.2f} seconds")
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
@@ -191,12 +195,10 @@ async def run_local_query(request: LocalQueryRequest):
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
-            print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
-        print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
@@ -205,7 +207,6 @@ async def run_local_query(request: LocalQueryRequest):
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
-        print(f"LLM Processing took: {llm_time:.2f} seconds")
         # Time response processing
         response_start = time.time()
@@ -226,13 +227,11 @@ async def run_local_query(request: LocalQueryRequest):
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
-        print(f"Response Processing took: {response_time:.2f} seconds")
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
-        timing_data['timestamp'] = datetime.now().isoformat()
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -243,9 +242,12 @@ async def run_local_query(request: LocalQueryRequest):
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
-        return {
-            "answers": answers
-        }
     except Exception as e:
         total_time = time.time() - start_time

     timing_data = {}
     try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Documents: {request.documents}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
         print(f"Processing {len(request.questions)} questions...")
         # Time PDF parsing
         text_chunks = parse_pdf_from_url(request.documents)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
         print(f"Extracted {len(text_chunks)} text chunks from PDF")
         # Time FAISS index building
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
         # Time response processing
         response_start = time.time()
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
     except Exception as e:
         total_time = time.time() - start_time
     timing_data = {}
     try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Document Path: {request.document_path}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
         print(f"Processing local document: {request.document_path}")
         print(f"Processing {len(request.questions)} questions...")
         text_chunks = parse_pdf_from_file(request.document_path)
         pdf_time = time.time() - pdf_start
         timing_data['pdf_parsing'] = round(pdf_time, 2)
         print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         # Time FAISS index building
         index, texts = build_faiss_index(text_chunks)
         index_time = time.time() - index_start
         timing_data['faiss_index_building'] = round(index_time, 2)
         # Time chunk retrieval for all questions
         retrieval_start = time.time()
             question_start = time.time()
             top_chunks = retrieve_chunks(index, texts, question)
             question_time = time.time() - question_start
             all_chunks.update(top_chunks)
         retrieval_time = time.time() - retrieval_start
         timing_data['chunk_retrieval'] = round(retrieval_time, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
         # Time LLM processing
         response = query_gemini(request.questions, list(all_chunks))
         llm_time = time.time() - llm_start
         timing_data['llm_processing'] = round(llm_time, 2)
         # Time response processing
         response_start = time.time()
         response_time = time.time() - response_start
         timing_data['response_processing'] = round(response_time, 2)
         print(f"Generated {len(answers)} answers")
         # Calculate total time
         total_time = time.time() - start_time
         timing_data['total_time'] = round(total_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
         print(f"TOTAL TIME: {timing_data['total_time']}s")
         print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
     except Exception as e:
         total_time = time.time() - start_time

parser.py CHANGED Viewed

@@ -4,15 +4,7 @@ from io import BytesIO
 import time
 def parse_pdf_from_url(url):
-    start_time = time.time()
-    print(f"Starting PDF download and parsing from URL...")
-    download_start = time.time()
     res = requests.get(url)
-    download_time = time.time() - download_start
-    print(f"PDF Download took: {download_time:.2f} seconds")
-    parse_start = time.time()
     doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
     chunks = []
     for page in doc:
@@ -20,18 +12,10 @@ def parse_pdf_from_url(url):
         if text.strip():
             chunks.append(text)
     doc.close()
-    parse_time = time.time() - parse_start
-    print(f"PDF Text Extraction took: {parse_time:.2f} seconds")
-    total_time = time.time() - start_time
-    print(f"Total PDF parsing from URL took: {total_time:.2f} seconds")
     return chunks
 def parse_pdf_from_file(file_path):
     """Parse a local PDF file and extract text chunks"""
-    start_time = time.time()
-    print(f"Starting PDF parsing from local file: {file_path}")
     try:
         doc = fitz.open(file_path)
         chunks = []
@@ -40,11 +24,6 @@ def parse_pdf_from_file(file_path):
             if text.strip():
                 chunks.append(text)
         doc.close()
-        total_time = time.time() - start_time
-        print(f"Total PDF parsing from file took: {total_time:.2f} seconds")
         return chunks
     except Exception as e:
-        total_time = time.time() - start_time
-        print(f"Error parsing PDF file after {total_time:.2f} seconds: {str(e)}")
         raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")

 import time
 def parse_pdf_from_url(url):
     res = requests.get(url)
     doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
     chunks = []
     for page in doc:
         if text.strip():
             chunks.append(text)
     doc.close()
     return chunks
 def parse_pdf_from_file(file_path):
     """Parse a local PDF file and extract text chunks"""
     try:
         doc = fitz.open(file_path)
         chunks = []
             if text.strip():
                 chunks.append(text)
         doc.close()
         return chunks
     except Exception as e:
         raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")

retriever.py CHANGED Viewed

@@ -5,30 +5,8 @@ from embedder import get_model
 # Use the preloaded model from embedder instead of creating a new instance
 def retrieve_chunks(index, texts, query, k=5):
-    start_time = time.time()
-    print(f"Retrieving chunks for query: '{query[:50]}...'")
-    # Time query embedding
-    embed_start = time.time()
     model = get_model()  # Use the preloaded model
     query_vec = model.encode([query])
-    embed_time = time.time() - embed_start
-    print(f"Query embedding took: {embed_time:.3f} seconds")
-    # Time FAISS search
-    search_start = time.time()
     distances, indices = index.search(np.array(query_vec), k)
-    search_time = time.time() - search_start
-    print(f"FAISS search took: {search_time:.3f} seconds")
-    # Time result processing
-    process_start = time.time()
     results = [texts[i] for i in indices[0]]
-    process_time = time.time() - process_start
-    print(f"Result processing took: {process_time:.3f} seconds")
-    total_time = time.time() - start_time
-    print(f"Total chunk retrieval took: {total_time:.3f} seconds")
-    print(f"Retrieved {len(results)} chunks")
     return results

 # Use the preloaded model from embedder instead of creating a new instance
 def retrieve_chunks(index, texts, query, k=5):
     model = get_model()  # Use the preloaded model
     query_vec = model.encode([query])
     distances, indices = index.search(np.array(query_vec), k)
     results = [texts[i] for i in indices[0]]
     return results