Spaces:

Rivalcoder
/

Issurance_Agent_Rag_Pinecone

Runtime error

App Files Files Community

Rivalcoder commited on Aug 4, 2025

Commit

192b91e

0 Parent(s):

Add First basic Version

Browse files

Files changed (10) hide show

.dockerignore +26 -0
.gitignore +61 -0
Dockerfile +32 -0
README.md +10 -0
app.py +260 -0
embedder.py +56 -0
llm.py +82 -0
pdf_parser.py +50 -0
requirements.txt +12 -0
retriever.py +9 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,26 @@

+.git
+.gitignore
+README.md
+DEPLOYMENT.md
+render.yaml
+start.sh
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis

.gitignore ADDED Viewed

	@@ -0,0 +1,61 @@

+# Environment variables
+.env
+.env.local
+.env.production
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+.cache
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# Temporary files
+*.tmp
+*.temp
+# FAISS index files
+*.index
+*.faiss
+# PDF files (if you don't want to commit them)
+*.pdf
+DEPLOYMENT.md

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM python:3.9-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Create a non-root user
+RUN useradd --create-home --shell /bin/bash appuser
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create cache directory with proper permissions
+RUN mkdir -p /app/.cache && chown -R appuser:appuser /app
+# Switch to non-root user
+USER appuser
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Issurance Agent Rag
+emoji: 💻
+colorFrom: red
+colorTo: pink
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import os
+import warnings
+import logging
+import time
+from datetime import datetime
+from fastapi import FastAPI, Request, HTTPException, Depends, Header
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from pdf_parser import parse_pdf_from_url_multithreaded as parse_pdf_from_url, parse_pdf_from_file_multithreaded as parse_pdf_from_file
+from embedder import build_pinecone_index, preload_model
+from retriever import retrieve_chunks
+from llm import query_gemini
+import uvicorn
+# Set up cache directory for HuggingFace models
+cache_dir = os.path.join(os.getcwd(), ".cache")
+os.makedirs(cache_dir, exist_ok=True)
+os.environ['HF_HOME'] = cache_dir
+os.environ['TRANSFORMERS_CACHE'] = cache_dir
+# Suppress TensorFlow warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+os.environ['TF_LOGGING_LEVEL'] = 'ERROR'
+os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'
+warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Preload the model at startup
+@app.on_event("startup")
+async def startup_event():
+    print("Starting up HackRx Insurance Policy Assistant...")
+    print("Preloading sentence transformer model...")
+    preload_model()
+    print("Model preloading completed. API is ready to serve requests!")
+@app.get("/")
+async def root():
+    return {"message": "HackRx Insurance Policy Assistant API is running!"}
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "message": "API is ready to process requests"}
+class QueryRequest(BaseModel):
+    documents: str
+    questions: list[str]
+class LocalQueryRequest(BaseModel):
+    document_path: str
+    questions: list[str]
+def verify_token(authorization: str = Header(None)):
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Invalid authorization header")
+    token = authorization.replace("Bearer ", "")
+    # For demo purposes, accept any token. In production, validate against a database
+    if not token:
+        raise HTTPException(status_code=401, detail="Invalid token")
+    return token
+@app.post("/api/v1/hackrx/run")
+async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
+    start_time = time.time()
+    timing_data = {}
+    try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Documents: {request.documents}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
+        print(f"Processing {len(request.questions)} questions...")
+        # Time PDF parsing
+        pdf_start = time.time()
+        text_chunks = parse_pdf_from_url(request.documents)
+        pdf_time = time.time() - pdf_start
+        timing_data['pdf_parsing'] = round(pdf_time, 2)
+        print(f"Extracted {len(text_chunks)} text chunks from PDF")
+        # Time Pinecone index building/upsert
+        index_start = time.time()
+        pinecone_index = build_pinecone_index(text_chunks)
+        index_time = time.time() - index_start
+        timing_data['pinecone_index_building'] = round(index_time, 2)
+        texts = text_chunks  # for retrieve_chunks
+        # Time chunk retrieval for all questions
+        retrieval_start = time.time()
+        all_chunks = set()
+        for i, question in enumerate(request.questions):
+            question_start = time.time()
+            top_chunks = retrieve_chunks(pinecone_index, texts, question)
+            question_time = time.time() - question_start
+            all_chunks.update(top_chunks)
+        retrieval_time = time.time() - retrieval_start
+        timing_data['chunk_retrieval'] = round(retrieval_time, 2)
+        print(f"Retrieved {len(all_chunks)} unique chunks")
+        # Time LLM processing
+        llm_start = time.time()
+        print(f"Processing all {len(request.questions)} questions in batch...")
+        response = query_gemini(request.questions, list(all_chunks))
+        llm_time = time.time() - llm_start
+        timing_data['llm_processing'] = round(llm_time, 2)
+        # Time response processing
+        response_start = time.time()
+        # Extract answers from the JSON response
+        if isinstance(response, dict) and "answers" in response:
+            answers = response["answers"]
+            while len(answers) < len(request.questions):
+                answers.append("Not Found")
+            answers = answers[:len(request.questions)]
+        else:
+            answers = [response] if isinstance(response, str) else []
+            while len(answers) < len(request.questions):
+                answers.append("Not Found")
+            answers = answers[:len(request.questions)]
+        response_time = time.time() - response_start
+        timing_data['response_processing'] = round(response_time, 2)
+        print(f"Generated {len(answers)} answers")
+        # Calculate total time
+        total_time = time.time() - start_time
+        timing_data['total_time'] = round(total_time, 2)
+        print(f"\n=== TIMING BREAKDOWN ===")
+        print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
+        print(f"Pinecone Index Building: {timing_data['pinecone_index_building']}s")
+        print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
+        print(f"LLM Processing: {timing_data['llm_processing']}s")
+        print(f"Response Processing: {timing_data['response_processing']}s")
+        print(f"TOTAL TIME: {timing_data['total_time']}s")
+        print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
+    except Exception as e:
+        total_time = time.time() - start_time
+        print(f"Error after {total_time:.2f} seconds: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+@app.post("/api/v1/hackrx/local")
+async def run_local_query(request: LocalQueryRequest):
+    start_time = time.time()
+    timing_data = {}
+    try:
+        print(f"\n=== INPUT JSON ===")
+        print(f"Document Path: {request.document_path}")
+        print(f"Questions: {request.questions}")
+        print(f"==================\n")
+        print(f"Processing local document: {request.document_path}")
+        print(f"Processing {len(request.questions)} questions...")
+        # Time local PDF parsing
+        pdf_start = time.time()
+        text_chunks = parse_pdf_from_file(request.document_path)
+        pdf_time = time.time() - pdf_start
+        timing_data['pdf_parsing'] = round(pdf_time, 2)
+        print(f"Extracted {len(text_chunks)} text chunks from local PDF")
+        # Time Pinecone index building/upsert
+        index_start = time.time()
+        pinecone_index = build_pinecone_index(text_chunks)
+        index_time = time.time() - index_start
+        timing_data['pinecone_index_building'] = round(index_time, 2)
+        texts = text_chunks
+        # Time chunk retrieval for all questions
+        retrieval_start = time.time()
+        all_chunks = set()
+        for i, question in enumerate(request.questions):
+            question_start = time.time()
+            top_chunks = retrieve_chunks(pinecone_index, texts, question)
+            question_time = time.time() - question_start
+            all_chunks.update(top_chunks)
+        retrieval_time = time.time() - retrieval_start
+        timing_data['chunk_retrieval'] = round(retrieval_time, 2)
+        print(f"Retrieved {len(all_chunks)} unique chunks")
+        # Time LLM processing
+        llm_start = time.time()
+        print(f"Processing all {len(request.questions)} questions in batch...")
+        response = query_gemini(request.questions, list(all_chunks))
+        llm_time = time.time() - llm_start
+        timing_data['llm_processing'] = round(llm_time, 2)
+        # Time response processing
+        response_start = time.time()
+        if isinstance(response, dict) and "answers" in response:
+            answers = response["answers"]
+            while len(answers) < len(request.questions):
+                answers.append("Not Found")
+            answers = answers[:len(request.questions)]
+        else:
+            answers = [response] if isinstance(response, str) else []
+            while len(answers) < len(request.questions):
+                answers.append("Not Found")
+            answers = answers[:len(request.questions)]
+        response_time = time.time() - response_start
+        timing_data['response_processing'] = round(response_time, 2)
+        print(f"Generated {len(answers)} answers")
+        total_time = time.time() - start_time
+        timing_data['total_time'] = round(total_time, 2)
+        print(f"\n=== TIMING BREAKDOWN ===")
+        print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
+        print(f"Pinecone Index Building: {timing_data['pinecone_index_building']}s")
+        print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
+        print(f"LLM Processing: {timing_data['llm_processing']}s")
+        print(f"Response Processing: {timing_data['response_processing']}s")
+        print(f"TOTAL TIME: {timing_data['total_time']}s")
+        print(f"=======================\n")
+        result = {"answers": answers}
+        print(f"=== OUTPUT JSON ===")
+        print(f"{result}")
+        print(f"==================\n")
+        return result
+    except Exception as e:
+        total_time = time.time() - start_time
+        print(f"Error after {total_time:.2f} seconds: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    uvicorn.run("app:app", host="0.0.0.0", port=port)

embedder.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+from pinecone import Pinecone, ServerlessSpec
+from sentence_transformers import SentenceTransformer
+from dotenv import load_dotenv
+load_dotenv()
+cache_dir = os.path.join(os.getcwd(), ".cache")
+os.makedirs(cache_dir, exist_ok=True)
+os.environ['HF_HOME'] = cache_dir
+os.environ['TRANSFORMERS_CACHE'] = cache_dir
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_ENV = os.getenv("PINECONE_ENV")  # Not used in new SDK, keep cloud+region below instead
+PINECONE_INDEX_NAME = 'bajaj-rag-assistant'
+PINECONE_CLOUD = 'aws'           # or 'gcp', or your choice, must match Pinecone project
+PINECONE_REGION = 'us-east-1'    # or your choice, must match Pinecone project
+# Create Pinecone client globally
+pc = Pinecone(api_key=PINECONE_API_KEY)
+_model = None
+def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
+    global _model
+    if _model is not None:
+        return _model
+    _model = SentenceTransformer(model_name, cache_folder=cache_dir)
+    return _model
+def get_model():
+    return preload_model()
+def build_pinecone_index(chunks, index_name=PINECONE_INDEX_NAME):
+    model = get_model()
+    embeddings = model.encode(
+        chunks,
+        batch_size=128,
+        convert_to_numpy=True,
+        normalize_embeddings=True
+    )
+    # Create index if it doesn't exist
+    if index_name not in pc.list_indexes().names():
+        pc.create_index(
+            name=index_name,
+            dimension=embeddings.shape[1],
+            metric='cosine',
+            spec=ServerlessSpec(
+                cloud=PINECONE_CLOUD,
+                region=PINECONE_REGION
+            )
+        )
+    index = pc.Index(index_name)
+    # Upsert embeddings in Pinecone
+    vectors = [(f"id-{i}", emb.tolist(), {"text": chunk}) for i, (emb, chunk) in enumerate(zip(embeddings, chunks))]
+    index.upsert(vectors)
+    return index

llm.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import google.generativeai as genai
+import os
+import json
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GOOGLE_API_KEY")
+if not api_key:
+    raise ValueError("GOOGLE_API_KEY environment variable is not set. Please add it to your .env file")
+print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key found")
+genai.configure(api_key=api_key)
+def query_gemini(questions, contexts):
+    try:
+        context = "\n\n".join(contexts)
+        questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
+        prompt = f"""
+You are an expert insurance assistant generating formal yet user-facing answers to policy questions and Other Human Questions. Your goal is to write professional, structured answers that reflect the language of policy documents — but are still human-readable and easy to understand.
+🧠 FORMAT & TONE GUIDELINES:
+- Write in professional third-person language (no "you", no "we").
+- Use clear sentence structure with proper punctuation and spacing.
+- Do NOT write in legalese or robotic passive constructions.
+- Include eligibility, limits, and waiting periods explicitly where relevant.
+- Keep it factual, neutral, and easy to follow.
+- First, try to answer each question using information from the provided context.
+- If the question is NOT covered by the context Provide Then Give The General Answer It Not Be In Context if Nothing Found Give Normal Ai Answer for The Question Correctly
+- Limit each answer to 2–3 sentences, and do not repeat unnecessary information.
+- If a question can be answered with a simple "Yes", "No", "Can apply", or "Cannot apply", then begin the answer with that phrase, followed by a short supporting Statement In Natural Human Like response.So Give A Good Answer For The Question With Correct Information.
+- Avoid giving  theory Based Long Long answers Try to Give Short Good Reasonable Answers.
+🛑 DO NOT:
+- Use words like "context", "document", or "text".
+- Output markdown, bullets, emojis, or markdown code blocks.
+- Say "helpful", "available", "allowed", "indemnified", "excluded", etc.
+- Use overly robotic passive constructions like "shall be indemnified".
+- Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
+✅ DO:
+- Write in clean, informative language.
+- Give complete answers in 2–3 sentences maximum.
+📤 OUTPUT FORMAT (strict):
+Respond with only the following JSON — no explanations, no comments, no markdown:
+{{
+  "answers": [
+    "Answer to question 1",
+    "Answer to question 2",
+    ...
+  ]
+}}
+📚 CONTEXT:
+{context}
+❓ QUESTIONS:
+{questions_text}
+Your task: For each question, provide a complete, professional, and clearly written answer in 2–3 sentences using a formal but readable tone.
+"""
+        model = genai.GenerativeModel('gemini-2.5-flash-lite')
+        response = model.generate_content(prompt)
+        response_text = response.text.strip()
+        try:
+            if response_text.startswith("```json"):
+                response_text = response_text.replace("```json", "").replace("```", "").strip()
+            elif response_text.startswith("```"):
+                response_text = response_text.replace("```", "").strip()
+            parsed_response = json.loads(response_text)
+            return parsed_response
+        except json.JSONDecodeError:
+            print(f"Failed to parse JSON response: {response_text}")
+            return {"answers": ["Error parsing response"] * len(questions)}
+    except Exception as e:
+        print(f"Error in query_gemini: {str(e)}")
+        return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}

pdf_parser.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import fitz  # PyMuPDF
+import requests
+from io import BytesIO
+from concurrent.futures import ThreadPoolExecutor
+def _extract_text(page):
+    text = page.get_text()
+    return text.strip() if text and text.strip() else None
+def parse_pdf_from_url_multithreaded(url, max_workers=2, chunk_size=1):
+    """
+    Download PDF from URL, extract text in parallel, optionally chunk pages.
+    """
+    res = requests.get(url)
+    with fitz.open(stream=BytesIO(res.content), filetype="pdf") as doc:
+        num_pages = len(doc)
+        pages = list(doc)
+        # Step 1: Parallel text extraction
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            texts = list(executor.map(_extract_text, pages))
+        # Step 2: Optional chunking
+        if chunk_size > 1:
+            chunks = []
+            for i in range(0, len(texts), chunk_size):
+                chunk = ' '.join([t for t in texts[i:i+chunk_size] if t])
+                if chunk:
+                    chunks.append(chunk)
+            return chunks
+        # Default: return one chunk per page
+        return [t for t in texts if t]
+def parse_pdf_from_file_multithreaded(file_path, max_workers=2, chunk_size=1):
+    """
+    Parse a local PDF file, extract text in parallel, optionally chunk pages.
+    """
+    with fitz.open(file_path) as doc:
+        num_pages = len(doc)
+        pages = list(doc)
+        # Step 1: Parallel text extraction
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            texts = list(executor.map(_extract_text, pages))
+        # Step 2: Optional chunking
+        if chunk_size > 1:
+            chunks = []
+            for i in range(0, len(texts), chunk_size):
+                chunk = ' '.join([t for t in texts[i:i+chunk_size] if t])
+                if chunk:
+                    chunks.append(chunk)
+            return chunks
+        return [t for t in texts if t]

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi
+uvicorn
+requests
+faiss-cpu
+sentence-transformers
+PyMuPDF
+python-dotenv
+tf-keras
+google-generativeai
+pinecone

retriever.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from embedder import get_model
+def retrieve_chunks(index, texts, question, top_k=15):
+    model = get_model()
+    q_embedding = model.encode([question], convert_to_numpy=True, normalize_embeddings=True)[0]
+    # Use Pinecone v3 index query
+    res = index.query(vector=q_embedding.tolist(), top_k=top_k, include_metadata=True)
+    selected_texts = [match['metadata']['text'] for match in res['matches']]
+    return selected_texts