Spaces:

meet12341234
/

testdeployment

Sleeping

File size: 11,747 Bytes

5fa5f30

"""

Hybrid RAG Chatbot for Jain Philosophy

Features:

1. Neo4j Graph + Vector Search for Book Knowledge

2. Fallback to LLM Internal Knowledge (Llama 3.3) if needed

3. Uses llama-3.3-70b-versatile model

"""

import os
import sys
from typing import List, Dict, Optional
from dotenv import load_dotenv
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer
from groq import Groq
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn

# Force UTF-8 output on Windows
if sys.platform == 'win32':
    sys.stdout.reconfigure(encoding='utf-8')

load_dotenv()

# Configuration
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_AUTH = (os.getenv("NEO4J_USERNAME", "neo4j"), os.getenv("NEO4J_PASSWORD", "password"))
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Term mappings
TERM_MAPPINGS = {
    "anekantavada": ["Anekāntavāda", "Anekānta", "non-absolutism"],
    "syadvada": ["Syādvāda", "Syād", "conditional predication"],
    "saptabhangi": ["Saptabhaṅgī", "seven-fold predication"],
    "naya": ["Nayas", "viewpoints", "7 Naya"],
    "gunasthana": ["Guṇasthānaka", "stages of spiritual development"],
    "tirthankara": ["Tīrthaṅkara", "Jina", "Arihanta"],
    "mahavira": ["Mahāvīra", "Vardhamana"],
    "jiva": ["Jīvāstikāya", "soul"],
    "ajiva": ["Ajīva", "non-soul"],
    "karma": ["Karma", "karmic matter"],
}

# Initialize FastAPI
app = FastAPI(title="Jain Philosophy Chatbot API")

# Allow CORS for testing
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class ChatRequest(BaseModel):
    query: str

class ChatResponse(BaseModel):
    answer: str

# Global resources
resources = {}

def get_resources():
    """Lazy load resources"""
    if not resources:
        if not GROQ_API_KEY:
            raise ValueError("GROQ_API_KEY is not set in .env file or environment variables")
            
        try:
            driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
            driver.verify_connectivity()
            embedder = SentenceTransformer('all-mpnet-base-v2')
            retriever = HybridRetriever(driver, embedder)
            client = Groq(api_key=GROQ_API_KEY)
            
            resources['driver'] = driver
            resources['retriever'] = retriever
            resources['client'] = client
            print("✓ Resources loaded")
        except Exception as e:
            print(f"✗ Error loading resources: {e}")
            raise e
    return resources

def search_neo4j_comprehensive(driver, embedder, query: str) -> List[Dict]:
    """

    Enhanced Neo4j Search Strategy:

    1. Concept Search (Fuzzy & Exact)

    2. Vector Search (Chunks)

    3. Keyword/Text Search (Fulltext)

    4. Chapter/Section Title Search

    """
    expanded_terms = []
    # Simple query expansion
    query_lower = query.lower()
    for term, variants in TERM_MAPPINGS.items():
        if term in query_lower:
            expanded_terms.extend(variants)
    
    embedding = embedder.encode(query).tolist()
    chunks = []
    
    with driver.session() as session:
        # 1. Concept Node Search (High Priority)
        try:
            result = session.run("""

                CALL db.index.fulltext.queryNodes('concept_name_index', $q)

                YIELD node, score

                RETURN 

                    'Concept: ' + node.name + ' (' + coalesce(node.category, 'General') + ')\n' + 

                    'Variants: ' + coalesce(toString(node.variants), 'None') as text, 

                    score + 1.0 as score

                LIMIT 3

            """, q=query)
            chunks.extend([dict(r) for r in result])
        except Exception: 
            pass 

        # 2. Gunasthana Specific Search
        try:
            result = session.run("""

                MATCH (g:Gunasthana)

                WHERE toLower(g.sanskrit_name) CONTAINS toLower($q) 

                   OR toLower(g.english_name) CONTAINS toLower($q)

                RETURN g.sanskrit_name + ' (' + g.english_name + ')\n' + g.description as text, 2.0 as score

            """, q=query)
            chunks.extend([dict(r) for r in result])
        except: pass

        # 3. Vector Search
        indexes = ['chunk_embeddings', 'gunasthana_embeddings']
        for idx in indexes:
            try:
                result = session.run(f"""

                    CALL db.index.vector.queryNodes('{idx}', 7, $emb)

                    YIELD node, score

                    RETURN coalesce(node.text, node.description) as text, score

                """, emb=embedding)
                chunks.extend([dict(r) for r in result])
            except: continue

        # 4. Fulltext Keyword Search
        lucene_query = query.replace("?", "").replace("!", "")
        if lucene_query.strip():
            try:
                result = session.run("""

                    CALL db.index.fulltext.queryNodes('chunk_text_index', $q)

                    YIELD node, score

                    RETURN node.text as text, score LIMIT 5

                """, q=lucene_query)
                chunks.extend([dict(r) for r in result])
            except: pass

        # 5. Structure/Chapter Search (Table of Contents)
        # If the user asks for "chapters", "summary", "outline", "structure"
        structure_keywords = ["chapter", "summary", "outline", "structure", "table of contents", "book"]
        if any(k in query_lower for k in structure_keywords):
            try:
                # Fetch all chapters sorted by number
                result = session.run("""

                    MATCH (c:Chapter)

                    RETURN c.number as number, c.title as title

                    ORDER BY c.number ASC

                """)
                chapters = [f"Chapter {r['number']}: {r['title']}" for r in result]
                if chapters:
                    toc_text = "Book Table of Contents (All Chapters):\n" + "\n".join(chapters)
                    chunks.append({
                        "text": toc_text,
                        "score": 2.5 # Very high relevance for structural questions
                    })
            except: pass

    # Deduplicate and Sort
    seen = set()
    unique_chunks = []
    
    # Sort by score descending
    for c in sorted(chunks, key=lambda x: x['score'], reverse=True):
        content = c['text']
        # Simple dedupe (using first 100 chars signature)
        sig = content[:100] if content else ""
        if sig and sig not in seen:
            seen.add(sig)
            unique_chunks.append(c)
    
    # Return top results. 
    # If we have the TOC (score 2.5), it will be at the top.
    return unique_chunks[:7]

class HybridRetriever:
    def __init__(self, driver, embedder):
        self.driver = driver
        self.embedder = embedder

    def search_book(self, query: str) -> List[Dict]:
        return search_neo4j_comprehensive(self.driver, self.embedder, query)

def ask_jain_sage(user_query: str, retriever: HybridRetriever, client: Groq) -> str:
    """

    Call llama-3.3-70b-versatile directly with book context + internal knowledge fallback.

    """
    # 1. Retrieve from Book
    book_chunks = retriever.search_book(user_query)
    book_text = "\n\n".join([c['text'] for c in book_chunks])
    
    system_prompt = (
        "You are an expert scholar on Jain philosophy. "
        "Use the provided context from the book 'Anekant Syadvad' to answer the question. "
        "If the book context is insufficient, use your own broad knowledge of Jainism and religion to answer comprehensively. "
        "Do NOT mention 'According to the text' just give the answer naturally. "
        "Always define Sanskrit terms."
        "Ensure the response is logically structured, concise yet comprehensive, and suitable for both "
        "academic and general readers."
        "If the available book context is partial or insufficient, responsibly supplement the answer "
        "using well-established principles of Jain philosophy and comparative religious knowledge, "
        "without introducing speculation. "
        "Whenever Sanskrit or Prakrit terms appear, always: "
        "1) Write the term in standard IAST-style transliteration, "
        "2) Clearly define the term in simple and precise language at its first occurrence. "
        "Use the following transliteration standard consistently: "
        "Vowels: "
        "अ a, आ ā, इ i, ई ī, उ u, ऊ ū, ऋ ṛ, ए e, ऐ ai, ओ o, औ au, अं ṁ/ṅ, अः ḥ. "
        "Consonants: "
        "क् k, ख् kh, ग् g, घ् gh, ङ् ṅ; "
        "च् c, छ् ch, ज् j, झ् jh, ञ् ñ; "
        "ट् ṭ, ठ् ṭh, ड् ḍ, ढ् ḍh, ण् ṇ; "
        "त् t, थ् th, द् d, ध् dh, न् n; "
        "प् p, फ् ph, ब् b, भ् bh, म् m; "
        "य् y, र् r, ल् l, व् v; "
        "श् ś, ष् ṣ, स् s, ह् h. "
    )

    user_message_content = f"Context from Book:\n{book_text}\n\nQuestion: {user_query}"
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message_content}
    ]

    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=messages,
        temperature=0.5,
        max_completion_tokens=2048,
        top_p=0.95,
    )

    return completion.choices[0].message.content

@app.on_event("startup")
async def startup_event():
    # Attempt to load resources on startup (useful for Render to fail early if missing env vars)
    try:
        get_resources()
    except Exception as e:
        print(f"Warning: Could not initialize resources on startup: {e}")

@app.get("/")
def read_root():
    return {"status": "Jain Sage AI is API Ready", "endpoints": "/chat"}

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    try:
        res = get_resources()
        retriever = res['retriever']
        client = res['client']
        
        answer = ask_jain_sage(request.query, retriever, client)
        return ChatResponse(answer=answer)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

def main():
    print("="*60)
    print("  Jain Philosophy AI Expert")
    print("  (Neo4j Graph + Llama 3.3 Internal Knowledge)")
    print("="*60)

    try:
        res = get_resources()
        retriever = res['retriever']
        client = res['client']
    except Exception as e:
        print(f"\n✗ Configuration Error: {e}")
        return

    # Loop
    while True:
        try:
            q = input("\nQ: ").strip()
            if q.lower() in ['exit', 'quit']: break
            if not q: continue
            
            print("  Thinking...", end='\r')
            ans = ask_jain_sage(q, retriever, client)
            print(" "*30, end='\r')
            print(f"A: {ans}\n")
            
        except KeyboardInterrupt:
            break
        except Exception as e:
            print(f"\nError: {e}")

    res['driver'].close()

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "run-server":
        uvicorn.run(app, host="0.0.0.0", port=10000)
    else:
        main()