""" Hybrid RAG Chatbot for Jain Philosophy Features: 1. Neo4j Graph + Vector Search for Book Knowledge 2. Fallback to LLM Internal Knowledge (Llama 3.3) if needed 3. Uses llama-3.3-70b-versatile model """ import os import sys from typing import List, Dict, Optional from dotenv import load_dotenv from neo4j import GraphDatabase from sentence_transformers import SentenceTransformer from groq import Groq from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import uvicorn # Force UTF-8 output on Windows if sys.platform == 'win32': sys.stdout.reconfigure(encoding='utf-8') load_dotenv() # Configuration NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687") NEO4J_AUTH = (os.getenv("NEO4J_USERNAME", "neo4j"), os.getenv("NEO4J_PASSWORD", "password")) GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Term mappings TERM_MAPPINGS = { "anekantavada": ["Anekāntavāda", "Anekānta", "non-absolutism"], "syadvada": ["Syādvāda", "Syād", "conditional predication"], "saptabhangi": ["Saptabhaṅgī", "seven-fold predication"], "naya": ["Nayas", "viewpoints", "7 Naya"], "gunasthana": ["Guṇasthānaka", "stages of spiritual development"], "tirthankara": ["Tīrthaṅkara", "Jina", "Arihanta"], "mahavira": ["Mahāvīra", "Vardhamana"], "jiva": ["Jīvāstikāya", "soul"], "ajiva": ["Ajīva", "non-soul"], "karma": ["Karma", "karmic matter"], } # Initialize FastAPI app = FastAPI(title="Jain Philosophy Chatbot API") # Allow CORS for testing app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class ChatRequest(BaseModel): query: str class ChatResponse(BaseModel): answer: str # Global resources resources = {} def get_resources(): """Lazy load resources""" if not resources: if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set in .env file or environment variables") try: driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH) driver.verify_connectivity() embedder = SentenceTransformer('all-mpnet-base-v2') retriever = HybridRetriever(driver, embedder) client = Groq(api_key=GROQ_API_KEY) resources['driver'] = driver resources['retriever'] = retriever resources['client'] = client print("✓ Resources loaded") except Exception as e: print(f"✗ Error loading resources: {e}") raise e return resources def search_neo4j_comprehensive(driver, embedder, query: str) -> List[Dict]: """ Enhanced Neo4j Search Strategy: 1. Concept Search (Fuzzy & Exact) 2. Vector Search (Chunks) 3. Keyword/Text Search (Fulltext) 4. Chapter/Section Title Search """ expanded_terms = [] # Simple query expansion query_lower = query.lower() for term, variants in TERM_MAPPINGS.items(): if term in query_lower: expanded_terms.extend(variants) embedding = embedder.encode(query).tolist() chunks = [] with driver.session() as session: # 1. Concept Node Search (High Priority) try: result = session.run(""" CALL db.index.fulltext.queryNodes('concept_name_index', $q) YIELD node, score RETURN 'Concept: ' + node.name + ' (' + coalesce(node.category, 'General') + ')\n' + 'Variants: ' + coalesce(toString(node.variants), 'None') as text, score + 1.0 as score LIMIT 3 """, q=query) chunks.extend([dict(r) for r in result]) except Exception: pass # 2. Gunasthana Specific Search try: result = session.run(""" MATCH (g:Gunasthana) WHERE toLower(g.sanskrit_name) CONTAINS toLower($q) OR toLower(g.english_name) CONTAINS toLower($q) RETURN g.sanskrit_name + ' (' + g.english_name + ')\n' + g.description as text, 2.0 as score """, q=query) chunks.extend([dict(r) for r in result]) except: pass # 3. Vector Search indexes = ['chunk_embeddings', 'gunasthana_embeddings'] for idx in indexes: try: result = session.run(f""" CALL db.index.vector.queryNodes('{idx}', 7, $emb) YIELD node, score RETURN coalesce(node.text, node.description) as text, score """, emb=embedding) chunks.extend([dict(r) for r in result]) except: continue # 4. Fulltext Keyword Search lucene_query = query.replace("?", "").replace("!", "") if lucene_query.strip(): try: result = session.run(""" CALL db.index.fulltext.queryNodes('chunk_text_index', $q) YIELD node, score RETURN node.text as text, score LIMIT 5 """, q=lucene_query) chunks.extend([dict(r) for r in result]) except: pass # 5. Structure/Chapter Search (Table of Contents) # If the user asks for "chapters", "summary", "outline", "structure" structure_keywords = ["chapter", "summary", "outline", "structure", "table of contents", "book"] if any(k in query_lower for k in structure_keywords): try: # Fetch all chapters sorted by number result = session.run(""" MATCH (c:Chapter) RETURN c.number as number, c.title as title ORDER BY c.number ASC """) chapters = [f"Chapter {r['number']}: {r['title']}" for r in result] if chapters: toc_text = "Book Table of Contents (All Chapters):\n" + "\n".join(chapters) chunks.append({ "text": toc_text, "score": 2.5 # Very high relevance for structural questions }) except: pass # Deduplicate and Sort seen = set() unique_chunks = [] # Sort by score descending for c in sorted(chunks, key=lambda x: x['score'], reverse=True): content = c['text'] # Simple dedupe (using first 100 chars signature) sig = content[:100] if content else "" if sig and sig not in seen: seen.add(sig) unique_chunks.append(c) # Return top results. # If we have the TOC (score 2.5), it will be at the top. return unique_chunks[:7] class HybridRetriever: def __init__(self, driver, embedder): self.driver = driver self.embedder = embedder def search_book(self, query: str) -> List[Dict]: return search_neo4j_comprehensive(self.driver, self.embedder, query) def ask_jain_sage(user_query: str, retriever: HybridRetriever, client: Groq) -> str: """ Call llama-3.3-70b-versatile directly with book context + internal knowledge fallback. """ # 1. Retrieve from Book book_chunks = retriever.search_book(user_query) book_text = "\n\n".join([c['text'] for c in book_chunks]) system_prompt = ( "You are an expert scholar on Jain philosophy. " "Use the provided context from the book 'Anekant Syadvad' to answer the question. " "If the book context is insufficient, use your own broad knowledge of Jainism and religion to answer comprehensively. " "Do NOT mention 'According to the text' just give the answer naturally. " "Always define Sanskrit terms." "Ensure the response is logically structured, concise yet comprehensive, and suitable for both " "academic and general readers." "If the available book context is partial or insufficient, responsibly supplement the answer " "using well-established principles of Jain philosophy and comparative religious knowledge, " "without introducing speculation. " "Whenever Sanskrit or Prakrit terms appear, always: " "1) Write the term in standard IAST-style transliteration, " "2) Clearly define the term in simple and precise language at its first occurrence. " "Use the following transliteration standard consistently: " "Vowels: " "अ a, आ ā, इ i, ई ī, उ u, ऊ ū, ऋ ṛ, ए e, ऐ ai, ओ o, औ au, अं ṁ/ṅ, अः ḥ. " "Consonants: " "क् k, ख् kh, ग् g, घ् gh, ङ् ṅ; " "च् c, छ् ch, ज् j, झ् jh, ञ् ñ; " "ट् ṭ, ठ् ṭh, ड् ḍ, ढ् ḍh, ण् ṇ; " "त् t, थ् th, द् d, ध् dh, न् n; " "प् p, फ् ph, ब् b, भ् bh, म् m; " "य् y, र् r, ल् l, व् v; " "श् ś, ष् ṣ, स् s, ह् h. " ) user_message_content = f"Context from Book:\n{book_text}\n\nQuestion: {user_query}" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message_content} ] completion = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=messages, temperature=0.5, max_completion_tokens=2048, top_p=0.95, ) return completion.choices[0].message.content @app.on_event("startup") async def startup_event(): # Attempt to load resources on startup (useful for Render to fail early if missing env vars) try: get_resources() except Exception as e: print(f"Warning: Could not initialize resources on startup: {e}") @app.get("/") def read_root(): return {"status": "Jain Sage AI is API Ready", "endpoints": "/chat"} @app.post("/chat", response_model=ChatResponse) async def chat_endpoint(request: ChatRequest): try: res = get_resources() retriever = res['retriever'] client = res['client'] answer = ask_jain_sage(request.query, retriever, client) return ChatResponse(answer=answer) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) def main(): print("="*60) print(" Jain Philosophy AI Expert") print(" (Neo4j Graph + Llama 3.3 Internal Knowledge)") print("="*60) try: res = get_resources() retriever = res['retriever'] client = res['client'] except Exception as e: print(f"\n✗ Configuration Error: {e}") return # Loop while True: try: q = input("\nQ: ").strip() if q.lower() in ['exit', 'quit']: break if not q: continue print(" Thinking...", end='\r') ans = ask_jain_sage(q, retriever, client) print(" "*30, end='\r') print(f"A: {ans}\n") except KeyboardInterrupt: break except Exception as e: print(f"\nError: {e}") res['driver'].close() if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "run-server": uvicorn.run(app, host="0.0.0.0", port=10000) else: main()