File size: 11,747 Bytes
5fa5f30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
"""

Hybrid RAG Chatbot for Jain Philosophy

Features:

1. Neo4j Graph + Vector Search for Book Knowledge

2. Fallback to LLM Internal Knowledge (Llama 3.3) if needed

3. Uses llama-3.3-70b-versatile model

"""

import os
import sys
from typing import List, Dict, Optional
from dotenv import load_dotenv
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer
from groq import Groq
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn

# Force UTF-8 output on Windows
if sys.platform == 'win32':
    sys.stdout.reconfigure(encoding='utf-8')

load_dotenv()

# Configuration
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_AUTH = (os.getenv("NEO4J_USERNAME", "neo4j"), os.getenv("NEO4J_PASSWORD", "password"))
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Term mappings
TERM_MAPPINGS = {
    "anekantavada": ["Anekāntavāda", "Anekānta", "non-absolutism"],
    "syadvada": ["Syādvāda", "Syād", "conditional predication"],
    "saptabhangi": ["Saptabhaṅgī", "seven-fold predication"],
    "naya": ["Nayas", "viewpoints", "7 Naya"],
    "gunasthana": ["Guṇasthānaka", "stages of spiritual development"],
    "tirthankara": ["Tīrthaṅkara", "Jina", "Arihanta"],
    "mahavira": ["Mahāvīra", "Vardhamana"],
    "jiva": ["Jīvāstikāya", "soul"],
    "ajiva": ["Ajīva", "non-soul"],
    "karma": ["Karma", "karmic matter"],
}

# Initialize FastAPI
app = FastAPI(title="Jain Philosophy Chatbot API")

# Allow CORS for testing
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class ChatRequest(BaseModel):
    query: str

class ChatResponse(BaseModel):
    answer: str

# Global resources
resources = {}

def get_resources():
    """Lazy load resources"""
    if not resources:
        if not GROQ_API_KEY:
            raise ValueError("GROQ_API_KEY is not set in .env file or environment variables")
            
        try:
            driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
            driver.verify_connectivity()
            embedder = SentenceTransformer('all-mpnet-base-v2')
            retriever = HybridRetriever(driver, embedder)
            client = Groq(api_key=GROQ_API_KEY)
            
            resources['driver'] = driver
            resources['retriever'] = retriever
            resources['client'] = client
            print("✓ Resources loaded")
        except Exception as e:
            print(f"✗ Error loading resources: {e}")
            raise e
    return resources

def search_neo4j_comprehensive(driver, embedder, query: str) -> List[Dict]:
    """

    Enhanced Neo4j Search Strategy:

    1. Concept Search (Fuzzy & Exact)

    2. Vector Search (Chunks)

    3. Keyword/Text Search (Fulltext)

    4. Chapter/Section Title Search

    """
    expanded_terms = []
    # Simple query expansion
    query_lower = query.lower()
    for term, variants in TERM_MAPPINGS.items():
        if term in query_lower:
            expanded_terms.extend(variants)
    
    embedding = embedder.encode(query).tolist()
    chunks = []
    
    with driver.session() as session:
        # 1. Concept Node Search (High Priority)
        try:
            result = session.run("""

                CALL db.index.fulltext.queryNodes('concept_name_index', $q)

                YIELD node, score

                RETURN 

                    'Concept: ' + node.name + ' (' + coalesce(node.category, 'General') + ')\n' + 

                    'Variants: ' + coalesce(toString(node.variants), 'None') as text, 

                    score + 1.0 as score

                LIMIT 3

            """, q=query)
            chunks.extend([dict(r) for r in result])
        except Exception: 
            pass 

        # 2. Gunasthana Specific Search
        try:
            result = session.run("""

                MATCH (g:Gunasthana)

                WHERE toLower(g.sanskrit_name) CONTAINS toLower($q) 

                   OR toLower(g.english_name) CONTAINS toLower($q)

                RETURN g.sanskrit_name + ' (' + g.english_name + ')\n' + g.description as text, 2.0 as score

            """, q=query)
            chunks.extend([dict(r) for r in result])
        except: pass

        # 3. Vector Search
        indexes = ['chunk_embeddings', 'gunasthana_embeddings']
        for idx in indexes:
            try:
                result = session.run(f"""

                    CALL db.index.vector.queryNodes('{idx}', 7, $emb)

                    YIELD node, score

                    RETURN coalesce(node.text, node.description) as text, score

                """, emb=embedding)
                chunks.extend([dict(r) for r in result])
            except: continue

        # 4. Fulltext Keyword Search
        lucene_query = query.replace("?", "").replace("!", "")
        if lucene_query.strip():
            try:
                result = session.run("""

                    CALL db.index.fulltext.queryNodes('chunk_text_index', $q)

                    YIELD node, score

                    RETURN node.text as text, score LIMIT 5

                """, q=lucene_query)
                chunks.extend([dict(r) for r in result])
            except: pass

        # 5. Structure/Chapter Search (Table of Contents)
        # If the user asks for "chapters", "summary", "outline", "structure"
        structure_keywords = ["chapter", "summary", "outline", "structure", "table of contents", "book"]
        if any(k in query_lower for k in structure_keywords):
            try:
                # Fetch all chapters sorted by number
                result = session.run("""

                    MATCH (c:Chapter)

                    RETURN c.number as number, c.title as title

                    ORDER BY c.number ASC

                """)
                chapters = [f"Chapter {r['number']}: {r['title']}" for r in result]
                if chapters:
                    toc_text = "Book Table of Contents (All Chapters):\n" + "\n".join(chapters)
                    chunks.append({
                        "text": toc_text,
                        "score": 2.5 # Very high relevance for structural questions
                    })
            except: pass

    # Deduplicate and Sort
    seen = set()
    unique_chunks = []
    
    # Sort by score descending
    for c in sorted(chunks, key=lambda x: x['score'], reverse=True):
        content = c['text']
        # Simple dedupe (using first 100 chars signature)
        sig = content[:100] if content else ""
        if sig and sig not in seen:
            seen.add(sig)
            unique_chunks.append(c)
    
    # Return top results. 
    # If we have the TOC (score 2.5), it will be at the top.
    return unique_chunks[:7]

class HybridRetriever:
    def __init__(self, driver, embedder):
        self.driver = driver
        self.embedder = embedder

    def search_book(self, query: str) -> List[Dict]:
        return search_neo4j_comprehensive(self.driver, self.embedder, query)

def ask_jain_sage(user_query: str, retriever: HybridRetriever, client: Groq) -> str:
    """

    Call llama-3.3-70b-versatile directly with book context + internal knowledge fallback.

    """
    # 1. Retrieve from Book
    book_chunks = retriever.search_book(user_query)
    book_text = "\n\n".join([c['text'] for c in book_chunks])
    
    system_prompt = (
        "You are an expert scholar on Jain philosophy. "
        "Use the provided context from the book 'Anekant Syadvad' to answer the question. "
        "If the book context is insufficient, use your own broad knowledge of Jainism and religion to answer comprehensively. "
        "Do NOT mention 'According to the text' just give the answer naturally. "
        "Always define Sanskrit terms."
        "Ensure the response is logically structured, concise yet comprehensive, and suitable for both "
        "academic and general readers."
        "If the available book context is partial or insufficient, responsibly supplement the answer "
        "using well-established principles of Jain philosophy and comparative religious knowledge, "
        "without introducing speculation. "
        "Whenever Sanskrit or Prakrit terms appear, always: "
        "1) Write the term in standard IAST-style transliteration, "
        "2) Clearly define the term in simple and precise language at its first occurrence. "
        "Use the following transliteration standard consistently: "
        "Vowels: "
        "अ a, आ ā, इ i, ई ī, उ u, ऊ ū, ऋ ṛ, ए e, ऐ ai, ओ o, औ au, अं ṁ/ṅ, अः ḥ. "
        "Consonants: "
        "क् k, ख् kh, ग् g, घ् gh, ङ् ṅ; "
        "च् c, छ् ch, ज् j, झ् jh, ञ् ñ; "
        "ट् ṭ, ठ् ṭh, ड् ḍ, ढ् ḍh, ण् ṇ; "
        "त् t, थ् th, द् d, ध् dh, न् n; "
        "प् p, फ् ph, ब् b, भ् bh, म् m; "
        "य् y, र् r, ल् l, व् v; "
        "श् ś, ष् ṣ, स् s, ह् h. "
    )

    user_message_content = f"Context from Book:\n{book_text}\n\nQuestion: {user_query}"
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message_content}
    ]

    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=messages,
        temperature=0.5,
        max_completion_tokens=2048,
        top_p=0.95,
    )

    return completion.choices[0].message.content

@app.on_event("startup")
async def startup_event():
    # Attempt to load resources on startup (useful for Render to fail early if missing env vars)
    try:
        get_resources()
    except Exception as e:
        print(f"Warning: Could not initialize resources on startup: {e}")

@app.get("/")
def read_root():
    return {"status": "Jain Sage AI is API Ready", "endpoints": "/chat"}

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    try:
        res = get_resources()
        retriever = res['retriever']
        client = res['client']
        
        answer = ask_jain_sage(request.query, retriever, client)
        return ChatResponse(answer=answer)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

def main():
    print("="*60)
    print("  Jain Philosophy AI Expert")
    print("  (Neo4j Graph + Llama 3.3 Internal Knowledge)")
    print("="*60)

    try:
        res = get_resources()
        retriever = res['retriever']
        client = res['client']
    except Exception as e:
        print(f"\n✗ Configuration Error: {e}")
        return

    # Loop
    while True:
        try:
            q = input("\nQ: ").strip()
            if q.lower() in ['exit', 'quit']: break
            if not q: continue
            
            print("  Thinking...", end='\r')
            ans = ask_jain_sage(q, retriever, client)
            print(" "*30, end='\r')
            print(f"A: {ans}\n")
            
        except KeyboardInterrupt:
            break
        except Exception as e:
            print(f"\nError: {e}")

    res['driver'].close()

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "run-server":
        uvicorn.run(app, host="0.0.0.0", port=10000)
    else:
        main()