meet12341234 commited on
Commit
5fa5f30
·
verified ·
1 Parent(s): 11d1adc

Upload 9 files

Browse files
Files changed (9) hide show
  1. .env +4 -0
  2. .gitignore +1 -0
  3. Anekant Syadvad - Final.txt +0 -0
  4. Dockerfile +28 -0
  5. chatbot.py +316 -0
  6. ingest.py +540 -0
  7. requirements.txt +7 -0
  8. sample_output.txt +122 -0
  9. test_chatbot.html +351 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GROQ_API_KEY=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
2
+ NEO4J_URI=neo4j+ssc://3c78cdc0.databases.neo4j.io
3
+ NEO4J_USERNAME=neo4j
4
+ NEO4J_PASSWORD=janN8bYGdza3cFfE3J30goS23zsXOjfw-ajfTe0VXSo
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Anekant Syadvad - Final.txt ADDED
The diff for this file is too large to render. See raw diff
 
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python runtime
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables
8
+ ENV PYTHONDONTWRITEBYTECODE=1 \
9
+ PYTHONUNBUFFERED=1
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ gcc \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install python dependencies - explicit copy for caching
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY chatbot.py .
22
+ COPY ingest.py .
23
+
24
+ # Expose port (Hugging Face Spaces uses 7860 by default)
25
+ EXPOSE 7860
26
+
27
+ # Command to run the application
28
+ CMD ["uvicorn", "chatbot:app", "--host", "0.0.0.0", "--port", "7860"]
chatbot.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hybrid RAG Chatbot for Jain Philosophy
3
+ Features:
4
+ 1. Neo4j Graph + Vector Search for Book Knowledge
5
+ 2. Fallback to LLM Internal Knowledge (Llama 3.3) if needed
6
+ 3. Uses llama-3.3-70b-versatile model
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ from typing import List, Dict, Optional
12
+ from dotenv import load_dotenv
13
+ from neo4j import GraphDatabase
14
+ from sentence_transformers import SentenceTransformer
15
+ from groq import Groq
16
+ from fastapi import FastAPI, HTTPException
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ from pydantic import BaseModel
19
+ import uvicorn
20
+
21
+ # Force UTF-8 output on Windows
22
+ if sys.platform == 'win32':
23
+ sys.stdout.reconfigure(encoding='utf-8')
24
+
25
+ load_dotenv()
26
+
27
+ # Configuration
28
+ NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
29
+ NEO4J_AUTH = (os.getenv("NEO4J_USERNAME", "neo4j"), os.getenv("NEO4J_PASSWORD", "password"))
30
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
31
+
32
+ # Term mappings
33
+ TERM_MAPPINGS = {
34
+ "anekantavada": ["Anekāntavāda", "Anekānta", "non-absolutism"],
35
+ "syadvada": ["Syādvāda", "Syād", "conditional predication"],
36
+ "saptabhangi": ["Saptabhaṅgī", "seven-fold predication"],
37
+ "naya": ["Nayas", "viewpoints", "7 Naya"],
38
+ "gunasthana": ["Guṇasthānaka", "stages of spiritual development"],
39
+ "tirthankara": ["Tīrthaṅkara", "Jina", "Arihanta"],
40
+ "mahavira": ["Mahāvīra", "Vardhamana"],
41
+ "jiva": ["Jīvāstikāya", "soul"],
42
+ "ajiva": ["Ajīva", "non-soul"],
43
+ "karma": ["Karma", "karmic matter"],
44
+ }
45
+
46
+ # Initialize FastAPI
47
+ app = FastAPI(title="Jain Philosophy Chatbot API")
48
+
49
+ # Allow CORS for testing
50
+ app.add_middleware(
51
+ CORSMiddleware,
52
+ allow_origins=["*"],
53
+ allow_credentials=True,
54
+ allow_methods=["*"],
55
+ allow_headers=["*"],
56
+ )
57
+
58
+ class ChatRequest(BaseModel):
59
+ query: str
60
+
61
+ class ChatResponse(BaseModel):
62
+ answer: str
63
+
64
+ # Global resources
65
+ resources = {}
66
+
67
+ def get_resources():
68
+ """Lazy load resources"""
69
+ if not resources:
70
+ if not GROQ_API_KEY:
71
+ raise ValueError("GROQ_API_KEY is not set in .env file or environment variables")
72
+
73
+ try:
74
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
75
+ driver.verify_connectivity()
76
+ embedder = SentenceTransformer('all-mpnet-base-v2')
77
+ retriever = HybridRetriever(driver, embedder)
78
+ client = Groq(api_key=GROQ_API_KEY)
79
+
80
+ resources['driver'] = driver
81
+ resources['retriever'] = retriever
82
+ resources['client'] = client
83
+ print("✓ Resources loaded")
84
+ except Exception as e:
85
+ print(f"✗ Error loading resources: {e}")
86
+ raise e
87
+ return resources
88
+
89
+ def search_neo4j_comprehensive(driver, embedder, query: str) -> List[Dict]:
90
+ """
91
+ Enhanced Neo4j Search Strategy:
92
+ 1. Concept Search (Fuzzy & Exact)
93
+ 2. Vector Search (Chunks)
94
+ 3. Keyword/Text Search (Fulltext)
95
+ 4. Chapter/Section Title Search
96
+ """
97
+ expanded_terms = []
98
+ # Simple query expansion
99
+ query_lower = query.lower()
100
+ for term, variants in TERM_MAPPINGS.items():
101
+ if term in query_lower:
102
+ expanded_terms.extend(variants)
103
+
104
+ embedding = embedder.encode(query).tolist()
105
+ chunks = []
106
+
107
+ with driver.session() as session:
108
+ # 1. Concept Node Search (High Priority)
109
+ try:
110
+ result = session.run("""
111
+ CALL db.index.fulltext.queryNodes('concept_name_index', $q)
112
+ YIELD node, score
113
+ RETURN
114
+ 'Concept: ' + node.name + ' (' + coalesce(node.category, 'General') + ')\n' +
115
+ 'Variants: ' + coalesce(toString(node.variants), 'None') as text,
116
+ score + 1.0 as score
117
+ LIMIT 3
118
+ """, q=query)
119
+ chunks.extend([dict(r) for r in result])
120
+ except Exception:
121
+ pass
122
+
123
+ # 2. Gunasthana Specific Search
124
+ try:
125
+ result = session.run("""
126
+ MATCH (g:Gunasthana)
127
+ WHERE toLower(g.sanskrit_name) CONTAINS toLower($q)
128
+ OR toLower(g.english_name) CONTAINS toLower($q)
129
+ RETURN g.sanskrit_name + ' (' + g.english_name + ')\n' + g.description as text, 2.0 as score
130
+ """, q=query)
131
+ chunks.extend([dict(r) for r in result])
132
+ except: pass
133
+
134
+ # 3. Vector Search
135
+ indexes = ['chunk_embeddings', 'gunasthana_embeddings']
136
+ for idx in indexes:
137
+ try:
138
+ result = session.run(f"""
139
+ CALL db.index.vector.queryNodes('{idx}', 7, $emb)
140
+ YIELD node, score
141
+ RETURN coalesce(node.text, node.description) as text, score
142
+ """, emb=embedding)
143
+ chunks.extend([dict(r) for r in result])
144
+ except: continue
145
+
146
+ # 4. Fulltext Keyword Search
147
+ lucene_query = query.replace("?", "").replace("!", "")
148
+ if lucene_query.strip():
149
+ try:
150
+ result = session.run("""
151
+ CALL db.index.fulltext.queryNodes('chunk_text_index', $q)
152
+ YIELD node, score
153
+ RETURN node.text as text, score LIMIT 5
154
+ """, q=lucene_query)
155
+ chunks.extend([dict(r) for r in result])
156
+ except: pass
157
+
158
+ # 5. Structure/Chapter Search (Table of Contents)
159
+ # If the user asks for "chapters", "summary", "outline", "structure"
160
+ structure_keywords = ["chapter", "summary", "outline", "structure", "table of contents", "book"]
161
+ if any(k in query_lower for k in structure_keywords):
162
+ try:
163
+ # Fetch all chapters sorted by number
164
+ result = session.run("""
165
+ MATCH (c:Chapter)
166
+ RETURN c.number as number, c.title as title
167
+ ORDER BY c.number ASC
168
+ """)
169
+ chapters = [f"Chapter {r['number']}: {r['title']}" for r in result]
170
+ if chapters:
171
+ toc_text = "Book Table of Contents (All Chapters):\n" + "\n".join(chapters)
172
+ chunks.append({
173
+ "text": toc_text,
174
+ "score": 2.5 # Very high relevance for structural questions
175
+ })
176
+ except: pass
177
+
178
+ # Deduplicate and Sort
179
+ seen = set()
180
+ unique_chunks = []
181
+
182
+ # Sort by score descending
183
+ for c in sorted(chunks, key=lambda x: x['score'], reverse=True):
184
+ content = c['text']
185
+ # Simple dedupe (using first 100 chars signature)
186
+ sig = content[:100] if content else ""
187
+ if sig and sig not in seen:
188
+ seen.add(sig)
189
+ unique_chunks.append(c)
190
+
191
+ # Return top results.
192
+ # If we have the TOC (score 2.5), it will be at the top.
193
+ return unique_chunks[:7]
194
+
195
+ class HybridRetriever:
196
+ def __init__(self, driver, embedder):
197
+ self.driver = driver
198
+ self.embedder = embedder
199
+
200
+ def search_book(self, query: str) -> List[Dict]:
201
+ return search_neo4j_comprehensive(self.driver, self.embedder, query)
202
+
203
+ def ask_jain_sage(user_query: str, retriever: HybridRetriever, client: Groq) -> str:
204
+ """
205
+ Call llama-3.3-70b-versatile directly with book context + internal knowledge fallback.
206
+ """
207
+ # 1. Retrieve from Book
208
+ book_chunks = retriever.search_book(user_query)
209
+ book_text = "\n\n".join([c['text'] for c in book_chunks])
210
+
211
+ system_prompt = (
212
+ "You are an expert scholar on Jain philosophy. "
213
+ "Use the provided context from the book 'Anekant Syadvad' to answer the question. "
214
+ "If the book context is insufficient, use your own broad knowledge of Jainism and religion to answer comprehensively. "
215
+ "Do NOT mention 'According to the text' just give the answer naturally. "
216
+ "Always define Sanskrit terms."
217
+ "Ensure the response is logically structured, concise yet comprehensive, and suitable for both "
218
+ "academic and general readers."
219
+ "If the available book context is partial or insufficient, responsibly supplement the answer "
220
+ "using well-established principles of Jain philosophy and comparative religious knowledge, "
221
+ "without introducing speculation. "
222
+ "Whenever Sanskrit or Prakrit terms appear, always: "
223
+ "1) Write the term in standard IAST-style transliteration, "
224
+ "2) Clearly define the term in simple and precise language at its first occurrence. "
225
+ "Use the following transliteration standard consistently: "
226
+ "Vowels: "
227
+ "अ a, आ ā, इ i, ई ī, उ u, ऊ ū, ऋ ṛ, ए e, ऐ ai, ओ o, औ au, अं ṁ/ṅ, अः ḥ. "
228
+ "Consonants: "
229
+ "क् k, ख् kh, ग् g, घ् gh, ङ् ṅ; "
230
+ "च् c, छ् ch, ज् j, झ् jh, ञ् ñ; "
231
+ "ट् ṭ, ठ् ṭh, ड् ḍ, ढ् ḍh, ण् ṇ; "
232
+ "त् t, थ् th, द् d, ध् dh, न् n; "
233
+ "प् p, फ् ph, ब् b, भ् bh, म् m; "
234
+ "य् y, र् r, ल् l, व् v; "
235
+ "श् ś, ष् ṣ, स् s, ह् h. "
236
+ )
237
+
238
+ user_message_content = f"Context from Book:\n{book_text}\n\nQuestion: {user_query}"
239
+
240
+ messages = [
241
+ {"role": "system", "content": system_prompt},
242
+ {"role": "user", "content": user_message_content}
243
+ ]
244
+
245
+ completion = client.chat.completions.create(
246
+ model="llama-3.3-70b-versatile",
247
+ messages=messages,
248
+ temperature=0.5,
249
+ max_completion_tokens=2048,
250
+ top_p=0.95,
251
+ )
252
+
253
+ return completion.choices[0].message.content
254
+
255
+ @app.on_event("startup")
256
+ async def startup_event():
257
+ # Attempt to load resources on startup (useful for Render to fail early if missing env vars)
258
+ try:
259
+ get_resources()
260
+ except Exception as e:
261
+ print(f"Warning: Could not initialize resources on startup: {e}")
262
+
263
+ @app.get("/")
264
+ def read_root():
265
+ return {"status": "Jain Sage AI is API Ready", "endpoints": "/chat"}
266
+
267
+ @app.post("/chat", response_model=ChatResponse)
268
+ async def chat_endpoint(request: ChatRequest):
269
+ try:
270
+ res = get_resources()
271
+ retriever = res['retriever']
272
+ client = res['client']
273
+
274
+ answer = ask_jain_sage(request.query, retriever, client)
275
+ return ChatResponse(answer=answer)
276
+ except Exception as e:
277
+ raise HTTPException(status_code=500, detail=str(e))
278
+
279
+ def main():
280
+ print("="*60)
281
+ print(" Jain Philosophy AI Expert")
282
+ print(" (Neo4j Graph + Llama 3.3 Internal Knowledge)")
283
+ print("="*60)
284
+
285
+ try:
286
+ res = get_resources()
287
+ retriever = res['retriever']
288
+ client = res['client']
289
+ except Exception as e:
290
+ print(f"\n✗ Configuration Error: {e}")
291
+ return
292
+
293
+ # Loop
294
+ while True:
295
+ try:
296
+ q = input("\nQ: ").strip()
297
+ if q.lower() in ['exit', 'quit']: break
298
+ if not q: continue
299
+
300
+ print(" Thinking...", end='\r')
301
+ ans = ask_jain_sage(q, retriever, client)
302
+ print(" "*30, end='\r')
303
+ print(f"A: {ans}\n")
304
+
305
+ except KeyboardInterrupt:
306
+ break
307
+ except Exception as e:
308
+ print(f"\nError: {e}")
309
+
310
+ res['driver'].close()
311
+
312
+ if __name__ == "__main__":
313
+ if len(sys.argv) > 1 and sys.argv[1] == "run-server":
314
+ uvicorn.run(app, host="0.0.0.0", port=10000)
315
+ else:
316
+ main()
ingest.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hybrid RAG Ingestion Pipeline for "Anekant Syadvad" - Jain Philosophy Book
3
+
4
+ This script implements a sophisticated knowledge graph-based RAG system with:
5
+ 1. Hierarchical chunking (Book → Chapter → Section → Chunk)
6
+ 2. Rich metadata extraction
7
+ 3. Entity recognition for key Jain concepts
8
+ 4. Proper Unicode/transliteration handling
9
+ 5. Semantic embeddings with contextual overlap
10
+ """
11
+
12
+ import os
13
+ import re
14
+ from typing import List, Dict, Tuple, Optional
15
+ from dataclasses import dataclass, field
16
+ from pydantic import BaseModel, Field
17
+ from dotenv import load_dotenv
18
+ from neo4j import GraphDatabase
19
+ from sentence_transformers import SentenceTransformer
20
+
21
+ load_dotenv()
22
+
23
+ # Configuration
24
+ NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
25
+ NEO4J_AUTH = (os.getenv("NEO4J_USERNAME", "neo4j"), os.getenv("NEO4J_PASSWORD", "password"))
26
+ FILE_PATH = "Anekant Syadvad - Final.txt"
27
+
28
+ # Key Jain Philosophy Concepts for Entity Recognition
29
+ JAIN_CONCEPTS = {
30
+ # Core Doctrines
31
+ "Anekāntavāda": ["Anekantavada", "Anekāntvāda", "Anekānta", "many-sidedness", "non-absolutism"],
32
+ "Syādvāda": ["Syadvada", "Syād", "conditional predication", "theory of conditioned predication"],
33
+ "Saptabhaṅgī": ["Saptabhangi", "seven-fold predication", "seven propositions"],
34
+ "Naya": ["Nayas", "viewpoints", "perspectives", "7 Naya", "seven viewpoints"],
35
+
36
+ # Six Substances (Dravya)
37
+ "Jīvāstikāya": ["Jivastikaya", "Jīva", "Jiva", "soul", "living being", "consciousness"],
38
+ "Pudgalāstīkāya": ["Pudgalastikaya", "Pudgala", "matter"],
39
+ "Dharmāstikāya": ["Dharmastikaya", "Dharma", "motion principle"],
40
+ "Adharmāstikāya": ["Adharmastikaya", "Adharma", "rest principle"],
41
+ "Ākāśāstikāya": ["Akashastikaya", "Ākāṡa", "Akasha", "space"],
42
+ "Kāla": ["Kala", "time"],
43
+
44
+ # Nine Tattvas
45
+ "Tattvas": ["Tattva", "nine elements", "fundamental principles"],
46
+ "Āsrava": ["Asrava", "influx of karma"],
47
+ "Bandha": ["Bandh", "bondage of karma"],
48
+ "Saṃvara": ["Samvara", "stoppage"],
49
+ "Nirjarā": ["Nirjara", "shedding of karma"],
50
+ "Mokṣa": ["Moksha", "liberation", "salvation"],
51
+ "Punya": ["merit", "good karma"],
52
+ "Pāpa": ["Papa", "demerit", "sin"],
53
+
54
+ # Religious Terms
55
+ "Tīrthaṅkara": ["Tirthankara", "Tīrthankara", "Jina", "Arhat", "Arihaṅta"],
56
+ "Mahāvīra": ["Mahavira", "Vardhamana"],
57
+ "Ṛṣabhadeva": ["Rishabhadeva", "Adinatha", "first Tirthankara"],
58
+ "Karma": ["Karmas", "karmic matter"],
59
+ "Saṅgha": ["Sangha", "fourfold community"],
60
+ "Sādhu": ["Sadhu", "monk"],
61
+ "Sādhvī": ["Sadhvi", "nun"],
62
+ "Śrāvaka": ["Shravak", "layman"],
63
+ "Śrāvikā": ["Shravika", "laywoman"],
64
+
65
+ # Types of Knowledge
66
+ "Kēvalajñāna": ["Kevala Jnana", "Kevalajnana", "omniscience", "absolute knowledge"],
67
+ "Mati Jñāna": ["Mati Jnana", "sensory knowledge"],
68
+ "Śruta Jñāna": ["Shruta Jnana", "scriptural knowledge"],
69
+ "Avadhi Jñāna": ["Avadhi Jnana", "clairvoyance"],
70
+ "Manaḥparyaya": ["Manahparyaya", "telepathy"],
71
+
72
+ # Seven Nayas
73
+ "Naigama Naya": ["Naigama", "common viewpoint"],
74
+ "Saṅgraha Naya": ["Sangraha Naya", "collective viewpoint"],
75
+ "Vyavahāra Naya": ["Vyavahara Naya", "practical viewpoint"],
76
+ "Ṛjusūtra Naya": ["Rijusutra Naya", "linear viewpoint"],
77
+ "Śabda Naya": ["Shabda Naya", "verbal viewpoint"],
78
+ "Samabhirūḍha Naya": ["Samabhirudha Naya", "etymological viewpoint"],
79
+ "Evambhūta Naya": ["Evambhuta Naya", "actuality viewpoint"],
80
+
81
+ # Practices
82
+ "Ahiṃsā": ["Ahimsa", "non-violence"],
83
+ "Anuvratas": ["Anuvrata", "small vows", "five vows"],
84
+ "Namaskāra Mahāmantra": ["Namaskar Mantra", "Navkar Mantra"],
85
+ "Guṇasthānaka": ["Gunasthana", "stages of spiritual development"],
86
+ }
87
+
88
+ # Chapter structure from the book's table of contents
89
+ CHAPTERS = [
90
+ {"number": 1, "title": "The Quest for Truth", "start_page": 1, "end_page": 13},
91
+ {"number": 2, "title": "Perspectives and Paradoxes", "start_page": 14, "end_page": 19},
92
+ {"number": 3, "title": "Beyond Judgement", "start_page": 20, "end_page": 26},
93
+ {"number": 4, "title": "Jainism: A First Look", "start_page": 27, "end_page": 39},
94
+ {"number": 5, "title": "Religion and Philosophy", "start_page": 40, "end_page": 48},
95
+ {"number": 6, "title": "Anekāntavāda", "start_page": 49, "end_page": 65},
96
+ {"number": 7, "title": "Syādvāda", "start_page": 66, "end_page": 75},
97
+ {"number": 8, "title": "Four Bases", "start_page": 76, "end_page": 84},
98
+ {"number": 9, "title": "Five Reasons", "start_page": 85, "end_page": 100},
99
+ {"number": 10, "title": "Various aspects of Knowledge", "start_page": 101, "end_page": 115},
100
+ {"number": 11, "title": "7 Naya", "start_page": 116, "end_page": 139},
101
+ {"number": 12, "title": "A point of view", "start_page": 140, "end_page": 147},
102
+ {"number": 13, "title": "Saptabhaṅgī", "start_page": 148, "end_page": 167},
103
+ {"number": 14, "title": "Barrister Chakravarti", "start_page": 168, "end_page": 181},
104
+ {"number": 15, "title": "Five Types of Knowledge", "start_page": 182, "end_page": 198},
105
+ {"number": 16, "title": "Karma", "start_page": 199, "end_page": 222},
106
+ {"number": 17, "title": "Development of the Soul", "start_page": 223, "end_page": 256},
107
+ {"number": 18, "title": "Life is a hassle", "start_page": 257, "end_page": 282},
108
+ {"number": 19, "title": "Confirmation and Refutation", "start_page": 283, "end_page": 292},
109
+ {"number": 20, "title": "Namaskāra Mahāmantra", "start_page": 293, "end_page": 309},
110
+ ]
111
+
112
+
113
+ class ChunkMetadata(BaseModel):
114
+ """Pydantic model for chunk metadata"""
115
+ chunk_id: str = Field(description="Unique identifier for the chunk")
116
+ chapter_number: int = Field(description="Chapter number (1-20)")
117
+ chapter_title: str = Field(description="Title of the chapter")
118
+ page_number: str = Field(description="Page number or range")
119
+ chunk_index: int = Field(description="Index of chunk within chapter")
120
+ total_chunks_in_chapter: int = Field(default=0, description="Total chunks in this chapter")
121
+ word_count: int = Field(description="Number of words in chunk")
122
+ has_sanskrit_terms: bool = Field(default=False, description="Whether chunk contains Sanskrit/transliterated terms")
123
+ key_concepts: List[str] = Field(default_factory=list, description="Key Jain concepts mentioned")
124
+
125
+
126
+ @dataclass
127
+ class ProcessedChunk:
128
+ """Represents a processed text chunk with metadata"""
129
+ text: str
130
+ metadata: ChunkMetadata
131
+ embedding: List[float] = field(default_factory=list)
132
+
133
+
134
+ def clean_text(text: str) -> str:
135
+ """Clean text while preserving Unicode/transliterated characters"""
136
+ # Remove form feed and other control characters
137
+ text = text.replace('\x0c', '')
138
+ text = text.replace('\x00', '')
139
+
140
+ # Normalize whitespace but preserve paragraph breaks
141
+ text = re.sub(r'[ \t]+', ' ', text)
142
+ text = re.sub(r'\n{3,}', '\n\n', text)
143
+
144
+ return text.strip()
145
+
146
+
147
+ def extract_page_number(chunk: str) -> Optional[str]:
148
+ """Extract page number from chunk"""
149
+ lines = chunk.strip().split('\n')
150
+ if not lines:
151
+ return None
152
+
153
+ last_line = lines[-1].strip()
154
+
155
+ # Check for numeric page number
156
+ if last_line.isdigit() and len(last_line) <= 3:
157
+ return last_line
158
+
159
+ # Check for Roman numerals (for front matter)
160
+ if re.match(r'^[ivxlcIVXLC]+$', last_line) and len(last_line) <= 5:
161
+ return last_line.lower()
162
+
163
+ return None
164
+
165
+
166
+ def identify_chapter(page_num: str) -> Optional[Dict]:
167
+ """Identify which chapter a page belongs to"""
168
+ try:
169
+ page_int = int(page_num)
170
+ for chapter in CHAPTERS:
171
+ if chapter["start_page"] <= page_int <= chapter["end_page"]:
172
+ return chapter
173
+ except (ValueError, TypeError):
174
+ pass
175
+ return None
176
+
177
+
178
+ def extract_key_concepts(text: str) -> List[str]:
179
+ """Extract key Jain concepts from text"""
180
+ found_concepts = []
181
+ text_lower = text.lower()
182
+
183
+ for main_term, variants in JAIN_CONCEPTS.items():
184
+ # Check main term (case-insensitive for ASCII, exact for Unicode)
185
+ if main_term.lower() in text_lower or main_term in text:
186
+ found_concepts.append(main_term)
187
+ continue
188
+
189
+ # Check variants
190
+ for variant in variants:
191
+ if variant.lower() in text_lower:
192
+ found_concepts.append(main_term)
193
+ break
194
+
195
+ return list(set(found_concepts))
196
+
197
+
198
+ def has_transliterated_terms(text: str) -> bool:
199
+ """Check if text contains Sanskrit transliteration characters"""
200
+ transliteration_chars = set("āīūṛṅñṭḍṇśṣḥṁĀĪŪṚṄÑṬḌṆŚṢḤṀ")
201
+ return any(c in transliteration_chars for c in text)
202
+
203
+
204
+ def semantic_chunk(text: str, max_chunk_size: int = 1500, overlap: int = 200) -> List[str]:
205
+ """
206
+ Semantic chunking that respects paragraph and sentence boundaries.
207
+ Uses a sliding window approach with overlap for context preservation.
208
+ """
209
+ # Split into paragraphs first
210
+ paragraphs = re.split(r'\n\n+', text)
211
+
212
+ chunks = []
213
+ current_chunk = ""
214
+
215
+ for para in paragraphs:
216
+ para = para.strip()
217
+ if not para:
218
+ continue
219
+
220
+ # If adding this paragraph exceeds max size, save current chunk
221
+ if len(current_chunk) + len(para) + 2 > max_chunk_size and current_chunk:
222
+ chunks.append(current_chunk.strip())
223
+
224
+ # Start new chunk with overlap from end of previous
225
+ words = current_chunk.split()
226
+ overlap_words = words[-overlap//10:] if len(words) > overlap//10 else words
227
+ current_chunk = " ".join(overlap_words) + "\n\n" + para
228
+ else:
229
+ if current_chunk:
230
+ current_chunk += "\n\n" + para
231
+ else:
232
+ current_chunk = para
233
+
234
+ # Don't forget the last chunk
235
+ if current_chunk.strip():
236
+ chunks.append(current_chunk.strip())
237
+
238
+ return chunks
239
+
240
+
241
+ def parse_book(file_path: str) -> List[ProcessedChunk]:
242
+ """Parse the book into hierarchical chunks with rich metadata"""
243
+ print(f"Reading file: {file_path}")
244
+
245
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
246
+ content = f.read()
247
+
248
+ # Verify transliteration characters are present
249
+ test_chars = ["ā", "ī", "ū", "ṛ", "ṅ", "ś", "ṣ", "ḥ"]
250
+ found_chars = [c for c in test_chars if c in content]
251
+ print(f"✓ Found transliterated characters: {found_chars}")
252
+
253
+ # Split by form feed (page breaks)
254
+ raw_pages = content.split('\x0c')
255
+ print(f"Found {len(raw_pages)} raw pages")
256
+
257
+ # Group content by chapter
258
+ chapter_content: Dict[int, List[Tuple[str, str]]] = {c["number"]: [] for c in CHAPTERS}
259
+ front_matter = []
260
+
261
+ for page in raw_pages:
262
+ page = clean_text(page)
263
+ if not page:
264
+ continue
265
+
266
+ page_num = extract_page_number(page)
267
+ chapter = identify_chapter(page_num) if page_num else None
268
+
269
+ if chapter:
270
+ # Remove page number from end for cleaner text
271
+ lines = page.split('\n')
272
+ if lines and lines[-1].strip() == page_num:
273
+ page = '\n'.join(lines[:-1])
274
+
275
+ chapter_content[chapter["number"]].append((page_num, page))
276
+ else:
277
+ front_matter.append((page_num or "fm", page))
278
+
279
+ # Process chapters into chunks
280
+ all_chunks: List[ProcessedChunk] = []
281
+
282
+ for chapter in CHAPTERS:
283
+ chapter_pages = chapter_content[chapter["number"]]
284
+ if not chapter_pages:
285
+ continue
286
+
287
+ # Combine all text from chapter
288
+ chapter_text = "\n\n".join([p[1] for p in chapter_pages])
289
+ page_range = f"{chapter_pages[0][0]}-{chapter_pages[-1][0]}" if len(chapter_pages) > 1 else chapter_pages[0][0]
290
+
291
+ # Semantic chunking
292
+ text_chunks = semantic_chunk(chapter_text, max_chunk_size=1500, overlap=200)
293
+
294
+ for idx, chunk_text in enumerate(text_chunks):
295
+ key_concepts = extract_key_concepts(chunk_text)
296
+
297
+ metadata = ChunkMetadata(
298
+ chunk_id=f"ch{chapter['number']:02d}_chunk{idx:03d}",
299
+ chapter_number=chapter["number"],
300
+ chapter_title=chapter["title"],
301
+ page_number=page_range,
302
+ chunk_index=idx,
303
+ total_chunks_in_chapter=len(text_chunks),
304
+ word_count=len(chunk_text.split()),
305
+ has_sanskrit_terms=has_transliterated_terms(chunk_text),
306
+ key_concepts=key_concepts
307
+ )
308
+
309
+ all_chunks.append(ProcessedChunk(text=chunk_text, metadata=metadata))
310
+
311
+ # Update total chunks per chapter
312
+ for chunk in all_chunks:
313
+ chunk.metadata.total_chunks_in_chapter = len([
314
+ c for c in all_chunks
315
+ if c.metadata.chapter_number == chunk.metadata.chapter_number
316
+ ])
317
+
318
+ print(f"Created {len(all_chunks)} semantic chunks across {len(CHAPTERS)} chapters")
319
+ return all_chunks
320
+
321
+
322
+ def create_graph_schema(session):
323
+ """Create Neo4j indexes and constraints"""
324
+ queries = [
325
+ # Constraints
326
+ "CREATE CONSTRAINT book_title IF NOT EXISTS FOR (b:Book) REQUIRE b.title IS UNIQUE",
327
+ "CREATE CONSTRAINT chapter_id IF NOT EXISTS FOR (c:Chapter) REQUIRE c.chapter_id IS UNIQUE",
328
+ "CREATE CONSTRAINT chunk_id IF NOT EXISTS FOR (ch:Chunk) REQUIRE ch.chunk_id IS UNIQUE",
329
+ "CREATE CONSTRAINT concept_name IF NOT EXISTS FOR (con:Concept) REQUIRE con.name IS UNIQUE",
330
+
331
+ # Vector index for semantic search
332
+ """CREATE VECTOR INDEX chunk_embeddings IF NOT EXISTS
333
+ FOR (n:Chunk)
334
+ ON (n.embedding)
335
+ OPTIONS {indexConfig: {
336
+ `vector.dimensions`: 768,
337
+ `vector.similarity_function`: 'cosine'
338
+ }}""",
339
+
340
+ # Full-text index for keyword search
341
+ "CREATE FULLTEXT INDEX chunk_text_index IF NOT EXISTS FOR (n:Chunk) ON EACH [n.text, n.chapter_title]",
342
+
343
+ # Index for concept lookup
344
+ "CREATE INDEX concept_name_index IF NOT EXISTS FOR (c:Concept) ON (c.name)",
345
+ ]
346
+
347
+ for query in queries:
348
+ try:
349
+ session.run(query)
350
+ except Exception as e:
351
+ if "already exists" not in str(e).lower():
352
+ print(f"Warning: {e}")
353
+
354
+
355
+ def ingest_to_neo4j(chunks: List[ProcessedChunk], embedder: SentenceTransformer):
356
+ """Ingest chunks into Neo4j with graph relationships"""
357
+ print(f"Connecting to Neo4j at {NEO4J_URI}...")
358
+
359
+ driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
360
+ driver.verify_connectivity()
361
+ print("✓ Connected to Neo4j")
362
+
363
+ with driver.session() as session:
364
+ # Clear existing data
365
+ print("Clearing existing book data...")
366
+ session.run("MATCH (n) WHERE n:Book OR n:Chapter OR n:Chunk OR n:Concept DETACH DELETE n")
367
+
368
+ # Create schema
369
+ print("Creating graph schema...")
370
+ create_graph_schema(session)
371
+
372
+ # Create Book node
373
+ session.run("""
374
+ CREATE (b:Book {
375
+ title: 'Anekāntavāda: The Heart of Jainism',
376
+ author: 'Late Mr. Chandulal S. Shah',
377
+ translator: 'Ms. Nimisha Vora',
378
+ language: 'English',
379
+ genre: 'Philosophy',
380
+ total_chapters: $total_chapters
381
+ })
382
+ """, total_chapters=len(CHAPTERS))
383
+
384
+ # Create Chapter nodes
385
+ for chapter in CHAPTERS:
386
+ session.run("""
387
+ MATCH (b:Book {title: 'Anekāntavāda: The Heart of Jainism'})
388
+ CREATE (c:Chapter {
389
+ chapter_id: $chapter_id,
390
+ number: $number,
391
+ title: $title,
392
+ start_page: $start_page,
393
+ end_page: $end_page
394
+ })
395
+ CREATE (b)-[:HAS_CHAPTER]->(c)
396
+ """,
397
+ chapter_id=f"chapter_{chapter['number']}",
398
+ number=chapter["number"],
399
+ title=chapter["title"],
400
+ start_page=chapter["start_page"],
401
+ end_page=chapter["end_page"]
402
+ )
403
+
404
+ # Create sequential chapter relationships
405
+ session.run("""
406
+ MATCH (c1:Chapter), (c2:Chapter)
407
+ WHERE c2.number = c1.number + 1
408
+ CREATE (c1)-[:NEXT_CHAPTER]->(c2)
409
+ """)
410
+
411
+ # Create Concept nodes
412
+ print("Creating concept nodes...")
413
+ for concept_name, variants in JAIN_CONCEPTS.items():
414
+ session.run("""
415
+ CREATE (con:Concept {
416
+ name: $name,
417
+ variants: $variants,
418
+ category: $category
419
+ })
420
+ """,
421
+ name=concept_name,
422
+ variants=variants,
423
+ category=categorize_concept(concept_name)
424
+ )
425
+
426
+ # Ingest chunks with embeddings
427
+ print("Ingesting chunks with embeddings...")
428
+ prev_chunk_id = None
429
+
430
+ for i, chunk in enumerate(chunks):
431
+ # Generate embedding
432
+ embedding = embedder.encode(chunk.text).tolist()
433
+ chunk.embedding = embedding
434
+
435
+ # Create Chunk node
436
+ session.run("""
437
+ MATCH (ch:Chapter {number: $chapter_number})
438
+ CREATE (c:Chunk {
439
+ chunk_id: $chunk_id,
440
+ text: $text,
441
+ chapter_number: $chapter_number,
442
+ chapter_title: $chapter_title,
443
+ page_number: $page_number,
444
+ chunk_index: $chunk_index,
445
+ word_count: $word_count,
446
+ has_sanskrit_terms: $has_sanskrit_terms,
447
+ key_concepts: $key_concepts,
448
+ embedding: $embedding
449
+ })
450
+ CREATE (ch)-[:CONTAINS]->(c)
451
+ """,
452
+ chunk_id=chunk.metadata.chunk_id,
453
+ text=chunk.text,
454
+ chapter_number=chunk.metadata.chapter_number,
455
+ chapter_title=chunk.metadata.chapter_title,
456
+ page_number=chunk.metadata.page_number,
457
+ chunk_index=chunk.metadata.chunk_index,
458
+ word_count=chunk.metadata.word_count,
459
+ has_sanskrit_terms=chunk.metadata.has_sanskrit_terms,
460
+ key_concepts=chunk.metadata.key_concepts,
461
+ embedding=embedding
462
+ )
463
+
464
+ # Create NEXT relationship for sequential reading
465
+ if prev_chunk_id:
466
+ session.run("""
467
+ MATCH (c1:Chunk {chunk_id: $prev_id}), (c2:Chunk {chunk_id: $curr_id})
468
+ CREATE (c1)-[:NEXT]->(c2)
469
+ """, prev_id=prev_chunk_id, curr_id=chunk.metadata.chunk_id)
470
+
471
+ prev_chunk_id = chunk.metadata.chunk_id
472
+
473
+ # Link to concepts
474
+ for concept_name in chunk.metadata.key_concepts:
475
+ session.run("""
476
+ MATCH (c:Chunk {chunk_id: $chunk_id}), (con:Concept {name: $concept_name})
477
+ CREATE (c)-[:MENTIONS]->(con)
478
+ """, chunk_id=chunk.metadata.chunk_id, concept_name=concept_name)
479
+
480
+ if (i + 1) % 20 == 0:
481
+ print(f" Ingested {i + 1}/{len(chunks)} chunks...", end='\r')
482
+
483
+ # Create concept co-occurrence relationships
484
+ print("\nCreating concept co-occurrence relationships...")
485
+ session.run("""
486
+ MATCH (c1:Concept)<-[:MENTIONS]-(chunk:Chunk)-[:MENTIONS]->(c2:Concept)
487
+ WHERE id(c1) < id(c2)
488
+ WITH c1, c2, count(chunk) as co_occurrences
489
+ WHERE co_occurrences > 1
490
+ MERGE (c1)-[r:CO_OCCURS_WITH]->(c2)
491
+ SET r.count = co_occurrences
492
+ """)
493
+
494
+ driver.close()
495
+ print(f"\n✓ Ingestion complete! {len(chunks)} chunks stored in Neo4j.")
496
+
497
+
498
+ def categorize_concept(concept_name: str) -> str:
499
+ """Categorize a concept for better organization"""
500
+ categories = {
501
+ "Core Doctrine": ["Anekāntavāda", "Syādvāda", "Saptabhaṅgī", "Naya"],
502
+ "Six Substances": ["Jīvāstikāya", "Pudgalāstīkāya", "Dharmāstikāya", "Adharmāstikāya", "Ākāśāstikāya", "Kāla"],
503
+ "Nine Tattvas": ["Tattvas", "Āsrava", "Bandha", "Saṃvara", "Nirjarā", "Mokṣa", "Punya", "Pāpa"],
504
+ "Religious Terms": ["Tīrthaṅkara", "Mahāvīra", "Ṛṣabhadeva", "Karma", "Saṅgha", "Sādhu", "Sādhvī", "Śrāvaka", "Śrāvikā"],
505
+ "Types of Knowledge": ["Kēvalajñāna", "Mati Jñāna", "Śruta Jñāna", "Avadhi Jñāna", "Manaḥparyaya"],
506
+ "Seven Nayas": ["Naigama Naya", "Saṅgraha Naya", "Vyavahāra Naya", "Ṛjusūtra Naya", "Śabda Naya", "Samabhirūḍha Naya", "Evambhūta Naya"],
507
+ "Practices": ["Ahiṃsā", "Anuvratas", "Namaskāra Mahāmantra", "Guṇasthānaka"],
508
+ }
509
+
510
+ for category, concepts in categories.items():
511
+ if concept_name in concepts:
512
+ return category
513
+ return "Other"
514
+
515
+
516
+ def main():
517
+ print("=" * 60)
518
+ print("Hybrid RAG Ingestion for Anekant Syadvad")
519
+ print("=" * 60)
520
+
521
+ # Load embedding model
522
+ print("\nLoading embedding model (all-mpnet-base-v2)...")
523
+ embedder = SentenceTransformer('all-mpnet-base-v2')
524
+ print("✓ Model loaded")
525
+
526
+ # Parse book
527
+ print("\nParsing book...")
528
+ chunks = parse_book(FILE_PATH)
529
+
530
+ # Ingest to Neo4j
531
+ print("\nIngesting to Neo4j...")
532
+ ingest_to_neo4j(chunks, embedder)
533
+
534
+ print("\n" + "=" * 60)
535
+ print("Ingestion Complete!")
536
+ print("=" * 60)
537
+
538
+
539
+ if __name__ == "__main__":
540
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-dotenv
4
+ neo4j
5
+ sentence-transformers
6
+ groq
7
+ pydantic
sample_output.txt ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ### Question
3
+ explain 5 rationales described by the Jain philosophers
4
+
5
+ ### Answer
6
+ The 5 rationales described by Jain philosophers are essential for understanding the functioning of the world and the occurrence of events. These rationales are:
7
+
8
+ 1. **Kāla** (Time): This rationale defines the maturity and immaturity periods of a thing or work, serving as a cause for events to take place. Kāla refers to the concept of time, which is considered a fundamental aspect of the universe. In Jain philosophy, time is seen as a continuous process that governs the evolution and devolution of all things.
9
+
10
+ 2. **Svabhāva** (Intrinsic Nature): This rationale refers to the inherent nature or characteristics of an entity, which can be recognized by its natural merit or features. Svabhāva is the unique essence or identity of a thing, which distinguishes it from others. It is the intrinsic property that defines the nature of an entity and determines its behavior.
11
+
12
+ 3. **Bhavitavyatā** (Destiny or Niyati): Also known as Niyati, this rationale is considered one of the eternal, infinite, and independent fundamentals. Bhavitavyatā refers to the concept of destiny or the natural order of things, which governs the course of events in the universe. It is the inherent tendency or predisposition of an entity to behave in a certain way.
13
+
14
+ 4. **Prārabdha** (Karma): This rationale refers to the actions of an individual and the community, which shape their destiny. Prārabdha is the accumulated karma or the consequences of past actions, which influence the present and future. In Jain philosophy, karma is seen as a fundamental principle that governs the universe, and it is believed that every action has consequences that affect an individual's spiritual journey.
15
+
16
+ 5. **Puruṣārtha** (Human Effort or Udyama): This rationale refers to the efforts made by a living being to achieve their goals. Puruṣārtha is the human endeavor or striving that enables individuals to overcome challenges and attain their objectives. It is the conscious effort or exertion that individuals make to shape their own destiny and achieve spiritual growth.
17
+
18
+ These 5 rationales are interconnected and interdependent, and they work together to shape the world and the events that occur within it. By understanding these rationales, individuals can gain insight into the workings of the universe and the principles that govern human existence.
19
+
20
+ ---
21
+
22
+ ### Question
23
+ explain 7 Naya
24
+
25
+ ### Answer
26
+ The 7 Naya, also known as the seven modes of knowing or perspectives, are a fundamental concept in Jain philosophy. Naya (नाय) is a Sanskrit term that means "approach," "method," or "way of knowing." It refers to the different ways of understanding or perceiving an object, concept, or reality. The 7 Naya provide a framework for analyzing and categorizing the various aspects of an object, allowing for a more nuanced and comprehensive understanding.
27
+
28
+ The 7 Naya are:
29
+
30
+ 1. **Naigama Naya** (नैगम नाय): This perspective involves understanding both the general and specific aspects of an object separately. Naigama is derived from the word Nigam (निगम), which means "resolution" or "decision." It refers to the process of distinguishing between the general and specific characteristics of an object.
31
+
32
+ 2. **Saṅgraha Naya** (संग्रह नाय): This perspective involves understanding the object as a whole, considering its various aspects and attributes. Saṅgraha means "collection" or "gathering," and this Naya involves collecting and synthesizing information about the object.
33
+
34
+ 3. **Vyavahāra Naya** (व्यवहार नाय): This perspective involves understanding the object in terms of its practical or functional aspects. Vyavahāra means "practical" or "worldly," and this Naya involves considering how the object is used or applied in everyday life.
35
+
36
+ 4. **Ṛjusūtra Naya** (ऋजुसूत्र नाय): This perspective involves understanding the object in terms of its essential or intrinsic nature. Ṛjusūtra means "straight" or "direct," and this Naya involves considering the object's inherent characteristics or properties.
37
+
38
+ 5. **Śabda Naya** (शब्द नाय): This perspective involves understanding the object through language or verbal description. Śabda means "word" or "sound," and this Naya involves considering the object's name, definition, or linguistic representation.
39
+
40
+ 6. **Samabhirūḍha Naya** (समभीरूढ नाय): This perspective involves understanding the object in terms of its relationships or interactions with other objects. Samabhirūḍha means "equally" or "simultaneously," and this Naya involves considering the object's connections or correlations with other entities.
41
+
42
+ 7. **Evambhūta Naya** (एवमभूत नाय): This perspective involves understanding the object in terms of its unique or distinctive characteristics. Evambhūta means "thus" or "in this way," and this Naya involves considering the object's individual or peculiar aspects.
43
+
44
+ The 7 Naya are not mutually exclusive, and they can be used in combination to gain a more comprehensive understanding of an object or concept. By applying these different perspectives, one can develop a more nuanced and multifaceted understanding of reality, which is a key goal of Jain philosophy. The 7 Naya are often used in conjunction with other Jain concepts, such as Anekāntavāda (the doctrine of multiple perspectives) and Syādvāda (the doctrine of conditional assertion), to provide a more complete and balanced understanding of the world.
45
+
46
+ ---
47
+
48
+ ### Question
49
+ Why does the author compare Anekāntavāda to Einstein’s Theory of Relativity?
50
+
51
+ ### Answer
52
+ The author compares Anekāntavāda to Einstein's Theory of Relativity because both concepts deal with the idea of relativity. In Jain philosophy, Anekāntavāda (Sanskrit: अनेकान्तवाद, meaning "non-absolutism" or "many-sidedness") refers to the doctrine that truth and reality are complex and multifaceted, and can be viewed from different perspectives. This concept is central to Jain epistemology and ontology.
53
+
54
+ Similarly, Einstein's Theory of Relativity challenged the traditional notion of absolute space and time, introducing the concept of relative motion and the equivalence of all inertial frames of reference. The author suggests that the idea of relativity in Jain philosophy, as expressed in Anekāntavāda, predates Einstein's theory by thousands of years.
55
+
56
+ By drawing this comparison, the author highlights the parallels between the two concepts and implies that Jain philosophy has long recognized the importance of considering multiple perspectives and the relative nature of reality. The author is emphasizing that Jainism's Anekāntavāda is a fundamental and unchallengeable principle that acknowledges the complexity and multifaceted nature of reality, much like Einstein's Theory of Relativity revolutionized our understanding of space and time.
57
+
58
+ In essence, the comparison serves to illustrate the depth and profundity of Jain philosophical thought, demonstrating that ancient Jain philosophers had already grasped the concept of relativity, which was later developed in modern physics. The term Anekāntavāda itself is derived from the Sanskrit words "anekānta," meaning "many-sided" or "non-absolutist," and "vāda," meaning "doctrine" or "theory."
59
+
60
+ The concept of Anekāntavāda is closely related to other key Jain terms, such as Syādvāda (Sanskrit: स्याद्वाद, meaning "maybe-ism" or "conditionalism"), which refers to the practice of qualifying statements with conditions or possibilities, acknowledging the complexity and contextuality of truth. By invoking these concepts, the author aims to showcase the richness and sophistication of Jain philosophical thought, which has been developed over centuries.
61
+
62
+ ---
63
+
64
+ ### Question
65
+ Name the six substances (Dravyas) described in Jain philosophy.
66
+
67
+ ### Answer
68
+ The six substances (Dravyas) described in Jain philosophy are:
69
+
70
+ 1. **Jīvāstikāya** (Living substance): Refers to the substance that comprises all living beings, including humans, animals, and microorganisms. The term 'Jīva' (जीव) means a living being or a soul, and 'Astikāya' (अस्तिकाय) refers to a substance that has a physical presence.
71
+
72
+ 2. **Dharmāstikāya** (Substance of motion): Helps things to move from one place to another. The word 'Dharma' (धर्म) in this context means motion or movement.
73
+
74
+ 3. **Adharmāstikāya** (Substance of rest): Helps things to stay still or stationary. The term 'Adharma' (अधर्म) in this context means rest or stillness.
75
+
76
+ 4. **Ākāśāstikāya** (Substance of space): Provides space for all other substances to exist. The term 'Ākāśa' (आकाश) means space or ether.
77
+
78
+ 5. **Pudgalāstīkāya** (Substance of matter): Comprises all non-living things, including matter and energy. The term 'Pudgala' (पुद्गल) means matter or material substance.
79
+
80
+ 6. **Kāla** (Time): Is the sixth substance, which exists independently and is considered an inanimate object. The term 'Kāla' (काल) means time.
81
+
82
+ These six substances are the fundamental components of the universe according to Jain philosophy, and they interact with each other to create the complex world we experience.
83
+
84
+ ---
85
+
86
+ ### Question
87
+ What is the highest form of knowledge according to Jainism?
88
+
89
+ ### Answer
90
+ The highest form of knowledge according to Jainism is **Kevala Jñāna** (केवल ज्ञान), which refers to omniscience or absolute knowledge. Kevala Jñāna is the state of complete knowledge where nothing remains unknown to the soul. A soul that has attained Kevala Jñāna is known as a Kevalī (केवली) or Sarvajña (सर्वज्ञ), and has the ability to comprehend and visualize everything in the universe, past, present, and future.
91
+
92
+ In this state, the soul has direct and self-evident knowledge without the need for any external medium or assistance. Kevala Jñāna is considered the ultimate goal of spiritual development in Jainism, where the soul has attained complete self-realization and liberation from the cycle of birth and death.
93
+
94
+ It is worth noting that Kevala Jñāna is different from other forms of knowledge, such as Mati (माति) and Śruta (श्रुत), which are indirect forms of knowledge that rely on the senses and the mind as intermediaries. In contrast, Kevala Jñāna is a direct and intuitive form of knowledge that arises from the soul itself, without the need for external sources or mediums.
95
+
96
+ ---
97
+
98
+ ### Question
99
+ What are the nine fundamental principles (Tattvas) in Jainism?
100
+
101
+ ### Answer
102
+ The nine fundamental principles (Tattvas) in Jainism are the essential concepts that describe the nature of the universe, the soul, and the path to liberation. These principles are as follows:
103
+
104
+ 1. **Jiva** (living beings): Jiva refers to the sentient beings that possess consciousness. In Jainism, Jiva is considered to be eternal and indestructible, and it is the essence of life.
105
+
106
+ 2. **Ajiva** (non-living things): Ajiva refers to non-sentient matter, which includes all inanimate objects, such as rocks, water, and air.
107
+
108
+ 3. **Punya** (virtue): Punya refers to the accumulation of positive karma, which results from good deeds and actions. It is considered to be a factor that contributes to the spiritual growth and development of the soul.
109
+
110
+ 4. **Pāpa** (sin): Pāpa refers to the accumulation of negative karma, which results from bad deeds and actions. It is considered to be a factor that hinders the spiritual growth and development of the soul.
111
+
112
+ 5. **Āsrava** (influx of karmic matter): Āsrava refers to the process by which karma accumulates and attaches to the soul. This influx of karmic matter is considered to be the cause of the soul's bondage to the cycle of birth and death.
113
+
114
+ 6. **Samvara** (stoppage of karmic matter): Samvara refers to the process of stopping the influx of karmic matter, which is essential for the soul's liberation. This can be achieved through the practice of austerities, self-control, and detachment.
115
+
116
+ 7. **Bandha** (bondage): Bandha refers to the state of being bound to the cycle of birth and death, which is caused by the accumulation of karma. This bondage is considered to be the root cause of suffering and unhappiness.
117
+
118
+ 8. **Nirjarā** (shedding of karmic matter): Nirjarā refers to the process of shedding or destroying the accumulated karma, which is essential for the soul's liberation. This can be achieved through the practice of austerities, self-control, and detachment.
119
+
120
+ 9. **Mokṣa** (liberation): Mokṣa refers to the state of ultimate liberation, where the soul is freed from the cycle of birth and death and achieves eternal bliss and happiness. This is considered to be the ultimate goal of Jainism, and it can be achieved through the practice of the above principles and the cultivation of spiritual knowledge and self-awareness.
121
+
122
+ These nine fundamental principles provide a framework for understanding the nature of the universe, the soul, and the path to liberation in Jainism. They offer a comprehensive and holistic approach to spiritual growth and development, and they are considered to be essential for achieving the ultimate goal of Mokṣa.
test_chatbot.html ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Jain Sage AI | Interactive Interface</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
11
+ <style>
12
+ :root {
13
+ --primary: #FF9F1C;
14
+ /* Saffron/Gold */
15
+ --secondary: #2EC4B6;
16
+ /* Teal/Turquoise */
17
+ --dark: #0F172A;
18
+ --darker: #020617;
19
+ --light: #F8FAFC;
20
+ --glass: rgba(255, 255, 255, 0.05);
21
+ --glass-border: rgba(255, 255, 255, 0.1);
22
+ }
23
+
24
+ * {
25
+ box-sizing: border-box;
26
+ margin: 0;
27
+ padding: 0;
28
+ }
29
+
30
+ body {
31
+ font-family: 'Outfit', sans-serif;
32
+ background-color: var(--darker);
33
+ color: var(--light);
34
+ min-height: 100vh;
35
+ display: flex;
36
+ justify-content: center;
37
+ align-items: center;
38
+ background: radial-gradient(circle at top right, #1e293b, var(--darker));
39
+ overflow-x: hidden;
40
+ }
41
+
42
+ .container {
43
+ width: 100%;
44
+ max-width: 900px;
45
+ padding: 2rem;
46
+ position: relative;
47
+ z-index: 10;
48
+ }
49
+
50
+ /* Ambient Background blobs */
51
+ .blob {
52
+ position: absolute;
53
+ width: 400px;
54
+ height: 400px;
55
+ background: var(--primary);
56
+ filter: blur(150px);
57
+ opacity: 0.2;
58
+ border-radius: 50%;
59
+ z-index: -1;
60
+ animation: float 10s infinite alternate;
61
+ }
62
+
63
+ .blob-2 {
64
+ background: var(--secondary);
65
+ top: 10%;
66
+ right: 10%;
67
+ animation-delay: -5s;
68
+ }
69
+
70
+ @keyframes float {
71
+ 0% {
72
+ transform: translate(0, 0);
73
+ }
74
+
75
+ 100% {
76
+ transform: translate(20px, 40px);
77
+ }
78
+ }
79
+
80
+ header {
81
+ text-align: center;
82
+ margin-bottom: 3rem;
83
+ }
84
+
85
+ h1 {
86
+ font-size: 3.5rem;
87
+ font-weight: 700;
88
+ background: linear-gradient(135deg, #fff 0%, var(--primary) 100%);
89
+ -webkit-background-clip: text;
90
+ -webkit-text-fill-color: transparent;
91
+ margin-bottom: 0.5rem;
92
+ letter-spacing: -1px;
93
+ }
94
+
95
+ .subtitle {
96
+ color: #94A3B8;
97
+ font-size: 1.1rem;
98
+ }
99
+
100
+ .chat-card {
101
+ background: var(--glass);
102
+ backdrop-filter: blur(16px);
103
+ border: 1px solid var(--glass-border);
104
+ border-radius: 24px;
105
+ padding: 2rem;
106
+ box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
107
+ }
108
+
109
+ .api-config {
110
+ margin-bottom: 1.5rem;
111
+ display: flex;
112
+ gap: 1rem;
113
+ align-items: center;
114
+ background: rgba(0, 0, 0, 0.2);
115
+ padding: 1rem;
116
+ border-radius: 12px;
117
+ }
118
+
119
+ .input-group {
120
+ flex: 1;
121
+ }
122
+
123
+ label {
124
+ display: block;
125
+ margin-bottom: 0.5rem;
126
+ font-size: 0.85rem;
127
+ color: var(--secondary);
128
+ font-weight: 600;
129
+ text-transform: uppercase;
130
+ letter-spacing: 0.5px;
131
+ }
132
+
133
+ input[type="text"],
134
+ textarea {
135
+ width: 100%;
136
+ background: rgba(15, 23, 42, 0.6);
137
+ border: 1px solid var(--glass-border);
138
+ color: white;
139
+ padding: 1rem;
140
+ border-radius: 12px;
141
+ font-family: inherit;
142
+ font-size: 1rem;
143
+ transition: all 0.3s ease;
144
+ }
145
+
146
+ input[type="text"]:focus,
147
+ textarea:focus {
148
+ outline: none;
149
+ border-color: var(--primary);
150
+ box-shadow: 0 0 0 2px rgba(255, 159, 28, 0.2);
151
+ }
152
+
153
+ .chat-box {
154
+ min-height: 200px;
155
+ max-height: 500px;
156
+ overflow-y: auto;
157
+ margin-bottom: 1.5rem;
158
+ display: flex;
159
+ flex-direction: column;
160
+ gap: 1rem;
161
+ }
162
+
163
+ .message {
164
+ padding: 1rem 1.5rem;
165
+ border-radius: 16px;
166
+ max-width: 80%;
167
+ line-height: 1.6;
168
+ animation: slideIn 0.3s ease-out;
169
+ }
170
+
171
+ @keyframes slideIn {
172
+ from {
173
+ opacity: 0;
174
+ transform: translateY(10px);
175
+ }
176
+
177
+ to {
178
+ opacity: 1;
179
+ transform: translateY(0);
180
+ }
181
+ }
182
+
183
+ .message.user {
184
+ align-self: flex-end;
185
+ background: rgba(46, 196, 182, 0.15);
186
+ border: 1px solid rgba(46, 196, 182, 0.3);
187
+ color: #d1fae5;
188
+ border-bottom-right-radius: 4px;
189
+ }
190
+
191
+ .message.bot {
192
+ align-self: flex-start;
193
+ background: rgba(255, 255, 255, 0.05);
194
+ border: 1px solid var(--glass-border);
195
+ color: #e2e8f0;
196
+ border-bottom-left-radius: 4px;
197
+ }
198
+
199
+ .controls {
200
+ display: flex;
201
+ gap: 1rem;
202
+ }
203
+
204
+ button {
205
+ background: var(--primary);
206
+ color: var(--darker);
207
+ border: none;
208
+ padding: 1rem 2rem;
209
+ border-radius: 12px;
210
+ font-weight: 700;
211
+ cursor: pointer;
212
+ transition: all 0.3s ease;
213
+ display: flex;
214
+ align-items: center;
215
+ gap: 0.5rem;
216
+ }
217
+
218
+ button:hover {
219
+ transform: translateY(-2px);
220
+ box-shadow: 0 10px 20px -5px rgba(255, 159, 28, 0.4);
221
+ }
222
+
223
+ button:disabled {
224
+ background: #64748b;
225
+ cursor: not-allowed;
226
+ transform: none;
227
+ }
228
+
229
+ .spinner {
230
+ width: 20px;
231
+ height: 20px;
232
+ border: 3px solid rgba(0, 0, 0, 0.3);
233
+ border-radius: 50%;
234
+ border-top-color: var(--darker);
235
+ animation: spin 1s linear infinite;
236
+ display: none;
237
+ }
238
+
239
+ @keyframes spin {
240
+ to {
241
+ transform: rotate(360deg);
242
+ }
243
+ }
244
+
245
+ /* Formatting for bot responses */
246
+ .message.bot strong {
247
+ color: var(--primary);
248
+ }
249
+ </style>
250
+ </head>
251
+
252
+ <body>
253
+ <div class="blob"></div>
254
+ <div class="blob blob-2"></div>
255
+
256
+ <div class="container">
257
+ <header>
258
+ <h1>Jain Sage AI</h1>
259
+ <p class="subtitle">Deep Wisdom from Anekant Syadvad & The Tirthankaras</p>
260
+ </header>
261
+
262
+ <div class="chat-card">
263
+
264
+ <div class="api-config">
265
+ <div class="input-group">
266
+ <label for="apiUrl">API Endpoint URL</label>
267
+ <input type="text" id="apiUrl" value="https://deploymentcode.onrender.com/chat"
268
+ placeholder="http://localhost:10000/chat">
269
+ </div>
270
+ <div style="font-size: 0.8rem; color: #64748b; max-width: 300px;">
271
+ Replace with your Render URL after deployment.
272
+ </div>
273
+ </div>
274
+
275
+ <div class="chat-box" id="chatBox">
276
+ <div class="message bot">
277
+ Namaste. I am an AI scholar of Jain Philosophy. Ask me about Anekantavada, Karma, or the path to
278
+ Moksha.
279
+ </div>
280
+ </div>
281
+
282
+ <div class="controls">
283
+ <input type="text" id="queryInput" placeholder="Ask a question..."
284
+ onkeydown="if(event.key==='Enter') sendMessage()">
285
+ <button onclick="sendMessage()" id="sendBtn">
286
+ <span>Ask</span>
287
+ <div class="spinner" id="spinner"></div>
288
+ </button>
289
+ </div>
290
+ </div>
291
+ </div>
292
+
293
+ <script>
294
+ async function sendMessage() {
295
+ const input = document.getElementById('queryInput');
296
+ const chatBox = document.getElementById('chatBox');
297
+ const btn = document.getElementById('sendBtn');
298
+ const spinner = document.getElementById('spinner');
299
+ const apiUrl = document.getElementById('apiUrl').value;
300
+
301
+ const query = input.value.trim();
302
+ if (!query) return;
303
+
304
+ // Add User Message
305
+ addMessage(query, 'user');
306
+ input.value = '';
307
+
308
+ // Loading State
309
+ btn.disabled = true;
310
+ spinner.style.display = 'block';
311
+
312
+ try {
313
+ const response = await fetch(apiUrl, {
314
+ method: 'POST',
315
+ headers: { 'Content-Type': 'application/json' },
316
+ body: JSON.stringify({ query: query })
317
+ });
318
+
319
+ if (!response.ok) {
320
+ throw new Error(`API Error: ${response.statusText}`);
321
+ }
322
+
323
+ const data = await response.json();
324
+ addMessage(formatResponse(data.answer), 'bot');
325
+
326
+ } catch (error) {
327
+ addMessage(`⚠️ Error: ${error.message}. Please check if the API URL is correct and the server is running.`, 'bot');
328
+ } finally {
329
+ btn.disabled = false;
330
+ spinner.style.display = 'none';
331
+ chatBox.scrollTop = chatBox.scrollHeight;
332
+ }
333
+ }
334
+
335
+ function addMessage(text, type) {
336
+ const chatBox = document.getElementById('chatBox');
337
+ const div = document.createElement('div');
338
+ div.className = `message ${type}`;
339
+ div.innerHTML = text.replace(/\n/g, '<br>');
340
+ chatBox.appendChild(div);
341
+ chatBox.scrollTop = chatBox.scrollHeight;
342
+ }
343
+
344
+ function formatResponse(text) {
345
+ // Simple markdown-like formatting for bold text
346
+ return text.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
347
+ }
348
+ </script>
349
+ </body>
350
+
351
+ </html>