Guru-25 commited on
Commit
200f6ed
·
verified ·
1 Parent(s): a97f5e0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +92 -40
main.py CHANGED
@@ -5,11 +5,15 @@ import json
5
  from sentence_transformers import SentenceTransformer
6
  import google.generativeai as genai
7
  import os
 
8
 
9
  # ---------------------
10
- # Config
11
  # ---------------------
12
 
 
 
 
13
  print("Loading songs data...")
14
  with open("songs.json", encoding="utf-8") as f:
15
  songs = json.load(f)
@@ -17,28 +21,28 @@ with open("songs.json", encoding="utf-8") as f:
17
  print("Loading embeddings...")
18
  embeddings = np.load("song_embeddings_e5_final.npy")
19
 
20
- print("Loading model...")
21
  model = SentenceTransformer("intfloat/multilingual-e5-large")
22
 
23
- # Configure Gemini API
24
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
25
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
26
 
27
  print("API ready!")
28
 
29
  # ---------------------
30
- # FastAPI app
31
  # ---------------------
 
32
  app = FastAPI(
33
  title="Thirumandiram Search API",
34
- description="Semantic search API for Thirumandiram verses with AI summaries",
35
- version="1.0.0"
36
  )
37
 
38
- # Allow CORS for your frontend
39
  app.add_middleware(
40
  CORSMiddleware,
41
- allow_origins=["*"], # Change to your frontend domain in production
42
  allow_methods=["*"],
43
  allow_headers=["*"],
44
  )
@@ -46,6 +50,7 @@ app.add_middleware(
46
  # ---------------------
47
  # Payiram Mapper
48
  # ---------------------
 
49
  def get_payiram(song_number: int) -> str:
50
  if 1 <= song_number <= 336:
51
  return "First Payiram"
@@ -68,8 +73,9 @@ def get_payiram(song_number: int) -> str:
68
  return "Unknown Payiram"
69
 
70
  # ---------------------
71
- # Utility: Search top-k matches
72
  # ---------------------
 
73
  def search_songs(query: str, top_k: int = 3):
74
  query_text = "query: " + query
75
  query_vec = model.encode([query_text])[0]
@@ -96,15 +102,40 @@ def search_songs(query: str, top_k: int = 3):
96
 
97
  return results
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # ---------------------
100
  # API Endpoints
101
  # ---------------------
 
102
  @app.get("/")
103
  def root():
104
- """API information and available endpoints"""
105
  return {
106
  "name": "Thirumandiram Search API",
107
- "version": "1.0.0",
108
  "endpoints": {
109
  "search": "/search?q=<query>&top_k=3",
110
  "chat_search": "/chat_search?q=<query>&top_k=3",
@@ -114,54 +145,75 @@ def root():
114
  }
115
 
116
  @app.get("/health")
117
- def health_check():
118
- """Health check endpoint"""
119
- return {"status": "healthy", "model_loaded": model is not None}
 
 
 
 
 
 
 
120
 
121
  @app.get("/search")
122
- def search(q: str = Query(..., description="Search query in Tamil or English"),
123
- top_k: int = Query(3, ge=1, le=10, description="Number of results to return")):
124
- """
125
- Semantic search for Thirumandiram verses
126
-
127
- Returns matching verses with similarity scores
128
- """
129
- results = search_songs(q, top_k)
130
- return {"query": q, "results": results}
 
 
 
131
 
132
  @app.get("/chat_search")
133
- def chat_search(q: str = Query(..., description="Search query in Tamil or English"),
134
- top_k: int = Query(3, ge=1, le=10, description="Number of results to return")):
135
- """
136
- Semantic search with AI-generated summary
137
-
138
- Returns matching verses along with a summarized explanation using Google Gemini
139
- """
 
 
 
 
 
 
 
 
140
  results = search_songs(q, top_k)
141
 
142
- # Build context
143
  context = "\n\n".join(
144
- [
145
- f"Song {r['song_number']} ({r['payiram']}):\nVerse:\n{r['padal']}\nExplanation:\n{r['vilakam_en']}"
146
- for r in results
147
- ]
148
  )
149
 
150
  prompt = f"""
151
- You are a helpful assistant. A user searched for: "{q}".
152
- Here are some relevant verses from Thirumandiram:
 
 
 
153
 
 
154
  {context}
155
 
156
- Please summarize these results in a clear, chatbot-friendly way.
157
- Explain the key ideas and how they relate to the query.
158
  """
159
 
160
- # Generate summary using Gemini
161
  response = gemini_model.generate_content(prompt)
162
 
163
  return {
164
  "query": q,
 
165
  "summary": response.text,
166
  "results": results
167
  }
 
5
  from sentence_transformers import SentenceTransformer
6
  import google.generativeai as genai
7
  import os
8
+ from dotenv import load_dotenv
9
 
10
  # ---------------------
11
+ # Startup Config
12
  # ---------------------
13
 
14
+ print("Loading environment variables...")
15
+ load_dotenv()
16
+
17
  print("Loading songs data...")
18
  with open("songs.json", encoding="utf-8") as f:
19
  songs = json.load(f)
 
21
  print("Loading embeddings...")
22
  embeddings = np.load("song_embeddings_e5_final.npy")
23
 
24
+ print("Loading embedding model...")
25
  model = SentenceTransformer("intfloat/multilingual-e5-large")
26
 
27
+ print("Configuring Gemini API...")
28
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
29
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
30
 
31
  print("API ready!")
32
 
33
  # ---------------------
34
+ # FastAPI App
35
  # ---------------------
36
+
37
  app = FastAPI(
38
  title="Thirumandiram Search API",
39
+ description="Semantic search and AI-assisted explanations for Thirumandiram verses",
40
+ version="2.0.0"
41
  )
42
 
 
43
  app.add_middleware(
44
  CORSMiddleware,
45
+ allow_origins=["*"],
46
  allow_methods=["*"],
47
  allow_headers=["*"],
48
  )
 
50
  # ---------------------
51
  # Payiram Mapper
52
  # ---------------------
53
+
54
  def get_payiram(song_number: int) -> str:
55
  if 1 <= song_number <= 336:
56
  return "First Payiram"
 
73
  return "Unknown Payiram"
74
 
75
  # ---------------------
76
+ # Semantic Search
77
  # ---------------------
78
+
79
  def search_songs(query: str, top_k: int = 3):
80
  query_text = "query: " + query
81
  query_vec = model.encode([query_text])[0]
 
102
 
103
  return results
104
 
105
+ # ---------------------
106
+ # Gemini Scope Classifier
107
+ # ---------------------
108
+
109
+ def is_thirumandiram_scope(query: str) -> bool:
110
+ prompt = f"""
111
+ You are a strict classifier.
112
+
113
+ Decide whether the following user query is related to:
114
+ - Thirumandiram
115
+ - Thirumoolar
116
+ - Saivism, Siddha philosophy, Yoga
117
+ - Spiritual concepts explained in Thirumandiram verses
118
+
119
+ Respond with ONLY:
120
+ YES or NO
121
+
122
+ If unsure, respond NO.
123
+
124
+ User query:
125
+ "{query}"
126
+ """
127
+ response = gemini_model.generate_content(prompt)
128
+ return response.text.strip().upper() == "YES"
129
+
130
  # ---------------------
131
  # API Endpoints
132
  # ---------------------
133
+
134
  @app.get("/")
135
  def root():
 
136
  return {
137
  "name": "Thirumandiram Search API",
138
+ "version": "2.0.0",
139
  "endpoints": {
140
  "search": "/search?q=<query>&top_k=3",
141
  "chat_search": "/chat_search?q=<query>&top_k=3",
 
145
  }
146
 
147
  @app.get("/health")
148
+ def health():
149
+ return {
150
+ "status": "healthy",
151
+ "embedding_model_loaded": model is not None,
152
+ "gemini_configured": os.getenv("GEMINI_API_KEY") is not None
153
+ }
154
+
155
+ # ---------------------
156
+ # Endpoint 1: Raw Semantic Search
157
+ # ---------------------
158
 
159
  @app.get("/search")
160
+ def search(
161
+ q: str = Query(..., description="Search query in Tamil or English"),
162
+ top_k: int = Query(3, ge=1, le=10)
163
+ ):
164
+ return {
165
+ "query": q,
166
+ "results": search_songs(q, top_k)
167
+ }
168
+
169
+ # ---------------------
170
+ # Endpoint 2: Chat Search (Gemini-Gated)
171
+ # ---------------------
172
 
173
  @app.get("/chat_search")
174
+ def chat_search(
175
+ q: str = Query(..., description="Search query in Tamil or English"),
176
+ top_k: int = Query(3, ge=1, le=10)
177
+ ):
178
+ # STEP 1: Scope check
179
+ if not is_thirumandiram_scope(q):
180
+ return {
181
+ "query": q,
182
+ "out_of_scope": True,
183
+ "message": "The query is not within the scope of Thirumandiram.",
184
+ "summary": None,
185
+ "results": []
186
+ }
187
+
188
+ # STEP 2: Semantic search
189
  results = search_songs(q, top_k)
190
 
191
+ # STEP 3: Context building
192
  context = "\n\n".join(
193
+ f"Song {r['song_number']} ({r['payiram']}):\n"
194
+ f"Verse:\n{r['padal']}\n"
195
+ f"Explanation:\n{r['vilakam_en']}"
196
+ for r in results
197
  )
198
 
199
  prompt = f"""
200
+ You are a Thirumandiram expert assistant.
201
+ Answer ONLY using Thirumandiram philosophy.
202
+
203
+ User query:
204
+ "{q}"
205
 
206
+ Relevant verses:
207
  {context}
208
 
209
+ Explain clearly how these verses address the query.
 
210
  """
211
 
 
212
  response = gemini_model.generate_content(prompt)
213
 
214
  return {
215
  "query": q,
216
+ "out_of_scope": False,
217
  "summary": response.text,
218
  "results": results
219
  }