tm / main.py
Guru-25's picture
Update main.py
200f6ed verified
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
import os
from dotenv import load_dotenv
# ---------------------
# Startup Config
# ---------------------
print("Loading environment variables...")
load_dotenv()
print("Loading songs data...")
with open("songs.json", encoding="utf-8") as f:
songs = json.load(f)
print("Loading embeddings...")
embeddings = np.load("song_embeddings_e5_final.npy")
print("Loading embedding model...")
model = SentenceTransformer("intfloat/multilingual-e5-large")
print("Configuring Gemini API...")
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
gemini_model = genai.GenerativeModel("gemini-2.5-flash")
print("API ready!")
# ---------------------
# FastAPI App
# ---------------------
app = FastAPI(
title="Thirumandiram Search API",
description="Semantic search and AI-assisted explanations for Thirumandiram verses",
version="2.0.0"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ---------------------
# Payiram Mapper
# ---------------------
def get_payiram(song_number: int) -> str:
if 1 <= song_number <= 336:
return "First Payiram"
elif 337 <= song_number <= 548:
return "Second Payiram"
elif 549 <= song_number <= 883:
return "Third Payiram"
elif 884 <= song_number <= 1033:
return "Fourth Payiram"
elif 1034 <= song_number <= 1560:
return "Fifth Payiram"
elif 1561 <= song_number <= 1783:
return "Sixth Payiram"
elif 1784 <= song_number <= 1980:
return "Seventh Payiram"
elif 1981 <= song_number <= 2121:
return "Eighth Payiram"
elif 2122 <= song_number <= 3000:
return "Ninth Payiram"
return "Unknown Payiram"
# ---------------------
# Semantic Search
# ---------------------
def search_songs(query: str, top_k: int = 3):
query_text = "query: " + query
query_vec = model.encode([query_text])[0]
sims = np.dot(embeddings, query_vec) / (
np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_vec)
)
top_idx = np.argsort(-sims)[:top_k]
results = []
for idx in top_idx:
song = songs[idx]
song_number = song["song_number"]
results.append({
"song_number": song_number,
"padal": song["padal"],
"vilakam": song["vilakam"],
"vilakam_en": song["vilakam_en"],
"payiram": get_payiram(song_number),
"similarity": float(sims[idx]),
})
return results
# ---------------------
# Gemini Scope Classifier
# ---------------------
def is_thirumandiram_scope(query: str) -> bool:
prompt = f"""
You are a strict classifier.
Decide whether the following user query is related to:
- Thirumandiram
- Thirumoolar
- Saivism, Siddha philosophy, Yoga
- Spiritual concepts explained in Thirumandiram verses
Respond with ONLY:
YES or NO
If unsure, respond NO.
User query:
"{query}"
"""
response = gemini_model.generate_content(prompt)
return response.text.strip().upper() == "YES"
# ---------------------
# API Endpoints
# ---------------------
@app.get("/")
def root():
return {
"name": "Thirumandiram Search API",
"version": "2.0.0",
"endpoints": {
"search": "/search?q=<query>&top_k=3",
"chat_search": "/chat_search?q=<query>&top_k=3",
"docs": "/docs",
"health": "/health"
}
}
@app.get("/health")
def health():
return {
"status": "healthy",
"embedding_model_loaded": model is not None,
"gemini_configured": os.getenv("GEMINI_API_KEY") is not None
}
# ---------------------
# Endpoint 1: Raw Semantic Search
# ---------------------
@app.get("/search")
def search(
q: str = Query(..., description="Search query in Tamil or English"),
top_k: int = Query(3, ge=1, le=10)
):
return {
"query": q,
"results": search_songs(q, top_k)
}
# ---------------------
# Endpoint 2: Chat Search (Gemini-Gated)
# ---------------------
@app.get("/chat_search")
def chat_search(
q: str = Query(..., description="Search query in Tamil or English"),
top_k: int = Query(3, ge=1, le=10)
):
# STEP 1: Scope check
if not is_thirumandiram_scope(q):
return {
"query": q,
"out_of_scope": True,
"message": "The query is not within the scope of Thirumandiram.",
"summary": None,
"results": []
}
# STEP 2: Semantic search
results = search_songs(q, top_k)
# STEP 3: Context building
context = "\n\n".join(
f"Song {r['song_number']} ({r['payiram']}):\n"
f"Verse:\n{r['padal']}\n"
f"Explanation:\n{r['vilakam_en']}"
for r in results
)
prompt = f"""
You are a Thirumandiram expert assistant.
Answer ONLY using Thirumandiram philosophy.
User query:
"{q}"
Relevant verses:
{context}
Explain clearly how these verses address the query.
"""
response = gemini_model.generate_content(prompt)
return {
"query": q,
"out_of_scope": False,
"summary": response.text,
"results": results
}