from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware import numpy as np import json from sentence_transformers import SentenceTransformer import google.generativeai as genai import os from dotenv import load_dotenv # --------------------- # Startup Config # --------------------- print("Loading environment variables...") load_dotenv() print("Loading songs data...") with open("songs.json", encoding="utf-8") as f: songs = json.load(f) print("Loading embeddings...") embeddings = np.load("song_embeddings_e5_final.npy") print("Loading embedding model...") model = SentenceTransformer("intfloat/multilingual-e5-large") print("Configuring Gemini API...") genai.configure(api_key=os.getenv("GEMINI_API_KEY")) gemini_model = genai.GenerativeModel("gemini-2.5-flash") print("API ready!") # --------------------- # FastAPI App # --------------------- app = FastAPI( title="Thirumandiram Search API", description="Semantic search and AI-assisted explanations for Thirumandiram verses", version="2.0.0" ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # --------------------- # Payiram Mapper # --------------------- def get_payiram(song_number: int) -> str: if 1 <= song_number <= 336: return "First Payiram" elif 337 <= song_number <= 548: return "Second Payiram" elif 549 <= song_number <= 883: return "Third Payiram" elif 884 <= song_number <= 1033: return "Fourth Payiram" elif 1034 <= song_number <= 1560: return "Fifth Payiram" elif 1561 <= song_number <= 1783: return "Sixth Payiram" elif 1784 <= song_number <= 1980: return "Seventh Payiram" elif 1981 <= song_number <= 2121: return "Eighth Payiram" elif 2122 <= song_number <= 3000: return "Ninth Payiram" return "Unknown Payiram" # --------------------- # Semantic Search # --------------------- def search_songs(query: str, top_k: int = 3): query_text = "query: " + query query_vec = model.encode([query_text])[0] sims = np.dot(embeddings, query_vec) / ( np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_vec) ) top_idx = np.argsort(-sims)[:top_k] results = [] for idx in top_idx: song = songs[idx] song_number = song["song_number"] results.append({ "song_number": song_number, "padal": song["padal"], "vilakam": song["vilakam"], "vilakam_en": song["vilakam_en"], "payiram": get_payiram(song_number), "similarity": float(sims[idx]), }) return results # --------------------- # Gemini Scope Classifier # --------------------- def is_thirumandiram_scope(query: str) -> bool: prompt = f""" You are a strict classifier. Decide whether the following user query is related to: - Thirumandiram - Thirumoolar - Saivism, Siddha philosophy, Yoga - Spiritual concepts explained in Thirumandiram verses Respond with ONLY: YES or NO If unsure, respond NO. User query: "{query}" """ response = gemini_model.generate_content(prompt) return response.text.strip().upper() == "YES" # --------------------- # API Endpoints # --------------------- @app.get("/") def root(): return { "name": "Thirumandiram Search API", "version": "2.0.0", "endpoints": { "search": "/search?q=&top_k=3", "chat_search": "/chat_search?q=&top_k=3", "docs": "/docs", "health": "/health" } } @app.get("/health") def health(): return { "status": "healthy", "embedding_model_loaded": model is not None, "gemini_configured": os.getenv("GEMINI_API_KEY") is not None } # --------------------- # Endpoint 1: Raw Semantic Search # --------------------- @app.get("/search") def search( q: str = Query(..., description="Search query in Tamil or English"), top_k: int = Query(3, ge=1, le=10) ): return { "query": q, "results": search_songs(q, top_k) } # --------------------- # Endpoint 2: Chat Search (Gemini-Gated) # --------------------- @app.get("/chat_search") def chat_search( q: str = Query(..., description="Search query in Tamil or English"), top_k: int = Query(3, ge=1, le=10) ): # STEP 1: Scope check if not is_thirumandiram_scope(q): return { "query": q, "out_of_scope": True, "message": "The query is not within the scope of Thirumandiram.", "summary": None, "results": [] } # STEP 2: Semantic search results = search_songs(q, top_k) # STEP 3: Context building context = "\n\n".join( f"Song {r['song_number']} ({r['payiram']}):\n" f"Verse:\n{r['padal']}\n" f"Explanation:\n{r['vilakam_en']}" for r in results ) prompt = f""" You are a Thirumandiram expert assistant. Answer ONLY using Thirumandiram philosophy. User query: "{q}" Relevant verses: {context} Explain clearly how these verses address the query. """ response = gemini_model.generate_content(prompt) return { "query": q, "out_of_scope": False, "summary": response.text, "results": results }