|
|
from fastapi import FastAPI, Query |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
import numpy as np |
|
|
import json |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import google.generativeai as genai |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Loading environment variables...") |
|
|
load_dotenv() |
|
|
|
|
|
print("Loading songs data...") |
|
|
with open("songs.json", encoding="utf-8") as f: |
|
|
songs = json.load(f) |
|
|
|
|
|
print("Loading embeddings...") |
|
|
embeddings = np.load("song_embeddings_e5_final.npy") |
|
|
|
|
|
print("Loading embedding model...") |
|
|
model = SentenceTransformer("intfloat/multilingual-e5-large") |
|
|
|
|
|
print("Configuring Gemini API...") |
|
|
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
|
|
gemini_model = genai.GenerativeModel("gemini-2.5-flash") |
|
|
|
|
|
print("API ready!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI( |
|
|
title="Thirumandiram Search API", |
|
|
description="Semantic search and AI-assisted explanations for Thirumandiram verses", |
|
|
version="2.0.0" |
|
|
) |
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_payiram(song_number: int) -> str: |
|
|
if 1 <= song_number <= 336: |
|
|
return "First Payiram" |
|
|
elif 337 <= song_number <= 548: |
|
|
return "Second Payiram" |
|
|
elif 549 <= song_number <= 883: |
|
|
return "Third Payiram" |
|
|
elif 884 <= song_number <= 1033: |
|
|
return "Fourth Payiram" |
|
|
elif 1034 <= song_number <= 1560: |
|
|
return "Fifth Payiram" |
|
|
elif 1561 <= song_number <= 1783: |
|
|
return "Sixth Payiram" |
|
|
elif 1784 <= song_number <= 1980: |
|
|
return "Seventh Payiram" |
|
|
elif 1981 <= song_number <= 2121: |
|
|
return "Eighth Payiram" |
|
|
elif 2122 <= song_number <= 3000: |
|
|
return "Ninth Payiram" |
|
|
return "Unknown Payiram" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def search_songs(query: str, top_k: int = 3): |
|
|
query_text = "query: " + query |
|
|
query_vec = model.encode([query_text])[0] |
|
|
|
|
|
sims = np.dot(embeddings, query_vec) / ( |
|
|
np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_vec) |
|
|
) |
|
|
|
|
|
top_idx = np.argsort(-sims)[:top_k] |
|
|
results = [] |
|
|
|
|
|
for idx in top_idx: |
|
|
song = songs[idx] |
|
|
song_number = song["song_number"] |
|
|
|
|
|
results.append({ |
|
|
"song_number": song_number, |
|
|
"padal": song["padal"], |
|
|
"vilakam": song["vilakam"], |
|
|
"vilakam_en": song["vilakam_en"], |
|
|
"payiram": get_payiram(song_number), |
|
|
"similarity": float(sims[idx]), |
|
|
}) |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_thirumandiram_scope(query: str) -> bool: |
|
|
prompt = f""" |
|
|
You are a strict classifier. |
|
|
|
|
|
Decide whether the following user query is related to: |
|
|
- Thirumandiram |
|
|
- Thirumoolar |
|
|
- Saivism, Siddha philosophy, Yoga |
|
|
- Spiritual concepts explained in Thirumandiram verses |
|
|
|
|
|
Respond with ONLY: |
|
|
YES or NO |
|
|
|
|
|
If unsure, respond NO. |
|
|
|
|
|
User query: |
|
|
"{query}" |
|
|
""" |
|
|
response = gemini_model.generate_content(prompt) |
|
|
return response.text.strip().upper() == "YES" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def root(): |
|
|
return { |
|
|
"name": "Thirumandiram Search API", |
|
|
"version": "2.0.0", |
|
|
"endpoints": { |
|
|
"search": "/search?q=<query>&top_k=3", |
|
|
"chat_search": "/chat_search?q=<query>&top_k=3", |
|
|
"docs": "/docs", |
|
|
"health": "/health" |
|
|
} |
|
|
} |
|
|
|
|
|
@app.get("/health") |
|
|
def health(): |
|
|
return { |
|
|
"status": "healthy", |
|
|
"embedding_model_loaded": model is not None, |
|
|
"gemini_configured": os.getenv("GEMINI_API_KEY") is not None |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/search") |
|
|
def search( |
|
|
q: str = Query(..., description="Search query in Tamil or English"), |
|
|
top_k: int = Query(3, ge=1, le=10) |
|
|
): |
|
|
return { |
|
|
"query": q, |
|
|
"results": search_songs(q, top_k) |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/chat_search") |
|
|
def chat_search( |
|
|
q: str = Query(..., description="Search query in Tamil or English"), |
|
|
top_k: int = Query(3, ge=1, le=10) |
|
|
): |
|
|
|
|
|
if not is_thirumandiram_scope(q): |
|
|
return { |
|
|
"query": q, |
|
|
"out_of_scope": True, |
|
|
"message": "The query is not within the scope of Thirumandiram.", |
|
|
"summary": None, |
|
|
"results": [] |
|
|
} |
|
|
|
|
|
|
|
|
results = search_songs(q, top_k) |
|
|
|
|
|
|
|
|
context = "\n\n".join( |
|
|
f"Song {r['song_number']} ({r['payiram']}):\n" |
|
|
f"Verse:\n{r['padal']}\n" |
|
|
f"Explanation:\n{r['vilakam_en']}" |
|
|
for r in results |
|
|
) |
|
|
|
|
|
prompt = f""" |
|
|
You are a Thirumandiram expert assistant. |
|
|
Answer ONLY using Thirumandiram philosophy. |
|
|
|
|
|
User query: |
|
|
"{q}" |
|
|
|
|
|
Relevant verses: |
|
|
{context} |
|
|
|
|
|
Explain clearly how these verses address the query. |
|
|
""" |
|
|
|
|
|
response = gemini_model.generate_content(prompt) |
|
|
|
|
|
return { |
|
|
"query": q, |
|
|
"out_of_scope": False, |
|
|
"summary": response.text, |
|
|
"results": results |
|
|
} |
|
|
|