Guessbot / rag_faiss.py
Mihirsingh1101's picture
Update rag_faiss.py
37de86d verified
import os
import pickle
import requests
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
# -------------------------------
# CONFIG
# -------------------------------
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
OPENROUTER_MODEL = "mistralai/mistral-7b-instruct"
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
FAISS_DIR = "faiss_store"
INDEX_FILE = os.path.join(FAISS_DIR, "index.faiss")
META_FILE = os.path.join(FAISS_DIR, "meta.pkl")
API_KEY = os.getenv("OPENROUTER_API_KEY")
if not API_KEY:
raise RuntimeError("OPENROUTER_API_KEY not set")
# -------------------------------
# EMBEDDINGS
# -------------------------------
print("🔹 Loading embedding model...")
embedder = SentenceTransformer(EMBED_MODEL)
# -------------------------------
# SMALL TALK / IDENTITY INTENTS
# -------------------------------
SMALL_TALK_INTENTS = {
"greeting": {
"keywords": [
"hello", "hi", "hey", "hii", "hai",
"good morning", "good evening", "good afternoon"
],
"answer": "Hello! I’m the official virtual assistant for GUESSS India. How can I help you today?"
},
"wellbeing": {
"keywords": [
"how are you", "how r you", "how are u",
"how do you do", "are you okay"
],
"answer": "I’m doing well, thank you! How can I assist you with GUESSS India today?"
},
"identity": {
"keywords": [
"who are you", "what are you",
"tell me about yourself", "introduce yourself"
],
"answer": "I’m a virtual assistant created to provide accurate and reliable information about GUESSS India."
},
"name": {
"keywords": [
"what is your name", "your name",
"do you have a name", "who should i call you"
],
"answer": "I don’t have a personal name, but you can think of me as the GUESSS India Assistant."
},
"capabilities": {
"keywords": [
"what do you do", "what can you do",
"how can you help", "what help can you provide"
],
"answer": (
"I answer questions related to GUESSS India, including surveys, programs, "
"campus ambassadors, podcasts, and general information."
)
},
"trust": {
"keywords": [
"can i trust you", "is your information reliable",
"are you reliable", "is this official"
],
"answer": "Yes. My responses are based on approved and verified information provided by the GUESSS India team."
},
"human": {
"keywords": [
"are you human", "are you a real person",
"are you ai", "are you a bot"
],
"answer": "No. I’m an AI-based assistant designed to share verified information from GUESSS India."
},
"availability": {
"keywords": [
"are you available", "are you always available",
"when are you available"
],
"answer": "Yes. I’m available 24/7 on the GUESSS India website."
},
"thanks": {
"keywords": [
"thank you", "thanks", "thank u", "thx"
],
"answer": "You’re welcome! If you have more questions about GUESSS India, I’m here to help."
}
}
def handle_small_talk(query: str):
q = query.lower().strip()
for intent in SMALL_TALK_INTENTS.values():
for kw in intent["keywords"]:
if kw in q:
return intent["answer"]
return None
# -------------------------------
# VECTOR STORE
# -------------------------------
class VectorStore:
def __init__(self):
print(f"🔹 Loading FAISS index from {INDEX_FILE}")
if not os.path.exists(INDEX_FILE):
raise FileNotFoundError("❌ index.faiss not found")
if not os.path.exists(META_FILE):
raise FileNotFoundError("❌ meta.pkl not found")
self.index = faiss.read_index(INDEX_FILE)
with open(META_FILE, "rb") as f:
self.texts = pickle.load(f)
print(f"✅ Loaded {len(self.texts)} documents")
def search(self, query, k=4, threshold=0.35):
q_emb = embedder.encode([query], normalize_embeddings=True).astype("float32")
scores, indices = self.index.search(q_emb, k)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx != -1 and idx < len(self.texts) and score >= threshold:
results.append(self.texts[idx])
print(f"🔎 FAISS returned {len(results)} high-confidence chunks")
return results
# -------------------------------
# OPENROUTER CALL
# -------------------------------
def call_openrouter(prompt, max_tokens=300):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "http://localhost",
"X-Title": "GuesssBot"
}
payload = {
"model": OPENROUTER_MODEL,
"messages": [
{
"role": "system",
"content": "You are a factual assistant for GUESSS India. Answer strictly using the provided context."
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.6,
"max_tokens": max_tokens
}
r = requests.post(
OPENROUTER_URL,
headers=headers,
json=payload,
timeout=60
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]
# -------------------------------
# MAIN ANSWER FUNCTION
# -------------------------------
def answer_question(vs, question: str):
# 1️⃣ Small talk / identity
small_talk = handle_small_talk(question)
if small_talk:
return small_talk
# 2️⃣ Knowledge-based (RAG)
contexts = vs.search(question, k=4)
if not contexts:
return (
"I can help with questions related to GUESSS India, such as surveys, programs, "
"campus ambassadors, or official contact information."
)
context_block = "\n---\n".join(contexts)
prompt = f"""
Answer ONLY using the context below.
If the answer is not present, say:
"I do not have information on this in my database."
---
CONTEXT:
{context_block}
---
QUESTION: {question}
ANSWER:
"""
return call_openrouter(prompt)