import os import json import warnings warnings.filterwarnings("ignore") from dotenv import load_dotenv from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_groq import ChatGroq from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_pinecone import PineconeVectorStore from pinecone import Pinecone, ServerlessSpec from fastapi import FastAPI, UploadFile, File, Form, Request from fastapi.responses import HTMLResponse, RedirectResponse from pydantic import BaseModel from typing import List from datetime import datetime from collections import Counter import uvicorn load_dotenv() # ── Config ── ADMIN_PASSWORD = "admin123" CHAT_LOG_FILE = "chat_log.json" FEEDBACK_FILE = "feedback.json" INDEX_NAME = "college-chatbot" PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") print("🔄 Loading embedding model...") embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") print("☁️ Connecting to Pinecone...") pc = Pinecone(api_key=PINECONE_API_KEY) vectorstore = PineconeVectorStore( index_name=INDEX_NAME, embedding=embeddings, pinecone_api_key=PINECONE_API_KEY ) retriever = vectorstore.as_retriever(search_kwargs={"k": 6}) print("🤖 Connecting to Groq LLM...") llm = ChatGroq( model_name="llama-3.3-70b-versatile", temperature=0.2, api_key=os.getenv("GROQ_API_KEY") ) prompt = PromptTemplate.from_template("""You are a helpful and friendly college enquiry assistant. Use ONLY the context below to answer the question. If the answer is not in the context, say "I don't have that information, please contact the college directly." Keep your answers clear and concise. IMPORTANT LANGUAGE RULE: - Detect the language of the "Current Question" below. - If the question is in Hindi (or contains Hindi/Devanagari words), respond FULLY in Hindi. - If the question is in English, respond in English. - Never mix languages in a single response. - If responding in Hindi, also translate the "I don't have that information" message to Hindi. Context: {context} Conversation History: {history} Current Question: {question} Answer:""") def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def format_history(history): if not history: return "No previous conversation." lines = [] for h in history: lines.append(f"Student: {h['user']}") lines.append(f"Assistant: {h['bot']}") return "\n".join(lines) def ask_with_memory(question: str, history: list) -> str: docs = retriever.invoke(question) context = format_docs(docs) formatted_history = format_history(history) chain = prompt | llm | StrOutputParser() return chain.invoke({"context": context, "history": formatted_history, "question": question}) def rebuild_knowledge_base(pdf_path: str): global vectorstore, retriever loader = PyPDFLoader(pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunks = splitter.split_documents(documents) seen, unique_chunks = set(), [] for chunk in chunks: text = chunk.page_content.strip() if text not in seen: seen.add(text) unique_chunks.append(chunk) # ── Clear existing Pinecone index and re-upload ── try: pc.Index(INDEX_NAME).delete(delete_all=True) print("✅ Old Pinecone data cleared") except Exception as e: print(f"⚠️ Could not clear index: {e}") vectorstore = PineconeVectorStore.from_documents( documents=unique_chunks, embedding=embeddings, index_name=INDEX_NAME, pinecone_api_key=PINECONE_API_KEY ) retriever = vectorstore.as_retriever(search_kwargs={"k": 6}) print(f"✅ Knowledge base rebuilt with {len(unique_chunks)} chunks in Pinecone") def load_feedback(): if os.path.exists(FEEDBACK_FILE): with open(FEEDBACK_FILE, "r") as f: return json.load(f) return [] def save_feedback(data): with open(FEEDBACK_FILE, "w") as f: json.dump(data, f, indent=2) def load_chat_log(): if os.path.exists(CHAT_LOG_FILE): with open(CHAT_LOG_FILE, "r") as f: return json.load(f) return [] def save_chat_log(data): with open(CHAT_LOG_FILE, "w") as f: json.dump(data, f, indent=2) app = FastAPI() class Message(BaseModel): message: str history: List[dict] = [] class Feedback(BaseModel): question: str answer: str rating: str @app.post("/ask") async def ask(payload: Message): if not payload.message.strip(): return {"answer": "Please ask a question! / कृपया एक प्रश्न पूछें!"} print(f"❓ Question: {payload.message}") answer = ask_with_memory(payload.message, payload.history) print(f"✅ Answer: {answer}") log = load_chat_log() log.append({"question": payload.message, "answer": answer, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")}) save_chat_log(log) return {"answer": answer} @app.post("/feedback") async def feedback(payload: Feedback): data = load_feedback() data.append({"question": payload.question, "answer": payload.answer, "rating": payload.rating, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")}) save_feedback(data) print(f"{'👍' if payload.rating == 'up' else '👎'} Feedback: {payload.question[:50]}") return {"status": "saved"} @app.get("/admin", response_class=HTMLResponse) async def admin_login(): return HTMLResponse(content="""
| Time | Question | Answer |
|---|
Upload a new college PDF to update the Pinecone knowledge base in the cloud.
Manage stored feedback and chat logs. These actions cannot be undone.
| Time | Question | Rating |
|---|
College Enquiry Assistant — Live Feedback Stats
Ask me anything about the college