lbrce-chatbot / app.py
deepak0991's picture
Update app.py
82c20f9 verified
import gradio as gr
import os
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
load_dotenv()
# --- Initialize LLM
groq_api_key = "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"
llm = ChatGroq(groq_api_key=groq_api_key, model_name="meta-llama/llama-4-scout-17b-16e-instruct")
# --- Prompt Template
prompt = ChatPromptTemplate.from_template(
"""
You are Clara, the chatbot for Lakireddy Bali Reddy College of Engineering (LBRCE). Your role is to provide friendly, helpful, and clear responses to questions about the college. Always respond professionally, using phrases like "Currently, I understand," or "Based on recent information," to keep the interaction conversational.
Answer general queries concisely and clearly. Avoid technical terms and keep answers user-friendly. Include info on:
- Courses, faculty, facilities, and placements
- Admissions, eligibility, and campus life
<context>
{context}
<context>
Question:{input}
"""
)
# --- Build the vector DB at startup
def load_embeddings():
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
loader = PyPDFDirectoryLoader("data")
docs = loader.load()
if not docs:
raise ValueError("No PDF documents found in the 'data/' directory.")
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_docs = splitter.split_documents(docs[:50])
if not final_docs:
raise ValueError("Text splitting returned no valid document chunks.")
return FAISS.from_documents(final_docs, embeddings)
# --- Prepare retriever and chain
vector_store = load_embeddings()
retriever = vector_store.as_retriever()
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
# --- Gradio inference function
def chat_with_clara(query):
if not query.strip():
return "Please enter a valid question."
response = retrieval_chain.invoke({'input': query})
answer = response['answer']
docs = response.get('context', [])
similar_chunks = "\n\n".join([f"• {doc.page_content[:300]}..." for doc in docs])
return answer, similar_chunks or "No similar document chunks found."
# --- Launch Gradio Interface
iface = gr.Interface(
fn=chat_with_clara,
inputs=gr.Textbox(label="Ask Clara (LBRCE Bot)", placeholder="E.g., What is the CSE cutoff for EAPCET?"),
outputs=[
gr.Textbox(label="Clara's Answer"),
gr.Textbox(label="Similar Document Context (from PDFs)", lines=6)
],
title="📚 RAG Chatbot: Clara for LBRCE",
description="Ask anything about LBRCE—admissions, faculty, courses, placements, campus life, and more!"
)
iface.launch()