Ai_ML_Tutor / app.py
HuzaifaTech's picture
Update app.py
8cbfc80 verified
# =========================
# IMPORTS
# =========================
import os
import tempfile
import gradio as gr
from groq import Groq
from duckduckgo_search import DDGS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
# =========================
# CONFIG
# =========================
GROQ_API_KEY = os.getenv("GROQ_API_KEY") # HF SECRET KEY
client = Groq(api_key=GROQ_API_KEY)
vectorstore = None
retriever = None
# =========================
# PROMPT
# =========================
def build_prompt(context, question):
return f"""
You are an expert AI assistant.
Use ONLY the context below.
If answer is not present, say "Not found in document".
CONTEXT:
{context}
QUESTION:
{question}
ANSWER:
"""
# =========================
# WEB SEARCH (FALLBACK)
# =========================
def web_search(query):
results = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=3):
results.append(r["body"])
return "\n\n".join(results)
# =========================
# PROCESS PDF (HF SAFE)
# =========================
def process_pdf(file):
global vectorstore, retriever
# safe file handling
file_path = file.name
loader = PyPDFLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=600,
chunk_overlap=100
)
chunks = splitter.split_documents(documents)
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embedding_model
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
return "✅ PDF successfully processed"
# =========================
# RAG ENGINE (HYBRID)
# =========================
def ask_rag(query):
global retriever
if retriever is None:
return "⚠️ Please upload a PDF first."
docs = retriever.invoke(query)
pdf_context = "\n\n".join([d.page_content for d in docs])
# hybrid fallback
if len(pdf_context.strip()) < 50:
web_context = web_search(query)
context = pdf_context + "\n\nWEB:\n" + web_context
else:
context = pdf_context
prompt = build_prompt(context, query)
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192"
)
return response.choices[0].message.content
# =========================
# CHAT FUNCTION (FIXED FORMAT)
# =========================
def chat(user_message, history):
response = ask_rag(user_message)
if history is None:
history = []
history.append((user_message, response))
return history, history
# =========================
# UI (HUGGING FACE SAFE)
# =========================
with gr.Blocks() as app:
gr.Markdown("# 🧠 Hybrid RAG Chatbot (PDF + Web)")
file = gr.File(label="Upload PDF")
status = gr.Textbox(label="Status")
chatbot = gr.Chatbot() # IMPORTANT FIX
msg = gr.Textbox(placeholder="Ask your question...")
state = gr.State([])
file.change(process_pdf, file, status)
msg.submit(chat, [msg, state], [chatbot, state])
app.launch()