Mahrukhh's picture
Create app.py
e9a0d05 verified
# app.py β€” Hugging Face deployable RAG Study Supervisor
import os
import gradio as gr
import fitz # PyMuPDF
import docx
import numpy as np
import faiss
import requests
from sentence_transformers import SentenceTransformer
# πŸ” Read Groq API Key from environment (set it in HF secrets)
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
MODEL = "llama3-8b-8192"
# πŸ’¬ Prompt templates
system_template = "You are a helpful and knowledgeable study supervisor. You are given excerpts from a document, and your job is to answer student questions based on that information. Be precise and explain clearly like a teacher."
user_template = """Context: {context}
Question: {question}
Answer like a teacher:"""
# πŸ“š Embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
def extract_text(file):
ext = file.name.split(".")[-1].lower()
if ext == "pdf":
doc = fitz.open(file.name)
return "\n".join([page.get_text() for page in doc])
elif ext in ["docx", "doc"]:
doc = docx.Document(file.name)
return "\n".join([p.text for p in doc.paragraphs])
return ""
def chunk_text(text, size=300, overlap=50):
words = text.split()
return [" ".join(words[i:i + size]) for i in range(0, len(words), size - overlap)]
def embed_chunks(chunks):
return embedder.encode(chunks).astype("float32")
def store_faiss(chunks, vectors):
index = faiss.IndexFlatL2(vectors.shape[1])
index.add(vectors)
return {"documents": chunks, "vectors": vectors, "index": index}
def get_context(query, state, k=3):
if state is None or "index" not in state:
return "⚠️ Please upload a document first.", None
q_vec = embedder.encode([query]).astype("float32")
_, I = state["index"].search(q_vec, k)
return [state["documents"][i] for i in I[0]], state
def query_llm(context, question):
prompt = user_template.format(context="\n".join(context), question=question)
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
json={
"model": MODEL,
"messages": [
{"role": "system", "content": system_template},
{"role": "user", "content": prompt}
],
"temperature": 0.3
}
)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
return f"❌ Error: {response.json()}"
def upload_file(file):
text = extract_text(file)
if not text.strip():
return "⚠️ File is empty or unreadable.", None
chunks = chunk_text(text)
vectors = embed_chunks(chunks)
state = store_faiss(chunks, vectors)
return "βœ… Document processed!", state
def ask_question(question, state):
if not question.strip():
return "❗ Please enter a question.", state
context, state = get_context(question, state)
if isinstance(context, str): # error message
return context, state
return query_llm(context, question), state
# πŸŽ›οΈ Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# πŸ“˜ Study Supervisor Chatbot\nUpload a document and ask questions about it.")
state = gr.State()
with gr.Row():
file = gr.File(file_types=[".pdf", ".docx", ".doc"], label="πŸ“‚ Upload Document")
upload_btn = gr.Button("πŸ“€ Upload and Process")
status = gr.Textbox(label="Status", interactive=False)
question = gr.Textbox(label="❓ Ask a Question")
answer = gr.Textbox(label="πŸ“š Answer", lines=8)
upload_btn.click(upload_file, inputs=file, outputs=[status, state])
question.submit(ask_question, inputs=[question, state], outputs=[answer, state])
demo.launch()