Spaces:
Sleeping
Sleeping
| # app.py β Hugging Face deployable RAG Study Supervisor | |
| import os | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| import docx | |
| import numpy as np | |
| import faiss | |
| import requests | |
| from sentence_transformers import SentenceTransformer | |
| # π Read Groq API Key from environment (set it in HF secrets) | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| MODEL = "llama3-8b-8192" | |
| # π¬ Prompt templates | |
| system_template = "You are a helpful and knowledgeable study supervisor. You are given excerpts from a document, and your job is to answer student questions based on that information. Be precise and explain clearly like a teacher." | |
| user_template = """Context: {context} | |
| Question: {question} | |
| Answer like a teacher:""" | |
| # π Embedding model | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| def extract_text(file): | |
| ext = file.name.split(".")[-1].lower() | |
| if ext == "pdf": | |
| doc = fitz.open(file.name) | |
| return "\n".join([page.get_text() for page in doc]) | |
| elif ext in ["docx", "doc"]: | |
| doc = docx.Document(file.name) | |
| return "\n".join([p.text for p in doc.paragraphs]) | |
| return "" | |
| def chunk_text(text, size=300, overlap=50): | |
| words = text.split() | |
| return [" ".join(words[i:i + size]) for i in range(0, len(words), size - overlap)] | |
| def embed_chunks(chunks): | |
| return embedder.encode(chunks).astype("float32") | |
| def store_faiss(chunks, vectors): | |
| index = faiss.IndexFlatL2(vectors.shape[1]) | |
| index.add(vectors) | |
| return {"documents": chunks, "vectors": vectors, "index": index} | |
| def get_context(query, state, k=3): | |
| if state is None or "index" not in state: | |
| return "β οΈ Please upload a document first.", None | |
| q_vec = embedder.encode([query]).astype("float32") | |
| _, I = state["index"].search(q_vec, k) | |
| return [state["documents"][i] for i in I[0]], state | |
| def query_llm(context, question): | |
| prompt = user_template.format(context="\n".join(context), question=question) | |
| response = requests.post( | |
| "https://api.groq.com/openai/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {GROQ_API_KEY}"}, | |
| json={ | |
| "model": MODEL, | |
| "messages": [ | |
| {"role": "system", "content": system_template}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| "temperature": 0.3 | |
| } | |
| ) | |
| if response.status_code == 200: | |
| return response.json()["choices"][0]["message"]["content"] | |
| else: | |
| return f"β Error: {response.json()}" | |
| def upload_file(file): | |
| text = extract_text(file) | |
| if not text.strip(): | |
| return "β οΈ File is empty or unreadable.", None | |
| chunks = chunk_text(text) | |
| vectors = embed_chunks(chunks) | |
| state = store_faiss(chunks, vectors) | |
| return "β Document processed!", state | |
| def ask_question(question, state): | |
| if not question.strip(): | |
| return "β Please enter a question.", state | |
| context, state = get_context(question, state) | |
| if isinstance(context, str): # error message | |
| return context, state | |
| return query_llm(context, question), state | |
| # ποΈ Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π Study Supervisor Chatbot\nUpload a document and ask questions about it.") | |
| state = gr.State() | |
| with gr.Row(): | |
| file = gr.File(file_types=[".pdf", ".docx", ".doc"], label="π Upload Document") | |
| upload_btn = gr.Button("π€ Upload and Process") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| question = gr.Textbox(label="β Ask a Question") | |
| answer = gr.Textbox(label="π Answer", lines=8) | |
| upload_btn.click(upload_file, inputs=file, outputs=[status, state]) | |
| question.submit(ask_question, inputs=[question, state], outputs=[answer, state]) | |
| demo.launch() | |