# ============================================ # 📘 Study Supervisor RAG App (Colab Compatible) # ============================================ # 🔧 STEP 1: Install Required Packages !pip -q install gradio faiss-cpu sentence-transformers PyPDF2 requests # 🔧 STEP 2: Imports import gradio as gr import faiss import os, requests from io import BytesIO from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer # ✅ CONFIGURATION GROQ_API_KEY = "" # 🔐 Set your Groq API key here GROQ_MODEL = "llama3-70b-8192" GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" EMBED_MODEL = "all-MiniLM-L6-v2" CHUNK_SIZE = 500 TOP_K = 5 # ✅ Load Embedding Model embedder = SentenceTransformer(EMBED_MODEL) # ✅ Global Variables faiss_index = None chunk_texts = [] # 🔄 STEP 3: Process PDF, Chunk, Embed, Index def process_pdf(file_obj): global faiss_index, chunk_texts pdf_stream = BytesIO(file_obj.read()) pdf = PdfReader(pdf_stream) full_text = "" for page in pdf.pages: text = page.extract_text() if text: full_text += text + "\n" if not full_text.strip(): return "❌ No extractable text found." # Split into chunks chunk_texts = [full_text[i:i+CHUNK_SIZE] for i in range(0, len(full_text), CHUNK_SIZE)] embeddings = embedder.encode(chunk_texts) # FAISS index dim = embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dim) faiss_index.add(embeddings) return f"✅ PDF processed: {len(chunk_texts)} chunks indexed." # 🧠 STEP 4: Call Groq LLaMA 3 for Answer Generation def call_groq_llm(context, question): if not GROQ_API_KEY: return "❌ API Key is missing. Set your GROQ_API_KEY." prompt = f"""You are an academic supervisor helping a student understand a research paper. Context: {context} Student's Question: {question} Answer:""" headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } data = { "model": GROQ_MODEL, "messages": [ {"role": "system", "content": "You are a knowledgeable and supportive supervisor guiding a student through a research paper. Respond clearly and academically."}, {"role": "user", "content": prompt} ] } try: response = requests.post(GROQ_API_URL, headers=headers, json=data, timeout=60) if response.status_code == 200: return response.json()['choices'][0]['message']['content'] else: return f"❌ Groq API Error {response.status_code}: {response.text}" except Exception as e: return f"❌ Exception: {str(e)}" # 💬 STEP 5: Query Handler def ask_question(query): if faiss_index is None or not chunk_texts: return "❌ Please upload and process a PDF first." query_embed = embedder.encode([query]) D, I = faiss_index.search(query_embed, TOP_K) retrieved_chunks = [chunk_texts[i] for i in I[0]] context = "\n---\n".join(retrieved_chunks) return call_groq_llm(context[:3000], query) # 🎨 STEP 6: Gradio Interface with gr.Blocks() as app: gr.Markdown("📘 **Student Study Assistant** - Upload a research paper and ask questions.") with gr.Row(): file_input = gr.File(label="📎 Upload PDF") process_button = gr.Button("📥 Process Document") status_output = gr.Textbox(label="Processing Status") chatbot = gr.ChatInterface( fn=ask_question, title="🎓 Study Supervisor", description="Ask your supervisor questions about the uploaded paper.", theme="soft" ) process_button.click(fn=process_pdf, inputs=file_input, outputs=status_output) app.launch(share=True)