Spaces:
Runtime error
Runtime error
| # ============================================ | |
| # π Study Supervisor RAG App (Colab Compatible) | |
| # ============================================ | |
| # π§ STEP 1: Install Required Packages | |
| !pip -q install gradio faiss-cpu sentence-transformers PyPDF2 requests | |
| # π§ STEP 2: Imports | |
| import gradio as gr | |
| import faiss | |
| import os, requests | |
| from io import BytesIO | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| # β CONFIGURATION | |
| GROQ_API_KEY = "" # π Set your Groq API key here | |
| GROQ_MODEL = "llama3-70b-8192" | |
| GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| EMBED_MODEL = "all-MiniLM-L6-v2" | |
| CHUNK_SIZE = 500 | |
| TOP_K = 5 | |
| # β Load Embedding Model | |
| embedder = SentenceTransformer(EMBED_MODEL) | |
| # β Global Variables | |
| faiss_index = None | |
| chunk_texts = [] | |
| # π STEP 3: Process PDF, Chunk, Embed, Index | |
| def process_pdf(file_obj): | |
| global faiss_index, chunk_texts | |
| pdf_stream = BytesIO(file_obj.read()) | |
| pdf = PdfReader(pdf_stream) | |
| full_text = "" | |
| for page in pdf.pages: | |
| text = page.extract_text() | |
| if text: | |
| full_text += text + "\n" | |
| if not full_text.strip(): | |
| return "β No extractable text found." | |
| # Split into chunks | |
| chunk_texts = [full_text[i:i+CHUNK_SIZE] for i in range(0, len(full_text), CHUNK_SIZE)] | |
| embeddings = embedder.encode(chunk_texts) | |
| # FAISS index | |
| dim = embeddings.shape[1] | |
| faiss_index = faiss.IndexFlatL2(dim) | |
| faiss_index.add(embeddings) | |
| return f"β PDF processed: {len(chunk_texts)} chunks indexed." | |
| # π§ STEP 4: Call Groq LLaMA 3 for Answer Generation | |
| def call_groq_llm(context, question): | |
| if not GROQ_API_KEY: | |
| return "β API Key is missing. Set your GROQ_API_KEY." | |
| prompt = f"""You are an academic supervisor helping a student understand a research paper. | |
| Context: | |
| {context} | |
| Student's Question: | |
| {question} | |
| Answer:""" | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "model": GROQ_MODEL, | |
| "messages": [ | |
| {"role": "system", "content": "You are a knowledgeable and supportive supervisor guiding a student through a research paper. Respond clearly and academically."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| } | |
| try: | |
| response = requests.post(GROQ_API_URL, headers=headers, json=data, timeout=60) | |
| if response.status_code == 200: | |
| return response.json()['choices'][0]['message']['content'] | |
| else: | |
| return f"β Groq API Error {response.status_code}: {response.text}" | |
| except Exception as e: | |
| return f"β Exception: {str(e)}" | |
| # π¬ STEP 5: Query Handler | |
| def ask_question(query): | |
| if faiss_index is None or not chunk_texts: | |
| return "β Please upload and process a PDF first." | |
| query_embed = embedder.encode([query]) | |
| D, I = faiss_index.search(query_embed, TOP_K) | |
| retrieved_chunks = [chunk_texts[i] for i in I[0]] | |
| context = "\n---\n".join(retrieved_chunks) | |
| return call_groq_llm(context[:3000], query) | |
| # π¨ STEP 6: Gradio Interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("π **Student Study Assistant** - Upload a research paper and ask questions.") | |
| with gr.Row(): | |
| file_input = gr.File(label="π Upload PDF") | |
| process_button = gr.Button("π₯ Process Document") | |
| status_output = gr.Textbox(label="Processing Status") | |
| chatbot = gr.ChatInterface( | |
| fn=ask_question, | |
| title="π Study Supervisor", | |
| description="Ask your supervisor questions about the uploaded paper.", | |
| theme="soft" | |
| ) | |
| process_button.click(fn=process_pdf, inputs=file_input, outputs=status_output) | |
| app.launch(share=True) | |