Spaces:

Ethanshibu
/

RAGtutor

Sleeping

App Files Files Community

Ethanshibu commited on Jun 27, 2025

Commit

e48e7db

verified ·

1 Parent(s): 5689cd9

Update app.py

Browse files

Files changed (1) hide show

app.py +237 -192

app.py CHANGED Viewed

@@ -1,239 +1,284 @@
 import os
-import gradio as gr
 import tempfile
 from typing import List, Optional
-import shutil
-import uuid
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import Chroma
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import PyPDFLoader
-from langchain.chains import RetrievalQA
-from langchain.llms.base import LLM
-from groq import Groq
-# ---- Custom LLM using Groq ----
-class GroqLLM(LLM):
-    model: str = "llama3-8b-8192"
-    api_key: str = ""
-    temperature: float = 0.7
-    def __init__(self, api_key: str, **kwargs):
-        super().__init__(**kwargs)
-        self.api_key = api_key
-    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        if not self.api_key:
-            raise ValueError("GROQ API key is required")
-        client = Groq(api_key=self.api_key)
-        messages = [
-            {"role": "system", "content": "You are Stitch, a friendly and intelligent academic tutor."},
-            {"role": "user", "content": prompt}
-        ]
-        response = client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=self.temperature,
-        )
-        return response.choices[0].message.content
-    @property
-    def _llm_type(self) -> str:
-        return "groq-llm"
-# --- Global Context ---
-rag_context = {"retriever": None, "mode": "Simple"}
-# --- PDF Processor ---
-def process_pdf(file, api_key):
-    if file is None:
-        return "❌ Please upload a PDF."
-    if not api_key:
-        return "❌ Please enter your Groq API key."
     try:
-        # Create a more persistent temp directory
-        temp_dir = f"./temp_{uuid.uuid4().hex}"
-        os.makedirs(temp_dir, exist_ok=True)
-        temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
-        shutil.copy(file.name, temp_pdf_path)
-        loader = PyPDFLoader(temp_pdf_path)
-        documents = loader.load()
-        if not documents:
-            return "❌ No content found in PDF."
-        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-        chunks = splitter.split_documents(documents)
-        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-        vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=temp_dir)
-        vectorstore.persist()
-        rag_context["retriever"] = vectorstore.as_retriever()
-        rag_context["api_key"] = api_key
-        return "✅ PDF processed successfully!"
     except Exception as e:
-        return f"❌ Failed to process PDF: {str(e)}"
-# --- QA Chain Handler ---
-def ask_stitch(question, mode, temperature, show_sources):
-    retriever = rag_context.get("retriever")
-    api_key = rag_context.get("api_key")
-    if not retriever:
-        return "❌ Please upload and process a PDF first.", ""
-    if not api_key:
-        return "❌ API key not found. Please re-upload PDF with API key.", ""
     try:
-        llm = GroqLLM(api_key=api_key, temperature=float(temperature))
-        qa = RetrievalQA.from_chain_type(
-            llm=llm,
-            retriever=retriever,
-            return_source_documents=show_sources
-        )
-        mode_prompt = "Explain simply and clearly." if mode == "Simple" else "Explain in detail with reasoning, examples, and context."
-        result = qa({"query": f"{mode_prompt}\n\nQuestion: {question}"})
-        answer = result["result"]
-        sources = ""
-        if show_sources and result.get("source_documents"):
-            sources = "\n\n🔎 **Sources:**\n"
-            for i, doc in enumerate(result.get("source_documents", [])[:3]):  # Limit to 3 sources
-                source_info = doc.metadata.get("source", "Unknown")
-                page = doc.metadata.get("page", "Unknown")
-                sources += f"- Source {i+1}: {source_info} (Page: {page})\n"
-        return answer, sources
     except Exception as e:
-        return f"❌ Error: {str(e)}", ""
-def chat_interface(message, history, mode, temperature, show_sources):
     if not message.strip():
-        return history, ""
-    answer, sources = ask_stitch(message, mode, temperature, show_sources)
-    history.append([message, answer + sources])
-    return history, ""
-# ---- Gradio UI ----
-def create_interface():
-    with gr.Blocks(
-        theme=gr.themes.Soft(primary_hue="indigo"),
-        title="Stitch AI Academic Tutor"
-    ) as app:
-        gr.Markdown("""
-        ## 📘 Meet **Stitch** — Your AI Academic Tutor
-        Upload academic PDFs and ask questions naturally. Stitch will help you understand complex concepts!
-        **Note:** You need a Groq API key to use this service. Get one free at [console.groq.com](https://console.groq.com)
-        """)
-        with gr.Row():
-            with gr.Column(scale=2):
-                api_key_input = gr.Textbox(
-                    label="🔑 Groq API Key",
-                    placeholder="Enter your Groq API key here...",
-                    type="password"
-                )
-            with gr.Column(scale=2):
-                pdf_input = gr.File(label="📄 Upload PDF", file_types=[".pdf"])
-            with gr.Column(scale=1):
-                upload_btn = gr.Button("📥 Process PDF", variant="primary")
-        status = gr.Textbox(label="📊 Status", interactive=False, value="Ready to process PDF...")
-        upload_btn.click(
-            fn=process_pdf,
-            inputs=[pdf_input, api_key_input],
-            outputs=status
         )
-        gr.Markdown("### 💬 Chat with Stitch")
         with gr.Row():
             with gr.Column(scale=1):
-                mode = gr.Radio(
-                    ["Simple", "Deep"],
-                    label="📚 Explanation Mode",
-                    value="Simple"
-                )
-                temperature = gr.Slider(
-                    0.0, 1.0,
-                    value=0.7,
-                    label="🎛️ Creativity Level",
-                    info="Higher = more creative, Lower = more focused"
-                )
-                show_sources = gr.Checkbox(
-                    label="🔍 Show Sources",
-                    value=True,
-                    info="Display source references"
                 )
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    label="Chat with Stitch",
-                    height=400,
-                    avatar_images=("👨‍🎓", "🤖")
                 )
-                msg = gr.Textbox(
-                    label="Ask a question",
-                    placeholder="e.g., 'Summarize the main points of chapter 2' or 'Explain the methodology used'",
-                    lines=2
                 )
                 with gr.Row():
-                    submit_btn = gr.Button("Send", variant="primary")
-                    clear_btn = gr.Button("Clear Chat")
         # Event handlers
-        def respond(message, history, mode, temp, sources):
-            return chat_interface(message, history, mode, temp, sources)
-        submit_btn.click(
-            respond,
-            inputs=[msg, chatbot, mode, temperature, show_sources],
-            outputs=[chatbot, msg]
         )
-        msg.submit(
-            respond,
-            inputs=[msg, chatbot, mode, temperature, show_sources],
-            outputs=[chatbot, msg]
         )
-        clear_btn.click(lambda: [], outputs=chatbot)
-        gr.Markdown("""
-        ### 📋 How to Use:
-        1. **Get API Key**: Sign up at [console.groq.com](https://console.groq.com) for a free Groq API key
-        2. **Enter API Key**: Paste your API key in the field above
-        3. **Upload PDF**: Choose an academic paper, textbook chapter, or research document
-        4. **Process**: Click "Process PDF" and wait for confirmation
-        5. **Ask Questions**: Start chatting with Stitch about your document!
-        ### 💡 Example Questions:
-        - "Summarize the main findings"
-        - "Explain the methodology in simple terms"
-        - "What are the key conclusions?"
-        - "How does this relate to [specific concept]?"
-        """)
-    return app
 # Launch the app
 if __name__ == "__main__":
-    app = create_interface()
-    app.launch()

 import os
 import tempfile
+import gradio as gr
+import PyPDF2
+import faiss
+import numpy as np
+from gtts import gTTS
+from sentence_transformers import SentenceTransformer
+import requests
+import json
 from typing import List, Optional
+import io
+# Configuration
+GROQ_API_KEY = "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"
+GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
+GROQ_MODEL = "llama3-8b-8192"
+# Global variables to store the current session
+current_index = None
+current_texts = None
+current_embeddings = None
+embed_model = None
+def initialize_model():
+    """Initialize the embedding model"""
+    global embed_model
+    if embed_model is None:
+        embed_model = SentenceTransformer('all-MiniLM-L6-v2')
+    return embed_model
+def load_pdf_text(file_path):
+    """Extract text from PDF file"""
+    try:
+        with open(file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+        return text
+    except Exception as e:
+        return f"Error reading PDF: {str(e)}"
+def split_text(text, chunk_size=500, overlap=50):
+    """Split text into chunks with overlap"""
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = min(start + chunk_size, len(text))
+        chunks.append(text[start:end])
+        start += chunk_size - overlap
+    return chunks
+def build_faiss_index(texts):
+    """Build FAISS index from text chunks"""
+    global embed_model, current_index, current_texts, current_embeddings
+    embed_model = initialize_model()
+    embeddings = embed_model.encode(texts, convert_to_numpy=True)
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(embeddings.astype('float32'))
+    current_index = index
+    current_texts = texts
+    current_embeddings = embeddings
+    return index
+def search_relevant_chunks(question, top_k=3):
+    """Search for relevant text chunks"""
+    global current_index, current_texts, embed_model
+    if current_index is None or current_texts is None:
+        return []
+    question_embedding = embed_model.encode([question], convert_to_numpy=True)
+    D, I = current_index.search(question_embedding.astype('float32'), top_k)
+    relevant_chunks = [current_texts[i] for i in I[0] if i < len(current_texts)]
+    return relevant_chunks
+def generate_tutor_response(context, question):
+    """Generate response using Groq API"""
+    prompt = (
+        f"You are a friendly and knowledgeable AI tutor. Based on the provided context from the student's notes, "
+        f"answer their question in a clear, educational manner. If the context doesn't contain enough information, "
+        f"say so and provide general guidance.\n\n"
+        f"Context from notes:\n{context}\n\n"
+        f"Student Question: {question}\n\n"
+        f"Please provide a helpful, tutor-like response:"
+    )
+    headers = {
+        "Authorization": f"Bearer {GROQ_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": GROQ_MODEL,
+        "messages": [
+            {"role": "system", "content": "You are a helpful and friendly AI tutor who explains concepts clearly and encourages learning."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.7,
+        "max_tokens": 1000
+    }
     try:
+        response = requests.post(GROQ_URL, headers=headers, json=data, timeout=30)
+        result = response.json()
+        if "error" in result:
+            return f"I'm sorry, I encountered an error: {result['error']['message']}"
+        return result["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"I'm sorry, I couldn't process your question right now. Error: {str(e)}"
+def create_audio_response(text):
+    """Create audio file from text using gTTS"""
+    try:
+        tts = gTTS(text=text, lang='en', slow=False)
+        # Create temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        tts.save(temp_file.name)
+        return temp_file.name
     except Exception as e:
+        print(f"Error creating audio: {e}")
+        return None
+def upload_pdf(file):
+    """Handle PDF upload and processing"""
+    if file is None:
+        return "Please upload a PDF file.", None
     try:
+        # Extract text from PDF
+        text = load_pdf_text(file.name)
+        if text.startswith("Error"):
+            return text, None
+        # Split into chunks
+        chunks = split_text(text, chunk_size=500, overlap=50)
+        if len(chunks) == 0:
+            return "No text found in the PDF. Please upload a different file.", None
+        # Build FAISS index
+        build_faiss_index(chunks)
+        return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about your document.", None
     except Exception as e:
+        return f"Error processing PDF: {str(e)}", None
+def chat_with_pdf(message, history):
+    """Handle chat interaction"""
+    global current_index, current_texts
+    if current_index is None or current_texts is None:
+        return "Please upload a PDF file first before asking questions."
     if not message.strip():
+        return "Please ask a question about your uploaded document."
+    try:
+        # Find relevant chunks
+        relevant_chunks = search_relevant_chunks(message, top_k=3)
+        if not relevant_chunks:
+            context = "No relevant information found in the uploaded document."
+        else:
+            context = " ".join(relevant_chunks)
+        # Generate response
+        response = generate_tutor_response(context, message)
+        return response
+    except Exception as e:
+        return f"I'm sorry, I encountered an error while processing your question: {str(e)}"
+def get_audio_response(message, history):
+    """Get audio version of the latest response"""
+    if not history:
+        return None
+    latest_response = history[-1][1]  # Get the latest bot response
+    if latest_response:
+        audio_file = create_audio_response(latest_response)
+        return audio_file
+    return None
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="AI PDF Tutor", theme=gr.themes.Soft()) as iface:
+        gr.Markdown(
+            """
+            # 📚 AI PDF Tutor with Voice
+            Upload your PDF study materials and chat with an AI tutor that can answer questions about your documents.
+            The tutor can also read responses aloud!
+            """
         )
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 📄 Upload Your PDF")
+                file_upload = gr.File(
+                    label="Upload PDF Document",
+                    file_types=[".pdf"],
+                    type="filepath"
                 )
+                upload_status = gr.Textbox(
+                    label="Upload Status",
+                    value="No file uploaded yet.",
+                    interactive=False
                 )
+            with gr.Column(scale=2):
+                gr.Markdown("### 💬 Chat with Your Document")
+                chatbot = gr.Chatbot(
+                    label="AI Tutor Chat",
+                    height=400
                 )
                 with gr.Row():
+                    msg_input = gr.Textbox(
+                        label="Ask a question about your document",
+                        placeholder="e.g., What are the key concepts in chapter 1?",
+                        lines=2,
+                        scale=4
+                    )
+                    audio_btn = gr.Button("🔊 Listen", scale=1)
+                audio_output = gr.Audio(label="Tutor Response (Audio)", visible=True)
         # Event handlers
+        file_upload.change(
+            fn=upload_pdf,
+            inputs=[file_upload],
+            outputs=[upload_status, audio_output]
         )
+        msg_input.submit(
+            fn=chat_with_pdf,
+            inputs=[msg_input, chatbot],
+            outputs=[chatbot]
+        ).then(
+            lambda: "",
+            outputs=[msg_input]
         )
+        audio_btn.click(
+            fn=get_audio_response,
+            inputs=[msg_input, chatbot],
+            outputs=[audio_output]
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["What are the main topics covered in this document?"],
+                ["Can you summarize the key points?"],
+                ["Explain the methodology used in this research."],
+                ["What are the conclusions of this study?"]
+            ],
+            inputs=[msg_input]
+        )
+    return iface
 # Launch the app
 if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )