Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Sleeping

App Files Files Community

Chia Woon Yap commited on Nov 21, 2025

Commit

dfc0561

verified ·

1 Parent(s): afbd38c

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -554

app.py DELETED Viewed

@@ -1,554 +0,0 @@
-# -*- coding: utf-8 -*-
-"""app
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1pwwcBb5Zlw1DA3u5K8W8mjrwBTBWXc1L
-"""
-import gradio as gr
-import numpy as np
-from transformers import pipeline
-import os
-import time
-import groq
-import uuid  # For generating unique filenames
-# LangChain imports with compatibility handling
-try:
-    from langchain_groq import ChatGroq
-    from langchain_core.messages import HumanMessage
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
-    from langchain_community.vectorstores import Chroma
-    from langchain_community.embeddings import HuggingFaceEmbeddings
-    from langchain_core.documents import Document
-except ImportError:
-    # Fallback for older versions
-    try:
-        from langchain_groq import ChatGroq
-        from langchain.schema import HumanMessage
-        from langchain.text_splitter import RecursiveCharacterTextSplitter
-        from langchain_community.vectorstores import Chroma
-        from langchain_community.embeddings import HuggingFaceEmbeddings
-        from langchain.docstore.document import Document
-    except ImportError as e:
-        print(f"Import warning: {e}")
-        # Define fallback classes
-        class HumanMessage:
-            def __init__(self, content):
-                self.content = content
-        class Document:
-            def __init__(self, page_content):
-                self.page_content = page_content
-# Basic imports
-import chardet
-import fitz  # PyMuPDF for PDFs
-import docx  # python-docx for Word files
-import gtts  # Google Text-to-Speech library
-from pptx import Presentation  # python-pptx for PowerPoint files
-import re
-print("🚀 Initializing AI Tutor Application...")
-# Initialize Whisper for speech-to-text
-try:
-    transcriber = pipeline(
-        "automatic-speech-recognition",
-        model="openai/whisper-base.en"
-    )
-    print("✅ Whisper model loaded successfully")
-except Exception as e:
-    print(f"❌ Error loading Whisper: {e}")
-    transcriber = None
-# Initialize Groq
-groq_api_key = os.getenv("GROQ_API_KEY")
-if groq_api_key:
-    try:
-        chat_model = ChatGroq(
-            model_name="llama-3.3-70b-versatile",
-            api_key=groq_api_key,
-            temperature=0.7
-        )
-        CHAT_MODEL_AVAILABLE = True
-        print("✅ Groq chat model initialized")
-    except Exception as e:
-        print(f"❌ Error initializing Groq: {e}")
-        CHAT_MODEL_AVAILABLE = False
-else:
-    print("⚠️ GROQ_API_KEY not found in environment variables")
-    CHAT_MODEL_AVAILABLE = False
-# Initialize Vector Store
-try:
-    os.makedirs("chroma_db", exist_ok=True)
-    embedding_model = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2"
-    )
-    vectorstore = Chroma(
-        embedding_function=embedding_model,
-        persist_directory="chroma_db"
-    )
-    VECTORSTORE_AVAILABLE = True
-    print("✅ Vector store initialized")
-except Exception as e:
-    print(f"❌ Error initializing vector store: {e}")
-    VECTORSTORE_AVAILABLE = False
-# Application state
-chat_memory = []
-# Quiz generation prompt
-quiz_prompt = """
-You are an AI assistant specialized in education. Given document content, generate a quiz with 10 questions mixing multiple-choice and fill-in-the-blank.
-Requirements:
-- 10 total questions
-- Mix of MCQs and fill-in-the-blank
-- Based on key concepts from the document
-- Include answer key
-- Remove all markdown formatting
-Output format:
-1. [Question text]
-   Options (if MCQ): a) b) c) d)
-   Answer: [Correct answer]
-"""
-def clean_response(response):
-    """Clean AI response from unwanted formatting."""
-    if not response:
-        return ""
-    cleaned = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
-    cleaned = re.sub(r"(\*\*|\*|\[|\]|#+|\\)", "", cleaned)
-    return cleaned.strip()
-def generate_quiz(content):
-    """Generate quiz from document content."""
-    if not CHAT_MODEL_AVAILABLE:
-        return "❌ Chat model not available. Please check GROQ_API_KEY configuration."
-    # Limit content length to avoid token limits
-    if len(content) > 8000:
-        content = content[:8000] + "... [content truncated for efficiency]"
-    try:
-        prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
-        response = chat_model([HumanMessage(content=prompt)])
-        return clean_response(response.content)
-    except Exception as e:
-        return f"❌ Error generating quiz: {str(e)}"
-def retrieve_documents(query):
-    """Retrieve relevant documents for context."""
-    if not VECTORSTORE_AVAILABLE or not query.strip():
-        return []
-    try:
-        results = vectorstore.similarity_search(query, k=2)
-        return [doc.page_content for doc in results]
-    except Exception as e:
-        print(f"Document retrieval error: {e}")
-        return []
-def chat_with_groq(user_input, chat_history):
-    """Handle chat interactions with the AI."""
-    try:
-        if not user_input.strip():
-            return chat_history, "", None
-        if not CHAT_MODEL_AVAILABLE:
-            error_msg = "🤖 Chat service is currently unavailable. Please check your API configuration."
-            chat_history.append({"role": "user", "content": user_input})
-            chat_history.append({"role": "assistant", "content": error_msg})
-            return chat_history, "", None
-        # Get relevant context from documents
-        relevant_docs = retrieve_documents(user_input)
-        context = "\n".join(relevant_docs) if relevant_docs else "No specific context available."
-        # Build enhanced prompt
-        system_msg = "You are a helpful AI tutor. Provide accurate, educational, and concise responses. If you don't know something, admit it honestly."
-        prompt = f"{system_msg}\n\nRelevant Context:\n{context}\n\nUser Question: {user_input}\n\nAssistant Response:"
-        # Get AI response
-        response = chat_model([HumanMessage(content=prompt)])
-        cleaned_response = clean_response(response.content)
-        # Update chat history
-        chat_history.append({"role": "user", "content": user_input})
-        chat_history.append({"role": "assistant", "content": cleaned_response})
-        # Generate speech output
-        audio_file = speech_playback(cleaned_response)
-        return chat_history, "", audio_file
-    except Exception as e:
-        error_msg = f"❌ Error processing your request: {str(e)}"
-        chat_history.append({"role": "user", "content": user_input})
-        chat_history.append({"role": "assistant", "content": error_msg})
-        return chat_history, "", None
-def speech_playback(text):
-    """Convert text to speech using gTTS."""
-    try:
-        if not text or len(text.strip()) < 10:
-            return None
-        # Limit text length for audio generation
-        if len(text) > 400:
-            text = text[:400] + "..."
-        unique_id = str(uuid.uuid4())[:8]
-        audio_file = f"audio_{unique_id}.mp3"
-        tts = gtts.gTTS(text=text, lang='en', slow=False)
-        tts.save(audio_file)
-        return audio_file
-    except Exception as e:
-        print(f"🔇 TTS Error: {e}")
-        return None
-def detect_encoding(file_path):
-    """Detect file encoding."""
-    try:
-        with open(file_path, "rb") as f:
-            raw_data = f.read(4096)
-            detected = chardet.detect(raw_data)
-            return detected.get("encoding", "utf-8")
-    except Exception:
-        return "utf-8"
-def extract_text_from_pdf(pdf_path):
-    """Extract text from PDF files."""
-    try:
-        doc = fitz.open(pdf_path)
-        text = ""
-        for page in doc:
-            text += page.get_text()
-        return text.strip() if text.strip() else "No extractable text found in PDF."
-    except Exception as e:
-        return f"PDF extraction error: {str(e)}"
-def extract_text_from_docx(docx_path):
-    """Extract text from Word documents."""
-    try:
-        doc = docx.Document(docx_path)
-        text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
-        return text.strip() if text.strip() else "No text found in Word document."
-    except Exception as e:
-        return f"Word extraction error: {str(e)}"
-def extract_text_from_pptx(pptx_path):
-    """Extract text from PowerPoint files."""
-    try:
-        prs = Presentation(pptx_path)
-        text = ""
-        for slide in prs.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text") and shape.text:
-                    text += shape.text + "\n"
-        return text.strip() if text.strip() else "No text found in PowerPoint."
-    except Exception as e:
-        return f"PowerPoint extraction error: {str(e)}"
-def process_document(file):
-    """Process uploaded document and generate quiz."""
-    try:
-        if not file:
-            return "📁 Please upload a document file first."
-        filename = file.name
-        file_ext = os.path.splitext(filename)[-1].lower()
-        print(f"Processing {file_ext} file: {filename}")
-        # Extract text based on file type
-        if file_ext == ".pdf":
-            content = extract_text_from_pdf(filename)
-        elif file_ext == ".docx":
-            content = extract_text_from_docx(filename)
-        elif file_ext == ".pptx":
-            content = extract_text_from_pptx(filename)
-        elif file_ext in [".txt", ".md"]:
-            encoding = detect_encoding(filename)
-            with open(filename, "r", encoding=encoding, errors="ignore") as f:
-                content = f.read()
-        else:
-            return f"❌ Unsupported file type: {file_ext}. Please upload PDF, Word, PowerPoint, or text files."
-        if not content or "error" in content.lower() or "no text" in content.lower():
-            return f"❌ Could not extract meaningful content from this file. Error: {content}"
-        # Store in vector database for future queries
-        if VECTORSTORE_AVAILABLE and len(content) > 100:
-            try:
-                text_splitter = RecursiveCharacterTextSplitter(
-                    chunk_size=500,
-                    chunk_overlap=50
-                )
-                texts = text_splitter.split_text(content)
-                documents = [Document(page_content=text) for text in texts]
-                vectorstore.add_documents(documents)
-            except Exception as e:
-                print(f"Vector store addition warning: {e}")
-        # Generate quiz from content
-        quiz = generate_quiz(content)
-        success_msg = f"""
-✅ **Document Processed Successfully!**
-📄 **File Type**: {file_ext.upper()}
-📝 **Content Preview**: {content[:200]}...
-📋 **Generated Quiz**:
-{quiz}
-        """
-        return success_msg
-    except Exception as e:
-        return f"❌ Error processing document: {str(e)}"
-def transcribe_audio(audio):
-    """Transcribe audio to text using Whisper."""
-    try:
-        if audio is None:
-            return "🎤 No audio detected. Please record or upload audio."
-        if transcriber is None:
-            return "🔇 Speech-to-text service is currently unavailable."
-        sample_rate, audio_data = audio
-        # Basic audio preprocessing
-        if audio_data.ndim > 1:
-            audio_data = np.mean(audio_data, axis=1)  # Convert to mono
-        audio_data = audio_data.astype(np.float32)
-        # Normalize audio
-        max_val = np.max(np.abs(audio_data))
-        if max_val > 0:
-            audio_data = audio_data / max_val
-        # Check audio length
-        audio_duration = len(audio_data) / sample_rate
-        if audio_duration < 0.5:
-            return "⏱️ Audio too short. Please record at least 1 second."
-        if audio_duration > 30:
-            return "⏱️ Audio too long. Please keep under 30 seconds."
-        # Transcribe
-        result = transcriber({"sampling_rate": sample_rate, "raw": audio_data})
-        text = result.get("text", "").strip()
-        if not text:
-            return "🔇 No speech detected. Please try again with clearer audio."
-        return f"🎤 Transcribed: {text}"
-    except Exception as e:
-        return f"❌ Transcription error: {str(e)}"
-def clear_chat():
-    """Clear chat history."""
-    chat_memory.clear()
-    return [], None
-def create_interface():
-    """Create and configure the Gradio interface."""
-    with gr.Blocks(
-        theme=gr.themes.Soft(),
-        title="AI Tutor - Learning Assistant",
-        css="""
-        .gradio-container {
-            max-width: 1200px !important;
-        }
-        """
-    ) as app:
-        gr.Markdown("""
-        # 🎓 AI Tutor Assistant
-        *Your personal learning companion with speech-to-text capabilities*
-        """)
-        # Main chat interface
-        with gr.Tab("💬 AI Chatbot"):
-            gr.Markdown("Chat with your AI tutor using text or voice input!")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    chatbot = gr.Chatbot(
-                        label="Conversation History",
-                        height=500,
-                        type="messages",
-                        show_copy_button=True,
-                        avatar_images=("👤", "🤖")
-                    )
-                with gr.Column(scale=1):
-                    audio_output = gr.Audio(
-                        label="Audio Response",
-                        type="filepath",
-                        visible=True,
-                        autoplay=True
-                    )
-            with gr.Row():
-                msg = gr.Textbox(
-                    label="Your message",
-                    placeholder="Type your question here or use voice input below...",
-                    scale=4,
-                    container=False,
-                    max_lines=3
-                )
-                send_btn = gr.Button("🚀 Send", scale=1, variant="primary")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    audio_input = gr.Audio(
-                        sources=["microphone"],
-                        type="numpy",
-                        label="🎤 Record Audio Question",
-                        show_download_button=False
-                    )
-            with gr.Accordion("💡 Tips for Better Experience", open=False):
-                gr.Markdown("""
-                **🎤 Voice Input Tips:**
-                - Speak clearly in a quiet environment
-                - Keep microphone 10-15 cm from your mouth
-                - Record for 2-5 seconds for best results
-                **📚 Document Tips:**
-                - Upload PDF, Word, or PowerPoint files
-                - Clear text documents work best
-                - Process documents before asking questions about them
-                **💬 Chat Tips:**
-                - Ask specific questions for better answers
-                - Use the clear button to start fresh conversations
-                - The AI remembers context from uploaded documents
-                """)
-            with gr.Row():
-                clear_btn = gr.Button("🧹 Clear Chat History", variant="secondary")
-                gr.Button("🔄 Refresh Page").click(
-                    lambda: None,
-                    None,
-                    None,
-                    js="() => window.location.reload()"
-                )
-        # Document processing tab
-        with gr.Tab("📚 Upload & Generate Quiz"):
-            gr.Markdown("Upload your study materials and generate custom quizzes automatically!")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    file_upload = gr.File(
-                        label="📁 Upload Study Materials",
-                        file_types=[".pdf", ".docx", ".pptx", ".txt", ".md"],
-                        file_count="single",
-                        height=100
-                    )
-                    process_btn = gr.Button("⚡ Process & Generate Quiz", variant="primary")
-                    gr.Markdown("""
-                    **Supported Formats:**
-                    - PDF documents
-                    - Word documents (.docx)
-                    - PowerPoint (.pptx)
-                    - Text files (.txt, .md)
-                    """)
-                with gr.Column(scale=2):
-                    quiz_display = gr.Textbox(
-                        label="📋 Generated Quiz",
-                        lines=20,
-                        max_lines=25,
-                        show_copy_button=True,
-                        placeholder="Your generated quiz will appear here after processing a document..."
-                    )
-        # Instructions tab
-        with gr.Tab("ℹ️ How to Use"):
-            gr.Markdown("""
-            ## 🎓 Getting Started with AI Tutor
-            ### 🎤 Using Voice Input
-            1. Go to the **AI Chatbot** tab
-            2. Click the microphone button
-            3. Allow microphone access in your browser
-            4. Speak clearly and wait for transcription
-            5. Review the text and click Send
-            ### 📚 Processing Documents
-            1. Go to the **Upload & Generate Quiz** tab
-            2. Upload your study materials (PDF, Word, PowerPoint)
-            3. Click "Process & Generate Quiz"
-            4. Get instant quiz questions based on your content
-            5. Use the chat to ask questions about your documents
-            ### 💬 Chat Features
-            - Ask questions about uploaded documents
-            - Get detailed explanations
-            - Receive audio responses
-            - Clear chat when needed
-            ### 🔧 Technical Requirements
-            - Modern web browser with microphone access
-            - Stable internet connection
-            - Groq API key (set as environment variable)
-            """)
-        # Event handlers
-        send_btn.click(
-            fn=chat_with_groq,
-            inputs=[msg, chatbot],
-            outputs=[chatbot, msg, audio_output]
-        )
-        msg.submit(
-            fn=chat_with_groq,
-            inputs=[msg, chatbot],
-            outputs=[chatbot, msg, audio_output]
-        )
-        audio_input.change(
-            fn=transcribe_audio,
-            inputs=[audio_input],
-            outputs=[msg]
-        )
-        process_btn.click(
-            fn=process_document,
-            inputs=[file_upload],
-            outputs=[quiz_display]
-        )
-        clear_btn.click(
-            fn=clear_chat,
-            outputs=[chatbot, audio_output]
-        )
-    return app
-# Launch the application
-if __name__ == "__main__":
-    print("🌈 Starting AI Tutor Application...")
-    app = create_interface()
-    app.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        debug=True
-    )