Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Sleeping

App Files Files Community

Chia Woon Yap commited on Nov 21, 2025

Commit

3a97a58

verified ·

1 Parent(s): 42c8ccb

Update app.py

Browse files

Files changed (1) hide show

app.py +419 -477

app.py CHANGED Viewed

@@ -16,597 +16,539 @@ import time
 import groq
 import uuid  # For generating unique filenames
-# Updated imports to address LangChain deprecation warnings:
-from langchain_groq import ChatGroq
-from langchain.schema import HumanMessage
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.docstore.document import Document
-# Importing chardet (make sure to add chardet to your requirements.txt)
 import chardet
 import fitz  # PyMuPDF for PDFs
 import docx  # python-docx for Word files
 import gtts  # Google Text-to-Speech library
 from pptx import Presentation  # python-pptx for PowerPoint files
 import re
-# FastAPI imports
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.responses import JSONResponse, FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-from typing import Optional
-import io
-import soundfile as sf
-import librosa
-# Enhanced Whisper model for speech-to-text with better configuration
 try:
     transcriber = pipeline(
-        "automatic-speech-recognition",
-        model="openai/whisper-small.en",  # Upgraded from base to small for better accuracy
-        device=-1,  # Use CPU (-1) or GPU (0)
-        chunk_length_s=30,
-        stride_length_s=5,
-        batch_size=8
     )
 except Exception as e:
-    print(f"Warning: Could not load enhanced Whisper model: {e}")
-    # Fallback to basic model
-    transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
-# Set API Key (Ensure it's stored securely in an environment variable)
-groq.api_key = os.getenv("GROQ_API_KEY")
-# Initialize Chat Model
-chat_model = ChatGroq(model_name="llama-3.3-70b-versatile", api_key=groq.api_key)
-# Initialize Embeddings and chromaDB
-os.makedirs("chroma_db", exist_ok=True)
-embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vectorstore = Chroma(
-    embedding_function=embedding_model,
-    persist_directory="chroma_db"
-)
-# Short-term memory for the LLM
 chat_memory = []
-# Audio processing parameters
-AUDIO_SAMPLE_RATE = 16000  # Whisper works best with 16kHz
-# Prompt for quiz generation with added remark
 quiz_prompt = """
-You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
-Generate 20 Questions.
-Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags.
-For each question:
-- Provide 4 answer choices (for MCQs), with only one correct answer.
-- Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
-- Include an answer key for all questions.
-- Ensure questions vary in difficulty and encourage comprehension rather than memorization.
-- Additionally, implement an instant feedback mechanism:
-    - When a user selects an answer, indicate whether it is correct or incorrect.
-    - If incorrect, provide a brief explanation from the document to guide learning.
-    - Ensure responses are concise and educational to enhance understanding.
-Output Example:
-1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
-Answer: Agent Core
-Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
-2. What is the main limitation of LLM-based applications?
-a) Limited token capacity
-b) Lack of domain expertise
-c) Prone to hallucination
-d) All of the above
-Answer: d) All of the above
-Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
-3. Given the following info, what is the value of P(jam|Rain)?
-P(no Rain) = 0.8;
-P(no Jam) = 0.2;
-P(Rain|Jam) = 0.1
-a) 0.016
-b) 0.025
-c) 0.1
-d) 0.4
-Answer: d) 0.4
-Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
 """
-# Function to clean AI response by removing unwanted formatting
 def clean_response(response):
-    """Removes <think> tags, asterisks, and markdown formatting."""
-    cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
-    cleaned_text = re.sub(r"(\*\*|\*|\[|\])", "", cleaned_text)
-    cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE)
-    cleaned_text = re.sub(r"\\", "", cleaned_text)
-    cleaned_text = re.sub(r"---", "", cleaned_text)
-    return cleaned_text.strip()
-# Function to generate quiz based on content
 def generate_quiz(content):
-    prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
-    response = chat_model([HumanMessage(content=prompt)])
-    cleaned_response = clean_response(response.content)
-    return cleaned_response
-# Function to retrieve relevant documents from vectorstore based on user query
 def retrieve_documents(query):
-    results = vectorstore.similarity_search(query, k=3)
-    return [doc.page_content for doc in results]
-# Function to convert tuple format to message format
-def convert_to_message_format(chat_history):
-    """Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
-    message_format = []
-    for user_msg, bot_msg in chat_history:
-        message_format.append({"role": "user", "content": user_msg})
-        message_format.append({"role": "assistant", "content": bot_msg})
-    return message_format
-# Function to convert message format to tuple format for processing
-def convert_to_tuple_format(chat_history):
-    """Convert from message format back to tuple format for processing"""
-    tuple_format = []
-    for i in range(0, len(chat_history), 2):
-        if i+1 < len(chat_history):
-            user_msg = chat_history[i]["content"]
-            bot_msg = chat_history[i+1]["content"]
-            tuple_format.append((user_msg, bot_msg))
-    return tuple_format
-# Function to handle chatbot interactions with short-term memory
 def chat_with_groq(user_input, chat_history):
     try:
-        # Convert message format to tuple format for processing
-        tuple_history = convert_to_tuple_format(chat_history)
-        # Retrieve relevant documents for additional context
-        relevant_docs = retrieve_documents(user_input)
-        context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
-        # Construct proper prompting with conversation history
-        system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
-        conversation_history = "\n".join(chat_memory[-10:])  # Keep the last 10 exchanges
-        prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
-        # Call the chat model
         response = chat_model([HumanMessage(content=prompt)])
-        # Clean response to remove any unwanted formatting
-        cleaned_response_text = clean_response(response.content)
-        # Append conversation history
-        chat_memory.append(f"User: {user_input}")
-        chat_memory.append(f"AI: {cleaned_response_text}")
-        # Update chat history - add new messages in the correct format
         chat_history.append({"role": "user", "content": user_input})
-        chat_history.append({"role": "assistant", "content": cleaned_response_text})
-        # Convert response to speech
-        audio_file = speech_playback(cleaned_response_text)
         return chat_history, "", audio_file
     except Exception as e:
-        error_msg = f"Error: {str(e)}"
         chat_history.append({"role": "user", "content": user_input})
         chat_history.append({"role": "assistant", "content": error_msg})
         return chat_history, "", None
-# Function to play response as speech using gTTS
 def speech_playback(text):
     try:
-        # Generate a unique filename for each audio file
-        unique_id = str(uuid.uuid4())
-        audio_file = f"output_audio_{unique_id}.mp3"
-        # Convert text to speech
-        tts = gtts.gTTS(text, lang='en')
         tts.save(audio_file)
-        # Return the path to the audio file
         return audio_file
     except Exception as e:
-        print(f"Error in speech_playback: {e}")
         return None
-# Function to detect encoding safely
 def detect_encoding(file_path):
     try:
         with open(file_path, "rb") as f:
             raw_data = f.read(4096)
             detected = chardet.detect(raw_data)
-            encoding = detected["encoding"]
-        return encoding if encoding else "utf-8"
     except Exception:
         return "utf-8"
-# Function to extract text from PDF
 def extract_text_from_pdf(pdf_path):
     try:
         doc = fitz.open(pdf_path)
-        text = "\n".join([page.get_text("text") for page in doc])
-        return text if text.strip() else "No extractable text found."
     except Exception as e:
-        return f"Error extracting text from PDF: {str(e)}"
-# Function to extract text from Word files (.docx)
 def extract_text_from_docx(docx_path):
     try:
         doc = docx.Document(docx_path)
-        text = "\n".join([para.text for para in doc.paragraphs])
-        return text if text.strip() else "No extractable text found."
     except Exception as e:
-        return f"Error extracting text from Word document: {str(e)}"
-# Function to extract text from PowerPoint files (.pptx)
 def extract_text_from_pptx(pptx_path):
     try:
-        presentation = Presentation(pptx_path)
         text = ""
-        for slide in presentation.slides:
             for shape in slide.shapes:
-                if hasattr(shape, "text"):
                     text += shape.text + "\n"
-        return text if text.strip() else "No extractable text found."
     except Exception as e:
-        return f"Error extracting text from PowerPoint: {str(e)}"
-# Function to process documents safely
 def process_document(file):
     try:
-        file_extension = os.path.splitext(file.name)[-1].lower()
-        if file_extension in [".png", ".jpg", ".jpeg"]:
-            return "Error: Images cannot be processed for text extraction."
-        if file_extension == ".pdf":
-            content = extract_text_from_pdf(file.name)
-        elif file_extension == ".docx":
-            content = extract_text_from_docx(file.name)
-        elif file_extension == ".pptx":
-            content = extract_text_from_pptx(file.name)
-        else:
-            encoding = detect_encoding(file.name)
-            with open(file.name, "r", encoding=encoding, errors="replace") as f:
                 content = f.read()
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-        documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
-        vectorstore.add_documents(documents)
-        quiz = generate_quiz(content)
-        return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
-    except Exception as e:
-        return f"Error processing document: {str(e)}"
-# Enhanced function to handle speech-to-text conversion with audio preprocessing
-def preprocess_audio(audio_data, sample_rate):
-    """
-    Enhanced audio preprocessing for better STT accuracy
-    """
-    try:
-        # Convert to mono if stereo
-        if audio_data.ndim > 1:
-            audio_data = np.mean(audio_data, axis=1)
-        # Convert to float32
-        audio_data = audio_data.astype(np.float32)
-        # Normalize audio
-        max_val = np.max(np.abs(audio_data))
-        if max_val > 0:
-            audio_data = audio_data / max_val
-        # Resample to 16kHz if needed (Whisper works best with 16kHz)
-        if sample_rate != AUDIO_SAMPLE_RATE:
-            audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=AUDIO_SAMPLE_RATE)
-            sample_rate = AUDIO_SAMPLE_RATE
-        # Apply noise reduction (simple high-pass filter)
-        import scipy.signal as sp
-        nyquist = sample_rate / 2
-        cutoff = 80  # High-pass filter cutoff frequency in Hz
-        b, a = sp.butter(2, cutoff/nyquist, btype='high')
-        audio_data = sp.filtfilt(b, a, audio_data)
-        return audio_data, sample_rate
     except Exception as e:
-        print(f"Audio preprocessing error: {e}")
-        # Return original audio if preprocessing fails
-        return audio_data, sample_rate
 def transcribe_audio(audio):
-    """
-    Enhanced speech-to-text transcription with better error handling and preprocessing
-    """
     try:
         if audio is None:
-            return "No audio input detected."
         sample_rate, audio_data = audio
-        # Preprocess audio
-        audio_data, sample_rate = preprocess_audio(audio_data, sample_rate)
-        # Ensure audio is not too short
-        if len(audio_data) / sample_rate < 0.5:  # Less than 0.5 seconds
-            return "Audio too short. Please record at least 1 second of audio."
-        # Ensure audio is not too long (to prevent timeouts)
-        max_duration = 30  # seconds
-        if len(audio_data) / sample_rate > max_duration:
-            # Truncate audio
-            max_samples = max_duration * sample_rate
-            audio_data = audio_data[:max_samples]
-        # Use Whisper with better configuration
-        result = transcriber({
-            "sampling_rate": sample_rate,
-            "raw": audio_data
-        })
-        transcription = result["text"].strip()
-        if not transcription:
-            return "No speech detected. Please try again with clearer audio."
-        return transcription
-    except Exception as e:
-        error_msg = f"Transcription error: {str(e)}"
-        print(error_msg)
-        return f"Sorry, I couldn't process the audio. Please try again. Error: {str(e)}"
-# FastAPI Application
-app = FastAPI(title="Tutor AI API", description="Enhanced Speech-to-Text Tutor AI API")
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# FastAPI Routes
-@app.get("/")
-async def root():
-    return {"message": "Tutor AI API is running", "version": "1.0"}
-@app.post("/api/transcribe")
-async def api_transcribe_audio(file: UploadFile = File(...)):
-    """
-    Enhanced API endpoint for speech-to-text transcription
-    """
-    try:
-        # Check if file is audio
-        if not file.content_type.startswith('audio/'):
-            raise HTTPException(status_code=400, detail="File must be an audio file")
-        # Read audio file
-        contents = await file.read()
-        # Convert to numpy array using soundfile
-        audio_io = io.BytesIO(contents)
-        audio_data, sample_rate = sf.read(audio_io)
         # Transcribe
-        transcription = transcribe_audio((sample_rate, audio_data))
-        return JSONResponse({
-            "success": True,
-            "transcription": transcription,
-            "audio_duration": len(audio_data) / sample_rate if audio_data is not None else 0
-        })
-    except Exception as e:
-        return JSONResponse({
-            "success": False,
-            "error": str(e)
-        }, status_code=500)
-@app.post("/api/chat")
-async def api_chat(message: str = Form(...)):
-    """
-    API endpoint for chat interactions
-    """
-    try:
-        # Simple chat response without memory for API
-        prompt = f"You are a helpful AI tutor. Answer the following question accurately and concisely: {message}"
-        response = chat_model([HumanMessage(content=prompt)])
-        cleaned_response = clean_response(response.content)
-        return JSONResponse({
-            "success": True,
-            "response": cleaned_response
-        })
-    except Exception as e:
-        return JSONResponse({
-            "success": False,
-            "error": str(e)
-        }, status_code=500)
-@app.post("/api/process-document")
-async def api_process_document(file: UploadFile = File(...)):
-    """
-    API endpoint for document processing
-    """
-    try:
-        # Save uploaded file temporarily
-        file_extension = os.path.splitext(file.filename)[-1].lower()
-        temp_path = f"temp_{uuid.uuid4()}{file_extension}"
-        with open(temp_path, "wb") as f:
-            f.write(await file.read())
-        # Process document based on type
-        if file_extension == ".pdf":
-            content = extract_text_from_pdf(temp_path)
-        elif file_extension == ".docx":
-            content = extract_text_from_docx(temp_path)
-        elif file_extension == ".pptx":
-            content = extract_text_from_pptx(temp_path)
-        else:
-            # Try text file
-            encoding = detect_encoding(temp_path)
-            with open(temp_path, "r", encoding=encoding, errors="replace") as f:
-                content = f.read()
-        # Clean up temp file
-        os.remove(temp_path)
-        # Generate quiz
-        quiz = generate_quiz(content)
-        return JSONResponse({
-            "success": True,
-            "content_preview": content[:500] + "..." if len(content) > 500 else content,
-            "quiz": quiz
-        })
     except Exception as e:
-        return JSONResponse({
-            "success": False,
-            "error": str(e)
-        }, status_code=500)
-@app.get("/api/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy", "timestamp": time.time()}
-# Clear chat history function
-def clear_chat_history():
     chat_memory.clear()
     return [], None
-def tutor_ai_chatbot():
-    """Main Gradio interface for the Tutor AI Chatbot."""
-    with gr.Blocks() as gradio_app:
-        gr.Markdown("# 📚 AI Tutor - We.(POC)")
-        gr.Markdown("An interactive Personal AI Tutor chatbot to help with your learning needs.")
-        # Chatbot Tab
-        with gr.Tab("AI Chatbot"):
             with gr.Row():
                 with gr.Column(scale=3):
-                    chatbot = gr.Chatbot(height=500, type="messages")
                 with gr.Column(scale=1):
-                    audio_playback = gr.Audio(label="Audio Response", type="filepath")
-            # Move the input controls here to span full width
             with gr.Row():
                 msg = gr.Textbox(
-                    label="Ask a question",
-                    placeholder="Type your question here...",
-                    container=False  # Removes the default container styling
                 )
-                submit = gr.Button("Send")
             with gr.Row():
                 with gr.Column(scale=1):
-                    audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
-            # Voice recording tips - ONLY in AI Chatbot tab
-            with gr.Accordion("🎤 Voice Recording Tips", open=False):
                 gr.Markdown("""
-                **For better speech recognition accuracy:**
-                - 🎙️ Speak clearly and at a moderate pace
-                - 🔇 Record in a quiet environment
-                - 📍 Keep the microphone close to your mouth (10-15 cm)
-                - 🎧 Use a good quality microphone if possible
-                - 📝 Review the transcribed text before sending
-                - 🔄 If transcription is poor, try recording again or type manually
                 """)
-            # Clear chat history button
-            clear_btn = gr.Button("Clear Chat")
-            # Handle chat interaction
-            submit.click(
-                chat_with_groq,
-                inputs=[msg, chatbot],
-                outputs=[chatbot, msg, audio_playback]
-            )
-            # Clear chat history function
-            clear_btn.click(
-                lambda: [],  # Return empty list in message format
-                inputs=None,
-                outputs=[chatbot]
-            )
-            # Also allow Enter key to submit
-            msg.submit(
-                chat_with_groq,
-                inputs=[msg, chatbot],
-                outputs=[chatbot, msg, audio_playback]
-            )
-            # Add some examples of questions students might ask
-            with gr.Accordion("Example Questions", open=False):
-                gr.Examples(
-                    examples=[
-                        "Can you explain the concept of RLHF AI?",
-                        "What are AI transformers?",
-                        "What is MoE AI?",
-                        "What's gate networks AI?",
-                        "I am making a switch, please generating baking recipe?"
-                    ],
-                    inputs=msg
-                )
-            # Connect audio input to transcription
-            audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=msg)
-        # Upload Notes & Generate Quiz Tab
-        with gr.Tab("Upload Notes & Generate Quiz"):
             with gr.Row():
-                with gr.Column(scale=2):
-                    file_input = gr.File(label="Upload Lecture Notes (PDF, DOCX, PPTX)")
-                with gr.Column(scale=3):
-                    quiz_output = gr.Textbox(label="Generated Quiz", lines=10)
-            # Connect file input to document processing
-            file_input.change(process_document, inputs=file_input, outputs=quiz_output)
-        # Introduction Video Tab - Now with the working video
-        with gr.Tab("Introduction Video"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### Welcome to the Introduction Video")
-                    gr.Markdown("Music from Xu Mengyuan - China-O, musician Xu Mengyuan YUAN! | 徐梦圆 - China-O 音乐人徐梦圆YUAN!")
-                    # Use the local video file that's stored in your Space
-                    gr.Video("We_not_me_video.mp4", label="Introduction Video")
-        # Launch the application
-        gradio_app.launch(share=False)
-# Run both FastAPI and Gradio
 if __name__ == "__main__":
-    import threading
-    # Start Gradio in a separate thread
-    gradio_thread = threading.Thread(target=tutor_ai_chatbot, daemon=True)
-    gradio_thread.start()
-    # Start FastAPI
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import groq
 import uuid  # For generating unique filenames
+# LangChain imports with compatibility handling
+try:
+    from langchain_groq import ChatGroq
+    from langchain_core.messages import HumanMessage
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_community.vectorstores import Chroma
+    from langchain_community.embeddings import HuggingFaceEmbeddings
+    from langchain_core.documents import Document
+except ImportError:
+    # Fallback for older versions
+    try:
+        from langchain_groq import ChatGroq
+        from langchain.schema import HumanMessage
+        from langchain.text_splitter import RecursiveCharacterTextSplitter
+        from langchain_community.vectorstores import Chroma
+        from langchain_community.embeddings import HuggingFaceEmbeddings
+        from langchain.docstore.document import Document
+    except ImportError as e:
+        print(f"Import warning: {e}")
+        # Define fallback classes
+        class HumanMessage:
+            def __init__(self, content):
+                self.content = content
+        class Document:
+            def __init__(self, page_content):
+                self.page_content = page_content
+# Basic imports
 import chardet
 import fitz  # PyMuPDF for PDFs
 import docx  # python-docx for Word files
 import gtts  # Google Text-to-Speech library
 from pptx import Presentation  # python-pptx for PowerPoint files
 import re
+print("🚀 Initializing AI Tutor Application...")
+# Initialize Whisper for speech-to-text
 try:
     transcriber = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-base.en"
     )
+    print("✅ Whisper model loaded successfully")
 except Exception as e:
+    print(f"❌ Error loading Whisper: {e}")
+    transcriber = None
+# Initialize Groq
+groq_api_key = os.getenv("GROQ_API_KEY")
+if groq_api_key:
+    try:
+        chat_model = ChatGroq(
+            model_name="llama-3.3-70b-versatile",
+            api_key=groq_api_key,
+            temperature=0.7
+        )
+        CHAT_MODEL_AVAILABLE = True
+        print("✅ Groq chat model initialized")
+    except Exception as e:
+        print(f"❌ Error initializing Groq: {e}")
+        CHAT_MODEL_AVAILABLE = False
+else:
+    print("⚠️ GROQ_API_KEY not found in environment variables")
+    CHAT_MODEL_AVAILABLE = False
+# Initialize Vector Store
+try:
+    os.makedirs("chroma_db", exist_ok=True)
+    embedding_model = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    vectorstore = Chroma(
+        embedding_function=embedding_model,
+        persist_directory="chroma_db"
+    )
+    VECTORSTORE_AVAILABLE = True
+    print("✅ Vector store initialized")
+except Exception as e:
+    print(f"❌ Error initializing vector store: {e}")
+    VECTORSTORE_AVAILABLE = False
+# Application state
 chat_memory = []
+# Quiz generation prompt
 quiz_prompt = """
+You are an AI assistant specialized in education. Given document content, generate a quiz with 10 questions mixing multiple-choice and fill-in-the-blank.
+Requirements:
+- 10 total questions
+- Mix of MCQs and fill-in-the-blank
+- Based on key concepts from the document
+- Include answer key
+- Remove all markdown formatting
+Output format:
+1. [Question text]
+   Options (if MCQ): a) b) c) d)
+   Answer: [Correct answer]
 """
 def clean_response(response):
+    """Clean AI response from unwanted formatting."""
+    if not response:
+        return ""
+    cleaned = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
+    cleaned = re.sub(r"(\*\*|\*|\[|\]|#+|\\)", "", cleaned)
+    return cleaned.strip()
 def generate_quiz(content):
+    """Generate quiz from document content."""
+    if not CHAT_MODEL_AVAILABLE:
+        return "❌ Chat model not available. Please check GROQ_API_KEY configuration."
+    # Limit content length to avoid token limits
+    if len(content) > 8000:
+        content = content[:8000] + "... [content truncated for efficiency]"
+    try:
+        prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
+        response = chat_model([HumanMessage(content=prompt)])
+        return clean_response(response.content)
+    except Exception as e:
+        return f"❌ Error generating quiz: {str(e)}"
 def retrieve_documents(query):
+    """Retrieve relevant documents for context."""
+    if not VECTORSTORE_AVAILABLE or not query.strip():
+        return []
+    try:
+        results = vectorstore.similarity_search(query, k=2)
+        return [doc.page_content for doc in results]
+    except Exception as e:
+        print(f"Document retrieval error: {e}")
+        return []
 def chat_with_groq(user_input, chat_history):
+    """Handle chat interactions with the AI."""
     try:
+        if not user_input.strip():
+            return chat_history, "", None
+        if not CHAT_MODEL_AVAILABLE:
+            error_msg = "🤖 Chat service is currently unavailable. Please check your API configuration."
+            chat_history.append({"role": "user", "content": user_input})
+            chat_history.append({"role": "assistant", "content": error_msg})
+            return chat_history, "", None
+        # Get relevant context from documents
+        relevant_docs = retrieve_documents(user_input)
+        context = "\n".join(relevant_docs) if relevant_docs else "No specific context available."
+        # Build enhanced prompt
+        system_msg = "You are a helpful AI tutor. Provide accurate, educational, and concise responses. If you don't know something, admit it honestly."
+        prompt = f"{system_msg}\n\nRelevant Context:\n{context}\n\nUser Question: {user_input}\n\nAssistant Response:"
+        # Get AI response
         response = chat_model([HumanMessage(content=prompt)])
+        cleaned_response = clean_response(response.content)
+        # Update chat history
         chat_history.append({"role": "user", "content": user_input})
+        chat_history.append({"role": "assistant", "content": cleaned_response})
+        # Generate speech output
+        audio_file = speech_playback(cleaned_response)
         return chat_history, "", audio_file
     except Exception as e:
+        error_msg = f"❌ Error processing your request: {str(e)}"
         chat_history.append({"role": "user", "content": user_input})
         chat_history.append({"role": "assistant", "content": error_msg})
         return chat_history, "", None
 def speech_playback(text):
+    """Convert text to speech using gTTS."""
     try:
+        if not text or len(text.strip()) < 10:
+            return None
+        # Limit text length for audio generation
+        if len(text) > 400:
+            text = text[:400] + "..."
+        unique_id = str(uuid.uuid4())[:8]
+        audio_file = f"audio_{unique_id}.mp3"
+        tts = gtts.gTTS(text=text, lang='en', slow=False)
         tts.save(audio_file)
         return audio_file
     except Exception as e:
+        print(f"🔇 TTS Error: {e}")
         return None
 def detect_encoding(file_path):
+    """Detect file encoding."""
     try:
         with open(file_path, "rb") as f:
             raw_data = f.read(4096)
             detected = chardet.detect(raw_data)
+            return detected.get("encoding", "utf-8")
     except Exception:
         return "utf-8"
 def extract_text_from_pdf(pdf_path):
+    """Extract text from PDF files."""
     try:
         doc = fitz.open(pdf_path)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        return text.strip() if text.strip() else "No extractable text found in PDF."
     except Exception as e:
+        return f"PDF extraction error: {str(e)}"
 def extract_text_from_docx(docx_path):
+    """Extract text from Word documents."""
     try:
         doc = docx.Document(docx_path)
+        text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
+        return text.strip() if text.strip() else "No text found in Word document."
     except Exception as e:
+        return f"Word extraction error: {str(e)}"
 def extract_text_from_pptx(pptx_path):
+    """Extract text from PowerPoint files."""
     try:
+        prs = Presentation(pptx_path)
         text = ""
+        for slide in prs.slides:
             for shape in slide.shapes:
+                if hasattr(shape, "text") and shape.text:
                     text += shape.text + "\n"
+        return text.strip() if text.strip() else "No text found in PowerPoint."
     except Exception as e:
+        return f"PowerPoint extraction error: {str(e)}"
 def process_document(file):
+    """Process uploaded document and generate quiz."""
     try:
+        if not file:
+            return "📁 Please upload a document file first."
+        filename = file.name
+        file_ext = os.path.splitext(filename)[-1].lower()
+        print(f"Processing {file_ext} file: {filename}")
+        # Extract text based on file type
+        if file_ext == ".pdf":
+            content = extract_text_from_pdf(filename)
+        elif file_ext == ".docx":
+            content = extract_text_from_docx(filename)
+        elif file_ext == ".pptx":
+            content = extract_text_from_pptx(filename)
+        elif file_ext in [".txt", ".md"]:
+            encoding = detect_encoding(filename)
+            with open(filename, "r", encoding=encoding, errors="ignore") as f:
                 content = f.read()
+        else:
+            return f"❌ Unsupported file type: {file_ext}. Please upload PDF, Word, PowerPoint, or text files."
+        if not content or "error" in content.lower() or "no text" in content.lower():
+            return f"❌ Could not extract meaningful content from this file. Error: {content}"
+        # Store in vector database for future queries
+        if VECTORSTORE_AVAILABLE and len(content) > 100:
+            try:
+                text_splitter = RecursiveCharacterTextSplitter(
+                    chunk_size=500,
+                    chunk_overlap=50
+                )
+                texts = text_splitter.split_text(content)
+                documents = [Document(page_content=text) for text in texts]
+                vectorstore.add_documents(documents)
+            except Exception as e:
+                print(f"Vector store addition warning: {e}")
+        # Generate quiz from content
+        quiz = generate_quiz(content)
+        success_msg = f"""
+✅ **Document Processed Successfully!**
+📄 **File Type**: {file_ext.upper()}
+📝 **Content Preview**: {content[:200]}...
+📋 **Generated Quiz**:
+{quiz}
+        """
+        return success_msg
     except Exception as e:
+        return f"❌ Error processing document: {str(e)}"
 def transcribe_audio(audio):
+    """Transcribe audio to text using Whisper."""
     try:
         if audio is None:
+            return "🎤 No audio detected. Please record or upload audio."
+        if transcriber is None:
+            return "🔇 Speech-to-text service is currently unavailable."
         sample_rate, audio_data = audio
+        # Basic audio preprocessing
+        if audio_data.ndim > 1:
+            audio_data = np.mean(audio_data, axis=1)  # Convert to mono
+        audio_data = audio_data.astype(np.float32)
+        # Normalize audio
+        max_val = np.max(np.abs(audio_data))
+        if max_val > 0:
+            audio_data = audio_data / max_val
+        # Check audio length
+        audio_duration = len(audio_data) / sample_rate
+        if audio_duration < 0.5:
+            return "⏱️ Audio too short. Please record at least 1 second."
+        if audio_duration > 30:
+            return "⏱️ Audio too long. Please keep under 30 seconds."
         # Transcribe
+        result = transcriber({"sampling_rate": sample_rate, "raw": audio_data})
+        text = result.get("text", "").strip()
+        if not text:
+            return "🔇 No speech detected. Please try again with clearer audio."
+        return f"🎤 Transcribed: {text}"
     except Exception as e:
+        return f"❌ Transcription error: {str(e)}"
+def clear_chat():
+    """Clear chat history."""
     chat_memory.clear()
     return [], None
+def create_interface():
+    """Create and configure the Gradio interface."""
+    with gr.Blocks(
+        theme=gr.themes.Soft(),
+        title="AI Tutor - Learning Assistant",
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        """
+    ) as app:
+        gr.Markdown("""
+        # 🎓 AI Tutor Assistant
+        *Your personal learning companion with speech-to-text capabilities*
+        """)
+        # Main chat interface
+        with gr.Tab("💬 AI Chatbot"):
+            gr.Markdown("Chat with your AI tutor using text or voice input!")
             with gr.Row():
                 with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(
+                        label="Conversation History",
+                        height=500,
+                        type="messages",
+                        show_copy_button=True,
+                        avatar_images=("👤", "🤖")
+                    )
                 with gr.Column(scale=1):
+                    audio_output = gr.Audio(
+                        label="Audio Response",
+                        type="filepath",
+                        visible=True,
+                        autoplay=True
+                    )
             with gr.Row():
                 msg = gr.Textbox(
+                    label="Your message",
+                    placeholder="Type your question here or use voice input below...",
+                    scale=4,
+                    container=False,
+                    max_lines=3
                 )
+                send_btn = gr.Button("🚀 Send", scale=1, variant="primary")
             with gr.Row():
                 with gr.Column(scale=1):
+                    audio_input = gr.Audio(
+                        sources=["microphone"],
+                        type="numpy",
+                        label="🎤 Record Audio Question",
+                        show_download_button=False
+                    )
+            with gr.Accordion("💡 Tips for Better Experience", open=False):
                 gr.Markdown("""
+                **🎤 Voice Input Tips:**
+                - Speak clearly in a quiet environment
+                - Keep microphone 10-15 cm from your mouth
+                - Record for 2-5 seconds for best results
+                **📚 Document Tips:**
+                - Upload PDF, Word, or PowerPoint files
+                - Clear text documents work best
+                - Process documents before asking questions about them
+                **💬 Chat Tips:**
+                - Ask specific questions for better answers
+                - Use the clear button to start fresh conversations
+                - The AI remembers context from uploaded documents
                 """)
             with gr.Row():
+                clear_btn = gr.Button("🧹 Clear Chat History", variant="secondary")
+                gr.Button("🔄 Refresh Page").click(
+                    lambda: None,
+                    None,
+                    None,
+                    js="() => window.location.reload()"
+                )
+        # Document processing tab
+        with gr.Tab("📚 Upload & Generate Quiz"):
+            gr.Markdown("Upload your study materials and generate custom quizzes automatically!")
             with gr.Row():
                 with gr.Column(scale=1):
+                    file_upload = gr.File(
+                        label="📁 Upload Study Materials",
+                        file_types=[".pdf", ".docx", ".pptx", ".txt", ".md"],
+                        file_count="single",
+                        height=100
+                    )
+                    process_btn = gr.Button("⚡ Process & Generate Quiz", variant="primary")
+                    gr.Markdown("""
+                    **Supported Formats:**
+                    - PDF documents
+                    - Word documents (.docx)
+                    - PowerPoint (.pptx)
+                    - Text files (.txt, .md)
+                    """)
+                with gr.Column(scale=2):
+                    quiz_display = gr.Textbox(
+                        label="📋 Generated Quiz",
+                        lines=20,
+                        max_lines=25,
+                        show_copy_button=True,
+                        placeholder="Your generated quiz will appear here after processing a document..."
+                    )
+        # Instructions tab
+        with gr.Tab("ℹ️ How to Use"):
+            gr.Markdown("""
+            ## 🎓 Getting Started with AI Tutor
+            ### 🎤 Using Voice Input
+            1. Go to the **AI Chatbot** tab
+            2. Click the microphone button
+            3. Allow microphone access in your browser
+            4. Speak clearly and wait for transcription
+            5. Review the text and click Send
+            ### 📚 Processing Documents
+            1. Go to the **Upload & Generate Quiz** tab
+            2. Upload your study materials (PDF, Word, PowerPoint)
+            3. Click "Process & Generate Quiz"
+            4. Get instant quiz questions based on your content
+            5. Use the chat to ask questions about your documents
+            ### 💬 Chat Features
+            - Ask questions about uploaded documents
+            - Get detailed explanations
+            - Receive audio responses
+            - Clear chat when needed
+            ### 🔧 Technical Requirements
+            - Modern web browser with microphone access
+            - Stable internet connection
+            - Groq API key (set as environment variable)
+            """)
+        # Event handlers
+        send_btn.click(
+            fn=chat_with_groq,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg, audio_output]
+        )
+        msg.submit(
+            fn=chat_with_groq,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg, audio_output]
+        )
+        audio_input.change(
+            fn=transcribe_audio,
+            inputs=[audio_input],
+            outputs=[msg]
+        )
+        process_btn.click(
+            fn=process_document,
+            inputs=[file_upload],
+            outputs=[quiz_display]
+        )
+        clear_btn.click(
+            fn=clear_chat,
+            outputs=[chatbot, audio_output]
+        )
+    return app
+# Launch the application
 if __name__ == "__main__":
+    print("🌈 Starting AI Tutor Application...")
+    app = create_interface()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        debug=True
+    )