Spaces:

AK97GAMERZ
/

new_new_prep

Running

App Files Files Community

AK97GAMERZ commited on May 10, 2025

Commit

1cc081e

verified ·

1 Parent(s): 85be1e4

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -186

app.py CHANGED Viewed

@@ -4,50 +4,41 @@ import os
 import fitz  # PyMuPDF
 import tempfile
 import subprocess # For calling Piper TTS
-import wave # For saving WAV files
 import pathlib
 import whisper # For Speech-to-Text
 import numpy as np
 import soundfile as sf # To read audio data for Whisper
 # --- Configuration ---
-# 1. Hugging Face Spaces Secrets:
-#    - GOOGLE_API_KEY: Your Gemini API Key
-#    - PIPER_VOICE_PATH: Path to the piper voice model (.onnx file).
-#      You'll need to upload the voice model and its .json config to your Space.
-#      Example: "voices/en_US-lessac-medium.onnx"
-#      Download voices from: https://huggingface.co/rhasspy/piper-voices/tree/main
 try:
     GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-    PIPER_VOICE_PATH_ONNX = os.environ.get("PIPER_VOICE_PATH_ONNX") # e.g., voices/en_US-lessac-medium.onnx
     PIPER_VOICE_PATH_JSON = PIPER_VOICE_PATH_ONNX + ".json" if PIPER_VOICE_PATH_ONNX else None
     if not GOOGLE_API_KEY:
         print("Warning: GOOGLE_API_KEY not found in secrets.")
     if not PIPER_VOICE_PATH_ONNX or not os.path.exists(PIPER_VOICE_PATH_ONNX):
         print(f"Warning: Piper voice ONNX model not found at specified path: {PIPER_VOICE_PATH_ONNX}. TTS will not work.")
-        PIPER_VOICE_PATH_ONNX = None # Disable TTS if model not found
-    if PIPER_VOICE_PATH_JSON and not os.path.exists(PIPER_VOICE_PATH_JSON):
-        print(f"Warning: Piper voice JSON config not found at specified path: {PIPER_VOICE_PATH_JSON}. TTS might have issues.")
 except KeyError as e:
     print(f"Please set the following environment variables in Hugging Face Space secrets: {e}")
     GOOGLE_API_KEY = None
     PIPER_VOICE_PATH_ONNX = None
 # Initialize Gemini
 if GOOGLE_API_KEY:
     genai.configure(api_key=GOOGLE_API_KEY)
-    gemini_model = genai.GenerativeModel('gemini-1.5-flash-latest') # Using 1.5 Flash
 else:
     gemini_model = None
-# Initialize Whisper STT model (load it once)
-# You can choose model size: "tiny", "base", "small", "medium", "large"
-# Smaller models are faster but less accurate. "base" or "small" is a good start.
 try:
     stt_model = whisper.load_model("base")
     print("Whisper STT model loaded successfully.")
@@ -58,19 +49,22 @@ except Exception as e:
 # --- Helper Functions ---
 def pdf_to_text(pdf_file_path):
-    """Extracts text from a PDF file."""
     if not pdf_file_path:
         return ""
-    doc = fitz.open(pdf_file_path)
-    text = ""
-    for page_num in range(len(doc)):
-        page = doc.load_page(page_num)
-        text += page.get_text()
-    doc.close()
-    return text
 def generate_lecture_prompt(chapter_text):
-    """Creates a detailed prompt for Gemini."""
     prompt = f"""
     You are an expert, engaging, and slightly humorous AI tutor, like the best human teacher one could ask for.
     Your goal is to generate a comprehensive and interactive lecture based on the following PDF chapter text.
@@ -115,162 +109,176 @@ def text_to_speech_piper(text, output_filename="lecture_audio.wav"):
         print("Piper TTS model not available or no text provided. Skipping TTS.")
         return None
-    # Ensure piper executable is in PATH or provide full path
-    # On Hugging Face Spaces, 'piper' might need to be installed via packages.txt or built.
-    # Assuming 'piper' is available:
-    piper_executable = "piper" # Or full path if not in PATH
-    # Create a temporary file for the text input if text is very long
-    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt", encoding='utf-8') as tmp_text_file:
-        tmp_text_file.write(text)
-        text_input_path = tmp_text_file.name
     command = [
         piper_executable,
         "--model", PIPER_VOICE_PATH_ONNX,
-        "--output_file", output_filename,
-        "--text_file", text_input_path # Using text file for potentially long inputs
     ]
-    # If your voice has a JSON config, Piper usually finds it if it's next to the ONNX file.
-    # If not, you might need to add: "--config", PIPER_VOICE_PATH_JSON
-    print(f"Running Piper TTS command: {' '.join(command)}")
     try:
-        process = subprocess.run(command, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore')
-        print("Piper TTS STDOUT:", process.stdout)
-        print("Piper TTS STDERR:", process.stderr)
-        os.remove(text_input_path) # Clean up temp file
         if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0:
             return output_filename
         else:
-            print(f"Piper TTS failed to create or created an empty output file: {output_filename}")
-            if process.stderr: print("Piper Error:", process.stderr)
             return None
-    except subprocess.CalledProcessError as e:
-        print(f"Error during Piper TTS execution: {e}")
-        print("Piper STDOUT:", e.stdout)
-        print("Piper STDERR:", e.stderr)
-        os.remove(text_input_path) # Clean up temp file
-        return None
     except FileNotFoundError:
         print(f"Error: '{piper_executable}' command not found. Make sure Piper is installed and in your PATH.")
-        print("On Hugging Face Spaces, you might need to add 'piper-tts' to requirements.txt (if it installs CLI) or install via packages.txt.")
-        os.remove(text_input_path) # Clean up temp file
         return None
-def transcribe_audio(audio_filepath):
-    """Transcribes audio file to text using Whisper."""
-    if not stt_model or not audio_filepath:
-        print("Whisper STT model not available or no audio file. Skipping transcription.")
         return "Error: STT not available."
     try:
-        # Whisper expects a NumPy array or path to file.
-        # Gradio mic input provides (sample_rate, data_numpy_array) or a filepath string
-        if isinstance(audio_filepath, tuple): # If (rate, data) format
-            sample_rate, data = audio_filepath
-            # Ensure data is float32, as whisper expects
-            if data.dtype != np.float32:
-                data = data.astype(np.float32) / np.iinfo(data.dtype).max # Normalize if int
-            # Save to a temporary WAV file because whisper.transcribe() is easier with file paths
-            # for some backend configurations or if there are issues with direct array processing.
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio_file:
-                sf.write(tmp_audio_file.name, data, sample_rate)
-                temp_audio_path = tmp_audio_file.name
-            result = stt_model.transcribe(temp_audio_path)
-            os.remove(temp_audio_path) # Clean up temp audio file
-        elif isinstance(audio_filepath, str): # If it's already a file path
-             result = stt_model.transcribe(audio_filepath)
         else:
             return "Error: Invalid audio input format for transcription."
         return result["text"]
     except Exception as e:
         print(f"Error during audio transcription: {e}")
         return f"Error during transcription: {str(e)}"
 # --- Main Gradio App Logic ---
-lecture_state = {"full_lecture_text": "", "current_segment_index": 0, "segments": []}
 def process_pdf_and_generate_lecture(pdf_file_obj, progress=gr.Progress(track_tqdm=True)):
-    """Processes PDF, generates lecture text, and converts to speech."""
     if not gemini_model:
-        return "Gemini API not configured.", None, "Error: Gemini API key missing.", None
     if not pdf_file_obj:
-        return "Please upload a PDF file.", None, "No PDF uploaded.", None
     progress(0.1, desc="Extracting text from PDF...")
-    pdf_text = pdf_to_text(pdf_file_obj.name) # .name gives the temp path of uploaded file
     if not pdf_text.strip():
-        return "Could not extract text from PDF or PDF is empty.", None, "Empty PDF content.", None
     progress(0.3, desc="Generating lecture script with Gemini...")
-    lecture_prompt = generate_lecture_prompt(pdf_text[:15000]) # Limit context window for safety
     try:
         response = gemini_model.generate_content(lecture_prompt)
         lecture_text = response.text
     except Exception as e:
         print(f"Error calling Gemini API: {e}")
-        return f"Error generating lecture: {e}", None, "Gemini API Error.", None
-    lecture_state["full_lecture_text"] = lecture_text
-    # Simple segmentation for now (e.g., by paragraphs) for potential future "resume"
-    lecture_state["segments"] = [s.strip() for s in lecture_text.split("\n\n") if s.strip()]
-    lecture_state["current_segment_index"] = 0
-    # For whiteboard: extract parts starting with "Imagine on our whiteboard:"
     whiteboard_content = ""
     for line in lecture_text.split('\n'):
         if line.lower().startswith("imagine on our whiteboard:"):
             whiteboard_content += line.replace("Imagine on our whiteboard:", "").strip() + "\n\n"
     if not whiteboard_content:
-        whiteboard_content = "No specific whiteboard content described for this section. The AI will verbally describe visuals."
     progress(0.7, desc="Converting lecture to speech (TTS)...")
     # Create a unique filename for audio to avoid caching issues if files are static
-    audio_output_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
-    lecture_audio_path = text_to_speech_piper(lecture_text, output_filename=audio_output_path)
-    if not lecture_audio_path:
         progress(1.0, desc="TTS failed. Displaying text only.")
-        return lecture_text, None, whiteboard_content, "TTS failed. Audio not available."
     progress(1.0, desc="Lecture ready!")
-    return lecture_text, lecture_audio_path, whiteboard_content, "Lecture generated successfully!"
-def handle_student_doubt(student_audio_query, lecture_context_text, progress=gr.Progress(track_tqdm=True)):
-    if not student_audio_query:
-        return "No doubt recorded. Please record your question.", None, lecture_context_text
     if not gemini_model:
-        return "Gemini API not configured. Cannot answer doubt.", None, lecture_context_text
     progress(0.2, desc="Transcribing your question...")
-    # student_audio_query from gr.Audio is a tuple (sample_rate, numpy_array) or filepath
-    # For Whisper, we often save it to a temp file if it's raw data.
-    # The `transcribe_audio` function handles this.
-    # Gradio's audio input (mic) typically gives a filepath to a temp WAV
-    student_question_text = transcribe_audio(student_audio_query)
     if student_question_text.startswith("Error:"):
-        return f"Could not understand your question: {student_question_text}", None, lecture_context_text
     progress(0.5, desc="Thinking about your question...")
     doubt_prompt = f"""
     A student has a doubt regarding the lecture.
-    Current Lecture Context:
     ---
-    {lecture_context_text[-2000:]}
     ---
     Student's Question: "{student_question_text}"
@@ -278,44 +286,56 @@ def handle_student_doubt(student_audio_query, lecture_context_text, progress=gr.
     1. Acknowledge the question.
     2. Provide a clear, concise, and helpful answer.
     3. Use analogies if helpful. Maintain your encouraging and slightly humorous tone.
-    4. After answering, gently prompt if they understood or if they'd like to continue the lecture. For example: "Does that make sense? Shall we get back to where we left off in the lecture?"
     Keep your answer focused on the question.
     """
     try:
         response = gemini_model.generate_content(doubt_prompt)
         answer_text = response.text
     except Exception as e:
         print(f"Error calling Gemini API for doubt: {e}")
-        return f"Error processing doubt: {e}", None, lecture_context_text
     progress(0.8, desc="Preparing audio for the answer...")
-    answer_audio_path_temp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
-    answer_audio_path = text_to_speech_piper(answer_text, output_filename=answer_audio_path_temp)
     if not answer_audio_path:
         progress(1.0, desc="TTS for answer failed.")
-        # Fallback: return text only if TTS fails
-        return f"AI Tutor (Text): {answer_text}\n(Audio for answer failed to generate)", None, lecture_context_text
     progress(1.0, desc="Answer ready!")
-    return f"Your Question: {student_question_text}\n\nAI Tutor: {answer_text}", answer_audio_path, lecture_context_text
 # --- Gradio UI ---
 css = """
 body { font-family: 'Arial', sans-serif; }
-.gradio-container { max-width: 900px !important; margin: auto !important; }
-.gr-button { background-color: #4CAF50; color: white; border-radius: 8px; }
-.gr-button:hover { background-color: #45a049; }
 .panel_description { padding: 10px; margin-bottom:10px; border-radius:5px; background-color:#f0f0f0; }
 .important_text { color: #D32F2F; font-weight: bold; }
 .markdown-output h1, .markdown-output h2 { color: #1976D2; }
-.markdown-output strong { color: #555; }
-.whiteboard-display { border: 2px dashed #ccc; padding: 15px; margin-top: 15px; background-color: #f9f9f9; min-height: 150px; }
 """
-with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     gr.Markdown("# 🤖 AI Human-Like Tutor", elem_id="app_title")
     gr.Markdown(
         "Upload a PDF chapter, and the AI will generate an engaging lecture with voice, "
@@ -324,42 +344,34 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     )
     if not GOOGLE_API_KEY or not gemini_model:
-        gr.Markdown("<p class='important_text'>🔴 Configuration Error: GOOGLE_API_KEY for Gemini is not set in Hugging Face Secrets. The app will not function correctly.</p>")
     if not PIPER_VOICE_PATH_ONNX:
-        gr.Markdown("<p class='important_text'>🟡 Configuration Warning: PIPER_VOICE_PATH not set or model not found. TTS (Text-to-Speech) will be disabled.</p>")
     if not stt_model:
-        gr.Markdown("<p class='important_text'>🟡 Configuration Warning: Whisper STT model failed to load. Mic input for doubts cannot be transcribed.</p>")
-    # Store lecture context for doubt handling
     lecture_context_state = gr.State(value="")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_upload = gr.File(label="Upload PDF Chapter", file_types=[".pdf"])
-            generate_button = gr.Button("🚀 Generate Lecture", variant="primary")
-            status_message = gr.Textbox(label="Status", interactive=False)
             gr.Markdown("---")
             gr.Markdown("### 🤔 Ask a Doubt")
-            # "Raise Hand" button could toggle visibility of mic_input and ask_doubt_button
-            # For simplicity, they are always visible here.
             raise_hand_button = gr.Button("✋ Raise Hand / Prepare to Ask")
-            # Mic input for student's doubt
-            # Using type="filepath" as Whisper model prefers file paths for robust processing.
-            # Gradio will save the recorded audio to a temporary file and pass its path.
             student_mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Your Doubt (after clicking Raise Hand)")
             ask_doubt_button = gr.Button("💬 Ask My Doubt", variant="secondary")
         with gr.Column(scale=2):
-            gr.Markdown("##  giảng Lecture Output")
             lecture_display = gr.Markdown(label="Lecture Script")
             lecture_audio = gr.Audio(label="🎧 Listen to Lecture", type="filepath", autoplay=False)
-            gr.Markdown("### 칠판 Whiteboard Area")
-            whiteboard_display = gr.Textbox(label="Visuals & Notes (as described by AI)", lines=8, interactive=False, elem_classes=["whiteboard-display"])
             gr.Markdown("---")
             gr.Markdown("### 💡 Doubt Resolution")
@@ -372,63 +384,48 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
         inputs=[pdf_upload],
         outputs=[lecture_display, lecture_audio, whiteboard_display, status_message, lecture_context_state],
         api_name="generate_lecture"
-    ).then(
-        fn=lambda lecture_text: lecture_text, # Store full lecture text for context
-        inputs=[lecture_display],
-        outputs=[lecture_context_state]
     )
-    # Simple "Raise Hand" action - could be expanded
     raise_hand_button.click(
-        lambda: gr.Info("Mic enabled! Record your question and click 'Ask My Doubt'.")
     )
     ask_doubt_button.click(
         fn=handle_student_doubt,
-        inputs=[student_mic_input, lecture_context_state], # Pass current lecture context
-        outputs=[doubt_answer_display, doubt_answer_audio, status_message], # status_message can be updated here too
         api_name="ask_doubt"
     )
-# --- Instructions for Hugging Face Spaces ---
-# 1. Create a new Space on Hugging Face.
-# 2. Choose "Gradio" as the SDK.
-# 3. Add your GOOGLE_API_KEY to the Space secrets (Settings -> Secrets).
-#    - Name: GOOGLE_API_KEY
-#    - Value: sk-yourActualGeminiApiKey...
-# 4. Download a Piper voice model:
-#    - Go to: https://huggingface.co/rhasspy/piper-voices/tree/main
-#    - Choose a voice, e.g., en_US-lessac-medium.onnx and its corresponding en_US-lessac-medium.onnx.json file.
-#    - Upload these two files to a folder in your Space, e.g., create a `voices` folder and put them there.
-# 5. Add PIPER_VOICE_PATH_ONNX to Space secrets:
-#    - Name: PIPER_VOICE_PATH_ONNX
-#    - Value: voices/en_US-lessac-medium.onnx (or whatever path you used)
-# 6. Create a `requirements.txt` file in your Space repository with the content provided above.
-# 7. Create an `app.py` file in your Space repository with the Python code above.
-# 8. (Optional but Recommended) If `ffmpeg` is needed by Whisper or Piper for audio conversion/handling on Spaces:
-#    Create a `packages.txt` file in your Space repository with the line:
-#    ffmpeg
-# 9. The app should build and run. You might need to wait a bit for models (like Whisper) to download on first run.
 if __name__ == "__main__":
-    # This part is for local execution, not strictly needed for HF Spaces
     # For local run, you'd set env vars:
     # os.environ['GOOGLE_API_KEY'] = 'YOUR_LOCAL_KEY'
     # os.environ['PIPER_VOICE_PATH_ONNX'] = 'path/to/your/local/voice.onnx'
-    # Make sure 'piper' executable is in your PATH or adjust `piper_executable` variable.
-    # And Whisper model will download to default cache.
-    # Check if secrets are loaded for local testing (if you want to emulate HF secrets)
-    if not GOOGLE_API_KEY:
-        print("Local Run: GOOGLE_API_KEY not set as env variable.")
-    if not PIPER_VOICE_PATH_ONNX:
-        print("Local Run: PIPER_VOICE_PATH_ONNX not set as env variable. TTS might fail if piper executable is not found or model path is incorrect.")
-    else:
-        # Check if the local piper model files actually exist
-        if not os.path.exists(PIPER_VOICE_PATH_ONNX):
-            print(f"Local Run Warning: Piper ONNX model not found at {PIPER_VOICE_PATH_ONNX}")
-        if not os.path.exists(PIPER_VOICE_PATH_ONNX + ".json"):
-            print(f"Local Run Warning: Piper JSON config not found at {PIPER_VOICE_PATH_ONNX + '.json'}")
-    app.launch(debug=True, share=False) # Set share=True to get a public link if running locally

 import fitz  # PyMuPDF
 import tempfile
 import subprocess # For calling Piper TTS
+# import wave # Not directly used now with Popen, but good to have if manipulating WAVs
 import pathlib
 import whisper # For Speech-to-Text
 import numpy as np
 import soundfile as sf # To read audio data for Whisper
 # --- Configuration ---
 try:
     GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+    PIPER_VOICE_PATH_ONNX = os.environ.get("PIPER_VOICE_PATH_ONNX")
     PIPER_VOICE_PATH_JSON = PIPER_VOICE_PATH_ONNX + ".json" if PIPER_VOICE_PATH_ONNX else None
     if not GOOGLE_API_KEY:
         print("Warning: GOOGLE_API_KEY not found in secrets.")
     if not PIPER_VOICE_PATH_ONNX or not os.path.exists(PIPER_VOICE_PATH_ONNX):
         print(f"Warning: Piper voice ONNX model not found at specified path: {PIPER_VOICE_PATH_ONNX}. TTS will not work.")
+        PIPER_VOICE_PATH_ONNX = None
+    if PIPER_VOICE_PATH_ONNX and (not PIPER_VOICE_PATH_JSON or not os.path.exists(PIPER_VOICE_PATH_JSON)):
+        print(f"Warning: Piper voice JSON config not found at specified path: {PIPER_VOICE_PATH_JSON}. TTS might have issues if model needs explicit config.")
 except KeyError as e:
     print(f"Please set the following environment variables in Hugging Face Space secrets: {e}")
     GOOGLE_API_KEY = None
     PIPER_VOICE_PATH_ONNX = None
+    PIPER_VOICE_PATH_JSON = None
 # Initialize Gemini
 if GOOGLE_API_KEY:
     genai.configure(api_key=GOOGLE_API_KEY)
+    gemini_model = genai.GenerativeModel('gemini-1.5-flash-latest')
 else:
     gemini_model = None
+# Initialize Whisper STT model
 try:
     stt_model = whisper.load_model("base")
     print("Whisper STT model loaded successfully.")
 # --- Helper Functions ---
 def pdf_to_text(pdf_file_path):
     if not pdf_file_path:
         return ""
+    try:
+        doc = fitz.open(pdf_file_path)
+        text = ""
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            text += page.get_text()
+        doc.close()
+        return text
+    except Exception as e:
+        print(f"Error reading PDF {pdf_file_path}: {e}")
+        return ""
 def generate_lecture_prompt(chapter_text):
     prompt = f"""
     You are an expert, engaging, and slightly humorous AI tutor, like the best human teacher one could ask for.
     Your goal is to generate a comprehensive and interactive lecture based on the following PDF chapter text.
         print("Piper TTS model not available or no text provided. Skipping TTS.")
         return None
+    piper_executable = "piper"
     command = [
         piper_executable,
         "--model", PIPER_VOICE_PATH_ONNX,
+        "--output_file", output_filename
     ]
+    # If your voice explicitly needs the .json config file passed (usually not if named correctly)
+    # and PIPER_VOICE_PATH_JSON and os.path.exists(PIPER_VOICE_PATH_JSON):
+    #    command.extend(["--config", PIPER_VOICE_PATH_JSON])
+    print(f"Running Piper TTS command: {' '.join(command)} (text will be piped via stdin)")
     try:
+        process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8', errors='ignore')
+        stdout, stderr = process.communicate(input=text)
+        if process.returncode != 0:
+            print(f"Error during Piper TTS execution. Return code: {process.returncode}")
+            print("Piper STDOUT:", stdout)
+            print("Piper STDERR:", stderr)
+            # Attempt to remove potentially empty/corrupted output file
+            if os.path.exists(output_filename):
+                try:
+                    os.remove(output_filename)
+                except OSError as e_rm:
+                    print(f"Could not remove potentially corrupted output file {output_filename}: {e_rm}")
+            return None
         if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0:
+            print("Piper TTS successful.")
+            if stdout: print("Piper TTS STDOUT:", stdout)
+            if stderr: print("Piper TTS STDERR:", stderr) # Should ideally be empty on success
             return output_filename
         else:
+            print(f"Piper TTS created an empty or no output file: {output_filename}")
+            if stdout: print("Piper STDOUT:", stdout)
+            if stderr: print("Piper Error:", stderr)
             return None
     except FileNotFoundError:
         print(f"Error: '{piper_executable}' command not found. Make sure Piper is installed and in your PATH.")
+        print("On Hugging Face Spaces, ensure 'piper-tts' is in requirements.txt and properly installed, and 'piper' is available in the environment.")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred during Piper TTS execution: {e}")
         return None
+def transcribe_audio(audio_input):
+    if not stt_model:
+        print("Whisper STT model not available. Skipping transcription.")
         return "Error: STT not available."
+    if not audio_input:
+        print("No audio input provided for transcription.")
+        return "Error: No audio provided."
     try:
+        # Gradio audio input can be a filepath string or (sample_rate, numpy_array)
+        # Whisper's transcribe method directly accepts filepaths.
+        # If it's (rate, data), we need to save it to a temp file.
+        temp_audio_path = None
+        if isinstance(audio_input, tuple):
+            sample_rate, data = audio_input
+            if data.dtype != np.float32: # Whisper expects float32
+                data = data.astype(np.float32) / np.iinfo(data.dtype).max if np.issubdtype(data.dtype, np.integer) else data.astype(np.float32)
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_f:
+                sf.write(tmp_f.name, data, sample_rate)
+                temp_audio_path = tmp_f.name
+            audio_filepath_to_transcribe = temp_audio_path
+        elif isinstance(audio_input, str) and os.path.exists(audio_input):
+            audio_filepath_to_transcribe = audio_input
         else:
             return "Error: Invalid audio input format for transcription."
+        result = stt_model.transcribe(audio_filepath_to_transcribe, fp16=False) # fp16=False for CPU
+        if temp_audio_path: # Clean up temp file if we created one
+            os.remove(temp_audio_path)
         return result["text"]
     except Exception as e:
         print(f"Error during audio transcription: {e}")
+        if temp_audio_path and os.path.exists(temp_audio_path): # Ensure cleanup on error too
+            try:
+                os.remove(temp_audio_path)
+            except OSError:
+                pass
         return f"Error during transcription: {str(e)}"
 # --- Main Gradio App Logic ---
 def process_pdf_and_generate_lecture(pdf_file_obj, progress=gr.Progress(track_tqdm=True)):
     if not gemini_model:
+        return "Gemini API not configured. Please check secrets.", None, "Error: Gemini API key missing.", "API Error", ""
     if not pdf_file_obj:
+        return "Please upload a PDF file.", None, "No PDF uploaded.", "Input Error", ""
     progress(0.1, desc="Extracting text from PDF...")
+    # pdf_file_obj.name is the temporary path of the uploaded file
+    pdf_text = pdf_to_text(pdf_file_obj.name)
     if not pdf_text.strip():
+        return "Could not extract text from PDF or PDF is empty.", None, "Empty PDF content.", "PDF Error", ""
+    # Limit context window for safety/cost, adjust as needed
+    max_text_length = 25000 # Increased slightly, but be mindful of API limits/costs
+    if len(pdf_text) > max_text_length:
+        print(f"PDF text truncated from {len(pdf_text)} to {max_text_length} characters for Gemini prompt.")
+        pdf_text = pdf_text[:max_text_length]
     progress(0.3, desc="Generating lecture script with Gemini...")
+    lecture_prompt = generate_lecture_prompt(pdf_text)
+    lecture_text = "" # Initialize to ensure it's always defined
     try:
         response = gemini_model.generate_content(lecture_prompt)
         lecture_text = response.text
     except Exception as e:
         print(f"Error calling Gemini API: {e}")
+        return f"Error generating lecture: {e}", None, "Gemini API Error.", "API Error", "" # Return 5 values
     whiteboard_content = ""
     for line in lecture_text.split('\n'):
         if line.lower().startswith("imagine on our whiteboard:"):
             whiteboard_content += line.replace("Imagine on our whiteboard:", "").strip() + "\n\n"
     if not whiteboard_content:
+        whiteboard_content = "No specific whiteboard content described for this section. AI will verbally describe visuals."
     progress(0.7, desc="Converting lecture to speech (TTS)...")
     # Create a unique filename for audio to avoid caching issues if files are static
+    # Using a temporary file that Gradio will handle for serving
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio_file:
+        audio_output_path_for_piper = tmp_audio_file.name
+    lecture_audio_path = text_to_speech_piper(lecture_text, output_filename=audio_output_path_for_piper)
+    if not lecture_audio_path: # lecture_audio_path will be None if TTS failed
         progress(1.0, desc="TTS failed. Displaying text only.")
+        # Return 5 values, including the lecture_text for the context state
+        return lecture_text, None, whiteboard_content, "TTS failed. Audio not available.", lecture_text
     progress(1.0, desc="Lecture ready!")
+    # Return 5 values, including the lecture_text for the context state
+    return lecture_text, lecture_audio_path, whiteboard_content, "Lecture generated successfully!", lecture_text
+def handle_student_doubt(student_audio_query, lecture_context_text_from_state, progress=gr.Progress(track_tqdm=True)):
+    if not student_audio_query: # student_audio_query is a filepath from gr.Audio
+        return "No doubt recorded. Please record your question.", None, "Please record your question first."
     if not gemini_model:
+        return "Gemini API not configured. Cannot answer doubt.", None, "API Error."
     progress(0.2, desc="Transcribing your question...")
+    student_question_text = transcribe_audio(student_audio_query) # student_audio_query is already filepath
     if student_question_text.startswith("Error:"):
+        return f"Could not understand your question: {student_question_text}", None, "STT Error."
     progress(0.5, desc="Thinking about your question...")
+    # Use only the last part of the lecture context to keep the prompt manageable
+    context_for_doubt = lecture_context_text_from_state[-3000:] if lecture_context_text_from_state else "No prior lecture context available."
     doubt_prompt = f"""
     A student has a doubt regarding the lecture.
+    Current Lecture Context (last part):
     ---
+    {context_for_doubt}
     ---
     Student's Question: "{student_question_text}"
     1. Acknowledge the question.
     2. Provide a clear, concise, and helpful answer.
     3. Use analogies if helpful. Maintain your encouraging and slightly humorous tone.
+    4. After answering, gently prompt if they understood or if they'd like to continue the lecture. For example: "Does that make sense? Let me know if you have more questions or if we should resume!"
     Keep your answer focused on the question.
     """
+    answer_text = "" # Initialize
     try:
         response = gemini_model.generate_content(doubt_prompt)
         answer_text = response.text
     except Exception as e:
         print(f"Error calling Gemini API for doubt: {e}")
+        return f"Error processing doubt: {e}", None, "API Error."
     progress(0.8, desc="Preparing audio for the answer...")
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_ans_audio_f:
+        answer_audio_path_for_piper = tmp_ans_audio_f.name
+    answer_audio_path = text_to_speech_piper(answer_text, output_filename=answer_audio_path_for_piper)
+    full_response_text = f"**Your Question:** {student_question_text}\n\n**AI Tutor:** {answer_text}"
     if not answer_audio_path:
         progress(1.0, desc="TTS for answer failed.")
+        return full_response_text, None, "TTS for answer failed. Text only."
     progress(1.0, desc="Answer ready!")
+    return full_response_text, answer_audio_path, "Answer provided."
 # --- Gradio UI ---
 css = """
 body { font-family: 'Arial', sans-serif; }
+.gradio-container { max-width: 950px !important; margin: auto !important; }
+.gr-button { border-radius: 8px; }
+.gr-button.gr-button-primary { background-color: #4CAF50; color: white; }
+.gr-button.gr-button-primary:hover { background-color: #45a049; }
+.gr-button.gr-button-secondary { background-color: #008CBA; color: white; }
+.gr-button.gr-button-secondary:hover { background-color: #007ba7; }
 .panel_description { padding: 10px; margin-bottom:10px; border-radius:5px; background-color:#f0f0f0; }
 .important_text { color: #D32F2F; font-weight: bold; }
 .markdown-output h1, .markdown-output h2 { color: #1976D2; }
+.markdown-output strong { color: #444; }
+.whiteboard-display { border: 2px dashed #ccc; padding: 15px; margin-top: 15px; background-color: #f9f9f9; min-height: 150px; font-family: 'Courier New', Courier, monospace; white-space: pre-wrap;}
+#app_title { text-align: center; color: #2c3e50; margin-bottom: 20px;}
+.status-box { font-weight: bold; padding: 8px; border-radius: 4px; margin-top: 5px; text-align: center;}
+.status-box-success { background-color: #e7f7e7; color: #28a745; }
+.status-box-error { background-color: #fdecea; color: #dc3545; }
+.status-box-info { background-color: #e7f3fe; color: #007bff; }
 """
+with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as app:
     gr.Markdown("# 🤖 AI Human-Like Tutor", elem_id="app_title")
     gr.Markdown(
         "Upload a PDF chapter, and the AI will generate an engaging lecture with voice, "
     )
     if not GOOGLE_API_KEY or not gemini_model:
+        gr.Markdown("<p class='important_text panel_description'>🔴 Configuration Error: GOOGLE_API_KEY for Gemini is not set in Hugging Face Secrets. The app will not function correctly.</p>")
     if not PIPER_VOICE_PATH_ONNX:
+        gr.Markdown("<p class='important_text panel_description'>🟡 Configuration Warning: PIPER_VOICE_PATH_ONNX not set or model not found. TTS (Text-to-Speech) will be disabled.</p>")
     if not stt_model:
+        gr.Markdown("<p class='important_text panel_description'>🟡 Configuration Warning: Whisper STT model failed to load. Mic input for doubts cannot be transcribed.</p>")
     lecture_context_state = gr.State(value="")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_upload = gr.File(label="Upload PDF Chapter", file_types=[".pdf"])
+            generate_button = gr.Button("🚀 Generate Lecture", variant="primary", elem_id="generate_button")
+            status_message = gr.Textbox(label="Status", interactive=False, elem_classes=["status-box"]) # For general status
             gr.Markdown("---")
             gr.Markdown("### 🤔 Ask a Doubt")
             raise_hand_button = gr.Button("✋ Raise Hand / Prepare to Ask")
             student_mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Your Doubt (after clicking Raise Hand)")
             ask_doubt_button = gr.Button("💬 Ask My Doubt", variant="secondary")
+            doubt_status_message = gr.Textbox(label="Doubt Status", interactive=False, elem_classes=["status-box"]) # Specific for doubt
         with gr.Column(scale=2):
+            gr.Markdown("##  giảng Lecture Output") # Kept your original header text
             lecture_display = gr.Markdown(label="Lecture Script")
             lecture_audio = gr.Audio(label="🎧 Listen to Lecture", type="filepath", autoplay=False)
+            gr.Markdown("### 칠판 Whiteboard Area") # Kept your original header text
+            whiteboard_display = gr.Textbox(label="Visuals & Notes (as described by AI)", lines=10, interactive=False, elem_classes=["whiteboard-display"])
             gr.Markdown("---")
             gr.Markdown("### 💡 Doubt Resolution")
         inputs=[pdf_upload],
         outputs=[lecture_display, lecture_audio, whiteboard_display, status_message, lecture_context_state],
         api_name="generate_lecture"
     )
+    # Update status_message based on the text content for better visual feedback
+    @status_message.change(inputs=status_message)
+    def update_status_styling(status_text):
+        if "Error" in status_text or "failed" in status_text or "not configured" in status_text or "not found" in status_text:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-error"])
+        elif "successfully" in status_text or "ready" in status_text:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-success"])
+        else:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-info"])
+    @doubt_status_message.change(inputs=doubt_status_message)
+    def update_doubt_status_styling(status_text):
+        if "Error" in status_text or "failed" in status_text or "not configured" in status_text or "not found" in status_text:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-error"])
+        elif "successfully" in status_text or "ready" in status_text or "provided" in status_text:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-success"])
+        else:
+            return gr.Textbox(value=status_text, elem_classes=["status-box", "status-box-info"])
     raise_hand_button.click(
+        lambda: gr.Info("Mic enabled! Record your question then click 'Ask My Doubt'.")
     )
     ask_doubt_button.click(
         fn=handle_student_doubt,
+        inputs=[student_mic_input, lecture_context_state],
+        outputs=[doubt_answer_display, doubt_answer_audio, doubt_status_message],
         api_name="ask_doubt"
     )
 if __name__ == "__main__":
     # For local run, you'd set env vars:
     # os.environ['GOOGLE_API_KEY'] = 'YOUR_LOCAL_KEY'
     # os.environ['PIPER_VOICE_PATH_ONNX'] = 'path/to/your/local/voice.onnx'
+    if not GOOGLE_API_KEY: print("Local Run: GOOGLE_API_KEY not set.")
+    if not PIPER_VOICE_PATH_ONNX: print("Local Run: PIPER_VOICE_PATH_ONNX not set.")
+    elif not os.path.exists(PIPER_VOICE_PATH_ONNX) or (PIPER_VOICE_PATH_JSON and not os.path.exists(PIPER_VOICE_PATH_JSON)):
+        print("Local Run: Piper voice model/config files not found at specified path(s).")
+    # Using ssr_mode=False for broader compatibility if issues arise with experimental SSR
+    # You can remove share=False if you want it to be accessible on your local network
+    app.launch(debug=True, share=False, server_port=7860) # Explicitly setting server_port