import gradio as gr import numpy as np import tempfile import scipy.io.wavfile as wav # ------------------------------- # 1. Load Models (Lightweight) # ------------------------------- from transformers import pipeline # Speech-to-Text (Whisper) stt = pipeline("automatic-speech-recognition", model="openai/whisper-small") # Better Tutor Model (FLAN-T5) llm = pipeline("text-generation", model="distilgpt2") # ------------------------------- # 2. Core Functions # ------------------------------- def speech_to_text(audio): """ Converts speech (audio input) to text """ if audio is None: return "No audio provided." sample_rate, data = audio with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: wav.write(tmp.name, sample_rate, data) result = stt(tmp.name) return result["text"] def generate_response(text): if not text or text == "No audio provided.": return "Please provide valid input." prompt = f""" You are an AI tutor. Explain clearly and simply. Question: {text} Answer: """ output = llm(prompt, max_length=150, num_return_sequences=1) response = output[0]["generated_text"] # Clean output return response.split("Answer:")[-1].strip() # ------------------------------- # 3. Main Pipeline # ------------------------------- def voice_tutor(audio): transcription = speech_to_text(audio) response = generate_response(transcription) return transcription, response # ------------------------------- # 4. Gradio UI # ------------------------------- with gr.Blocks() as demo: gr.Markdown("## 🎓 AI Voice Tutor (No TTS Version)") audio_input = gr.Audio( sources=["microphone", "upload"], type="numpy", label="Speak or Upload Audio" ) transcription_box = gr.Textbox(label="Transcription") response_box = gr.Textbox(label="Tutor Response") submit_btn = gr.Button("Generate Response") submit_btn.click( fn=voice_tutor, inputs=audio_input, outputs=[transcription_box, response_box] ) # ------------------------------- # 5. Launch # ------------------------------- if __name__ == "__main__": demo.launch()