Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import tempfile | |
| import scipy.io.wavfile as wav | |
| # ------------------------------- | |
| # 1. Load Models (Lightweight) | |
| # ------------------------------- | |
| from transformers import pipeline | |
| # Speech-to-Text (Whisper) | |
| stt = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
| # Better Tutor Model (FLAN-T5) | |
| llm = pipeline("text-generation", model="distilgpt2") | |
| # ------------------------------- | |
| # 2. Core Functions | |
| # ------------------------------- | |
| def speech_to_text(audio): | |
| """ | |
| Converts speech (audio input) to text | |
| """ | |
| if audio is None: | |
| return "No audio provided." | |
| sample_rate, data = audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| wav.write(tmp.name, sample_rate, data) | |
| result = stt(tmp.name) | |
| return result["text"] | |
| def generate_response(text): | |
| if not text or text == "No audio provided.": | |
| return "Please provide valid input." | |
| prompt = f""" | |
| You are an AI tutor. | |
| Explain clearly and simply. | |
| Question: {text} | |
| Answer: | |
| """ | |
| output = llm(prompt, max_length=150, num_return_sequences=1) | |
| response = output[0]["generated_text"] | |
| # Clean output | |
| return response.split("Answer:")[-1].strip() | |
| # ------------------------------- | |
| # 3. Main Pipeline | |
| # ------------------------------- | |
| def voice_tutor(audio): | |
| transcription = speech_to_text(audio) | |
| response = generate_response(transcription) | |
| return transcription, response | |
| # ------------------------------- | |
| # 4. Gradio UI | |
| # ------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ๐ AI Voice Tutor (No TTS Version)") | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="numpy", | |
| label="Speak or Upload Audio" | |
| ) | |
| transcription_box = gr.Textbox(label="Transcription") | |
| response_box = gr.Textbox(label="Tutor Response") | |
| submit_btn = gr.Button("Generate Response") | |
| submit_btn.click( | |
| fn=voice_tutor, | |
| inputs=audio_input, | |
| outputs=[transcription_box, response_box] | |
| ) | |
| # ------------------------------- | |
| # 5. Launch | |
| # ------------------------------- | |
| if __name__ == "__main__": | |
| demo.launch() |