Spaces:
Sleeping
Sleeping
File size: 2,246 Bytes
ca91e4b f433513 ca91e4b f433513 b5508b8 ca91e4b f433513 ca91e4b f433513 ca91e4b f433513 ca91e4b f433513 ca91e4b b5508b8 ca91e4b b5508b8 ca91e4b f433513 ca91e4b f433513 ca91e4b f433513 ca91e4b f433513 ca91e4b f433513 ca91e4b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | import gradio as gr
import numpy as np
import tempfile
import scipy.io.wavfile as wav
# -------------------------------
# 1. Load Models (Lightweight)
# -------------------------------
from transformers import pipeline
# Speech-to-Text (Whisper)
stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
# Better Tutor Model (FLAN-T5)
llm = pipeline("text-generation", model="distilgpt2")
# -------------------------------
# 2. Core Functions
# -------------------------------
def speech_to_text(audio):
"""
Converts speech (audio input) to text
"""
if audio is None:
return "No audio provided."
sample_rate, data = audio
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
wav.write(tmp.name, sample_rate, data)
result = stt(tmp.name)
return result["text"]
def generate_response(text):
if not text or text == "No audio provided.":
return "Please provide valid input."
prompt = f"""
You are an AI tutor.
Explain clearly and simply.
Question: {text}
Answer:
"""
output = llm(prompt, max_length=150, num_return_sequences=1)
response = output[0]["generated_text"]
# Clean output
return response.split("Answer:")[-1].strip()
# -------------------------------
# 3. Main Pipeline
# -------------------------------
def voice_tutor(audio):
transcription = speech_to_text(audio)
response = generate_response(transcription)
return transcription, response
# -------------------------------
# 4. Gradio UI
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("## 🎓 AI Voice Tutor (No TTS Version)")
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="numpy",
label="Speak or Upload Audio"
)
transcription_box = gr.Textbox(label="Transcription")
response_box = gr.Textbox(label="Tutor Response")
submit_btn = gr.Button("Generate Response")
submit_btn.click(
fn=voice_tutor,
inputs=audio_input,
outputs=[transcription_box, response_box]
)
# -------------------------------
# 5. Launch
# -------------------------------
if __name__ == "__main__":
demo.launch() |