Spaces:

HuzaifaTech
/

AI_tutor

Sleeping

App Files Files Community

HuzaifaTech commited on Apr 18

Commit

ca91e4b

verified ·

1 Parent(s): c737b83

Create app.py

Browse files

app file added

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import numpy as np
+import tempfile
+import scipy.io.wavfile as wav
+# -------------------------------
+# 1. Load Models (Lightweight)
+# -------------------------------
+# Whisper (Speech-to-Text)
+from transformers import pipeline
+stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+# Simple LLM (text generation)
+llm = pipeline("text-generation", model="distilgpt2")
+# TTS (Coqui TTS)
+from TTS.api import TTS
+tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
+# -------------------------------
+# 2. Core Functions
+# -------------------------------
+def speech_to_text(audio):
+    """
+    Converts speech (audio file) to text using Whisper
+    """
+    if audio is None:
+        return "No audio provided."
+    sample_rate, data = audio
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        wav.write(tmp.name, sample_rate, data)
+        result = stt(tmp.name)
+    return result["text"]
+def generate_response(text):
+    """
+    Generates tutor-style response using LLM
+    """
+    if not text or text == "No audio provided.":
+        return "Please provide valid input."
+    # Simple AI tutor system prompt
+    prompt = f"""
+    You are a helpful AI tutor.
+    Explain clearly, simply, and step-by-step.
+    Question: {text}
+    Answer:
+    """
+    output = llm(prompt, max_length=150, num_return_sequences=1)
+    response = output[0]["generated_text"]
+    # Clean response (remove prompt repetition)
+    return response.split("Answer:")[-1].strip()
+def text_to_speech(text):
+    """
+    Converts text to speech using Coqui TTS
+    """
+    if not text:
+        return None
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tts_model.tts_to_file(text=text, file_path=tmp.name)
+        return tmp.name
+# -------------------------------
+# 3. Pipeline Function
+# -------------------------------
+def voice_tutor(audio):
+    """
+    Full pipeline:
+    Audio → Text → Response → Voice
+    """
+    transcription = speech_to_text(audio)
+    response = generate_response(transcription)
+    audio_output = text_to_speech(response)
+    return transcription, response, audio_output
+# -------------------------------
+# 4. Gradio UI
+# -------------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎓 AI Voice Tutor")
+    audio_input = gr.Audio(
+        sources=["microphone", "upload"],
+        type="numpy",
+        label="Speak or Upload Audio"
+    )
+    transcription_box = gr.Textbox(label="Transcription")
+    response_box = gr.Textbox(label="Tutor Response")
+    audio_output = gr.Audio(label="Voice Output")
+    submit_btn = gr.Button("Generate Response")
+    submit_btn.click(
+        fn=voice_tutor,
+        inputs=audio_input,
+        outputs=[transcription_box, response_box, audio_output]
+    )
+# -------------------------------
+# 5. Launch
+# -------------------------------
+if __name__ == "__main__":
+    demo.launch()