Spaces:

HuzaifaTech
/

AI_tutor

Sleeping

App Files Files Community

HuzaifaTech commited on Apr 18

Commit

f433513

verified ·

1 Parent(s): 9b327a8

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -41

app.py CHANGED Viewed

@@ -7,16 +7,14 @@ import scipy.io.wavfile as wav
 # 1. Load Models (Lightweight)
 # -------------------------------
-# Whisper (Speech-to-Text)
 from transformers import pipeline
 stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
-# Simple LLM (text generation)
-llm = pipeline("text-generation", model="distilgpt2")
-# TTS (Coqui TTS)
-from TTS.api import TTS
-tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
 # -------------------------------
 # 2. Core Functions
@@ -24,68 +22,52 @@ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=
 def speech_to_text(audio):
     """
-    Converts speech (audio file) to text using Whisper
     """
     if audio is None:
         return "No audio provided."
     sample_rate, data = audio
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
         wav.write(tmp.name, sample_rate, data)
         result = stt(tmp.name)
     return result["text"]
 def generate_response(text):
     """
-    Generates tutor-style response using LLM
     """
     if not text or text == "No audio provided.":
         return "Please provide valid input."
-    # Simple AI tutor system prompt
     prompt = f"""
-    You are a helpful AI tutor.
-    Explain clearly, simply, and step-by-step.
     Question: {text}
     Answer:
     """
-    output = llm(prompt, max_length=150, num_return_sequences=1)
-    response = output[0]["generated_text"]
-    # Clean response (remove prompt repetition)
-    return response.split("Answer:")[-1].strip()
-def text_to_speech(text):
-    """
-    Converts text to speech using Coqui TTS
-    """
-    if not text:
-        return None
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-        tts_model.tts_to_file(text=text, file_path=tmp.name)
-        return tmp.name
 # -------------------------------
-# 3. Pipeline Function
 # -------------------------------
 def voice_tutor(audio):
-    """
-    Full pipeline:
-    Audio → Text → Response → Voice
-    """
     transcription = speech_to_text(audio)
     response = generate_response(transcription)
-    audio_output = text_to_speech(response)
-    return transcription, response, audio_output
 # -------------------------------
@@ -93,7 +75,7 @@ def voice_tutor(audio):
 # -------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎓 AI Voice Tutor")
     audio_input = gr.Audio(
         sources=["microphone", "upload"],
@@ -104,16 +86,15 @@ with gr.Blocks() as demo:
     transcription_box = gr.Textbox(label="Transcription")
     response_box = gr.Textbox(label="Tutor Response")
-    audio_output = gr.Audio(label="Voice Output")
     submit_btn = gr.Button("Generate Response")
     submit_btn.click(
         fn=voice_tutor,
         inputs=audio_input,
-        outputs=[transcription_box, response_box, audio_output]
     )
 # -------------------------------
 # 5. Launch
 # -------------------------------

 # 1. Load Models (Lightweight)
 # -------------------------------
 from transformers import pipeline
+# Speech-to-Text (Whisper)
 stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+# Better Tutor Model (FLAN-T5)
+llm = pipeline("text2text-generation", model="google/flan-t5-small")
 # -------------------------------
 # 2. Core Functions
 def speech_to_text(audio):
     """
+    Converts speech (audio input) to text
     """
     if audio is None:
         return "No audio provided."
     sample_rate, data = audio
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
         wav.write(tmp.name, sample_rate, data)
         result = stt(tmp.name)
     return result["text"]
 def generate_response(text):
     """
+    Generates tutor-style response
     """
     if not text or text == "No audio provided.":
         return "Please provide valid input."
     prompt = f"""
+    You are an expert AI tutor.
+    Explain:
+    - in simple words
+    - step by step
+    - with examples if possible
     Question: {text}
     Answer:
     """
+    output = llm(prompt, max_length=150)
+    return output[0]["generated_text"]
 # -------------------------------
+# 3. Main Pipeline
 # -------------------------------
 def voice_tutor(audio):
     transcription = speech_to_text(audio)
     response = generate_response(transcription)
+    return transcription, response
 # -------------------------------
 # -------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎓 AI Voice Tutor (No TTS Version)")
     audio_input = gr.Audio(
         sources=["microphone", "upload"],
     transcription_box = gr.Textbox(label="Transcription")
     response_box = gr.Textbox(label="Tutor Response")
     submit_btn = gr.Button("Generate Response")
     submit_btn.click(
         fn=voice_tutor,
         inputs=audio_input,
+        outputs=[transcription_box, response_box]
     )
 # -------------------------------
 # 5. Launch
 # -------------------------------