HuzaifaTech commited on
Commit
f433513
Β·
verified Β·
1 Parent(s): 9b327a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -41
app.py CHANGED
@@ -7,16 +7,14 @@ import scipy.io.wavfile as wav
7
  # 1. Load Models (Lightweight)
8
  # -------------------------------
9
 
10
- # Whisper (Speech-to-Text)
11
  from transformers import pipeline
 
 
12
  stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
13
 
14
- # Simple LLM (text generation)
15
- llm = pipeline("text-generation", model="distilgpt2")
16
 
17
- # TTS (Coqui TTS)
18
- from TTS.api import TTS
19
- tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
20
 
21
  # -------------------------------
22
  # 2. Core Functions
@@ -24,68 +22,52 @@ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=
24
 
25
  def speech_to_text(audio):
26
  """
27
- Converts speech (audio file) to text using Whisper
28
  """
29
  if audio is None:
30
  return "No audio provided."
31
-
32
  sample_rate, data = audio
 
33
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
34
  wav.write(tmp.name, sample_rate, data)
35
  result = stt(tmp.name)
36
-
37
  return result["text"]
38
 
39
 
40
  def generate_response(text):
41
  """
42
- Generates tutor-style response using LLM
43
  """
44
  if not text or text == "No audio provided.":
45
  return "Please provide valid input."
46
 
47
- # Simple AI tutor system prompt
48
  prompt = f"""
49
- You are a helpful AI tutor.
50
- Explain clearly, simply, and step-by-step.
 
 
 
 
51
 
52
  Question: {text}
53
  Answer:
54
  """
55
 
56
- output = llm(prompt, max_length=150, num_return_sequences=1)
57
- response = output[0]["generated_text"]
58
-
59
- # Clean response (remove prompt repetition)
60
- return response.split("Answer:")[-1].strip()
61
-
62
-
63
- def text_to_speech(text):
64
- """
65
- Converts text to speech using Coqui TTS
66
- """
67
- if not text:
68
- return None
69
-
70
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
71
- tts_model.tts_to_file(text=text, file_path=tmp.name)
72
- return tmp.name
73
 
74
 
75
  # -------------------------------
76
- # 3. Pipeline Function
77
  # -------------------------------
78
 
79
  def voice_tutor(audio):
80
- """
81
- Full pipeline:
82
- Audio β†’ Text β†’ Response β†’ Voice
83
- """
84
  transcription = speech_to_text(audio)
85
  response = generate_response(transcription)
86
- audio_output = text_to_speech(response)
87
 
88
- return transcription, response, audio_output
89
 
90
 
91
  # -------------------------------
@@ -93,7 +75,7 @@ def voice_tutor(audio):
93
  # -------------------------------
94
 
95
  with gr.Blocks() as demo:
96
- gr.Markdown("## πŸŽ“ AI Voice Tutor")
97
 
98
  audio_input = gr.Audio(
99
  sources=["microphone", "upload"],
@@ -104,16 +86,15 @@ with gr.Blocks() as demo:
104
  transcription_box = gr.Textbox(label="Transcription")
105
  response_box = gr.Textbox(label="Tutor Response")
106
 
107
- audio_output = gr.Audio(label="Voice Output")
108
-
109
  submit_btn = gr.Button("Generate Response")
110
 
111
  submit_btn.click(
112
  fn=voice_tutor,
113
  inputs=audio_input,
114
- outputs=[transcription_box, response_box, audio_output]
115
  )
116
 
 
117
  # -------------------------------
118
  # 5. Launch
119
  # -------------------------------
 
7
  # 1. Load Models (Lightweight)
8
  # -------------------------------
9
 
 
10
  from transformers import pipeline
11
+
12
+ # Speech-to-Text (Whisper)
13
  stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
14
 
15
+ # Better Tutor Model (FLAN-T5)
16
+ llm = pipeline("text2text-generation", model="google/flan-t5-small")
17
 
 
 
 
18
 
19
  # -------------------------------
20
  # 2. Core Functions
 
22
 
23
  def speech_to_text(audio):
24
  """
25
+ Converts speech (audio input) to text
26
  """
27
  if audio is None:
28
  return "No audio provided."
29
+
30
  sample_rate, data = audio
31
+
32
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
33
  wav.write(tmp.name, sample_rate, data)
34
  result = stt(tmp.name)
35
+
36
  return result["text"]
37
 
38
 
39
  def generate_response(text):
40
  """
41
+ Generates tutor-style response
42
  """
43
  if not text or text == "No audio provided.":
44
  return "Please provide valid input."
45
 
 
46
  prompt = f"""
47
+ You are an expert AI tutor.
48
+
49
+ Explain:
50
+ - in simple words
51
+ - step by step
52
+ - with examples if possible
53
 
54
  Question: {text}
55
  Answer:
56
  """
57
 
58
+ output = llm(prompt, max_length=150)
59
+ return output[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
  # -------------------------------
63
+ # 3. Main Pipeline
64
  # -------------------------------
65
 
66
  def voice_tutor(audio):
 
 
 
 
67
  transcription = speech_to_text(audio)
68
  response = generate_response(transcription)
 
69
 
70
+ return transcription, response
71
 
72
 
73
  # -------------------------------
 
75
  # -------------------------------
76
 
77
  with gr.Blocks() as demo:
78
+ gr.Markdown("## πŸŽ“ AI Voice Tutor (No TTS Version)")
79
 
80
  audio_input = gr.Audio(
81
  sources=["microphone", "upload"],
 
86
  transcription_box = gr.Textbox(label="Transcription")
87
  response_box = gr.Textbox(label="Tutor Response")
88
 
 
 
89
  submit_btn = gr.Button("Generate Response")
90
 
91
  submit_btn.click(
92
  fn=voice_tutor,
93
  inputs=audio_input,
94
+ outputs=[transcription_box, response_box]
95
  )
96
 
97
+
98
  # -------------------------------
99
  # 5. Launch
100
  # -------------------------------