HuzaifaTech commited on
Commit
ca91e4b
Β·
verified Β·
1 Parent(s): c737b83

Create app.py

Browse files

app file added

Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import tempfile
4
+ import scipy.io.wavfile as wav
5
+
6
+ # -------------------------------
7
+ # 1. Load Models (Lightweight)
8
+ # -------------------------------
9
+
10
+ # Whisper (Speech-to-Text)
11
+ from transformers import pipeline
12
+ stt = pipeline("automatic-speech-recognition", model="openai/whisper-small")
13
+
14
+ # Simple LLM (text generation)
15
+ llm = pipeline("text-generation", model="distilgpt2")
16
+
17
+ # TTS (Coqui TTS)
18
+ from TTS.api import TTS
19
+ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
20
+
21
+ # -------------------------------
22
+ # 2. Core Functions
23
+ # -------------------------------
24
+
25
+ def speech_to_text(audio):
26
+ """
27
+ Converts speech (audio file) to text using Whisper
28
+ """
29
+ if audio is None:
30
+ return "No audio provided."
31
+
32
+ sample_rate, data = audio
33
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
34
+ wav.write(tmp.name, sample_rate, data)
35
+ result = stt(tmp.name)
36
+
37
+ return result["text"]
38
+
39
+
40
+ def generate_response(text):
41
+ """
42
+ Generates tutor-style response using LLM
43
+ """
44
+ if not text or text == "No audio provided.":
45
+ return "Please provide valid input."
46
+
47
+ # Simple AI tutor system prompt
48
+ prompt = f"""
49
+ You are a helpful AI tutor.
50
+ Explain clearly, simply, and step-by-step.
51
+
52
+ Question: {text}
53
+ Answer:
54
+ """
55
+
56
+ output = llm(prompt, max_length=150, num_return_sequences=1)
57
+ response = output[0]["generated_text"]
58
+
59
+ # Clean response (remove prompt repetition)
60
+ return response.split("Answer:")[-1].strip()
61
+
62
+
63
+ def text_to_speech(text):
64
+ """
65
+ Converts text to speech using Coqui TTS
66
+ """
67
+ if not text:
68
+ return None
69
+
70
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
71
+ tts_model.tts_to_file(text=text, file_path=tmp.name)
72
+ return tmp.name
73
+
74
+
75
+ # -------------------------------
76
+ # 3. Pipeline Function
77
+ # -------------------------------
78
+
79
+ def voice_tutor(audio):
80
+ """
81
+ Full pipeline:
82
+ Audio β†’ Text β†’ Response β†’ Voice
83
+ """
84
+ transcription = speech_to_text(audio)
85
+ response = generate_response(transcription)
86
+ audio_output = text_to_speech(response)
87
+
88
+ return transcription, response, audio_output
89
+
90
+
91
+ # -------------------------------
92
+ # 4. Gradio UI
93
+ # -------------------------------
94
+
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("## πŸŽ“ AI Voice Tutor")
97
+
98
+ audio_input = gr.Audio(
99
+ sources=["microphone", "upload"],
100
+ type="numpy",
101
+ label="Speak or Upload Audio"
102
+ )
103
+
104
+ transcription_box = gr.Textbox(label="Transcription")
105
+ response_box = gr.Textbox(label="Tutor Response")
106
+
107
+ audio_output = gr.Audio(label="Voice Output")
108
+
109
+ submit_btn = gr.Button("Generate Response")
110
+
111
+ submit_btn.click(
112
+ fn=voice_tutor,
113
+ inputs=audio_input,
114
+ outputs=[transcription_box, response_box, audio_output]
115
+ )
116
+
117
+ # -------------------------------
118
+ # 5. Launch
119
+ # -------------------------------
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch()