Stanley03 commited on
Commit
07788ea
·
verified ·
1 Parent(s): 3069c59

Create kiswahili_hf_app.py

Browse files
Files changed (1) hide show
  1. kiswahili_hf_app.py +276 -0
kiswahili_hf_app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kiswahili Voice Agent for Hugging Face Spaces
4
+ Natural conversational Kiswahili voice-to-voice assistant
5
+ """
6
+
7
+ import gradio as gr
8
+ import os
9
+ from datetime import datetime
10
+ import json
11
+
12
+ # Try to import optional dependencies
13
+ try:
14
+ from gtts import gTTS
15
+ HAS_GTTS = True
16
+ except ImportError:
17
+ HAS_GTTS = False
18
+
19
+ try:
20
+ import speech_recognition as sr
21
+ HAS_SR = True
22
+ except ImportError:
23
+ HAS_SR = False
24
+
25
+ try:
26
+ import requests
27
+ HAS_REQUESTS = True
28
+ except ImportError:
29
+ HAS_REQUESTS = False
30
+
31
+ # Conversation history
32
+ conversation_history = []
33
+ conversation_id = None
34
+
35
+ # Natural Kiswahili system prompt
36
+ SYSTEM_PROMPT = """Wewe ni Manus, msaidizi wa sauti wa Kiswahili ambaye ana tabia nzuri na karimu.
37
+ Unazungumza Kiswahili safi na asilia, na unafahamu utamaduni wa Kiswahili.
38
+ Katika kila jibu, jaribu kuuliza swali la mfuatano ili kuendelea na mazungumzo.
39
+ Jibu kwa ufupi lakini kwa maana - kwa kawaida 1-2 sentensi.
40
+ Kila jibu lazima liwe na swali au kauli inayokamatia mazungumzo."""
41
+
42
+ # Natural Kiswahili UI strings
43
+ UI_STRINGS = {
44
+ "title": "🎙️ Manus - Msaidizi wa Sauti wa Kiswahili",
45
+ "subtitle": "Mazungumzo ya asilia kwa Kiswahili",
46
+ "instruction": "Bonyeza kurekodi, sema kitu kwa Kiswahili, kisha bonyeza kuacha.",
47
+ "status_recording": "🔴 Inasikiliza...",
48
+ "status_processing": "⚙️ Inachakata...",
49
+ "status_ready": "✅ Tayari",
50
+ "status_error": "❌ Hitilafu",
51
+ "user_label": "Wewe:",
52
+ "assistant_label": "Manus:",
53
+ "reset_button": "🔄 Anza Upya",
54
+ "reset_confirm": "Mazungumzo yamefutwa. Karibu tena!",
55
+ "error_audio": "Haiwezekani kusoma sauti. Tafadhali jaribu tena.",
56
+ "error_process": "Haiwezekani kuchakata sauti. Tafadhali jaribu tena.",
57
+ "welcome": "Habari! Naitwa Manus. Karibu sana! Unaweza kusema kitu chochote kwa Kiswahili, na nitakujibu.",
58
+ }
59
+
60
+ def transcribe_audio(audio_file):
61
+ """Transcribe Kiswahili audio using speech recognition"""
62
+ if not HAS_SR:
63
+ return "Haiwezekani kusoma sauti - moduli haipo"
64
+
65
+ try:
66
+ recognizer = sr.Recognizer()
67
+ with sr.AudioFile(audio_file) as source:
68
+ audio = recognizer.record(source)
69
+
70
+ # Try to recognize Kiswahili
71
+ text = recognizer.recognize_google(audio, language="sw-TZ")
72
+ return text
73
+ except sr.UnknownValueError:
74
+ return "Haiwezekani kuelewa sauti. Tafadhali jaribu tena."
75
+ except sr.RequestError:
76
+ return "Haiwezekani kuunganisha na huduma ya mtandao."
77
+ except Exception as e:
78
+ return f"Hitilafu: {str(e)}"
79
+
80
+ def generate_response(user_text):
81
+ """Generate natural Kiswahili response using simple logic"""
82
+ # Simple rule-based responses for demo (replace with API call for better results)
83
+
84
+ user_text_lower = user_text.lower()
85
+
86
+ # Greeting responses
87
+ greetings = {
88
+ "habari": "Habari nzuri! Niko sawa. Wewe je, uko sawa?",
89
+ "jina": "Naitwa Manus, msaidizi wako wa sauti. Jina lako nani?",
90
+ "asante": "Karibu sana! Kuna kitu kingine ninachoweza kukusaidia?",
91
+ "pole": "Pole pole! Kila kitu kitakuwa sawa. Unaweza kusema nini kinachokukosesha?",
92
+ "ndiyo": "Nzuri! Unaweza kusema zaidi?",
93
+ "hapana": "Sawa. Kuna kitu kingine?",
94
+ }
95
+
96
+ # Check for keywords
97
+ for keyword, response in greetings.items():
98
+ if keyword in user_text_lower:
99
+ return response
100
+
101
+ # Default conversational response
102
+ default_responses = [
103
+ "Hiyo ni kitu kizuri! Unaweza kusema zaidi kuhusu hilo?",
104
+ "Nimeelewa. Na kisha nini?",
105
+ "Sawa! Hiyo ni kitu muhimu. Unaweza kueneza?",
106
+ "Nzuri sana! Unaweza kusema kitu kingine?",
107
+ "Hiyo ni interesting! Unaweza kuniambia zaidi?",
108
+ ]
109
+
110
+ import random
111
+ return random.choice(default_responses)
112
+
113
+ def text_to_speech_kiswahili(text):
114
+ """Convert Kiswahili text to speech"""
115
+ if not HAS_GTTS:
116
+ return None
117
+
118
+ try:
119
+ tts = gTTS(text=text, lang='sw', slow=False)
120
+ audio_file = "/tmp/response.mp3"
121
+ tts.save(audio_file)
122
+ return audio_file
123
+ except Exception as e:
124
+ print(f"TTS Error: {e}")
125
+ return None
126
+
127
+ def process_voice_input(audio_input):
128
+ """Main processing function for voice input"""
129
+ global conversation_history, conversation_id
130
+
131
+ if audio_input is None:
132
+ return (
133
+ UI_STRINGS["status_error"],
134
+ UI_STRINGS["error_audio"],
135
+ None,
136
+ gr.update(value="")
137
+ )
138
+
139
+ try:
140
+ # Step 1: Transcribe user audio
141
+ user_text = transcribe_audio(audio_input)
142
+
143
+ if "Hitilafu" in user_text or "Haiwezekani" in user_text:
144
+ return (
145
+ UI_STRINGS["status_error"],
146
+ user_text,
147
+ None,
148
+ gr.update(value="")
149
+ )
150
+
151
+ # Step 2: Generate response
152
+ assistant_response = generate_response(user_text)
153
+
154
+ # Step 3: Convert response to speech
155
+ audio_response = text_to_speech_kiswahili(assistant_response)
156
+
157
+ # Step 4: Update conversation history
158
+ conversation_history.append({
159
+ "timestamp": datetime.now().isoformat(),
160
+ "user": user_text,
161
+ "assistant": assistant_response
162
+ })
163
+
164
+ # Format conversation display
165
+ conversation_text = ""
166
+ for msg in conversation_history:
167
+ conversation_text += f"\n**{UI_STRINGS['user_label']}** {msg['user']}\n"
168
+ conversation_text += f"**{UI_STRINGS['assistant_label']}** {msg['assistant']}\n"
169
+
170
+ return (
171
+ UI_STRINGS["status_ready"],
172
+ conversation_text,
173
+ audio_response,
174
+ gr.update(value="") # Clear recorder
175
+ )
176
+
177
+ except Exception as e:
178
+ error_msg = f"{UI_STRINGS['status_error']}: {str(e)}"
179
+ return (
180
+ UI_STRINGS["status_error"],
181
+ error_msg,
182
+ None,
183
+ gr.update(value="")
184
+ )
185
+
186
+ def reset_conversation():
187
+ """Reset conversation history"""
188
+ global conversation_history
189
+ conversation_history = []
190
+ return (
191
+ UI_STRINGS["status_ready"],
192
+ UI_STRINGS["reset_confirm"],
193
+ None,
194
+ gr.update(value="")
195
+ )
196
+
197
+ # Create Gradio interface
198
+ with gr.Blocks(title=UI_STRINGS["title"], theme=gr.themes.Soft()) as demo:
199
+ gr.Markdown(f"# {UI_STRINGS['title']}")
200
+ gr.Markdown(f"### {UI_STRINGS['subtitle']}")
201
+ gr.Markdown(f"> {UI_STRINGS['instruction']}")
202
+
203
+ with gr.Row():
204
+ with gr.Column(scale=1):
205
+ # Status indicator
206
+ status_display = gr.Textbox(
207
+ value=UI_STRINGS["status_ready"],
208
+ label="Hali",
209
+ interactive=False,
210
+ lines=1
211
+ )
212
+
213
+ # Voice recorder
214
+ audio_input = gr.Audio(
215
+ label="🎤 Rekodi Sauti",
216
+ type="filepath",
217
+ sources=["microphone"]
218
+ )
219
+
220
+ # Process button
221
+ process_btn = gr.Button(
222
+ "📤 Tuma Sauti",
223
+ variant="primary",
224
+ size="lg"
225
+ )
226
+
227
+ # Reset button
228
+ reset_btn = gr.Button(
229
+ UI_STRINGS["reset_button"],
230
+ variant="secondary"
231
+ )
232
+
233
+ with gr.Column(scale=1):
234
+ # Conversation history
235
+ conversation_display = gr.Markdown(
236
+ value=f"**{UI_STRINGS['assistant_label']}** {UI_STRINGS['welcome']}\n",
237
+ label="Mazungumzo"
238
+ )
239
+
240
+ # Audio response player
241
+ audio_output = gr.Audio(
242
+ label="🔊 Jibu la Sauti",
243
+ type="filepath",
244
+ interactive=False
245
+ )
246
+
247
+ # Event handlers
248
+ process_btn.click(
249
+ fn=process_voice_input,
250
+ inputs=[audio_input],
251
+ outputs=[status_display, conversation_display, audio_output, audio_input]
252
+ )
253
+
254
+ reset_btn.click(
255
+ fn=reset_conversation,
256
+ outputs=[status_display, conversation_display, audio_output, audio_input]
257
+ )
258
+
259
+ # Auto-process when audio is recorded
260
+ audio_input.change(
261
+ fn=lambda audio: process_voice_input(audio) if audio else (
262
+ UI_STRINGS["status_ready"],
263
+ conversation_display.value,
264
+ None,
265
+ gr.update(value="")
266
+ ),
267
+ inputs=[audio_input],
268
+ outputs=[status_display, conversation_display, audio_output, audio_input]
269
+ )
270
+
271
+ if __name__ == "__main__":
272
+ demo.launch(
273
+ server_name="0.0.0.0",
274
+ server_port=7860,
275
+ share=True
276
+ )