|
|
|
|
|
""" |
|
|
Kiswahili Voice Agent for Hugging Face Spaces |
|
|
Natural conversational Kiswahili voice-to-voice assistant |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import os |
|
|
from datetime import datetime |
|
|
import json |
|
|
|
|
|
|
|
|
try: |
|
|
from gtts import gTTS |
|
|
HAS_GTTS = True |
|
|
except ImportError: |
|
|
HAS_GTTS = False |
|
|
|
|
|
try: |
|
|
import speech_recognition as sr |
|
|
HAS_SR = True |
|
|
except ImportError: |
|
|
HAS_SR = False |
|
|
|
|
|
try: |
|
|
import requests |
|
|
HAS_REQUESTS = True |
|
|
except ImportError: |
|
|
HAS_REQUESTS = False |
|
|
|
|
|
|
|
|
conversation_history = [] |
|
|
conversation_id = None |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = """Wewe ni Manus, msaidizi wa sauti wa Kiswahili ambaye ana tabia nzuri na karimu. |
|
|
Unazungumza Kiswahili safi na asilia, na unafahamu utamaduni wa Kiswahili. |
|
|
Katika kila jibu, jaribu kuuliza swali la mfuatano ili kuendelea na mazungumzo. |
|
|
Jibu kwa ufupi lakini kwa maana - kwa kawaida 1-2 sentensi. |
|
|
Kila jibu lazima liwe na swali au kauli inayokamatia mazungumzo.""" |
|
|
|
|
|
|
|
|
UI_STRINGS = { |
|
|
"title": "ποΈ Manus - Msaidizi wa Sauti wa Kiswahili", |
|
|
"subtitle": "Mazungumzo ya asilia kwa Kiswahili", |
|
|
"instruction": "Bonyeza kurekodi, sema kitu kwa Kiswahili, kisha bonyeza kuacha.", |
|
|
"status_recording": "π΄ Inasikiliza...", |
|
|
"status_processing": "βοΈ Inachakata...", |
|
|
"status_ready": "β
Tayari", |
|
|
"status_error": "β Hitilafu", |
|
|
"user_label": "Wewe:", |
|
|
"assistant_label": "Manus:", |
|
|
"reset_button": "π Anza Upya", |
|
|
"reset_confirm": "Mazungumzo yamefutwa. Karibu tena!", |
|
|
"error_audio": "Haiwezekani kusoma sauti. Tafadhali jaribu tena.", |
|
|
"error_process": "Haiwezekani kuchakata sauti. Tafadhali jaribu tena.", |
|
|
"welcome": "Habari! Naitwa Manus. Karibu sana! Unaweza kusema kitu chochote kwa Kiswahili, na nitakujibu.", |
|
|
} |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
|
"""Transcribe Kiswahili audio using speech recognition""" |
|
|
if not HAS_SR: |
|
|
return "Haiwezekani kusoma sauti - moduli haipo" |
|
|
|
|
|
try: |
|
|
recognizer = sr.Recognizer() |
|
|
with sr.AudioFile(audio_file) as source: |
|
|
audio = recognizer.record(source) |
|
|
|
|
|
|
|
|
text = recognizer.recognize_google(audio, language="sw-TZ") |
|
|
return text |
|
|
except sr.UnknownValueError: |
|
|
return "Haiwezekani kuelewa sauti. Tafadhali jaribu tena." |
|
|
except sr.RequestError: |
|
|
return "Haiwezekani kuunganisha na huduma ya mtandao." |
|
|
except Exception as e: |
|
|
return f"Hitilafu: {str(e)}" |
|
|
|
|
|
def generate_response(user_text): |
|
|
"""Generate natural Kiswahili response using simple logic""" |
|
|
|
|
|
|
|
|
user_text_lower = user_text.lower() |
|
|
|
|
|
|
|
|
greetings = { |
|
|
"habari": "Habari nzuri! Niko sawa. Wewe je, uko sawa?", |
|
|
"jina": "Naitwa Manus, msaidizi wako wa sauti. Jina lako nani?", |
|
|
"asante": "Karibu sana! Kuna kitu kingine ninachoweza kukusaidia?", |
|
|
"pole": "Pole pole! Kila kitu kitakuwa sawa. Unaweza kusema nini kinachokukosesha?", |
|
|
"ndiyo": "Nzuri! Unaweza kusema zaidi?", |
|
|
"hapana": "Sawa. Kuna kitu kingine?", |
|
|
} |
|
|
|
|
|
|
|
|
for keyword, response in greetings.items(): |
|
|
if keyword in user_text_lower: |
|
|
return response |
|
|
|
|
|
|
|
|
default_responses = [ |
|
|
"Hiyo ni kitu kizuri! Unaweza kusema zaidi kuhusu hilo?", |
|
|
"Nimeelewa. Na kisha nini?", |
|
|
"Sawa! Hiyo ni kitu muhimu. Unaweza kueneza?", |
|
|
"Nzuri sana! Unaweza kusema kitu kingine?", |
|
|
"Hiyo ni interesting! Unaweza kuniambia zaidi?", |
|
|
] |
|
|
|
|
|
import random |
|
|
return random.choice(default_responses) |
|
|
|
|
|
def text_to_speech_kiswahili(text): |
|
|
"""Convert Kiswahili text to speech""" |
|
|
if not HAS_GTTS: |
|
|
return None |
|
|
|
|
|
try: |
|
|
tts = gTTS(text=text, lang='sw', slow=False) |
|
|
audio_file = "/tmp/response.mp3" |
|
|
tts.save(audio_file) |
|
|
return audio_file |
|
|
except Exception as e: |
|
|
print(f"TTS Error: {e}") |
|
|
return None |
|
|
|
|
|
def process_voice_input(audio_input): |
|
|
"""Main processing function for voice input""" |
|
|
global conversation_history, conversation_id |
|
|
|
|
|
if audio_input is None: |
|
|
return ( |
|
|
UI_STRINGS["status_error"], |
|
|
UI_STRINGS["error_audio"], |
|
|
None, |
|
|
gr.update(value="") |
|
|
) |
|
|
|
|
|
try: |
|
|
|
|
|
user_text = transcribe_audio(audio_input) |
|
|
|
|
|
if "Hitilafu" in user_text or "Haiwezekani" in user_text: |
|
|
return ( |
|
|
UI_STRINGS["status_error"], |
|
|
user_text, |
|
|
None, |
|
|
gr.update(value="") |
|
|
) |
|
|
|
|
|
|
|
|
assistant_response = generate_response(user_text) |
|
|
|
|
|
|
|
|
audio_response = text_to_speech_kiswahili(assistant_response) |
|
|
|
|
|
|
|
|
conversation_history.append({ |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
"user": user_text, |
|
|
"assistant": assistant_response |
|
|
}) |
|
|
|
|
|
|
|
|
conversation_text = "" |
|
|
for msg in conversation_history: |
|
|
conversation_text += f"\n**{UI_STRINGS['user_label']}** {msg['user']}\n" |
|
|
conversation_text += f"**{UI_STRINGS['assistant_label']}** {msg['assistant']}\n" |
|
|
|
|
|
return ( |
|
|
UI_STRINGS["status_ready"], |
|
|
conversation_text, |
|
|
audio_response, |
|
|
gr.update(value="") |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"{UI_STRINGS['status_error']}: {str(e)}" |
|
|
return ( |
|
|
UI_STRINGS["status_error"], |
|
|
error_msg, |
|
|
None, |
|
|
gr.update(value="") |
|
|
) |
|
|
|
|
|
def reset_conversation(): |
|
|
"""Reset conversation history""" |
|
|
global conversation_history |
|
|
conversation_history = [] |
|
|
return ( |
|
|
UI_STRINGS["status_ready"], |
|
|
UI_STRINGS["reset_confirm"], |
|
|
None, |
|
|
gr.update(value="") |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Blocks(title=UI_STRINGS["title"], theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown(f"# {UI_STRINGS['title']}") |
|
|
gr.Markdown(f"### {UI_STRINGS['subtitle']}") |
|
|
gr.Markdown(f"> {UI_STRINGS['instruction']}") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
status_display = gr.Textbox( |
|
|
value=UI_STRINGS["status_ready"], |
|
|
label="Hali", |
|
|
interactive=False, |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
|
|
|
audio_input = gr.Audio( |
|
|
label="π€ Rekodi Sauti", |
|
|
type="filepath", |
|
|
sources=["microphone"] |
|
|
) |
|
|
|
|
|
|
|
|
process_btn = gr.Button( |
|
|
"π€ Tuma Sauti", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
|
|
|
reset_btn = gr.Button( |
|
|
UI_STRINGS["reset_button"], |
|
|
variant="secondary" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
conversation_display = gr.Markdown( |
|
|
value=f"**{UI_STRINGS['assistant_label']}** {UI_STRINGS['welcome']}\n", |
|
|
label="Mazungumzo" |
|
|
) |
|
|
|
|
|
|
|
|
audio_output = gr.Audio( |
|
|
label="π Jibu la Sauti", |
|
|
type="filepath", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
fn=process_voice_input, |
|
|
inputs=[audio_input], |
|
|
outputs=[status_display, conversation_display, audio_output, audio_input] |
|
|
) |
|
|
|
|
|
reset_btn.click( |
|
|
fn=reset_conversation, |
|
|
outputs=[status_display, conversation_display, audio_output, audio_input] |
|
|
) |
|
|
|
|
|
|
|
|
audio_input.change( |
|
|
fn=lambda audio: process_voice_input(audio) if audio else ( |
|
|
UI_STRINGS["status_ready"], |
|
|
conversation_display.value, |
|
|
None, |
|
|
gr.update(value="") |
|
|
), |
|
|
inputs=[audio_input], |
|
|
outputs=[status_display, conversation_display, audio_output, audio_input] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=True |
|
|
) |
|
|
|