Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 29

Commit

cd37326

1 Parent(s): 9e78ee7

updatE

Browse files

Files changed (2) hide show

app.py +39 -1
test_tts.py +40 -0

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ import av
 import gradio as gr
 import spaces
 import torch
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
@@ -154,9 +157,33 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
 @spaces.GPU()
 @torch.inference_mode()
-def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
         yield ""
         return
@@ -200,6 +227,16 @@ def generate(message: dict, history: list[dict], system_prompt: str = "", max_ne
         output += delta
         yield output
 # Examples for the chat interface (with additional inputs: system_prompt, max_new_tokens)
 examples = [
@@ -221,6 +258,7 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
     ],
     title="Shako IRAQI AI",
     examples=examples,

 import gradio as gr
 import spaces
 import torch
+from gtts import gTTS
+import io
+import base64
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
     return messages
+def generate_speech(text: str, lang: str = 'ar') -> tuple[str, str]:
+    """Generate speech from text using Google TTS and return audio file path and base64 data."""
+    try:
+        # Create TTS object
+        tts = gTTS(text=text, lang=lang, slow=False)
+        # Save to temporary file
+        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        temp_audio_file.close()
+        tts.save(temp_audio_file.name)
+        # Also create base64 version for direct playback
+        audio_buffer = io.BytesIO()
+        tts.write_to_fp(audio_buffer)
+        audio_buffer.seek(0)
+        audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
+        return temp_audio_file.name, f"data:audio/mp3;base64,{audio_base64}"
+    except Exception as e:
+        print(f"TTS Error: {e}")
+        return None, None
 @spaces.GPU()
 @torch.inference_mode()
+def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512, enable_voice: bool = False) -> Iterator[tuple[str, str | None]]:
     if not validate_media_constraints(message):
         yield ""
         return
         output += delta
         yield output
+    # Generate voice if enabled
+    if enable_voice and output.strip():
+        _, audio_data = generate_speech(output.strip(), lang='ar')
+        if audio_data:
+            yield {"text": output, "audio": audio_data}
+        else:
+            yield output
+    else:
+        yield output
 # Examples for the chat interface (with additional inputs: system_prompt, max_new_tokens)
 examples = [
     additional_inputs=[
         gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
+        gr.Checkbox(label="Enable Voice Output", value=False),
     ],
     title="Shako IRAQI AI",
     examples=examples,

test_tts.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env python3
+from gtts import gTTS
+import io
+import base64
+import tempfile
+def generate_speech(text: str, lang: str = 'ar') -> tuple[str, str]:
+    """Generate speech from text using Google TTS and return audio file path and base64 data."""
+    try:
+        # Create TTS object
+        tts = gTTS(text=text, lang=lang, slow=False)
+        # Save to temporary file
+        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        temp_audio_file.close()
+        tts.save(temp_audio_file.name)
+        # Also create base64 version for direct playback
+        audio_buffer = io.BytesIO()
+        tts.write_to_fp(audio_buffer)
+        audio_buffer.seek(0)
+        audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
+        return temp_audio_file.name, f"data:audio/mp3;base64,{audio_base64}"
+    except Exception as e:
+        print(f"TTS Error: {e}")
+        return None, None
+if __name__ == "__main__":
+    # Test the TTS function
+    text = "مرحبا، هذا اختبار للصوت"
+    file_path, audio_data = generate_speech(text)
+    if file_path and audio_data:
+        print(f"Audio file created: {file_path}")
+        print(f"Audio data length: {len(audio_data)}")
+        print("TTS test successful!")
+    else:
+        print("TTS test failed!")