Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 29

Commit

4ecae52

1 Parent(s): 9fc653f

updatE

Browse files

Files changed (1) hide show

app.py +23 -133

app.py CHANGED Viewed

@@ -8,14 +8,11 @@ import av
 import gradio as gr
 import spaces
 import torch
-from gtts import gTTS
-import io
-import base64
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
 # Model configuration
-model_id = "anaspro/Shako-4B-it-v2"
 processor = AutoProcessor.from_pretrained(model_id)
 model = AutoModelForImageTextToText.from_pretrained(
     model_id,
@@ -157,33 +154,9 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
-def generate_speech(text: str, lang: str = 'ar') -> tuple[str, str]:
-    """Generate speech from text using Google TTS and return audio file path and base64 data."""
-    try:
-        # Create TTS object
-        tts = gTTS(text=text, lang=lang, slow=False)
-        # Save to temporary file
-        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-        temp_audio_file.close()
-        tts.save(temp_audio_file.name)
-        # Also create base64 version for direct playback
-        audio_buffer = io.BytesIO()
-        tts.write_to_fp(audio_buffer)
-        audio_buffer.seek(0)
-        audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
-        return temp_audio_file.name, f"data:audio/mp3;base64,{audio_base64}"
-    except Exception as e:
-        print(f"TTS Error: {e}")
-        return None, None
 @spaces.GPU()
 @torch.inference_mode()
-def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512, enable_voice: bool = False) -> Iterator[tuple[str, str | None]]:
     if not validate_media_constraints(message):
         yield ""
         return
@@ -227,115 +200,32 @@ def generate(message: dict, history: list[dict], system_prompt: str = "", max_ne
         output += delta
         yield output
-    # Generate voice if enabled
-    if enable_voice and output.strip():
-        _, audio_data = generate_speech(output.strip(), lang='ar')
-        if audio_data:
-            yield {"text": output, "audio": audio_data}
-        else:
-            yield output
-    else:
-        yield output
 # Examples for the chat interface (with additional inputs: system_prompt, max_new_tokens)
 examples = [
-    ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
 ]
-# Create custom interface with voice recording
-def create_interface():
-    with gr.Blocks(title="Shako IRAQI AI", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# Shako IRAQI AI 🤖")
-        gr.Markdown("تحدث مع الذكاء الاصطناعي العراقي - يدعم الصور والفيديو والصوت!")
-        chatbot = gr.Chatbot(type="messages", height=500)
-        with gr.Row():
-            with gr.Column(scale=4):
-                textbox = gr.MultimodalTextbox(
-                    file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
-                    file_count="multiple",
-                    placeholder="اكتب رسالتك هنا أو ارفع ملف...",
-                    show_label=False,
-                    autofocus=True,
-                )
-            with gr.Column(scale=1):
-                voice_input = gr.Audio(
-                    sources=["microphone"],
-                    type="filepath",
-                    label="🎤 تسجيل صوتي",
-                    show_label=True,
-                )
-        with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
-            system_prompt = gr.Textbox(
-                label="System Prompt",
-                value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا",
-                lines=2
-            )
-            max_tokens = gr.Slider(
-                label="Max New Tokens",
-                minimum=100,
-                maximum=2000,
-                step=10,
-                value=700
-            )
-            enable_voice = gr.Checkbox(
-                label="تفعيل الصوت في الردود",
-                value=False
-            )
-        def process_input(message, voice_file, history, system_prompt, max_tokens, enable_voice):
-            """Process both text and voice inputs"""
-            if voice_file:
-                # If voice input is provided, create a message with the audio file
-                voice_message = {"files": [voice_file], "text": message.get("text", "")}
-            else:
-                voice_message = message
-            # Generate response
-            response_text = ""
-            for partial_response in generate(voice_message, history, system_prompt, max_tokens, enable_voice):
-                if isinstance(partial_response, dict):
-                    # Handle audio response
-                    response_text = partial_response["text"]
-                    yield partial_response
-                else:
-                    response_text = partial_response
-                    yield partial_response
-        # Handle submission
-        textbox.submit(
-            fn=process_input,
-            inputs=[textbox, voice_input, chatbot, system_prompt, max_tokens, enable_voice],
-            outputs=[chatbot]
-        ).then(
-            fn=lambda: None,
-            inputs=[],
-            outputs=[voice_input]  # Clear voice input after submission
-        )
-        # Clear voice input when text is submitted
-        textbox.submit(
-            fn=lambda: None,
-            inputs=[],
-            outputs=[voice_input]
-        )
-        # Examples
-        gr.Examples(
-            examples=[
-                "مرحبا، كيف حالك؟",
-                "شرح لي عن الذكاء الاصطناعي",
-                "أخبرني نكتة عراقية"
-            ],
-            inputs=[textbox]
-        )
-    return demo
 if __name__ == "__main__":
-    demo = create_interface()
     demo.launch()

 import gradio as gr
 import spaces
 import torch
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
 # Model configuration
+model_id = "anaspro/Shako-4B-it"
 processor = AutoProcessor.from_pretrained(model_id)
 model = AutoModelForImageTextToText.from_pretrained(
     model_id,
     return messages
 @spaces.GPU()
 @torch.inference_mode()
+def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
         yield ""
         return
         output += delta
         yield output
 # Examples for the chat interface (with additional inputs: system_prompt, max_new_tokens)
 examples = [
+    ["What is the capital of France?", "You are a helpful assistant.", 700],
+    ["Explain quantum computing in simple terms", "You are a helpful assistant.", 512],
+    ["Write a short story about a robot learning to paint", "You are a helpful assistant.", 1000]
 ]
+# Create the chat interface
+demo = gr.ChatInterface(
+    fn=generate,
+    type="messages",
+    textbox=gr.MultimodalTextbox(
+        file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
+        file_count="multiple",
+        autofocus=True,
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
+    ],
+    title="Shako IRAQI AI",
+    examples=examples,
+    stop_btn=False,
+)
 if __name__ == "__main__":
     demo.launch()