Spaces:

ibrahimlasfar
/

mgpt

Runtime error

App Files Files Community

ibrahimlasfar commited on Aug 31, 2025

Commit

1c08614

1 Parent(s): 2621d75

Fix model availability, audio/image submission, and enhance UI

Browse files

Files changed (3) hide show

api/endpoints.py +1 -1
main.py +191 -107
utils/generation.py +1 -1

api/endpoints.py CHANGED Viewed

@@ -12,7 +12,7 @@ router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-20b:together")

 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:together")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-20b:together")

main.py CHANGED Viewed

@@ -31,74 +31,101 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 # إعداد CSS
 css = """
-.gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
 .chatbot {
-    border: 1px solid #ccc;
-    border-radius: 15px;
-    padding: 20px;
-    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
 }
-.input-textbox {
-    font-size: 18px;
-    padding: 12px;
-    border-radius: 8px;
-    border: 1px solid #aaa;
 }
-.upload-button, .audio-input-button, .audio-record-button {
-    background: #4CAF50;
-    color: white;
-    border-radius: 8px;
-    padding: 10px 20px;
     font-size: 16px;
-    cursor: pointer;
 }
-.upload-button:hover, .audio-input-button:hover, .audio-record-button:hover {
-    background: #45a049;
 }
-.upload-button::before {
-    content: '📷 ';
-    font-size: 20px;
 }
-.audio-input-button::before {
-    content: '🎤 ';
-    font-size: 20px;
 }
-.audio-record-button::before {
-    content: '🔊 ';
-    font-size: 20px;
 }
-.loading::after {
-    content: '';
-    display: inline-block;
-    width: 18px;
-    height: 18px;
-    border: 3px solid #333;
-    border-top-color: transparent;
-    border-radius: 50%;
-    animation: spin 1s linear infinite;
-    margin-left: 10px;
 }
-@keyframes spin {
-    to { transform: rotate(360deg); }
 }
-.output-container {
-    margin-top: 25px;
-    padding: 15px;
-    border: 1px solid #ddd;
     border-radius: 10px;
-    background: #fff;
 }
-.audio-output-container {
-    display: flex;
-    align-items: center;
-    gap: 15px;
-    margin-top: 15px;
 }
-.output-format-radio {
-    margin-top: 10px;
 }
 """
 # دالة لمعالجة الإدخال
 def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_format="text"):
     input_type = "text"
@@ -106,62 +133,93 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
     image_data = None
     if audio_input:
         input_type = "audio"
-        with open(audio_input, "rb") as f:
-            audio_data = f.read()
-        message = "Transcribe this audio"
     elif image_input:
         input_type = "image"
-        with open(image_input, "rb") as f:
-            image_data = f.read()
-        message = f"Analyze this image"
     response_text = ""
     audio_response = None
-    for chunk in generate(
-        message=message,
-        history=history,
-        system_prompt=system_prompt,
-        temperature=temperature,
-        reasoning_effort=reasoning_effort,
-        enable_browsing=enable_browsing,
-        max_new_tokens=max_new_tokens,
-        input_type=input_type,
-        audio_data=audio_data,
-        image_data=image_data,
-        output_format=output_format
-    ):
-        if isinstance(chunk, bytes):
-            audio_response = io.BytesIO(chunk)
-            audio_response.name = "response.wav"
-        else:
-            response_text += chunk
-        yield response_text, audio_response
 # دالة لمعالجة زر إرسال الصوت
 def submit_audio(audio_input, output_format):
     if not audio_input:
         return "Please upload or record an audio file.", None
-    return process_input(message="", audio_input=audio_input, output_format=output_format)
 # دالة لمعالجة زر إرسال الصورة
 def submit_image(image_input, output_format):
     if not image_input:
         return "Please upload an image.", None
-    return process_input(message="", image_input=image_input, output_format=output_format)
 # إعداد واجهة Gradio
 with gr.Blocks(css=css, theme="gradio/soft") as chatbot_ui:
     gr.Markdown(
         """
         # MGZon Chatbot 🤖
-        A versatile chatbot powered by DeepSeek, GPT-OSS, CLIP, Whisper, and Parler-TTS. Supports text, audio, and image inputs with text or voice outputs. Upload files, record audio, or type your query and choose your output format!
         """
     )
     with gr.Row():
         with gr.Column(scale=3):
-            chatbot = gr.Chatbot(label="Chat", height=500, latex_delimiters=LATEX_DELIMS)
         with gr.Column(scale=1):
-            with gr.Accordion("⚙️ Settings", open=True):
                 system_prompt = gr.Textbox(
                     label="System Prompt",
                     value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze content appropriately. Respond in the requested output format (text or audio).",
@@ -171,40 +229,66 @@ with gr.Blocks(css=css, theme="gradio/soft") as chatbot_ui:
                 reasoning_effort = gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium")
                 enable_browsing = gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True)
                 max_new_tokens = gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000)
-                output_format = gr.Radio(
-                    label="Output Format",
-                    choices=["text", "audio"],
-                    value="text",
-                    elem_classes="output-format-radio"
-                )
-    with gr.Row():
-        message = gr.Textbox(label="Type your message", placeholder="Enter your query or describe your request...", lines=2, elem_classes="input-textbox")
-        submit_btn = gr.Button("Send", variant="primary")
     with gr.Row():
-        with gr.Column(scale=1):
-            audio_input = gr.Audio(label="Record or Upload Audio", type="filepath", elem_classes="audio-input")
-            audio_submit_btn = gr.Button("Send Audio", elem_classes="audio-input-button")
-        with gr.Column(scale=1):
-            image_input = gr.File(label="Upload Image", file_types=["image"], elem_classes="upload-button")
-            image_submit_btn = gr.Button("Send Image", elem_classes="upload-button")
     output_text = gr.Textbox(label="Response", lines=10, elem_classes="output-container")
     output_audio = gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)
-    # ربط الأزرار
     submit_btn.click(
         fn=process_input,
-        inputs=[message, audio_input, image_input, chatbot, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, output_format],
-        outputs=[output_text, output_audio]
     )
-    audio_submit_btn.click(
-        fn=submit_audio,
-        inputs=[audio_input, output_format],
-        outputs=[output_text, output_audio]
     )
-    image_submit_btn.click(
         fn=submit_image,
-        inputs=[image_input, output_format],
-        outputs=[output_text, output_audio]
     )
 # إعداد FastAPI

 # إعداد CSS
 css = """
+.gradio-container {
+    max-width: 1000px;
+    margin: auto;
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    background: #f0f2f5;
+}
 .chatbot {
+    border: none;
+    border-radius: 20px;
+    padding: 15px;
+    background: #fff;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+    height: 600px;
+    overflow-y: auto;
 }
+.input-container {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    border: 1px solid #ddd;
+    border-radius: 25px;
+    padding: 8px;
+    background: #fff;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+    position: sticky;
+    bottom: 10px;
+    margin: 10px;
 }
+.input-textbox {
+    flex-grow: 1;
+    border: none;
+    outline: none;
     font-size: 16px;
+    padding: 10px 15px;
+    border-radius: 20px;
+    background: transparent;
 }
+.input-icon {
+    background: none;
+    border: none;
+    cursor: pointer;
+    font-size: 22px;
+    padding: 8px;
+    color: #555;
+    transition: color 0.2s;
 }
+.input-icon:hover {
+    color: #0084ff;
 }
+.submit-btn {
+    background: #0084ff;
+    color: white;
+    border-radius: 50%;
+    width: 36px;
+    height: 36px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 18px;
+    cursor: pointer;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.2);
 }
+.submit-btn:hover {
+    background: #0066cc;
 }
+.output-container {
+    margin: 15px 0;
+    padding: 15px;
+    border-radius: 10px;
+    background: #f9f9f9;
+    border: 1px solid #e0e0e0;
 }
+.settings-accordion {
+    background: #fff;
+    border-radius: 10px;
+    padding: 15px;
+    box-shadow: 0 1px 5px rgba(0,0,0,0.1);
+    margin-bottom: 10px;
 }
+.audio-output-container {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-top: 10px;
+    background: #fff;
+    padding: 10px;
     border-radius: 10px;
 }
+.gr-button {
+    transition: background-color 0.2s, transform 0.1s;
 }
+.gr-button:hover {
+    transform: scale(1.05);
 }
 """
 # دالة لمعالجة الإدخال
 def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_format="text"):
     input_type = "text"
     image_data = None
     if audio_input:
         input_type = "audio"
+        try:
+            with open(audio_input, "rb") as f:
+                audio_data = f.read()
+            message = "Transcribe this audio"
+        except Exception as e:
+            logger.error(f"Failed to read audio file: {e}")
+            return f"Error: Failed to read audio file: {e}", None
     elif image_input:
         input_type = "image"
+        try:
+            with open(image_input, "rb") as f:
+                image_data = f.read()
+            message = "Analyze this image"
+        except Exception as e:
+            logger.error(f"Failed to read image file: {e}")
+            return f"Error: Failed to read image file: {e}", None
     response_text = ""
     audio_response = None
+    try:
+        for chunk in generate(
+            message=message,
+            history=history,
+            system_prompt=system_prompt,
+            temperature=temperature,
+            reasoning_effort=reasoning_effort,
+            enable_browsing=enable_browsing,
+            max_new_tokens=max_new_tokens,
+            input_type=input_type,
+            audio_data=audio_data,
+            image_data=image_data,
+            output_format=output_format
+        ):
+            if isinstance(chunk, bytes):
+                audio_response = io.BytesIO(chunk)
+                audio_response.name = "response.wav"
+            else:
+                response_text += chunk
+            yield response_text or "Processing...", audio_response
+    except Exception as e:
+        logger.error(f"Generation failed: {e}")
+        yield f"Error: Generation failed: {e}", None
 # دالة لمعالجة زر إرسال الصوت
 def submit_audio(audio_input, output_format):
     if not audio_input:
         return "Please upload or record an audio file.", None
+    response_text = ""
+    audio_response = None
+    try:
+        for text, audio in process_input(message="", audio_input=audio_input, output_format=output_format):
+            response_text = text or "No text response generated."
+            audio_response = audio
+        return response_text, audio_response
+    except Exception as e:
+        logger.error(f"Audio submission failed: {e}")
+        return f"Error: Audio processing failed: {e}", None
 # دالة لمعالجة زر إرسال الصورة
 def submit_image(image_input, output_format):
     if not image_input:
         return "Please upload an image.", None
+    response_text = ""
+    audio_response = None
+    try:
+        for text, audio in process_input(message="", image_input=image_input, output_format=output_format):
+            response_text = text or "No text response generated."
+            audio_response = audio
+        return response_text, audio_response
+    except Exception as e:
+        logger.error(f"Image submission failed: {e}")
+        return f"Error: Image processing failed: {e}", None
+# إعداد واجهة Gradio
 # إعداد واجهة Gradio
 with gr.Blocks(css=css, theme="gradio/soft") as chatbot_ui:
     gr.Markdown(
         """
         # MGZon Chatbot 🤖
+        A versatile chatbot powered by DeepSeek, GPT-OSS, CLIP, Whisper, and Parler-TTS. Type your query, upload images/files, or record audio in one sleek input form!
         """
     )
     with gr.Row():
         with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Chat", height=600, latex_delimiters=LATEX_DELIMS, elem_classes="chatbot")
         with gr.Column(scale=1):
+            with gr.Accordion("⚙️ Settings", open=False, elem_classes="settings-accordion"):
                 system_prompt = gr.Textbox(
                     label="System Prompt",
                     value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze content appropriately. Respond in the requested output format (text or audio).",
                 reasoning_effort = gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium")
                 enable_browsing = gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True)
                 max_new_tokens = gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000)
+                output_format = gr.Radio(label="Output Format", choices=["text", "audio"], value="text")
     with gr.Row():
+        with gr.Column():
+            with gr.Group(elem_classes="input-container"):
+                message = gr.Textbox(
+                    placeholder="Type your message, or use icons to upload files/audio...",
+                    lines=1,
+                    elem_classes="input-textbox",
+                    show_label=False
+                )
+                file_input = gr.File(
+                    file_types=["image", ".mp3", ".wav"],
+                    show_label=False,
+                    elem_classes="input-icon",
+                    visible=False
+                )
+                audio_input = gr.Audio(
+                    type="filepath",
+                    show_label=False,
+                    elem_classes="input-icon",
+                    visible=False
+                )
+                file_btn = gr.Button("📎", elem_classes="input-icon")
+                audio_btn = gr.Button("🎤", elem_classes="input-icon")
+                submit_btn = gr.Button("➡️", elem_classes="submit-btn")
     output_text = gr.Textbox(label="Response", lines=10, elem_classes="output-container")
     output_audio = gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)
+    # ربط الأحداث
+    file_btn.click(
+        fn=lambda: gr.update(visible=True),
+        outputs=file_input
+    )
+    audio_btn.click(
+        fn=lambda: gr.update(visible=True),
+        outputs=audio_input
+    )
     submit_btn.click(
         fn=process_input,
+        inputs=[message, audio_input, file_input, chatbot, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, output_format],
+        outputs=[output_text, output_audio, chatbot, message],
+        _js="() => { return ['', null, null, []]; }"  # تنظيف المدخلات
     )
+    message.submit(
+        fn=process_input,
+        inputs=[message, audio_input, file_input, chatbot, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, output_format],
+        outputs=[output_text, output_audio, chatbot, message],
+        _js="() => { return ['', null, null, []]; }"  # تنظيف المدخلات
     )
+    file_input.change(
         fn=submit_image,
+        inputs=[file_input, output_format],
+        outputs=[output_text, output_audio, chatbot, message],
+        _js="() => { return ['', null, null, []]; }"  # تنظيف المدخلات
+    )
+    audio_input.change(
+        fn=submit_audio,
+        inputs=[audio_input, output_format],
+        outputs=[output_text, output_audio, chatbot, message],
+        _js="() => { return ['', null, null, []]; }"  # تنظيف المدخلات
     )
 # إعداد FastAPI

utils/generation.py CHANGED Viewed

@@ -35,7 +35,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-20b:together")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")

 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:together")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-20b:together")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")