Spaces:

SII-InnoMegrez
/

InnoMegrez2

Sleeping

App Files Files Community

SII-InnoMegrez commited on Sep 10, 2025

Commit

bc9db78

verified ·

1 Parent(s): 6b8c84c

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +6 -6
app.py +133 -263

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
 ---
-title: Megrez 3B Omni
-emoji: 🐠
-colorFrom: red
-colorTo: blue
 sdk: gradio
-sdk_version: 5.3.0
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Megrez-3B-Omni Chat Demo
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Megrez2-3x7B-A3B-Preview
+emoji: 👀
+colorFrom: purple
+colorTo: yellow
 sdk: gradio
+sdk_version: 5.30.0
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Megrez2 Chat Demo
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,297 +1,167 @@
-# -*- encoding: utf-8 -*-
-# File: app.py
-# Description: None
-from copy import deepcopy
-from typing import Dict, List
-from PIL import Image
-import io
-import subprocess
 import requests
 import json
-import base64
-import gradio as gr
-import librosa
-import os
-IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp")
-VIDEO_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi", ".flv", ".wmv", ".webm", ".m4v")
-AUDIO_EXTENSIONS = (".mp3", ".wav", "flac", ".m4a", ".wma")
-DEFAULT_SAMPLING_PARAMS = {
-    "top_p": 0.8,
-    "top_k": 100,
-    "temperature": 0.7,
-    "do_sample": True,
-    "num_beams": 1,
-    "repetition_penalty": 1.2,
-}
-MAX_NEW_TOKENS = 1024
-def load_image_to_base64(image_path):
-    """Load image and convert to base64 string"""
-    with Image.open(image_path) as img:
-        if img.mode != 'RGB':
-            img = img.convert('RGB')
-        img_byte_arr = io.BytesIO()
-        img.save(img_byte_arr, format='PNG')
-        img_byte_arr = img_byte_arr.getvalue()
-        return base64.b64encode(img_byte_arr).decode('utf-8')
-def wav_to_bytes_with_ffmpeg(wav_file_path):
-    process = subprocess.Popen(
-        ['ffmpeg', '-i', wav_file_path, '-f', 'wav', '-'],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE
-    )
-    out, _ = process.communicate()
-    return base64.b64encode(out).decode('utf-8')
-def parse_sse_response(response):
-    for line in response.iter_lines():
-        print(line)
-        if line:
-            line = line.decode('utf-8')
-            if line.startswith('data: '):
-                data = line[6:].strip()  # Remove 'data: ' prefix
-                if data == '[DONE]':
                     break
                 try:
-                    json_data = json.loads(data)
-                    print(f"{json_data['text']}")
-                    yield json_data['text']
-                except json.JSONDecodeError:
-                    print(f"Failed to parse JSON: {data}")
-                    raise gr.Error(f"Failed to parse JSON: {data}")
-def history2messages(history: List[Dict]) -> List[Dict]:
-    """
-    Transform gradio history to chat messages.
-    """
-    messages = []
-    cur_message = dict()
-    for item in history:
-        if item["role"] == "assistant":
-            if len(cur_message) > 0:
-                messages.append(deepcopy(cur_message))
-                cur_message = dict()
-            messages.append(deepcopy(item))
-            continue
-        if "role" not in cur_message:
-            cur_message["role"] = "user"
-        if "content" not in cur_message:
-            cur_message["content"] = dict()
-        if "metadata" not in item or item["metadata"] is None:
-            item["metadata"] = {"title": ""}
-        if item["metadata"]["title"] == "":
-            cur_message["content"]["text"] = item["content"]
-        elif item["metadata"]["title"] == "image":
-            cur_message["content"]["image"] = load_image_to_base64(item["content"][0])
-        elif item["metadata"]["title"] == "audio":
-            cur_message["content"]["audio"] = wav_to_bytes_with_ffmpeg(item["content"][0])
-    if len(cur_message) > 0:
-        messages.append(cur_message)
-    return messages
-def check_messages(history, message, audio):
-    if not isinstance(message, dict):
-        raise gr.Error("消息格式错误")
-    has_text = message.get("text", "") and message["text"].strip()
-    has_files = len(message.get("files", [])) > 0
-    has_audio = audio is not None
-    if not (has_text or has_files or has_audio):
-        raise gr.Error("请输入文字或上传音频/图片后再发送。")
-    audios = []
-    images = []
-    for file_msg in message["files"]:
-        if file_msg.endswith(AUDIO_EXTENSIONS) or file_msg.endswith(VIDEO_EXTENSIONS):
-            duration = librosa.get_duration(filename=file_msg)
-            if duration > 30:
-                raise gr.Error("音频时长不能超过30秒。")
-            if duration == 0:
-                raise gr.Error("音频时长不能为0秒。")
-            audios.append(file_msg)
-        elif file_msg.endswith(IMAGE_EXTENSIONS):
-            images.append(file_msg)
-        else:
-            filename = file_msg.split("/")[-1]
-            raise gr.Error(f"Unsupported file type: {filename}. It should be an image or audio file.")
-    if len(audios) > 1:
-        raise gr.Error("Please upload only one audio file.")
-    if len(images) > 1:
-        raise gr.Error("Please upload only one image file.")
-    if audio is not None:
-        if len(audios) > 0:
-            raise gr.Error("Please upload only one audio file or record audio.")
-        audios.append(audio)
-    # Append the message to the history
-    for image in images:
-        history.append({"role": "user", "content": (image,), "metadata": {"title": "image"}})
-    for audio in audios:
-        history.append({"role": "user", "content": (audio,), "metadata": {"title": "audio"}})
-    if message["text"]:
-        history.append({"role": "user", "content": message["text"]})
-    return history, gr.MultimodalTextbox(value=None, interactive=False), None
-def bot(
-    history: list,
-    top_p: float,
-    top_k: int,
-    temperature: float,
-    repetition_penalty: float,
-    max_new_tokens: int = MAX_NEW_TOKENS,
-    regenerate: bool = False,
-):
-    if history and regenerate:
-        history = history[:-1]
-    if not history:
-        return history
-    msgs = history2messages(history)
-    print(msgs)
-    API_URL = os.getenv("API_URL", "http://8.141.126.196:28000/v1/chat")
     payload = {
-        "messages": msgs,
-        "sampling_params": {
-            "top_p": top_p,
-            "top_k": top_k,
-            "temperature": temperature,
-            "repetition_penalty": repetition_penalty,
-            "max_new_tokens": max_new_tokens,
-            "num_beams": 3,
-        }
     }
-    response = requests.get(
-        API_URL,
-        json=payload,
-        headers={'Accept': 'text/event-stream'},
-        stream=True
-    )
-    response_text = ""
-    for text in parse_sse_response(response):
-        response_text += text
-        yield history + [{"role": "assistant", "content": response_text}]
-    return response_text
-def change_state(state):
-    return gr.update(visible=not state), not state
 def reset_user_input():
     return gr.update(value="")
 if __name__ == "__main__":
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             f"""
-# 🪐 Chat with <a href="https://github.com/infinigence/Infini-Megrez-Omni">Megrez-3B-Omni</a>
 """
         )
-        chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, type="messages", height='48vh')
-        sampling_params_group_hidden_state = gr.State(False)
-        with gr.Row(equal_height=True):
-            chat_input = gr.MultimodalTextbox(
-                file_count="multiple",
-                placeholder="Enter your prompt or upload image/audio here, then press ENTER...",
-                show_label=False,
-                scale=8,
-                file_types=["image", "audio"],
                 interactive=True,
-                # stop_btn=True,
             )
-        with gr.Row(equal_height=True):
-            audio_input = gr.Audio(
-                sources=["microphone", "upload"],
-                type="filepath",
-                scale=1,
-                max_length=30
             )
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1, min_width=150):
-                with gr.Row(equal_height=True):
-                    regenerate_btn = gr.Button("Regenerate", variant="primary")
-                    clear_btn = gr.ClearButton(
-                        [chat_input, audio_input, chatbot],
-                    )
-        with gr.Row():
-            sampling_params_toggle_btn = gr.Button("Sampling Parameters")
-        with gr.Group(visible=False) as sampling_params_group:
-            with gr.Row():
-                temperature = gr.Slider(
-                    minimum=0, maximum=1.2, value=DEFAULT_SAMPLING_PARAMS["temperature"], label="Temperature"
-                )
-                repetition_penalty = gr.Slider(
-                    minimum=0,
-                    maximum=2,
-                    value=DEFAULT_SAMPLING_PARAMS["repetition_penalty"],
-                    label="Repetition Penalty",
-                )
-            with gr.Row():
-                top_p = gr.Slider(minimum=0, maximum=1, value=DEFAULT_SAMPLING_PARAMS["top_p"], label="Top-p")
-                top_k = gr.Slider(minimum=0, maximum=1000, value=DEFAULT_SAMPLING_PARAMS["top_k"], label="Top-k")
-            with gr.Row():
-                max_new_tokens = gr.Slider(
-                    minimum=1,
-                    maximum=MAX_NEW_TOKENS,
-                    value=MAX_NEW_TOKENS,
-                    label="Max New Tokens",
-                    interactive=True,
-                )
-        sampling_params_toggle_btn.click(
-            change_state,
-            sampling_params_group_hidden_state,
-            [sampling_params_group, sampling_params_group_hidden_state],
         )
-        chat_msg = chat_input.submit(
-            check_messages,
-            [chatbot, chat_input, audio_input],
-            [chatbot, chat_input, audio_input],
         )
-        bot_msg = chat_msg.then(
-            bot,
-            inputs=[chatbot, top_p, top_k, temperature, repetition_penalty, max_new_tokens],
-            outputs=chatbot,
-            api_name="bot_response",
         )
-        bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
-        regenerate_btn.click(
-            bot,
-            inputs=[chatbot, top_p, top_k, temperature, repetition_penalty, max_new_tokens, gr.State(True)],
-            outputs=chatbot,
         )
-    demo.launch(server_name="0.0.0.0")

+from argparse import ArgumentParser
+import gradio as gr
 import requests
 import json
+import time
+def get_streaming_response(response: requests.Response):
+    for chunk in response.iter_lines():
+        if chunk:
+            data = chunk.decode("utf-8")
+            if data.startswith('data: '):
+                json_str = data[6:]
+                if json_str == '[DONE]':
                     break
                 try:
+                    chunk = json.loads(json_str)
+                    delta = chunk.get('choices', [{}])[0].get('delta', {})
+                    new_text = delta.get('content', '')
+                    if new_text:
+                        yield new_text
+                except (json.JSONDecodeError, IndexError):
+                    print(f"Skipping malformed SSE line: {json_str}")
+                    continue
+def _chat_stream(model, tokenizer, query, history, temperature, top_p, max_output_tokens):
+    conversation = []
+    for query_h, response_h in history:
+        conversation.append({"role": "user", "content": query_h})
+        conversation.append({"role": "assistant", "content": response_h})
+    conversation.append({"role": "user", "content": query})
+    headers = {
+        "Content-Type": "application/json"
+    }
     payload = {
+        "model": "megrez-moe-waic",
+        "messages": conversation,
+        "max_tokens": max_output_tokens,
+        "temperature": max(temperature, 0),
+        "top_p": top_p,
+        "stream": True
     }
+    try:
+        API_URL = "http://8.152.0.142:8080/v1/chat/completions"
+        response = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=60, stream=True)
+        response.raise_for_status()
+        for chunk in get_streaming_response(response):
+            yield chunk
+            time.sleep(0.01)
+    except requests.exceptions.RequestException as e:
+        print(f"API request failed: {e}")
+        yield f"Error: Could not connect to the API. Details: {e}"
+    except (KeyError, IndexError) as e:
+        print(f"Failed to parse API response: {response.text}")
+        yield f"Error: Invalid response format from the API. Details: {e}"
+def predict(_query, _chatbot, _task_history, _temperature, _top_p, _max_output_tokens):
+    print(f"User: {_query}")
+    _chatbot.append((_query, ""))
+    full_response = ""
+    stream = _chat_stream(None, None, _query, history=_task_history, temperature=_temperature, top_p=_top_p, max_output_tokens=_max_output_tokens)
+    for new_text in stream:
+        full_response += new_text
+        _chatbot[-1] = (_query, full_response)
+        yield _chatbot
+    print(f"History: {_task_history}")
+    _task_history.append((_query, full_response))
+    print(f"Megrez (from API): {full_response}")
+def regenerate(_chatbot, _task_history, _temperature, _top_p, _max_output_tokens):
+    if not _task_history:
+        yield _chatbot
+        return
+    item = _task_history.pop(-1)
+    _chatbot.pop(-1)
+    yield from predict(item[0], _chatbot, _task_history, _temperature, _top_p, _max_output_tokens)
 def reset_user_input():
     return gr.update(value="")
+def reset_state(_chatbot, _task_history):
+    _task_history.clear()
+    _chatbot.clear()
+    return _chatbot
 if __name__ == "__main__":
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             f"""
+# 🎱 Chat with Megrez2 <a href="https://github.com/infinigence/Infini-Megrez">
 """
         )
+        chatbot = gr.Chatbot(label="Megrez2", elem_classes="control-height", height='48vh', show_copy_button=True,
+            latex_delimiters=[
+            {"left": "$$", "right": "$$", "display": True},
+            {"left": "$", "right": "$", "display": False},
+            {"left": "\\(", "right": "\\)", "display": False},
+            {"left": "\\[", "right": "\\]", "display": True},
+        ])
+        with gr.Row():
+            with gr.Column(scale=20):
+                query = gr.Textbox(show_label=False, container=False, placeholder="Enter your prompt here and press ENTER")
+            with gr.Column(scale=1, min_width=100):
+                submit_btn = gr.Button("🚀 Send", variant="primary")
+        task_history = gr.State([])
+        with gr.Row():
+            empty_btn = gr.Button("🗑️ Clear History")
+            regen_btn = gr.Button("🔄 Regenerate")
+        with gr.Accordion("Parameters", open=False) as parameter_row:
+            temperature = gr.Slider(
+                minimum=0.0,
+                maximum=1.2,
+                value=0.7,
+                step=0.1,
                 interactive=True,
+                label="Temperature",
             )
+            top_p = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.9,
+                step=0.1,
+                interactive=True,
+                label="Top P",
+            )
+            max_output_tokens = gr.Slider(
+                minimum=16,
+                maximum=32768,
+                value=4096,
+                step=1024,
+                interactive=True,
+                label="Max output tokens",
             )
+        submit_btn.click(
+            predict, [query, chatbot, task_history, temperature, top_p, max_output_tokens], [chatbot], show_progress=True
         )
+        query.submit(
+            predict, [query, chatbot, task_history, temperature, top_p, max_output_tokens], [chatbot], show_progress=True
         )
+        submit_btn.click(reset_user_input, [], [query])
+        query.submit(reset_user_input, [], [query])
+        empty_btn.click(
+            reset_state, [chatbot, task_history], outputs=[chatbot], show_progress=True
         )
+        regen_btn.click(
+            regenerate, [chatbot, task_history, temperature, top_p, max_output_tokens], [chatbot], show_progress=True
         )
+    demo.launch(ssr_mode=False, share=True)