Ministrals_demo / app.py
Jofthomas's picture
Update app.py
f87a4ad verified
import base64
from http import HTTPStatus
import os
import uuid
import time
from datetime import datetime, timedelta
import torch
import gradio as gr
from gradio_client import utils as client_utils
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from config import DEFAULT_THEME, LIGHT_THEME, DEFAULT_SYS_PROMPT,DEFAULT_MODEL_SIZE, save_history, user_config, bot_config, welcome_config, markdown_config, upload_config, MINISTRAL_MODELS
from ui_components.thinking_button import ThinkingButton
import spaces
try:
from transformers import Mistral3ForConditionalGeneration, AutoProcessor, AutoTokenizer, TextIteratorStreamer
from huggingface_hub import hf_hub_download
from threading import Thread
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
print("Warning: transformers not available. Running in demo mode only.")
MODEL_CACHE = {}
SYSTEM_PROMPT_CACHE = {}
PROCESSOR_CACHE = {}
print("=" * 50)
print("πŸš€ Ministral Demo Starting")
print(f" Model cache initialized (empty)")
print(f" Processor cache initialized (empty)")
print(f" System prompt cache initialized (empty)")
print("=" * 50)
def log_with_time(message: str):
timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
print(f"[{timestamp}] {message}")
def log_cache_status():
log_with_time(f"πŸ“¦ Cache status: {len(MODEL_CACHE)} models, {len(PROCESSOR_CACHE)} processors, {len(SYSTEM_PROMPT_CACHE)} prompts cached")
def load_system_prompt(model_id: str) -> str:
cache_key = model_id
if cache_key in SYSTEM_PROMPT_CACHE:
log_with_time(f"πŸ“‹ System prompt cache hit for {model_id.split('/')[-1]}")
cached_prompt = SYSTEM_PROMPT_CACHE[cache_key]
today = datetime.today().strftime("%Y-%m-%d")
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
model_name = model_id.split("/")[-1]
return cached_prompt.format(name=model_name, today=today, yesterday=yesterday)
try:
log_with_time(f"πŸ“₯ Downloading system prompt for {model_id.split('/')[-1]}...")
start = time.time()
file_path = hf_hub_download(repo_id=model_id, filename="SYSTEM_PROMPT.txt")
with open(file_path, "r") as file:
system_prompt = file.read()
SYSTEM_PROMPT_CACHE[cache_key] = system_prompt
today = datetime.today().strftime("%Y-%m-%d")
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
model_name = model_id.split("/")[-1]
log_with_time(f"βœ… System prompt loaded in {time.time() - start:.2f}s")
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
except Exception as e:
log_with_time(f"⚠️ Could not load system prompt: {e}")
return DEFAULT_SYS_PROMPT
def get_processor_and_tokenizer(model_id: str):
if model_id in PROCESSOR_CACHE:
log_with_time(f"πŸ“‹ Processor cache hit for {model_id.split('/')[-1]}")
return PROCESSOR_CACHE[model_id]
try:
log_with_time(f"πŸ“₯ Loading processor for {model_id.split('/')[-1]}...")
start = time.time()
processor = AutoProcessor.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
PROCESSOR_CACHE[model_id] = (processor, tokenizer)
log_with_time(f"βœ… Processor loaded in {time.time() - start:.2f}s")
return processor, tokenizer
except Exception as e:
log_with_time(f"❌ Error loading processor: {e}")
return None, None
def get_model_and_processor(model_id: str, show_notification=False):
if not TRANSFORMERS_AVAILABLE:
log_with_time("⚠️ Transformers not available")
return None, None, None
if model_id in MODEL_CACHE:
log_with_time(f"πŸ“‹ Model cache hit for {model_id.split('/')[-1]}")
return MODEL_CACHE[model_id]
model_name = model_id.split("/")[-1]
try:
if show_notification:
gr.Info(f"πŸ“₯ Loading {model_name}... This may take a few minutes on first use.", duration=15)
total_start = time.time()
processor, tokenizer = get_processor_and_tokenizer(model_id)
if processor is None or tokenizer is None:
return None, None, None
log_with_time(f"πŸ“₯ Loading model weights for {model_name}...")
model_start = time.time()
model = Mistral3ForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
).eval()
log_with_time(f"βœ… Model weights loaded in {time.time() - model_start:.2f}s")
MODEL_CACHE[model_id] = (model, processor, tokenizer)
total_time = time.time() - total_start
log_with_time(f"πŸŽ‰ {model_name} fully loaded in {total_time:.2f}s (cached for future use)")
if show_notification:
gr.Info(f"βœ… {model_name} loaded and ready!", duration=3)
return model, processor, tokenizer
except Exception as e:
log_with_time(f"❌ Error loading model {model_id}: {e}")
if show_notification:
gr.Warning(f"❌ Failed to load model: {str(e)}", duration=10)
return None, None, None
def encode_file_to_base64(file_path):
with open(file_path, "rb") as file:
mime_type = client_utils.get_mimetype(file_path)
bae64_data = base64.b64encode(file.read()).decode("utf-8")
return f"data:{mime_type};base64,{bae64_data}"
def format_history_for_transformers(history, model_id):
system_prompt = load_system_prompt(model_id)
messages = [{
"role": "system",
"content": [{"type": "text", "text": system_prompt}]
}]
for item in history:
if item["role"] == "user":
content = []
text_content = item["content"][1]["content"]
content.append({"type": "text", "text": text_content})
for file_path in item["content"][0]["content"]:
if file_path.startswith("http"):
content.append({"type": "image", "url": file_path})
elif os.path.exists(file_path):
mime_type = client_utils.get_mimetype(file_path)
if mime_type.startswith("image"):
content.append({"type": "image", "url": file_path})
messages.append({
"role": "user",
"content": content
})
elif item["role"] == "assistant":
text_contents = [content["content"] for content in item["content"] if content["type"] == "text"]
if text_contents:
messages.append({
"role": "assistant",
"content": [{"type": "text", "text": " ".join(text_contents)}]
})
return messages
def prepare_inputs(processor, messages):
log_with_time("πŸ“‹ Preparing inputs...")
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
)
if 'token_type_ids' in inputs:
del inputs['token_type_ids']
return inputs
@spaces.GPU(duration=180)
def generate_streaming(model, processor, tokenizer, messages):
log_with_time("πŸš€ Starting streaming generation...")
inputs = prepare_inputs(processor, messages)
log_with_time("πŸ“€ Moving tensors to GPU...")
inputs = {
k: (v.to(model.device, dtype=torch.bfloat16) if v.is_floating_point() else v.to(model.device))
for k, v in inputs.items()
}
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=2048,
temperature=0.15,
do_sample=True,
)
log_with_time("🧡 Starting generation thread...")
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
log_with_time("πŸ“ Streaming tokens...")
generated_text = ""
for token in streamer:
generated_text += token
yield {"token": token, "full_text": generated_text, "done": False}
thread.join()
log_with_time(f"βœ… Generation complete: {len(generated_text)} chars")
yield {"token": "", "full_text": generated_text, "done": True}
class Gradio_Events:
@staticmethod
def submit(state_value):
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
enable_thinking = state_value["conversation_contexts"][state_value["conversation_id"]]["enable_thinking"]
model_size = state_value["conversation_contexts"][state_value["conversation_id"]].get("model_size", "14B")
model_type = "reasoning" if enable_thinking else "instruct"
model_id = MINISTRAL_MODELS[model_size][model_type]
log_cache_status()
log_with_time(f"πŸ“ Formatting {len(history)} messages for {model_id.split('/')[-1]}")
messages = format_history_for_transformers(history, model_id)
log_with_time(f"πŸ“¨ {len(messages)} messages prepared (including system prompt)")
history.append({
"role": "assistant",
"content": [],
"key": str(uuid.uuid4()),
"loading": True,
"header": f"Ministral-3-{model_size}",
"status": "pending"
})
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
try:
start_time = time.time()
answer_content = ""
if TRANSFORMERS_AVAILABLE:
needs_download = model_id not in MODEL_CACHE
model, processor, tokenizer = get_model_and_processor(model_id, show_notification=needs_download)
else:
model, processor, tokenizer = None, None, None
if model is not None and processor is not None and tokenizer is not None:
log_with_time(f"πŸš€ Starting inference with {model_id.split('/')[-1]}")
try:
inference_start = time.time()
history[-1]["content"] = [{
"type": "text",
"content": "",
}]
history[-1]["loading"] = False
for chunk in generate_streaming(model, processor, tokenizer, messages):
if chunk.get("token"):
answer_content = chunk["full_text"]
history[-1]["content"][0]["content"] = answer_content
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value)
}
if chunk.get("done"):
answer_content = chunk["full_text"]
break
inference_time = time.time() - inference_start
log_with_time(f"βœ… Streaming generation completed in {inference_time:.2f}s")
except Exception as e:
log_with_time(f"❌ Model inference error: {e}")
error_str = str(e)
if "timeout" in error_str.lower() or "aborted" in error_str.lower():
answer_content = "⏱️ GPU timeout: The request took too long to process. Please try:\n\n1. Using a shorter prompt\n2. Reducing image sizes\n3. Trying again in a moment"
elif "memory" in error_str.lower() or "oom" in error_str.lower():
answer_content = "πŸ’Ύ GPU out of memory. Try using a smaller model or reducing input size."
else:
answer_content = f"❌ Model inference failed: {error_str}\n\nPlease try again or check the console for more details."
history[-1]["content"] = [{
"type": "text",
"content": answer_content,
}]
history[-1]["loading"] = False
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value)
}
else:
log_with_time(f"⚠️ Using demo mode for: {model_id}")
demo_answer = f"This is a demo response from {model_id}. The application is running in demo mode.\n\nTo use real models, install transformers: `pip install transformers torch`\n\nYour message: {messages[-1]['content'][0]['text'] if messages and messages[-1]['content'] else 'N/A'}"
history[-1]["content"] = [{
"type": "text",
"content": "",
}]
for char in demo_answer:
answer_content += char
history[-1]["content"][0]["content"] = answer_content
history[-1]["loading"] = False
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value)
}
time.sleep(0.01)
log_with_time(f"πŸ“Š Response generated: {len(answer_content)} chars")
history[-1]["status"] = "done"
cost_time = "{:.2f}".format(time.time() - start_time)
log_with_time(f"⏱️ Total request time: {cost_time}s")
history[-1]["footer"] = f"{cost_time}s"
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
except Exception as e:
log_with_time(f"❌ Request failed for {model_id.split('/')[-1]}: {e}")
history[-1]["loading"] = False
history[-1]["status"] = "done"
if not history[-1]["content"]:
history[-1]["content"] = []
history[-1]["content"].append({
"type": "text",
"content": f'<span style="color: var(--color-red-500)">Error: {str(e)}</span>'
})
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value)
}
@staticmethod
def add_message(input_value, thinking_btn_state_value, model_selector_state_value, state_value):
text = input_value["text"]
files = input_value["files"]
if not state_value["conversation_id"]:
random_id = str(uuid.uuid4())
history = []
state_value["conversation_id"] = random_id
state_value["conversation_contexts"][state_value["conversation_id"]] = {"history": history}
state_value["conversations"].append({"label": text, "key": random_id})
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
state_value["conversation_contexts"][state_value["conversation_id"]] = {
"history": history,
"enable_thinking": thinking_btn_state_value["enable_thinking"],
"model_size": model_selector_state_value["model_size"]
}
history.append({
"key": str(uuid.uuid4()),
"role": "user",
"content": [{"type": "file", "content": [f for f in files]}, {"type": "text", "content": text}]
})
yield Gradio_Events.preprocess_submit(clear_input=True)(state_value)
try:
for chunk in Gradio_Events.submit(state_value):
yield chunk
except Exception as e:
raise e
finally:
yield Gradio_Events.postprocess_submit(state_value)
@staticmethod
def preprocess_submit(clear_input=True):
def preprocess_submit_handler(state_value):
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
return {
**({input: gr.update(value=None, loading=True) if clear_input else gr.update(loading=True)} if clear_input else {}),
conversations: gr.update(active_key=state_value["conversation_id"],
items=list(map(lambda item: {**item, "disabled": True if item["key"] != state_value["conversation_id"] else False}, state_value["conversations"]))),
add_conversation_btn: gr.update(disabled=True),
clear_btn: gr.update(disabled=True),
conversation_delete_menu_item: gr.update(disabled=True),
chatbot: gr.update(value=history, bot_config=bot_config(disabled_actions=['edit', 'retry', 'delete']), user_config=user_config(disabled_actions=['edit', 'delete'])),
state: gr.update(value=state_value),
}
return preprocess_submit_handler
@staticmethod
def postprocess_submit(state_value):
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
return {
input: gr.update(loading=False),
conversation_delete_menu_item: gr.update(disabled=False),
clear_btn: gr.update(disabled=False),
conversations: gr.update(items=state_value["conversations"]),
add_conversation_btn: gr.update(disabled=False),
chatbot: gr.update(value=history, bot_config=bot_config(), user_config=user_config()),
state: gr.update(value=state_value),
}
@staticmethod
def cancel(state_value):
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
history[-1]["loading"] = False
history[-1]["status"] = "done"
history[-1]["footer"] = "Chat completion paused"
return Gradio_Events.postprocess_submit(state_value)
@staticmethod
def delete_message(state_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
history = history[:index] + history[index + 1:]
state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history
return gr.update(value=state_value)
@staticmethod
def edit_message(state_value, chatbot_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
history[index]["content"] = chatbot_value[index]["content"]
if not history[index].get("edited"):
history[index]["edited"] = True
history[index]["footer"] = ((history[index]["footer"]) + " " if history[index].get("footer") else "") + "Edited"
return gr.update(value=state_value), gr.update(value=history)
@staticmethod
def regenerate_message(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
history = history[:index]
state_value["conversation_contexts"][state_value["conversation_id"]] = {
"history": history,
"enable_thinking": thinking_btn_state_value["enable_thinking"],
"model_size": model_selector_state_value["model_size"]
}
yield Gradio_Events.preprocess_submit()(state_value)
try:
for chunk in Gradio_Events.submit(state_value):
yield chunk
except Exception as e:
raise e
finally:
yield Gradio_Events.postprocess_submit(state_value)
@staticmethod
def apply_prompt(e: gr.EventData, input_value):
input_value["text"] = e._data["payload"][0]["value"]["description"]
input_value["files"] = e._data["payload"][0]["value"]["urls"]
return gr.update(value=input_value)
@staticmethod
def new_chat(thinking_btn_state, model_selector_state, state_value):
if not state_value["conversation_id"]:
return gr.skip()
state_value["conversation_id"] = ""
thinking_btn_state["enable_thinking"] = True
model_selector_state["model_size"] = "14B"
return gr.update(active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=thinking_btn_state), gr.update(value="14B"), gr.update(value=state_value)
@staticmethod
def select_conversation(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData):
active_key = e._data["payload"][0]
if state_value["conversation_id"] == active_key or (active_key not in state_value["conversation_contexts"]):
return gr.skip()
state_value["conversation_id"] = active_key
thinking_btn_state_value["enable_thinking"] = state_value["conversation_contexts"][active_key].get("enable_thinking", False)
model_size = state_value["conversation_contexts"][active_key].get("model_size", "14B")
model_selector_state_value["model_size"] = model_size
return gr.update(active_key=active_key), gr.update(value=state_value["conversation_contexts"][active_key]["history"]), gr.update(value=thinking_btn_state_value), gr.update(value=model_size), gr.update(value=state_value)
@staticmethod
def click_conversation_menu(state_value, e: gr.EventData):
conversation_id = e._data["payload"][0]["key"]
operation = e._data["payload"][1]["key"]
if operation == "delete":
del state_value["conversation_contexts"][conversation_id]
state_value["conversations"] = [item for item in state_value["conversations"] if item["key"] != conversation_id]
if state_value["conversation_id"] == conversation_id:
state_value["conversation_id"] = ""
return gr.update(items=state_value["conversations"], active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=state_value)
else:
return gr.update(items=state_value["conversations"]), gr.skip(), gr.update(value=state_value)
return gr.skip()
@staticmethod
def clear_conversation_history(state_value):
if not state_value["conversation_id"]:
return gr.skip()
state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = []
return gr.update(value=None), gr.update(value=state_value)
@staticmethod
def update_browser_state(state_value):
return gr.update(value=dict(conversations=state_value["conversations"], conversation_contexts=state_value["conversation_contexts"]))
@staticmethod
def apply_browser_state(browser_state_value, state_value):
state_value["conversations"] = browser_state_value["conversations"]
state_value["conversation_contexts"] = browser_state_value["conversation_contexts"]
return gr.update(items=browser_state_value["conversations"]), gr.update(value=state_value)
css = """
body, html {
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
color: var(--ms-gr-ant-color-text, #000000) !important;
}
.gradio-container, .gradio-container.dark {
padding: 0 !important;
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
color: var(--ms-gr-ant-color-text, #000000) !important;
}
.gradio-container > main.fillable {
padding: 0 !important;
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}
#chatbot .ms-gr-ant-col,
#chatbot .ms-gr-antd-col,
#chatbot [class*="ms-gr-ant-col"] {
padding-left: 0 !important;
padding-right: 0 !important;
background-color: transparent !important;
}
#chatbot {
height: calc(100vh - 21px - 16px);
max-height: 1500px;
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}
#chatbot .chatbot-conversations {
height: 100vh;
background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
padding-left: 4px;
padding-right: 4px;
}
#chatbot .chatbot-conversations .chatbot-conversations-list {
padding-left: 0;
padding-right: 0;
}
#chatbot .chatbot-chat {
padding: 32px;
padding-bottom: 0;
height: 100%;
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
color: var(--ms-gr-ant-color-text, #000000) !important;
}
@media (max-width: 768px) {
#chatbot .chatbot-chat {
padding: 10px;
}
}
#chatbot .chatbot-chat .chatbot-chat-messages {
flex: 1;
background-color: transparent !important;
}
.gradio-container .contain {
background-color: transparent !important;
padding: 0 !important;
}
.user-message-content {
background-color: #ffffff !important;
background: #ffffff !important;
border-radius: 16px !important;
padding: 14px 18px !important;
border: 1px solid #E9E2CB !important;
color: #1E1E1E !important;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06) !important;
max-width: fit-content;
}
[class*="chatbot"] [class*="user"] [class*="content"],
[class*="chatbot"] [class*="user"] [class*="bubble"],
[class*="chatbot"] [class*="user"] [class*="message"],
[class*="pro-chatbot"] [class*="user"] {
background-color: transparent !important;
background: transparent !important;
}
.user-message-content,
.user-message-content *:not(code):not(pre) {
background-color: inherit !important;
}
.chatbot-welcome-prompts {
background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
}
[class*="welcome"] [class*="prompt"],
[class*="welcome"] [class*="prompts"],
[class*="prompts"] [class*="item"],
[class*="prompts"] [class*="card"],
.ms-gr-antdx-prompts-item,
.ms-gr-pro-chatbot-welcome-prompts {
color: #1E1E1E !important;
}
[class*="welcome"] span,
[class*="welcome"] p,
[class*="welcome"] div,
[class*="prompts"] span,
[class*="prompts"] p,
[class*="prompts"] div,
[class*="prompts"] [class*="title"],
[class*="prompts"] [class*="description"],
.ms-gr-antdx-prompts-item span,
.ms-gr-antdx-prompts-item p,
.ms-gr-antdx-prompts-item div {
color: #1E1E1E !important;
}
[class*="prompts"] [class*="item"] {
background-color: #FFFAEB !important;
border: 1px solid #E9E2CB !important;
}
.chatbot-conversations {
background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
}
.chatbot-conversations .ms-gr-ant-typography {
color: var(--ms-gr-ant-color-text, #000000) !important;
}
.chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled {
background-color: var(--ms-gr-ant-color-primary, #FF8205) !important;
color: #ffffff !important;
border: none !important;
}
.chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled:hover {
background-color: #FA500F !important;
transform: translateY(-1px);
}
.chatbot-conversations .ms-gr-ant-conversations {
color: var(--ms-gr-ant-color-text, #000000) !important;
}
.chatbot-conversations .ms-gr-ant-conversations-item {
color: var(--ms-gr-ant-color-text, #000000) !important;
background-color: transparent !important;
}
.chatbot-conversations .ms-gr-ant-conversations-item:hover {
background-color: var(--ms-gr-ant-color-bg-elevated, #E9E2CB) !important;
}
.ant-typography {
color: var(--ms-gr-ant-color-text, #000000) !important;
}
.ant-flex {
color: var(--ms-gr-ant-color-text, #000000) !important;
}
#chatbot > .ant-col {
background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}
h1, h2, h3, h4, h5, h6, p, span {
color: var(--ms-gr-ant-color-text);
}
.ms-gr-pro-chatbot-bot,
.ms-gr-pro-chatbot-bot *,
.ms-gr-pro-chatbot-bot .ms-gr-pro-chatbot-message-content,
.ms-gr-pro-chatbot-bot [class*="content"],
[class*="chatbot"] [class*="bot"],
[class*="chatbot"] [class*="bot"] * {
color: #1E1E1E !important;
}
.ms-gr-pro-chatbot-bot h1,
.ms-gr-pro-chatbot-bot h2,
.ms-gr-pro-chatbot-bot h3,
.ms-gr-pro-chatbot-bot h4,
.ms-gr-pro-chatbot-bot h5,
.ms-gr-pro-chatbot-bot h6,
.ms-gr-pro-chatbot-bot strong,
.ms-gr-pro-chatbot-bot b,
.ms-gr-pro-chatbot-bot em,
.ms-gr-pro-chatbot-bot i,
.ms-gr-pro-chatbot-bot p,
.ms-gr-pro-chatbot-bot span,
.ms-gr-pro-chatbot-bot li,
.ms-gr-pro-chatbot-bot ul,
.ms-gr-pro-chatbot-bot ol,
.ms-gr-pro-chatbot-bot a,
[class*="chatbot"] [class*="bot"] h1,
[class*="chatbot"] [class*="bot"] h2,
[class*="chatbot"] [class*="bot"] h3,
[class*="chatbot"] [class*="bot"] strong,
[class*="chatbot"] [class*="bot"] b,
[class*="chatbot"] [class*="bot"] p,
[class*="chatbot"] [class*="bot"] span,
[class*="chatbot"] [class*="bot"] li {
color: #1E1E1E !important;
}
.ms-gr-pro-chatbot-bot [style*="color"],
[class*="chatbot"] [class*="bot"] [style*="color"],
[class*="chatbot"] [class*="bot"] [style] {
color: #1E1E1E !important;
}
.ms-gr-pro-chatbot-bot pre,
.ms-gr-pro-chatbot-bot pre code {
background-color: #E9E2CB !important;
color: #1E1E1E !important;
}
.ms-gr-pro-chatbot-bot code:not(pre code) {
background-color: #E9E2CB !important;
color: #1E1E1E !important;
padding: 2px 6px;
border-radius: 4px;
}
footer {
display: none !important;
}
.footer {
display: none !important;
}
*, *::before, *::after {
transition: none !important;
}
"""
with gr.Blocks(
fill_width=True,
css=css,
theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray", neutral_hue="gray")
) as demo:
state = gr.State({"conversation_contexts": {}, "conversations": [], "conversation_id": "", "oss_cache": {}})
thinking_btn_state = gr.State({"enable_thinking": False})
model_selector_state = gr.State({"model_size": "14B"})
with ms.Application(), antdx.XProvider(theme=LIGHT_THEME), ms.AutoLoading():
with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
with antd.Col(md=dict(flex="0 0 260px", span=24, order=0), span=0, order=1, elem_style=dict(width=0)):
with ms.Div(elem_classes="chatbot-conversations"):
with antd.Flex(vertical=True, gap="small", elem_style=dict(height="100%")):
with antd.Flex(vertical=True, gap="small", align="center", elem_style=dict(padding=8)):
with antd.Typography.Title(level=1, elem_style=dict(fontSize=24, margin=0)):
with antd.Flex(align="center", gap="small", justify="center"):
antd.Image('./assets/m-boxed-rainbow.png', preview=False, alt="logo", width=24, height=24)
ms.Span("Ministrals Demo")
with antd.Button(value=None, color="primary", variant="filled", block=True) as add_conversation_btn:
ms.Text("New Conversation")
with ms.Slot("icon"):
antd.Icon("PlusOutlined")
with antdx.Conversations(elem_classes="chatbot-conversations-list") as conversations:
with ms.Slot('menu.items'):
with antd.Menu.Item(label="Delete", key="delete", danger=True) as conversation_delete_menu_item:
with ms.Slot("icon"):
antd.Icon("DeleteOutlined")
with antd.Col(flex=1, elem_style=dict(height="100%")):
with antd.Flex(vertical=True, gap="small", elem_classes="chatbot-chat"):
with antd.Flex(align="center", gap="large", elem_style=dict(paddingBottom=10)):
antd.Typography.Title("Hello, I'm Ministral", level=3, elem_style=dict(margin=0))
with antd.Flex(align="center", gap="small"):
ms.Span("currently using:", elem_style=dict(fontSize=12))
model_display = antd.Typography.Text(
value="mistralai/Ministral-3-14B-Instruct-2512",
copyable=True, code=True,
elem_style=dict(fontSize=12, color="var(--ms-gr-ant-color-text-secondary)")
)
chatbot = pro.Chatbot(elem_classes="chatbot-chat-messages", height=0,
markdown_config=markdown_config(), welcome_config=welcome_config(),
user_config=user_config(), bot_config=bot_config())
with pro.MultimodalInput(placeholder="How can I help you today?", upload_config=upload_config()) as input:
with ms.Slot("prefix"):
with antd.Flex(gap=4, wrap=True, elem_style=dict(maxWidth='40vw', display="inline-flex")):
with antd.Button(value=None, type="text") as clear_btn:
with ms.Slot("icon"):
antd.Icon("ClearOutlined")
model_selector = antd.Select(
value=DEFAULT_MODEL_SIZE, default_value=DEFAULT_MODEL_SIZE,
options=[{"label": "Ministral-3-14B", "value": "14B"}, {"label": "Ministral-3-8B", "value": "8B"}, {"label": "Ministral-3-3B", "value": "3B"}],
elem_style=dict(width=180)
)
with antd.Button("Thinking", shape="round", color="primary") as thinking_btn:
with ms.Slot("icon"):
antd.Icon("SunOutlined")
def toggle_thinking(state_value):
state_value["enable_thinking"] = not state_value["enable_thinking"]
return gr.update(value=state_value)
def apply_thinking_style(state_value):
return gr.update(variant="solid" if state_value["enable_thinking"] else "")
def update_model_size(value, state_value):
state_value["model_size"] = value
return gr.update(value=state_value)
def update_model_display(thinking_state, model_state):
model_size = model_state.get("model_size", "14B")
model_type = "reasoning" if thinking_state.get("enable_thinking", False) else "instruct"
model_name = MINISTRAL_MODELS[model_size][model_type]
return gr.update(value=model_name)
thinking_btn_state.change(fn=apply_thinking_style, inputs=[thinking_btn_state], outputs=[thinking_btn])
thinking_btn_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display])
thinking_btn.click(fn=toggle_thinking, inputs=[thinking_btn_state], outputs=[thinking_btn_state])
model_selector.change(fn=update_model_size, inputs=[model_selector, model_selector_state], outputs=[model_selector_state])
model_selector_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display])
if save_history:
browser_state = gr.BrowserState({"conversation_contexts": {}, "conversations": []}, storage_key="ministral_demo_storage")
state.change(fn=Gradio_Events.update_browser_state, inputs=[state], outputs=[browser_state])
demo.load(fn=Gradio_Events.apply_browser_state, inputs=[browser_state, state], outputs=[conversations, state])
add_conversation_btn.click(fn=Gradio_Events.new_chat, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state])
conversations.active_change(fn=Gradio_Events.select_conversation, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state])
conversations.menu_click(fn=Gradio_Events.click_conversation_menu, inputs=[state], outputs=[conversations, chatbot, state])
chatbot.welcome_prompt_select(fn=Gradio_Events.apply_prompt, inputs=[input], outputs=[input])
chatbot.delete(fn=Gradio_Events.delete_message, inputs=[state], outputs=[state])
chatbot.edit(fn=Gradio_Events.edit_message, inputs=[state, chatbot], outputs=[state, chatbot])
regenerating_event = chatbot.retry(fn=Gradio_Events.regenerate_message, inputs=[thinking_btn_state, model_selector_state, state],
outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state])
submit_event = input.submit(fn=Gradio_Events.add_message, inputs=[input, thinking_btn_state, model_selector_state, state],
outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state])
input.cancel(fn=Gradio_Events.cancel, inputs=[state],
outputs=[input, conversation_delete_menu_item, clear_btn, conversations, add_conversation_btn, chatbot, state],
cancels=[submit_event, regenerating_event], queue=False)
clear_btn.click(fn=Gradio_Events.clear_conversation_history, inputs=[state], outputs=[chatbot, state])
if __name__ == "__main__":
demo.queue(default_concurrency_limit=100, max_size=100).launch(ssr_mode=False, max_threads=100, show_api=False)