Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -239,14 +239,9 @@ def _extract_text_content(content) -> Optional[str]:
|
|
| 239 |
return None
|
| 240 |
|
| 241 |
|
| 242 |
-
def format_messages(
|
| 243 |
"""Format message list for Step3-VL-10B."""
|
| 244 |
messages: List[dict] = []
|
| 245 |
-
if system:
|
| 246 |
-
messages.append({
|
| 247 |
-
"role": "system",
|
| 248 |
-
"content": [{"type": "text", "text": system}]
|
| 249 |
-
})
|
| 250 |
|
| 251 |
if not history:
|
| 252 |
history = []
|
|
@@ -311,7 +306,7 @@ def build_user_display(image_url: Optional[str], user_text: Optional[str]) -> st
|
|
| 311 |
|
| 312 |
|
| 313 |
@spaces.GPU
|
| 314 |
-
def chat(
|
| 315 |
"""Chat function for Step3-VL-10B."""
|
| 316 |
if model_name is None:
|
| 317 |
model_name = MODEL_NAME
|
|
@@ -334,7 +329,7 @@ def chat(system_prompt, user_text, image_file, history, max_tokens, temperature,
|
|
| 334 |
image = load_image(image_file)
|
| 335 |
image_url = image_to_data_url(image) if image is not None else None
|
| 336 |
|
| 337 |
-
messages = format_messages(
|
| 338 |
if not messages:
|
| 339 |
yield history or [], "", None
|
| 340 |
return
|
|
@@ -661,14 +656,6 @@ with gr.Blocks(title="Step3-VL-10B", css=custom_css, theme=gr.themes.Soft()) as
|
|
| 661 |
with gr.Column(scale=1, min_width=350):
|
| 662 |
# Configuration
|
| 663 |
with gr.Accordion("⚙️ Configuration", open=False):
|
| 664 |
-
system_prompt = gr.Textbox(
|
| 665 |
-
label="System Prompt",
|
| 666 |
-
lines=2,
|
| 667 |
-
value="You are a multimodal assistant with strong visual perception and reasoning.",
|
| 668 |
-
placeholder="Enter system prompt...",
|
| 669 |
-
elem_classes=["input-box"]
|
| 670 |
-
)
|
| 671 |
-
|
| 672 |
max_tokens = gr.Slider(
|
| 673 |
1, 56000,
|
| 674 |
value=16384,
|
|
@@ -677,13 +664,13 @@ with gr.Blocks(title="Step3-VL-10B", css=custom_css, theme=gr.themes.Soft()) as
|
|
| 677 |
)
|
| 678 |
temperature = gr.Slider(
|
| 679 |
0.0, 2.0,
|
| 680 |
-
value=0
|
| 681 |
label="Temperature",
|
| 682 |
info="Higher = more random"
|
| 683 |
)
|
| 684 |
top_p = gr.Slider(
|
| 685 |
0.0, 1.0,
|
| 686 |
-
value=0
|
| 687 |
label="Top P",
|
| 688 |
info="Nucleus sampling"
|
| 689 |
)
|
|
@@ -749,7 +736,7 @@ with gr.Blocks(title="Step3-VL-10B", css=custom_css, theme=gr.themes.Soft()) as
|
|
| 749 |
|
| 750 |
submit_btn.click(
|
| 751 |
fn=chat,
|
| 752 |
-
inputs=[
|
| 753 |
outputs=[chatbot, user_text, image_file]
|
| 754 |
)
|
| 755 |
|
|
|
|
| 239 |
return None
|
| 240 |
|
| 241 |
|
| 242 |
+
def format_messages(history, user_text, image: Optional[Image.Image] = None):
|
| 243 |
"""Format message list for Step3-VL-10B."""
|
| 244 |
messages: List[dict] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
if not history:
|
| 247 |
history = []
|
|
|
|
| 306 |
|
| 307 |
|
| 308 |
@spaces.GPU
|
| 309 |
+
def chat(user_text, image_file, history, max_tokens, temperature, top_p, show_thinking=True, model_name=None):
|
| 310 |
"""Chat function for Step3-VL-10B."""
|
| 311 |
if model_name is None:
|
| 312 |
model_name = MODEL_NAME
|
|
|
|
| 329 |
image = load_image(image_file)
|
| 330 |
image_url = image_to_data_url(image) if image is not None else None
|
| 331 |
|
| 332 |
+
messages = format_messages(history, user_text, image)
|
| 333 |
if not messages:
|
| 334 |
yield history or [], "", None
|
| 335 |
return
|
|
|
|
| 656 |
with gr.Column(scale=1, min_width=350):
|
| 657 |
# Configuration
|
| 658 |
with gr.Accordion("⚙️ Configuration", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
max_tokens = gr.Slider(
|
| 660 |
1, 56000,
|
| 661 |
value=16384,
|
|
|
|
| 664 |
)
|
| 665 |
temperature = gr.Slider(
|
| 666 |
0.0, 2.0,
|
| 667 |
+
value=1.0,
|
| 668 |
label="Temperature",
|
| 669 |
info="Higher = more random"
|
| 670 |
)
|
| 671 |
top_p = gr.Slider(
|
| 672 |
0.0, 1.0,
|
| 673 |
+
value=1.0,
|
| 674 |
label="Top P",
|
| 675 |
info="Nucleus sampling"
|
| 676 |
)
|
|
|
|
| 736 |
|
| 737 |
submit_btn.click(
|
| 738 |
fn=chat,
|
| 739 |
+
inputs=[user_text, image_file, chatbot, max_tokens, temperature, top_p, show_thinking],
|
| 740 |
outputs=[chatbot, user_text, image_file]
|
| 741 |
)
|
| 742 |
|