Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 23 |
processor = AutoProcessor.from_pretrained(MODEL_DIR)
|
| 24 |
|
| 25 |
# --------- Chat Inference Function ---------
|
| 26 |
-
def chat_qwen_vl(
|
| 27 |
# —— 原有多模态输入构造 —— #
|
| 28 |
text = processor.apply_chat_template(
|
| 29 |
messages, tokenize=False, add_generation_prompt=True
|
|
@@ -51,8 +51,8 @@ def chat_qwen_vl(messages):
|
|
| 51 |
**inputs, # 包含 input_ids, pixel_values, attention_mask 等
|
| 52 |
streamer=streamer, # 关键:挂载 streamer
|
| 53 |
top_k=1024,
|
| 54 |
-
max_new_tokens=
|
| 55 |
-
temperature=
|
| 56 |
top_p=0.1,
|
| 57 |
eos_token_id=terminators, # 你的结束符 ID 列表
|
| 58 |
)
|
|
@@ -70,57 +70,6 @@ def chat_qwen_vl(messages):
|
|
| 70 |
# 每次拿到新片段就拼接并输出
|
| 71 |
yield "".join(buffer)
|
| 72 |
|
| 73 |
-
def chat_qwen_vl_(message: str, history: list, temperature: float = 0.7, max_new_tokens: int = 1024):
|
| 74 |
-
"""
|
| 75 |
-
Stream chat response from local Qwen2.5-VL model.
|
| 76 |
-
"""
|
| 77 |
-
# Build conversation prompt
|
| 78 |
-
conv = []
|
| 79 |
-
for u, a in history:
|
| 80 |
-
conv.append(f"<user> {u}")
|
| 81 |
-
conv.append(f"<assistant> {a}")
|
| 82 |
-
conv.append(f"<user> {message}")
|
| 83 |
-
conv.append("<assistant>")
|
| 84 |
-
|
| 85 |
-
# Tokenize
|
| 86 |
-
inputs = tokenizer(
|
| 87 |
-
"\n".join(conv),
|
| 88 |
-
return_tensors="pt",
|
| 89 |
-
truncation=True,
|
| 90 |
-
max_length=4096
|
| 91 |
-
).to(model.device)
|
| 92 |
-
|
| 93 |
-
# Create streamer
|
| 94 |
-
streamer = TextIteratorStreamer(
|
| 95 |
-
tokenizer,
|
| 96 |
-
timeout=10.0,
|
| 97 |
-
skip_prompt=True,
|
| 98 |
-
skip_special_tokens=True
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# Generation kwargs
|
| 102 |
-
gen_kwargs = dict(
|
| 103 |
-
input_ids=inputs.input_ids,
|
| 104 |
-
attention_mask=inputs.attention_mask,
|
| 105 |
-
streamer=streamer,
|
| 106 |
-
do_sample=(temperature > 0),
|
| 107 |
-
temperature=temperature,
|
| 108 |
-
max_new_tokens=max_new_tokens,
|
| 109 |
-
eos_token_id=terminators,
|
| 110 |
-
)
|
| 111 |
-
if temperature == 0:
|
| 112 |
-
gen_kwargs["do_sample"] = False
|
| 113 |
-
|
| 114 |
-
# Launch generation in thread
|
| 115 |
-
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
| 116 |
-
thread.start()
|
| 117 |
-
|
| 118 |
-
# Stream outputs
|
| 119 |
-
output_chunks = []
|
| 120 |
-
for chunk in streamer:
|
| 121 |
-
output_chunks.append(chunk)
|
| 122 |
-
yield "".join(output_chunks)
|
| 123 |
-
|
| 124 |
# --------- 3D Mesh Coloring Function ---------
|
| 125 |
def apply_gradient_color(mesh_text: str) -> str:
|
| 126 |
"""
|
|
|
|
| 23 |
processor = AutoProcessor.from_pretrained(MODEL_DIR)
|
| 24 |
|
| 25 |
# --------- Chat Inference Function ---------
|
| 26 |
+
def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
|
| 27 |
# —— 原有多模态输入构造 —— #
|
| 28 |
text = processor.apply_chat_template(
|
| 29 |
messages, tokenize=False, add_generation_prompt=True
|
|
|
|
| 51 |
**inputs, # 包含 input_ids, pixel_values, attention_mask 等
|
| 52 |
streamer=streamer, # 关键:挂载 streamer
|
| 53 |
top_k=1024,
|
| 54 |
+
max_new_tokens=max_new_tokens,
|
| 55 |
+
temperature=temperature,
|
| 56 |
top_p=0.1,
|
| 57 |
eos_token_id=terminators, # 你的结束符 ID 列表
|
| 58 |
)
|
|
|
|
| 70 |
# 每次拿到新片段就拼接并输出
|
| 71 |
yield "".join(buffer)
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# --------- 3D Mesh Coloring Function ---------
|
| 74 |
def apply_gradient_color(mesh_text: str) -> str:
|
| 75 |
"""
|