Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,11 +5,11 @@ import os
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
# ============================================================
|
| 8 |
-
# 配置
|
| 9 |
# ============================================================
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
MODEL_NAME = "step-3.5-flash"
|
| 13 |
HF_CONFIG_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/raw/main/config.json"
|
| 14 |
STEPFUN_LOGO_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/resolve/main/stepfun.svg"
|
| 15 |
STEPFUN_LOGO_PATH = "/tmp/stepfun_logo.svg"
|
|
@@ -90,28 +90,38 @@ def fetch_model_config():
|
|
| 90 |
|
| 91 |
|
| 92 |
def format_messages(history, system_prompt: str, user_message: str):
|
|
|
|
| 93 |
messages = []
|
| 94 |
if system_prompt.strip():
|
| 95 |
messages.append({"role": "system", "content": system_prompt})
|
| 96 |
for msg in history:
|
| 97 |
-
if msg["role"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
content = msg.get("content", "")
|
| 99 |
if content:
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
messages.append({"role": "user", "content": user_message})
|
| 102 |
return messages
|
| 103 |
|
| 104 |
|
| 105 |
def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
|
| 106 |
-
"""流式聊天,返回 (reasoning, content) 生成器"""
|
| 107 |
messages = format_messages(history, system_prompt, message)
|
| 108 |
|
| 109 |
reasoning = ""
|
| 110 |
content = ""
|
|
|
|
| 111 |
|
| 112 |
try:
|
| 113 |
headers = {
|
| 114 |
-
"Authorization": f"Bearer {
|
| 115 |
"Content-Type": "application/json",
|
| 116 |
}
|
| 117 |
payload = {
|
|
@@ -121,9 +131,10 @@ def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int
|
|
| 121 |
"max_tokens": max_tokens,
|
| 122 |
"temperature": temperature if temperature > 0 else 0.01,
|
| 123 |
"top_p": top_p,
|
|
|
|
| 124 |
}
|
| 125 |
|
| 126 |
-
with httpx.stream("POST", f"{
|
| 127 |
response.raise_for_status()
|
| 128 |
for line in response.iter_lines():
|
| 129 |
if not line or not line.startswith("data: "):
|
|
@@ -134,20 +145,26 @@ def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int
|
|
| 134 |
try:
|
| 135 |
chunk = json.loads(data_str)
|
| 136 |
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
|
|
| 137 |
if delta.get("reasoning"):
|
| 138 |
reasoning += delta["reasoning"]
|
| 139 |
-
yield reasoning, content
|
|
|
|
| 140 |
if delta.get("content"):
|
| 141 |
content += delta["content"]
|
| 142 |
-
yield reasoning, content
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
except json.JSONDecodeError:
|
| 144 |
continue
|
| 145 |
-
yield reasoning, content
|
| 146 |
|
| 147 |
except httpx.HTTPStatusError as e:
|
| 148 |
-
yield reasoning, f"❌ API 错误: {e.response.status_code}"
|
| 149 |
except Exception as e:
|
| 150 |
-
yield reasoning, f"❌ 错误: {str(e)}"
|
| 151 |
|
| 152 |
|
| 153 |
def clean_thinking(text: str) -> str:
|
|
@@ -172,9 +189,9 @@ def main():
|
|
| 172 |
with st.sidebar:
|
| 173 |
st.header("⚙️ 设置")
|
| 174 |
system_prompt = st.text_area("系统提示词", value="你是一个有帮助的 AI 助手。", height=80)
|
| 175 |
-
max_tokens = st.slider("最大长度", 256,
|
| 176 |
-
temperature = st.slider("Temperature", 0.0, 1.
|
| 177 |
-
top_p = st.slider("Top-p", 0.1,
|
| 178 |
|
| 179 |
st.divider()
|
| 180 |
if st.button("🗑️ 清空对话", use_container_width=True):
|
|
@@ -246,8 +263,9 @@ def main():
|
|
| 246 |
|
| 247 |
full_response = ""
|
| 248 |
full_thinking = ""
|
|
|
|
| 249 |
|
| 250 |
-
for thinking, response in chat_stream(
|
| 251 |
prompt,
|
| 252 |
st.session_state.messages[:-1],
|
| 253 |
system_prompt,
|
|
@@ -257,6 +275,8 @@ def main():
|
|
| 257 |
):
|
| 258 |
full_thinking = thinking
|
| 259 |
full_response = response if response else "▌"
|
|
|
|
|
|
|
| 260 |
|
| 261 |
# 更新思考内容
|
| 262 |
if full_thinking:
|
|
@@ -266,11 +286,12 @@ def main():
|
|
| 266 |
# 更新回答内容
|
| 267 |
answer_placeholder.markdown(full_response)
|
| 268 |
|
| 269 |
-
# 保存消息
|
| 270 |
st.session_state.messages.append({
|
| 271 |
"role": "assistant",
|
| 272 |
"content": full_response,
|
| 273 |
"thinking": full_thinking,
|
|
|
|
| 274 |
})
|
| 275 |
st.rerun()
|
| 276 |
|
|
|
|
| 5 |
import re
|
| 6 |
|
| 7 |
# ============================================================
|
| 8 |
+
# 配置 - 使用 OpenRouter API
|
| 9 |
# ============================================================
|
| 10 |
+
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
|
| 11 |
+
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
| 12 |
+
MODEL_NAME = "stepfun/step-3.5-flash"
|
| 13 |
HF_CONFIG_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/raw/main/config.json"
|
| 14 |
STEPFUN_LOGO_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/resolve/main/stepfun.svg"
|
| 15 |
STEPFUN_LOGO_PATH = "/tmp/stepfun_logo.svg"
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def format_messages(history, system_prompt: str, user_message: str):
|
| 93 |
+
"""格式化消息,保留 reasoning_details 用于多轮对话"""
|
| 94 |
messages = []
|
| 95 |
if system_prompt.strip():
|
| 96 |
messages.append({"role": "system", "content": system_prompt})
|
| 97 |
for msg in history:
|
| 98 |
+
if msg["role"] == "user":
|
| 99 |
+
content = msg.get("content", "")
|
| 100 |
+
if content:
|
| 101 |
+
messages.append({"role": "user", "content": content})
|
| 102 |
+
elif msg["role"] == "assistant":
|
| 103 |
content = msg.get("content", "")
|
| 104 |
if content:
|
| 105 |
+
assistant_msg = {"role": "assistant", "content": content}
|
| 106 |
+
# 保留 reasoning_details 用于多轮对话
|
| 107 |
+
if msg.get("reasoning_details"):
|
| 108 |
+
assistant_msg["reasoning_details"] = msg["reasoning_details"]
|
| 109 |
+
messages.append(assistant_msg)
|
| 110 |
messages.append({"role": "user", "content": user_message})
|
| 111 |
return messages
|
| 112 |
|
| 113 |
|
| 114 |
def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
|
| 115 |
+
"""流式聊天,返回 (reasoning, content, reasoning_details) 生成器"""
|
| 116 |
messages = format_messages(history, system_prompt, message)
|
| 117 |
|
| 118 |
reasoning = ""
|
| 119 |
content = ""
|
| 120 |
+
reasoning_details = None
|
| 121 |
|
| 122 |
try:
|
| 123 |
headers = {
|
| 124 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 125 |
"Content-Type": "application/json",
|
| 126 |
}
|
| 127 |
payload = {
|
|
|
|
| 131 |
"max_tokens": max_tokens,
|
| 132 |
"temperature": temperature if temperature > 0 else 0.01,
|
| 133 |
"top_p": top_p,
|
| 134 |
+
"reasoning": {"enabled": True}, # 启用推理模式
|
| 135 |
}
|
| 136 |
|
| 137 |
+
with httpx.stream("POST", f"{OPENROUTER_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=120.0) as response:
|
| 138 |
response.raise_for_status()
|
| 139 |
for line in response.iter_lines():
|
| 140 |
if not line or not line.startswith("data: "):
|
|
|
|
| 145 |
try:
|
| 146 |
chunk = json.loads(data_str)
|
| 147 |
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
| 148 |
+
# 处理 reasoning (流式思考内容)
|
| 149 |
if delta.get("reasoning"):
|
| 150 |
reasoning += delta["reasoning"]
|
| 151 |
+
yield reasoning, content, reasoning_details
|
| 152 |
+
# 处理 content (流式回答内容)
|
| 153 |
if delta.get("content"):
|
| 154 |
content += delta["content"]
|
| 155 |
+
yield reasoning, content, reasoning_details
|
| 156 |
+
# 处理完整的 reasoning_details (用于多轮保留)
|
| 157 |
+
message_obj = chunk.get("choices", [{}])[0].get("message", {})
|
| 158 |
+
if message_obj.get("reasoning_details"):
|
| 159 |
+
reasoning_details = message_obj["reasoning_details"]
|
| 160 |
except json.JSONDecodeError:
|
| 161 |
continue
|
| 162 |
+
yield reasoning, content, reasoning_details
|
| 163 |
|
| 164 |
except httpx.HTTPStatusError as e:
|
| 165 |
+
yield reasoning, f"❌ API 错误: {e.response.status_code}", None
|
| 166 |
except Exception as e:
|
| 167 |
+
yield reasoning, f"❌ 错误: {str(e)}", None
|
| 168 |
|
| 169 |
|
| 170 |
def clean_thinking(text: str) -> str:
|
|
|
|
| 189 |
with st.sidebar:
|
| 190 |
st.header("⚙️ 设置")
|
| 191 |
system_prompt = st.text_area("系统提示词", value="你是一个有帮助的 AI 助手。", height=80)
|
| 192 |
+
max_tokens = st.slider("最大长度", 256, 131072, 4096, step=256, help="最大 128k")
|
| 193 |
+
temperature = st.slider("Temperature", 0.0, 1.5, 0.7, step=0.1)
|
| 194 |
+
top_p = st.slider("Top-p", 0.1, 1.0, 0.9, step=0.05)
|
| 195 |
|
| 196 |
st.divider()
|
| 197 |
if st.button("🗑️ 清空对话", use_container_width=True):
|
|
|
|
| 263 |
|
| 264 |
full_response = ""
|
| 265 |
full_thinking = ""
|
| 266 |
+
full_reasoning_details = None
|
| 267 |
|
| 268 |
+
for thinking, response, reasoning_details in chat_stream(
|
| 269 |
prompt,
|
| 270 |
st.session_state.messages[:-1],
|
| 271 |
system_prompt,
|
|
|
|
| 275 |
):
|
| 276 |
full_thinking = thinking
|
| 277 |
full_response = response if response else "▌"
|
| 278 |
+
if reasoning_details:
|
| 279 |
+
full_reasoning_details = reasoning_details
|
| 280 |
|
| 281 |
# 更新思考内容
|
| 282 |
if full_thinking:
|
|
|
|
| 286 |
# 更新回答内容
|
| 287 |
answer_placeholder.markdown(full_response)
|
| 288 |
|
| 289 |
+
# 保存消息(包含 reasoning_details 用于多轮对话)
|
| 290 |
st.session_state.messages.append({
|
| 291 |
"role": "assistant",
|
| 292 |
"content": full_response,
|
| 293 |
"thinking": full_thinking,
|
| 294 |
+
"reasoning_details": full_reasoning_details,
|
| 295 |
})
|
| 296 |
st.rerun()
|
| 297 |
|