Spaces:

stepfun-ai
/

Step-3.5-Flash

Running

App Files Files Community

WinstonDeng commited on Feb 12

Commit

989e372

verified ·

1 Parent(s): 40d2a31

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -19

app.py CHANGED Viewed

@@ -5,11 +5,11 @@ import os
 import re
 # ============================================================
-# 配置
 # ============================================================
-STEPFUN_API_KEY = os.environ.get("STEPFUN_API_KEY", "")
-STEPFUN_BASE_URL = "https://api.stepfun.com/v1"
-MODEL_NAME = "step-3.5-flash"
 HF_CONFIG_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/raw/main/config.json"
 STEPFUN_LOGO_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/resolve/main/stepfun.svg"
 STEPFUN_LOGO_PATH = "/tmp/stepfun_logo.svg"
@@ -90,28 +90,38 @@ def fetch_model_config():
 def format_messages(history, system_prompt: str, user_message: str):
     messages = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt})
     for msg in history:
-        if msg["role"] in ["user", "assistant"]:
             content = msg.get("content", "")
             if content:
-                messages.append({"role": msg["role"], "content": content})
     messages.append({"role": "user", "content": user_message})
     return messages
 def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
-    """流式聊天，返回 (reasoning, content) 生成器"""
     messages = format_messages(history, system_prompt, message)
     reasoning = ""
     content = ""
     try:
         headers = {
-            "Authorization": f"Bearer {STEPFUN_API_KEY}",
             "Content-Type": "application/json",
         }
         payload = {
@@ -121,9 +131,10 @@ def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int
             "max_tokens": max_tokens,
             "temperature": temperature if temperature > 0 else 0.01,
             "top_p": top_p,
         }
-        with httpx.stream("POST", f"{STEPFUN_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=120.0) as response:
             response.raise_for_status()
             for line in response.iter_lines():
                 if not line or not line.startswith("data: "):
@@ -134,20 +145,26 @@ def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int
                 try:
                     chunk = json.loads(data_str)
                     delta = chunk.get("choices", [{}])[0].get("delta", {})
                     if delta.get("reasoning"):
                         reasoning += delta["reasoning"]
-                        yield reasoning, content
                     if delta.get("content"):
                         content += delta["content"]
-                        yield reasoning, content
                 except json.JSONDecodeError:
                     continue
-        yield reasoning, content
     except httpx.HTTPStatusError as e:
-        yield reasoning, f"❌ API 错误: {e.response.status_code}"
     except Exception as e:
-        yield reasoning, f"❌ 错误: {str(e)}"
 def clean_thinking(text: str) -> str:
@@ -172,9 +189,9 @@ def main():
     with st.sidebar:
         st.header("⚙️ 设置")
         system_prompt = st.text_area("系统提示词", value="你是一个有帮助的 AI 助手。", height=80)
-        max_tokens = st.slider("最大长度", 256, 256000, 4096, step=256, help="最大 128k")
-        temperature = st.slider("Temperature", 0.0, 1.0, 0.7, step=0.1)
-        top_p = st.slider("Top-p", 0.1, 0.99, 0.9, step=0.05)
         st.divider()
         if st.button("🗑️ 清空对话", use_container_width=True):
@@ -246,8 +263,9 @@ def main():
             full_response = ""
             full_thinking = ""
-            for thinking, response in chat_stream(
                 prompt,
                 st.session_state.messages[:-1],
                 system_prompt,
@@ -257,6 +275,8 @@ def main():
             ):
                 full_thinking = thinking
                 full_response = response if response else "▌"
                 # 更新思考内容
                 if full_thinking:
@@ -266,11 +286,12 @@ def main():
                 # 更新回答内容
                 answer_placeholder.markdown(full_response)
-            # 保存消息
             st.session_state.messages.append({
                 "role": "assistant",
                 "content": full_response,
                 "thinking": full_thinking,
             })
             st.rerun()

 import re
 # ============================================================
+# 配置 - 使用 OpenRouter API
 # ============================================================
+OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
+OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+MODEL_NAME = "stepfun/step-3.5-flash"
 HF_CONFIG_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/raw/main/config.json"
 STEPFUN_LOGO_URL = "https://huggingface.co/stepfun-ai/Step-3.5-Flash/resolve/main/stepfun.svg"
 STEPFUN_LOGO_PATH = "/tmp/stepfun_logo.svg"
 def format_messages(history, system_prompt: str, user_message: str):
+    """格式化消息，保留 reasoning_details 用于多轮对话"""
     messages = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt})
     for msg in history:
+        if msg["role"] == "user":
+            content = msg.get("content", "")
+            if content:
+                messages.append({"role": "user", "content": content})
+        elif msg["role"] == "assistant":
             content = msg.get("content", "")
             if content:
+                assistant_msg = {"role": "assistant", "content": content}
+                # 保留 reasoning_details 用于多轮对话
+                if msg.get("reasoning_details"):
+                    assistant_msg["reasoning_details"] = msg["reasoning_details"]
+                messages.append(assistant_msg)
     messages.append({"role": "user", "content": user_message})
     return messages
 def chat_stream(message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float):
+    """流式聊天，返回 (reasoning, content, reasoning_details) 生成器"""
     messages = format_messages(history, system_prompt, message)
     reasoning = ""
     content = ""
+    reasoning_details = None
     try:
         headers = {
+            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "Content-Type": "application/json",
         }
         payload = {
             "max_tokens": max_tokens,
             "temperature": temperature if temperature > 0 else 0.01,
             "top_p": top_p,
+            "reasoning": {"enabled": True},  # 启用推理模式
         }
+        with httpx.stream("POST", f"{OPENROUTER_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=120.0) as response:
             response.raise_for_status()
             for line in response.iter_lines():
                 if not line or not line.startswith("data: "):
                 try:
                     chunk = json.loads(data_str)
                     delta = chunk.get("choices", [{}])[0].get("delta", {})
+                    # 处理 reasoning (流式思考内容)
                     if delta.get("reasoning"):
                         reasoning += delta["reasoning"]
+                        yield reasoning, content, reasoning_details
+                    # 处理 content (流式回答内容)
                     if delta.get("content"):
                         content += delta["content"]
+                        yield reasoning, content, reasoning_details
+                    # 处理完整的 reasoning_details (用于多轮保留)
+                    message_obj = chunk.get("choices", [{}])[0].get("message", {})
+                    if message_obj.get("reasoning_details"):
+                        reasoning_details = message_obj["reasoning_details"]
                 except json.JSONDecodeError:
                     continue
+        yield reasoning, content, reasoning_details
     except httpx.HTTPStatusError as e:
+        yield reasoning, f"❌ API 错误: {e.response.status_code}", None
     except Exception as e:
+        yield reasoning, f"❌ 错误: {str(e)}", None
 def clean_thinking(text: str) -> str:
     with st.sidebar:
         st.header("⚙️ 设置")
         system_prompt = st.text_area("系统提示词", value="你是一个有帮助的 AI 助手。", height=80)
+        max_tokens = st.slider("最大长度", 256, 131072, 4096, step=256, help="最大 128k")
+        temperature = st.slider("Temperature", 0.0, 1.5, 0.7, step=0.1)
+        top_p = st.slider("Top-p", 0.1, 1.0, 0.9, step=0.05)
         st.divider()
         if st.button("🗑️ 清空对话", use_container_width=True):
             full_response = ""
             full_thinking = ""
+            full_reasoning_details = None
+            for thinking, response, reasoning_details in chat_stream(
                 prompt,
                 st.session_state.messages[:-1],
                 system_prompt,
             ):
                 full_thinking = thinking
                 full_response = response if response else "▌"
+                if reasoning_details:
+                    full_reasoning_details = reasoning_details
                 # 更新思考内容
                 if full_thinking:
                 # 更新回答内容
                 answer_placeholder.markdown(full_response)
+            # 保存消息（包含 reasoning_details 用于多轮对话）
             st.session_state.messages.append({
                 "role": "assistant",
                 "content": full_response,
                 "thinking": full_thinking,
+                "reasoning_details": full_reasoning_details,
             })
             st.rerun()