WinstonDeng's picture
Update app.py
79229c9 verified
raw
history blame
3.64 kB
import os
import json
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from gradio import Server
from openai import OpenAI
# Initialize the Gradio Server (which is a FastAPI subclass)
app = Server()
# Create static directory if it doesn't exist
os.makedirs("static", exist_ok=True)
@app.api(name="chat_with_step")
def chat_with_step(
messages_json: str,
reasoning_effort: str = "medium",
max_tokens: int = 2048,
temperature: float = 0.7
) -> str:
"""
API endpoint to call Step 3.7 Flash model via OpenAI-compatible API.
Takes conversation messages as a JSON-serialized string, and parameters.
Returns the assistant response along with any reasoning details.
"""
try:
# Load messages from JSON string
messages = json.loads(messages_json)
# Load key from secure server-side environment variable
key = os.environ.get("STEP_API_KEY", "").strip()
if not key:
return json.dumps({
"status": "error",
"message": "STEP_API_KEY environment variable is not configured on the server."
})
# Initialize OpenAI client configured for StepFun
client = OpenAI(
api_key=key,
base_url="https://api.stepfun.com/v1",
)
# Prepare parameters for the API call
params = {
"model": "step-3.7-flash",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
# Add reasoning effort if applicable (only for step-3.7-flash model family)
if reasoning_effort in ["low", "medium", "high"]:
params["reasoning_effort"] = reasoning_effort
# Perform completion request
response = client.chat.completions.create(**params)
# Extract assistant content
content = response.choices[0].message.content
# Capture reasoning content if returned by the API
# Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content
reasoning_content = getattr(response.choices[0].message, "reasoning_content", "")
# Alternatively, if the model returns thoughts inside <think> tags, we can extract them
if not reasoning_content and content and "<think>" in content and "</think>" in content:
parts = content.split("</think>", 1)
reasoning_content = parts[0].replace("<think>", "").strip()
content = parts[1].strip()
return json.dumps({
"status": "success",
"content": content,
"reasoning_content": reasoning_content or ""
})
except Exception as e:
return json.dumps({
"status": "error",
"message": str(e)
})
@app.get("/")
async def homepage():
"""Serves the main application landing page."""
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html")
if os.path.exists(html_path):
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read(), status_code=200)
return HTMLResponse(
content="<h1>Frontend is building. Please refresh in a few seconds...</h1>",
status_code=200
)
# Mount static folder for CSS, JS, and image assets
app.mount("/static", StaticFiles(directory="static"), name="static")
if __name__ == "__main__":
# Launch Gradio Server (default port is 7860)
app.launch(show_error=True)