Step-3.7-Flash / app.py
akhaliq's picture
akhaliq HF Staff
Remove credentials field for 100% secure server-side key loading
62beaf6
import os
import json
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from gradio import Server
from openai import OpenAI
# Initialize the Gradio Server (which is a FastAPI subclass)
app = Server()
# Create static directory if it doesn't exist
os.makedirs("static", exist_ok=True)
@app.api(name="chat_with_step")
def chat_with_step(
messages_json: str,
reasoning_effort: str = "medium",
max_tokens: int = 2048,
temperature: float = 0.7
) -> str:
"""
API endpoint to call Step 3.7 Flash model via OpenAI-compatible API.
Takes conversation messages as a JSON-serialized string, and parameters.
Returns the assistant response along with any reasoning details.
"""
try:
# Load messages from JSON string
messages = json.loads(messages_json)
# Load key from secure server-side environment variable
key = os.environ.get("STEP_API_KEY", "").strip()
if not key:
return json.dumps({
"status": "error",
"message": "STEP_API_KEY environment variable is not configured on the server."
})
# Initialize OpenAI client configured for StepFun
client = OpenAI(
api_key=key,
base_url="https://api.stepfun.ai/v1",
)
# Prepare parameters for the API call
params = {
"model": "step-3.7-flash",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
# Add reasoning effort if applicable (only for step-3.7-flash model family)
if reasoning_effort in ["low", "medium", "high"]:
params["reasoning_effort"] = reasoning_effort
# Perform completion request
response = client.chat.completions.create(**params)
# Extract assistant content
content = response.choices[0].message.content
# Capture reasoning content if returned by the API
# Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content
reasoning_content = getattr(response.choices[0].message, "reasoning_content", "")
# Alternatively, if the model returns thoughts inside <think> tags, we can extract them
if not reasoning_content and content and "<think>" in content and "</think>" in content:
parts = content.split("</think>", 1)
reasoning_content = parts[0].replace("<think>", "").strip()
content = parts[1].strip()
return json.dumps({
"status": "success",
"content": content,
"reasoning_content": reasoning_content or ""
})
except Exception as e:
return json.dumps({
"status": "error",
"message": str(e)
})
@app.get("/")
async def homepage():
"""Serves the main application landing page."""
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html")
if os.path.exists(html_path):
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read(), status_code=200)
return HTMLResponse(
content="<h1>Frontend is building. Please refresh in a few seconds...</h1>",
status_code=200
)
# Mount static folder for CSS, JS, and image assets
app.mount("/static", StaticFiles(directory="static"), name="static")
if __name__ == "__main__":
# Launch Gradio Server (default port is 7860)
app.launch(show_error=True)