Step-3.7-Flash-dev

Running

App Files Files Community

Step-3.7-Flash-dev / app.py

WinstonDeng

Update app.py

79229c9 verified 5 days ago

raw

history blame

3.64 kB

	import os
	import json
	from fastapi import FastAPI
	from fastapi.responses import HTMLResponse
	from fastapi.staticfiles import StaticFiles
	from gradio import Server
	from openai import OpenAI

	# Initialize the Gradio Server (which is a FastAPI subclass)
	app = Server()

	# Create static directory if it doesn't exist
	os.makedirs("static", exist_ok=True)

	@app.api(name="chat_with_step")
	def chat_with_step(
	messages_json: str,
	reasoning_effort: str = "medium",
	max_tokens: int = 2048,
	temperature: float = 0.7
	) -> str:
	"""
	API endpoint to call Step 3.7 Flash model via OpenAI-compatible API.
	Takes conversation messages as a JSON-serialized string, and parameters.
	Returns the assistant response along with any reasoning details.
	"""
	try:
	# Load messages from JSON string
	messages = json.loads(messages_json)

	# Load key from secure server-side environment variable
	key = os.environ.get("STEP_API_KEY", "").strip()
	if not key:
	return json.dumps({
	"status": "error",
	"message": "STEP_API_KEY environment variable is not configured on the server."
	})

	# Initialize OpenAI client configured for StepFun
	client = OpenAI(
	api_key=key,
	base_url="https://api.stepfun.com/v1",
	)

	# Prepare parameters for the API call
	params = {
	"model": "step-3.7-flash",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}

	# Add reasoning effort if applicable (only for step-3.7-flash model family)
	if reasoning_effort in ["low", "medium", "high"]:
	params["reasoning_effort"] = reasoning_effort

	# Perform completion request
	response = client.chat.completions.create(**params)

	# Extract assistant content
	content = response.choices[0].message.content

	# Capture reasoning content if returned by the API
	# Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content
	reasoning_content = getattr(response.choices[0].message, "reasoning_content", "")

	# Alternatively, if the model returns thoughts inside <think> tags, we can extract them
	if not reasoning_content and content and "<think>" in content and "</think>" in content:
	parts = content.split("</think>", 1)
	reasoning_content = parts[0].replace("<think>", "").strip()
	content = parts[1].strip()

	return json.dumps({
	"status": "success",
	"content": content,
	"reasoning_content": reasoning_content or ""
	})

	except Exception as e:
	return json.dumps({
	"status": "error",
	"message": str(e)
	})

	@app.get("/")
	async def homepage():
	"""Serves the main application landing page."""
	html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html")
	if os.path.exists(html_path):
	with open(html_path, "r", encoding="utf-8") as f:
	return HTMLResponse(content=f.read(), status_code=200)
	return HTMLResponse(
	content="<h1>Frontend is building. Please refresh in a few seconds...</h1>",
	status_code=200
	)

	# Mount static folder for CSS, JS, and image assets
	app.mount("/static", StaticFiles(directory="static"), name="static")

	if __name__ == "__main__":
	# Launch Gradio Server (default port is 7860)
	app.launch(show_error=True)