import os import json from fastapi import FastAPI from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from gradio import Server from openai import OpenAI # Initialize the Gradio Server (which is a FastAPI subclass) app = Server() # Create static directory if it doesn't exist os.makedirs("static", exist_ok=True) @app.api(name="chat_with_step") def chat_with_step( messages_json: str, reasoning_effort: str = "medium", max_tokens: int = 2048, temperature: float = 0.7 ) -> str: """ API endpoint to call Step 3.7 Flash model via OpenAI-compatible API. Takes conversation messages as a JSON-serialized string, and parameters. Returns the assistant response along with any reasoning details. """ try: # Load messages from JSON string messages = json.loads(messages_json) # Load key from secure server-side environment variable key = os.environ.get("STEP_API_KEY", "").strip() if not key: return json.dumps({ "status": "error", "message": "STEP_API_KEY environment variable is not configured on the server." }) # Initialize OpenAI client configured for StepFun client = OpenAI( api_key=key, base_url="https://api.stepfun.ai/v1", ) # Prepare parameters for the API call params = { "model": "step-3.7-flash", "messages": messages, "max_tokens": max_tokens, "temperature": temperature } # Add reasoning effort if applicable (only for step-3.7-flash model family) if reasoning_effort in ["low", "medium", "high"]: params["reasoning_effort"] = reasoning_effort # Perform completion request response = client.chat.completions.create(**params) # Extract assistant content content = response.choices[0].message.content # Capture reasoning content if returned by the API # Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content reasoning_content = getattr(response.choices[0].message, "reasoning_content", "") # Alternatively, if the model returns thoughts inside tags, we can extract them if not reasoning_content and content and "" in content and "" in content: parts = content.split("", 1) reasoning_content = parts[0].replace("", "").strip() content = parts[1].strip() return json.dumps({ "status": "success", "content": content, "reasoning_content": reasoning_content or "" }) except Exception as e: return json.dumps({ "status": "error", "message": str(e) }) @app.get("/") async def homepage(): """Serves the main application landing page.""" html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html") if os.path.exists(html_path): with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(content=f.read(), status_code=200) return HTMLResponse( content="

Frontend is building. Please refresh in a few seconds...

", status_code=200 ) # Mount static folder for CSS, JS, and image assets app.mount("/static", StaticFiles(directory="static"), name="static") if __name__ == "__main__": # Launch Gradio Server (default port is 7860) app.launch(show_error=True)