Spaces:
Running
Running
| import os | |
| import json | |
| from fastapi import FastAPI | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from gradio import Server | |
| from openai import OpenAI | |
| # Initialize the Gradio Server (which is a FastAPI subclass) | |
| app = Server() | |
| # Create static directory if it doesn't exist | |
| os.makedirs("static", exist_ok=True) | |
| def chat_with_step( | |
| messages_json: str, | |
| reasoning_effort: str = "medium", | |
| max_tokens: int = 2048, | |
| temperature: float = 0.7 | |
| ) -> str: | |
| """ | |
| API endpoint to call Step 3.7 Flash model via OpenAI-compatible API. | |
| Takes conversation messages as a JSON-serialized string, and parameters. | |
| Returns the assistant response along with any reasoning details. | |
| """ | |
| try: | |
| # Load messages from JSON string | |
| messages = json.loads(messages_json) | |
| # Load key from secure server-side environment variable | |
| key = os.environ.get("STEP_API_KEY", "").strip() | |
| if not key: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": "STEP_API_KEY environment variable is not configured on the server." | |
| }) | |
| # Initialize OpenAI client configured for StepFun | |
| client = OpenAI( | |
| api_key=key, | |
| base_url="https://api.stepfun.ai/v1", | |
| ) | |
| # Prepare parameters for the API call | |
| params = { | |
| "model": "step-3.7-flash", | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature | |
| } | |
| # Add reasoning effort if applicable (only for step-3.7-flash model family) | |
| if reasoning_effort in ["low", "medium", "high"]: | |
| params["reasoning_effort"] = reasoning_effort | |
| # Perform completion request | |
| response = client.chat.completions.create(**params) | |
| # Extract assistant content | |
| content = response.choices[0].message.content | |
| # Capture reasoning content if returned by the API | |
| # Step 3.7 reasoning models might put reasoning in choice.message.reasoning_content | |
| reasoning_content = getattr(response.choices[0].message, "reasoning_content", "") | |
| # Alternatively, if the model returns thoughts inside <think> tags, we can extract them | |
| if not reasoning_content and content and "<think>" in content and "</think>" in content: | |
| parts = content.split("</think>", 1) | |
| reasoning_content = parts[0].replace("<think>", "").strip() | |
| content = parts[1].strip() | |
| return json.dumps({ | |
| "status": "success", | |
| "content": content, | |
| "reasoning_content": reasoning_content or "" | |
| }) | |
| except Exception as e: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": str(e) | |
| }) | |
| async def homepage(): | |
| """Serves the main application landing page.""" | |
| html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html") | |
| if os.path.exists(html_path): | |
| with open(html_path, "r", encoding="utf-8") as f: | |
| return HTMLResponse(content=f.read(), status_code=200) | |
| return HTMLResponse( | |
| content="<h1>Frontend is building. Please refresh in a few seconds...</h1>", | |
| status_code=200 | |
| ) | |
| # Mount static folder for CSS, JS, and image assets | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| if __name__ == "__main__": | |
| # Launch Gradio Server (default port is 7860) | |
| app.launch(show_error=True) | |