import os import requests from fastapi import FastAPI, HTTPException, Security from fastapi.security import APIKeyHeader from pydantic import BaseModel from typing import List, Optional, Any app = FastAPI(title="Vedika AI Gateway") NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions" MODEL_NAME = "stepfun-ai/step-3.7-flash" CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token") api_key_header = APIKeyHeader(name="Authorization", auto_error=False) VEDIKA_SYSTEM_PROMPT = { "role": "system", "content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution." } class MessageModel(BaseModel): role: str content: Any class RunPodInput(BaseModel): messages: List[MessageModel] max_tokens: Optional[int] = 16384 temperature: Optional[float] = 1.00 top_p: Optional[float] = 0.95 class RunPodPayload(BaseModel): input: RunPodInput def verify_token(api_key: str = Security(api_key_header)): if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN: raise HTTPException(status_code=401, detail="Invalid or missing authentication token") return api_key @app.get("/") def home(): return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"} @app.post("/run") @app.post("/v1/chat/completions") def process_chat(payload: RunPodPayload, token: str = Security(verify_token)): if not NVIDIA_API_KEY: raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing") headers = { "Authorization": f"Bearer {NVIDIA_API_KEY}", "Accept": "application/json" } formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages] nvidia_payload = { "model": MODEL_NAME, "messages": formatted_messages, "max_tokens": payload.input.max_tokens, "temperature": payload.input.temperature, "top_p": payload.input.top_p, "stream": False } try: response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload) if response.status_code == 200: return response.json() else: raise HTTPException(status_code=response.status_code, detail=response.text) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)