Spaces:
Running
Running
| import os | |
| import requests | |
| from fastapi import FastAPI, HTTPException, Security | |
| from fastapi.security import APIKeyHeader | |
| from pydantic import BaseModel | |
| from typing import List, Optional, Any | |
| app = FastAPI(title="Vedika AI Gateway") | |
| NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") | |
| INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions" | |
| MODEL_NAME = "stepfun-ai/step-3.7-flash" | |
| CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token") | |
| api_key_header = APIKeyHeader(name="Authorization", auto_error=False) | |
| VEDIKA_SYSTEM_PROMPT = { | |
| "role": "system", | |
| "content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution." | |
| } | |
| class MessageModel(BaseModel): | |
| role: str | |
| content: Any | |
| class RunPodInput(BaseModel): | |
| messages: List[MessageModel] | |
| max_tokens: Optional[int] = 16384 | |
| temperature: Optional[float] = 1.00 | |
| top_p: Optional[float] = 0.95 | |
| class RunPodPayload(BaseModel): | |
| input: RunPodInput | |
| def verify_token(api_key: str = Security(api_key_header)): | |
| if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN: | |
| raise HTTPException(status_code=401, detail="Invalid or missing authentication token") | |
| return api_key | |
| def home(): | |
| return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"} | |
| def process_chat(payload: RunPodPayload, token: str = Security(verify_token)): | |
| if not NVIDIA_API_KEY: | |
| raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing") | |
| headers = { | |
| "Authorization": f"Bearer {NVIDIA_API_KEY}", | |
| "Accept": "application/json" | |
| } | |
| formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages] | |
| nvidia_payload = { | |
| "model": MODEL_NAME, | |
| "messages": formatted_messages, | |
| "max_tokens": payload.input.max_tokens, | |
| "temperature": payload.input.temperature, | |
| "top_p": payload.input.top_p, | |
| "stream": False | |
| } | |
| try: | |
| response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload) | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| raise HTTPException(status_code=response.status_code, detail=response.text) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |