Spaces:
Running
Running
File size: 2,917 Bytes
fc18c47 cbc80cd fc18c47 cbc80cd fc18c47 cbc80cd fc18c47 cbc80cd fc18c47 856fe24 cbc80cd dbc0935 fc18c47 cbc80cd fc18c47 dbc0935 fc18c47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | import os
import requests
from fastapi import FastAPI, HTTPException, Security
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
from typing import List, Optional, Any
app = FastAPI(title="Vedika AI Gateway")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "stepfun-ai/step-3.7-flash"
CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token")
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
VEDIKA_SYSTEM_PROMPT = {
"role": "system",
"content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution."
}
class MessageModel(BaseModel):
role: str
content: Any
class RunPodInput(BaseModel):
messages: List[MessageModel]
max_tokens: Optional[int] = 16384
temperature: Optional[float] = 1.00
top_p: Optional[float] = 0.95
class RunPodPayload(BaseModel):
input: RunPodInput
def verify_token(api_key: str = Security(api_key_header)):
if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN:
raise HTTPException(status_code=401, detail="Invalid or missing authentication token")
return api_key
@app.get("/")
def home():
return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"}
@app.post("/run")
@app.post("/v1/chat/completions")
def process_chat(payload: RunPodPayload, token: str = Security(verify_token)):
if not NVIDIA_API_KEY:
raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing")
headers = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Accept": "application/json"
}
formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages]
nvidia_payload = {
"model": MODEL_NAME,
"messages": formatted_messages,
"max_tokens": payload.input.max_tokens,
"temperature": payload.input.temperature,
"top_p": payload.input.top_p,
"stream": False
}
try:
response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload)
if response.status_code == 200:
return response.json()
else:
raise HTTPException(status_code=response.status_code, detail=response.text)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|