File size: 2,917 Bytes
fc18c47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbc80cd
fc18c47
 
 
 
cbc80cd
 
 
fc18c47
 
 
 
 
 
 
 
 
 
cbc80cd
fc18c47
cbc80cd
fc18c47
 
 
 
 
 
 
856fe24
cbc80cd
dbc0935
fc18c47
 
 
cbc80cd
 
 
fc18c47
 
 
 
 
 
 
 
 
 
 
dbc0935
fc18c47
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import requests
from fastapi import FastAPI, HTTPException, Security
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
from typing import List, Optional, Any

app = FastAPI(title="Vedika AI Gateway")

NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "stepfun-ai/step-3.7-flash"
CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token")
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)

VEDIKA_SYSTEM_PROMPT = {
    "role": "system",
    "content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution."
}

class MessageModel(BaseModel):
    role: str
    content: Any

class RunPodInput(BaseModel):
    messages: List[MessageModel]
    max_tokens: Optional[int] = 16384
    temperature: Optional[float] = 1.00
    top_p: Optional[float] = 0.95

class RunPodPayload(BaseModel):
    input: RunPodInput

def verify_token(api_key: str = Security(api_key_header)):
    if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN:
        raise HTTPException(status_code=401, detail="Invalid or missing authentication token")
    return api_key

@app.get("/")
def home():
    return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"}

@app.post("/run")
@app.post("/v1/chat/completions")
def process_chat(payload: RunPodPayload, token: str = Security(verify_token)):
    if not NVIDIA_API_KEY:
        raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing")

    headers = {
        "Authorization": f"Bearer {NVIDIA_API_KEY}",
        "Accept": "application/json"
    }
    
    formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages]
    
    nvidia_payload = {
        "model": MODEL_NAME,
        "messages": formatted_messages,
        "max_tokens": payload.input.max_tokens,
        "temperature": payload.input.temperature,
        "top_p": payload.input.top_p,
        "stream": False
    }

    try:
        response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload)
        if response.status_code == 200:
            return response.json()
        else:
            raise HTTPException(status_code=response.status_code, detail=response.text)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)