Spaces:

Vedika-advanced-AI
/

Modal

Running

File size: 2,917 Bytes

fc18c47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbc80cd
fc18c47
 
 
 
cbc80cd
 
 
fc18c47
 
 
 
 
 
 
 
 
 
cbc80cd
fc18c47
cbc80cd
fc18c47
 
 
 
 
 
 
856fe24
cbc80cd
dbc0935
fc18c47
 
 
cbc80cd
 
 
fc18c47
 
 
 
 
 
 
 
 
 
 
dbc0935
fc18c47

import os
import requests
from fastapi import FastAPI, HTTPException, Security
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
from typing import List, Optional, Any

app = FastAPI(title="Vedika AI Gateway")

NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "stepfun-ai/step-3.7-flash"
CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token")
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)

VEDIKA_SYSTEM_PROMPT = {
    "role": "system",
    "content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution."
}

class MessageModel(BaseModel):
    role: str
    content: Any

class RunPodInput(BaseModel):
    messages: List[MessageModel]
    max_tokens: Optional[int] = 16384
    temperature: Optional[float] = 1.00
    top_p: Optional[float] = 0.95

class RunPodPayload(BaseModel):
    input: RunPodInput

def verify_token(api_key: str = Security(api_key_header)):
    if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN:
        raise HTTPException(status_code=401, detail="Invalid or missing authentication token")
    return api_key

@app.get("/")
def home():
    return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"}

@app.post("/run")
@app.post("/v1/chat/completions")
def process_chat(payload: RunPodPayload, token: str = Security(verify_token)):
    if not NVIDIA_API_KEY:
        raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing")

    headers = {
        "Authorization": f"Bearer {NVIDIA_API_KEY}",
        "Accept": "application/json"
    }
    
    formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages]
    
    nvidia_payload = {
        "model": MODEL_NAME,
        "messages": formatted_messages,
        "max_tokens": payload.input.max_tokens,
        "temperature": payload.input.temperature,
        "top_p": payload.input.top_p,
        "stream": False
    }

    try:
        response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload)
        if response.status_code == 200:
            return response.json()
        else:
            raise HTTPException(status_code=response.status_code, detail=response.text)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)