Modal / app.py
Vedika-advanced-AI's picture
Update app.py
cbc80cd verified
import os
import requests
from fastapi import FastAPI, HTTPException, Security
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
from typing import List, Optional, Any
app = FastAPI(title="Vedika AI Gateway")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "stepfun-ai/step-3.7-flash"
CLIENT_AUTH_TOKEN = os.getenv("AUTH_TOKEN", "my-super-secret-token")
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
VEDIKA_SYSTEM_PROMPT = {
"role": "system",
"content": "You are Vedika AI, an exceptionally advanced multimedia large language model designed for maximum speed and real-time processing. Your primary purpose is to deliver highly accurate data insights and technical automation swiftly. You are operating via the NVIDIA L40S infrastructure at ultra-high performance levels, capable of reading and processing up to 13,000 tokens per second. Maintain absolute technical precision, efficiency, and a professional persona in every execution."
}
class MessageModel(BaseModel):
role: str
content: Any
class RunPodInput(BaseModel):
messages: List[MessageModel]
max_tokens: Optional[int] = 16384
temperature: Optional[float] = 1.00
top_p: Optional[float] = 0.95
class RunPodPayload(BaseModel):
input: RunPodInput
def verify_token(api_key: str = Security(api_key_header)):
if not api_key or api_key.replace("Bearer ", "") != CLIENT_AUTH_TOKEN:
raise HTTPException(status_code=401, detail="Invalid or missing authentication token")
return api_key
@app.get("/")
def home():
return {"status": "online", "model": "Vedika AI (stepfun-ai/step-3.7-flash)"}
@app.post("/run")
@app.post("/v1/chat/completions")
def process_chat(payload: RunPodPayload, token: str = Security(verify_token)):
if not NVIDIA_API_KEY:
raise HTTPException(status_code=500, detail="Server Error: NVIDIA_API_KEY is missing")
headers = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Accept": "application/json"
}
formatted_messages = [VEDIKA_SYSTEM_PROMPT] + [msg.model_dump() for msg in payload.input.messages]
nvidia_payload = {
"model": MODEL_NAME,
"messages": formatted_messages,
"max_tokens": payload.input.max_tokens,
"temperature": payload.input.temperature,
"top_p": payload.input.top_p,
"stream": False
}
try:
response = requests.post(INVOKE_URL, headers=headers, json=nvidia_payload)
if response.status_code == 200:
return response.json()
else:
raise HTTPException(status_code=response.status_code, detail=response.text)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)