ai-api / app.py
errordrive's picture
Update app.py
0d16573 verified
Raw
History Blame Contribute Delete
4.14 kB
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
import requests
import os
import re
app = FastAPI()
# ============================================
# API KEYS
# ============================================
MY_API_KEY = os.getenv("MY_API_KEY")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
# ============================================
# NVIDIA CONFIG
# ============================================
NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "moonshotai/kimi-k2.6"
# ============================================
# REQUEST MODELS
# ============================================
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str = MODEL_NAME
messages: list[Message]
# ============================================
# HOME ROUTE
# ============================================
@app.get("/")
def home():
return {
"status": "online",
"provider": "NVIDIA",
"model": MODEL_NAME,
"message": "Advanced AI API is running"
}
# ============================================
# CLEAN RESPONSE
# ============================================
def clean_response(text):
if not text:
return "I'm here. How can I help you?"
# Remove thinking tags
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
# Remove extra spaces
text = re.sub(r"\s+", " ", text)
return text.strip()
# ============================================
# CHAT ENDPOINT
# ============================================
@app.post("/v1/chat/completions")
async def chat(
request: ChatRequest,
authorization: str = Header(None)
):
# Check auth
if not authorization:
raise HTTPException(
status_code=401,
detail="Missing Authorization header"
)
token = authorization.replace("Bearer ", "").strip()
if token != MY_API_KEY:
raise HTTPException(
status_code=403,
detail="Invalid API key"
)
# Strong system prompt
system_prompt = """
You are a highly accurate conversational AI assistant.
Rules:
- Reply naturally like a real human.
- Keep responses concise and clean.
- Use excellent grammar.
- Never hallucinate facts.
- Never invent information.
- If unsure, say you are unsure.
- Stay relevant to the user's message.
- Never generate random languages.
- Reply in English unless the user speaks Bangla.
- If the user speaks Bangla, reply naturally in Bangla.
- Avoid robotic wording.
- Do not generate code unless requested.
"""
# Build messages
messages = [
{
"role": "system",
"content": system_prompt
}
]
for m in request.messages:
messages.append({
"role": m.role,
"content": m.content.strip()
})
# NVIDIA request headers
headers = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json"
}
# NVIDIA request body
payload = {
"model": MODEL_NAME,
"messages": messages,
"max_tokens": 512,
"temperature": 0.2,
"top_p": 0.7,
"stream": False
}
try:
response = requests.post(
NVIDIA_URL,
headers=headers,
json=payload,
timeout=120
)
data = response.json()
output = data["choices"][0]["message"]["content"]
output = clean_response(output)
if len(output.strip()) == 0:
output = "I'm here. How can I help you?"
return {
"object": "chat.completion",
"model": MODEL_NAME,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": output
},
"finish_reason": "stop"
}
]
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=str(e)
)