Spaces:
Sleeping
Sleeping
File size: 4,136 Bytes
5586612 0d16573 5586612 0d16573 5586612 0d16573 5586612 0d16573 5586612 aa1d401 5586612 0d16573 5586612 aa1d401 0d16573 aa1d401 5586612 0d16573 5586612 aa1d401 030316b 0d16573 030316b cab4da4 030316b aa1d401 030316b cab4da4 030316b 0d16573 aa1d401 0d16573 5586612 0d16573 5586612 0d16573 5586612 aa1d401 0d16573 aa1d401 0d16573 5586612 030316b 5586612 0d16573 5586612 aa1d401 0d16573 5586612 aa1d401 5586612 030316b 5586612 cab4da4 5586612 aa1d401 5586612 030316b 5586612 aa1d401 5586612 aa1d401 5586612 aa1d401 5586612 030316b 5586612 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
import requests
import os
import re
app = FastAPI()
# ============================================
# API KEYS
# ============================================
MY_API_KEY = os.getenv("MY_API_KEY")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
# ============================================
# NVIDIA CONFIG
# ============================================
NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
MODEL_NAME = "moonshotai/kimi-k2.6"
# ============================================
# REQUEST MODELS
# ============================================
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str = MODEL_NAME
messages: list[Message]
# ============================================
# HOME ROUTE
# ============================================
@app.get("/")
def home():
return {
"status": "online",
"provider": "NVIDIA",
"model": MODEL_NAME,
"message": "Advanced AI API is running"
}
# ============================================
# CLEAN RESPONSE
# ============================================
def clean_response(text):
if not text:
return "I'm here. How can I help you?"
# Remove thinking tags
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
# Remove extra spaces
text = re.sub(r"\s+", " ", text)
return text.strip()
# ============================================
# CHAT ENDPOINT
# ============================================
@app.post("/v1/chat/completions")
async def chat(
request: ChatRequest,
authorization: str = Header(None)
):
# Check auth
if not authorization:
raise HTTPException(
status_code=401,
detail="Missing Authorization header"
)
token = authorization.replace("Bearer ", "").strip()
if token != MY_API_KEY:
raise HTTPException(
status_code=403,
detail="Invalid API key"
)
# Strong system prompt
system_prompt = """
You are a highly accurate conversational AI assistant.
Rules:
- Reply naturally like a real human.
- Keep responses concise and clean.
- Use excellent grammar.
- Never hallucinate facts.
- Never invent information.
- If unsure, say you are unsure.
- Stay relevant to the user's message.
- Never generate random languages.
- Reply in English unless the user speaks Bangla.
- If the user speaks Bangla, reply naturally in Bangla.
- Avoid robotic wording.
- Do not generate code unless requested.
"""
# Build messages
messages = [
{
"role": "system",
"content": system_prompt
}
]
for m in request.messages:
messages.append({
"role": m.role,
"content": m.content.strip()
})
# NVIDIA request headers
headers = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json"
}
# NVIDIA request body
payload = {
"model": MODEL_NAME,
"messages": messages,
"max_tokens": 512,
"temperature": 0.2,
"top_p": 0.7,
"stream": False
}
try:
response = requests.post(
NVIDIA_URL,
headers=headers,
json=payload,
timeout=120
)
data = response.json()
output = data["choices"][0]["message"]["content"]
output = clean_response(output)
if len(output.strip()) == 0:
output = "I'm here. How can I help you?"
return {
"object": "chat.completion",
"model": MODEL_NAME,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": output
},
"finish_reason": "stop"
}
]
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=str(e)
) |