from fastapi import FastAPI, Header, HTTPException from pydantic import BaseModel import requests import os import re app = FastAPI() # ============================================ # API KEYS # ============================================ MY_API_KEY = os.getenv("MY_API_KEY") NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") # ============================================ # NVIDIA CONFIG # ============================================ NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions" MODEL_NAME = "moonshotai/kimi-k2.6" # ============================================ # REQUEST MODELS # ============================================ class Message(BaseModel): role: str content: str class ChatRequest(BaseModel): model: str = MODEL_NAME messages: list[Message] # ============================================ # HOME ROUTE # ============================================ @app.get("/") def home(): return { "status": "online", "provider": "NVIDIA", "model": MODEL_NAME, "message": "Advanced AI API is running" } # ============================================ # CLEAN RESPONSE # ============================================ def clean_response(text): if not text: return "I'm here. How can I help you?" # Remove thinking tags text = re.sub(r".*?", "", text, flags=re.DOTALL) # Remove extra spaces text = re.sub(r"\s+", " ", text) return text.strip() # ============================================ # CHAT ENDPOINT # ============================================ @app.post("/v1/chat/completions") async def chat( request: ChatRequest, authorization: str = Header(None) ): # Check auth if not authorization: raise HTTPException( status_code=401, detail="Missing Authorization header" ) token = authorization.replace("Bearer ", "").strip() if token != MY_API_KEY: raise HTTPException( status_code=403, detail="Invalid API key" ) # Strong system prompt system_prompt = """ You are a highly accurate conversational AI assistant. Rules: - Reply naturally like a real human. - Keep responses concise and clean. - Use excellent grammar. - Never hallucinate facts. - Never invent information. - If unsure, say you are unsure. - Stay relevant to the user's message. - Never generate random languages. - Reply in English unless the user speaks Bangla. - If the user speaks Bangla, reply naturally in Bangla. - Avoid robotic wording. - Do not generate code unless requested. """ # Build messages messages = [ { "role": "system", "content": system_prompt } ] for m in request.messages: messages.append({ "role": m.role, "content": m.content.strip() }) # NVIDIA request headers headers = { "Authorization": f"Bearer {NVIDIA_API_KEY}", "Content-Type": "application/json" } # NVIDIA request body payload = { "model": MODEL_NAME, "messages": messages, "max_tokens": 512, "temperature": 0.2, "top_p": 0.7, "stream": False } try: response = requests.post( NVIDIA_URL, headers=headers, json=payload, timeout=120 ) data = response.json() output = data["choices"][0]["message"]["content"] output = clean_response(output) if len(output.strip()) == 0: output = "I'm here. How can I help you?" return { "object": "chat.completion", "model": MODEL_NAME, "choices": [ { "index": 0, "message": { "role": "assistant", "content": output }, "finish_reason": "stop" } ] } except Exception as e: raise HTTPException( status_code=500, detail=str(e) )