from fastapi import FastAPI from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from llama_cpp import Llama from huggingface_hub import hf_hub_download import os, json, uvicorn, threading, requests from contextlib import asynccontextmanager # ========================= # CONFIG # ========================= HF_TOKEN = os.getenv("HF_TOKEN") SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SUPABASE_KEY") USE_DB = SUPABASE_URL and SUPABASE_KEY if USE_DB: from supabase import create_client supabase = create_client(SUPABASE_URL, SUPABASE_KEY) else: supabase = None print("⚠️ Running WITHOUT Supabase (dev mode)") models = {} stop_flags = {} memory_store = {} DEFAULT_USER_ID = "test_user" DEFAULT_CONV_ID = "test_convo" # ========================= # REQUEST MODELS # ========================= class ChatRequest(BaseModel): user_id: str = DEFAULT_USER_ID conversation_id: str = DEFAULT_CONV_ID messages: list temperature: float = 0.5 stream: bool = False class TeamRequest(BaseModel): team: list class PokemonRequest(BaseModel): name: str class CompareRequest(BaseModel): pokemon1: str pokemon2: str class BattleRequest(BaseModel): pokemon1: str pokemon2: str # ========================= # 🔥 ULTRA STRONG SYSTEM PROMPT # ========================= POKEDEX_PROMPT = """You are an expert Pokémon battle analyst AI. CRITICAL RULES: - Use ONLY the provided Pokémon data - DO NOT hallucinate or invent any stats, moves, or abilities - If data is missing, explicitly say so - Be logically correct based on Pokémon mechanics YOUR TASK: - Analyze Pokémon using: • Types • Base stats • Abilities • Physical attributes - Apply real battle reasoning RESPONSE FORMAT: Always structure answers with: • Overview • Strengths • Weaknesses • Battle Analysis • Final Verdict (if comparison/battle) BATTLE LOGIC: - Consider type effectiveness strictly - Consider stat advantages (attack, speed, defense) - Mention why one Pokémon wins logically STYLE: - Clear, structured, no fluff - No repetition """ MODEL_PROMPTS = {"llama": POKEDEX_PROMPT} # ========================= # CLEAN OUTPUT # ========================= def clean_output(text): for w in ["<|eot_id|>", "<|end_of_text|>", "<|eof|>"]: if w in text: text = text.split(w)[0] return text.strip() # ========================= # STORAGE # ========================= def get_messages(cid): if USE_DB: res = supabase.table("messages").select("role,content").eq("conversation_id", cid).execute() return res.data or [] return memory_store.get(cid, []) def save_message(cid, role, content): if USE_DB: supabase.table("messages").insert({ "conversation_id": cid, "role": role, "content": content }).execute() else: memory_store.setdefault(cid, []).append({ "role": role, "content": content }) # ========================= # 🔥 ADVANCED POKEAPI # ========================= def get_pokemon(name): try: url = f"https://pokeapi.co/api/v2/pokemon/{name.lower()}" res = requests.get(url) if res.status_code != 200: return None data = res.json() return { "name": data["name"], "types": [t["type"]["name"] for t in data["types"]], "abilities": [a["ability"]["name"] for a in data["abilities"]], "stats": {s["stat"]["name"]: s["base_stat"] for s in data["stats"]}, "height": data["height"], "weight": data["weight"], "base_experience": data["base_experience"] } except: return None # ========================= # PROMPT BUILDER # ========================= def build_prompt(messages, cid): history = get_messages(cid)[-6:] prompt = "<|begin_of_text|>\n" prompt += "<|start_header_id|>system<|end_header_id|>\n" prompt += MODEL_PROMPTS["llama"] + "\n<|eot_id|>\n" for msg in (history + messages): prompt += f"<|start_header_id|>{msg['role']}<|end_header_id|>\n{msg['content']}\n<|eot_id|>\n" prompt += "<|start_header_id|>assistant<|end_header_id|>\n" return prompt # ========================= # MODEL # ========================= def load_model(repo, file): return Llama( model_path=hf_hub_download(repo_id=repo, filename=file, token=HF_TOKEN, cache_dir="/data"), n_ctx=1536, n_threads=4, n_batch=512, use_mmap=True, use_mlock=True, f16_kv=True, verbose=False ) def load_models(): models["llama"] = load_model( "Valtry/llama3.2-3b-q4-gguf", "llama3.2-3b-q4.gguf" ) # ========================= # APP INIT # ========================= @asynccontextmanager async def lifespan(app: FastAPI): load_models() yield app = FastAPI(lifespan=lifespan) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # ========================= # LLM EXECUTION # ========================= def run_llm(prompt): llm = models["llama"] output = llm( prompt, max_tokens=1024, temperature=0.5, top_p=0.9, repeat_penalty=1.2 ) return clean_output(output["choices"][0]["text"]) # ========================= # ROUTES # ========================= @app.post("/v1/chat") async def chat(req: ChatRequest): prompt = build_prompt(req.messages, req.conversation_id) return {"result": run_llm(prompt)} # 🔹 Pokémon Info @app.post("/v1/pokemon-info") async def pokemon_info(req: PokemonRequest): poke = get_pokemon(req.name) if not poke: return {"error": "Pokémon not found"} prompt = f""" Analyze this Pokémon in detail: DATA: {json.dumps(poke, indent=2)} Explain: - Overview - Strengths - Weaknesses - Battle usage """ return {"result": run_llm(prompt)} # 🔹 Team Builder @app.post("/v1/team-builder") async def team_builder(req: TeamRequest): team_data = [get_pokemon(p) for p in req.team] team_data = [t for t in team_data if t] prompt = f""" TEAM DATA: {json.dumps(team_data, indent=2)} Analyze: - Team strengths - Weaknesses - Missing coverage - Improvements """ return {"result": run_llm(prompt)} # 🔹 Compare @app.post("/v1/compare") async def compare(req: CompareRequest): p1 = get_pokemon(req.pokemon1) p2 = get_pokemon(req.pokemon2) prompt = f""" Compare these Pokémon: POKEMON 1: {json.dumps(p1, indent=2)} POKEMON 2: {json.dumps(p2, indent=2)} Analyze: - Stat differences - Type advantages - Which is better and why """ return {"result": run_llm(prompt)} # 🔹 Battle @app.post("/v1/battle") async def battle(req: BattleRequest): p1 = get_pokemon(req.pokemon1) p2 = get_pokemon(req.pokemon2) prompt = f""" Simulate a battle: POKEMON 1: {json.dumps(p1, indent=2)} POKEMON 2: {json.dumps(p2, indent=2)} Predict: - Winner - Why (type + stats reasoning) - Key deciding factors """ return {"result": run_llm(prompt)} # ========================= # RUN # ========================= if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=7860)