from fastapi import FastAPI, HTTPException from pydantic import BaseModel from fastapi.middleware.cors import CORSMiddleware from llama_cpp import Llama from huggingface_hub import hf_hub_download # ========================================================= # MODEL (ULTRA FAST FREE-TIER) # ========================================================= REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" FILENAME = "tinyllama-1.1b-chat.Q4_K_M.gguf" print("[SYSTEM] Downloading TinyLlama...") MODEL_PATH = hf_hub_download( repo_id=REPO_ID, filename=FILENAME ) print("[SYSTEM] Initializing model...") llm = Llama( model_path=MODEL_PATH, n_ctx=512, # КРИТИЧНО для скорости n_batch=1024, n_threads=2, # РОВНО под HF CPU Basic use_mmap=True, use_mlock=False, verbose=False ) print("[SYSTEM] TinyLlama READY") # ========================================================= # FASTAPI # ========================================================= app = FastAPI(title="Apex Free Engine") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # ========================================================= # REQUEST MODEL # ========================================================= class AnalysisRequest(BaseModel): context: str query: str # ========================================================= # ROUTES # ========================================================= @app.get("/") def health(): return { "status": "online", "engine": "Apex", "model": "TinyLlama-1.1B", "tier": "HF Free" } @app.post("/analyze") def analyze(req: AnalysisRequest): try: # СУПЕР КОРОТКИЙ, НО УМНЫЙ PROMPT prompt = f"""<|system|> Ты — Apex. Отвечай кратко, логично и по делу. Используй только данный контекст. Если данных недостаточно — скажи об этом. Язык: русский. <|user|> Контекст: {req.context} Вопрос: {req.query} <|assistant|> """ output = llm( prompt, max_tokens=60, # БОЛЬШЕ НЕЛЬЗЯ НА FREE temperature=0.1, # Минимум фантазии top_p=0.8, stop=[""], echo=False ) answer = output["choices"][0]["text"].strip() return { "result": answer, "model": "TinyLlama-1.1B" } except Exception as e: raise HTTPException( status_code=500, detail=str(e) )