apex-engine / app.py
Garush65's picture
Update app.py
cc5d488 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# =========================================================
# MODEL (ULTRA FAST FREE-TIER)
# =========================================================
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FILENAME = "tinyllama-1.1b-chat.Q4_K_M.gguf"
print("[SYSTEM] Downloading TinyLlama...")
MODEL_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME
)
print("[SYSTEM] Initializing model...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=512, # КРИТИЧНО для скорости
n_batch=1024,
n_threads=2, # РОВНО под HF CPU Basic
use_mmap=True,
use_mlock=False,
verbose=False
)
print("[SYSTEM] TinyLlama READY")
# =========================================================
# FASTAPI
# =========================================================
app = FastAPI(title="Apex Free Engine")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# =========================================================
# REQUEST MODEL
# =========================================================
class AnalysisRequest(BaseModel):
context: str
query: str
# =========================================================
# ROUTES
# =========================================================
@app.get("/")
def health():
return {
"status": "online",
"engine": "Apex",
"model": "TinyLlama-1.1B",
"tier": "HF Free"
}
@app.post("/analyze")
def analyze(req: AnalysisRequest):
try:
# СУПЕР КОРОТКИЙ, НО УМНЫЙ PROMPT
prompt = f"""<|system|>
Ты — Apex.
Отвечай кратко, логично и по делу.
Используй только данный контекст.
Если данных недостаточно — скажи об этом.
Язык: русский.
</s>
<|user|>
Контекст:
{req.context}
Вопрос:
{req.query}
</s>
<|assistant|>
"""
output = llm(
prompt,
max_tokens=60, # БОЛЬШЕ НЕЛЬЗЯ НА FREE
temperature=0.1, # Минимум фантазии
top_p=0.8,
stop=["</s>"],
echo=False
)
answer = output["choices"][0]["text"].strip()
return {
"result": answer,
"model": "TinyLlama-1.1B"
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=str(e)
)