Spaces:
Runtime error
Runtime error
File size: 2,713 Bytes
cc5d488 ba1690d 58c0c73 ba1690d 58c0c73 cc5d488 58c0c73 ba1690d cc5d488 ba1690d cc5d488 ba1690d 58c0c73 cc5d488 d463102 ba1690d cc5d488 58c0c73 bcc64f9 58c0c73 cc5d488 ba1690d cc5d488 ba1690d cc5d488 ba1690d 58c0c73 cc5d488 ba1690d 58c0c73 cc5d488 bcc64f9 cc5d488 bcc64f9 cc5d488 bcc64f9 4f9fa8f ba1690d bcc64f9 4f9fa8f cc5d488 ba1690d 58c0c73 d463102 58c0c73 cc5d488 d463102 58c0c73 cc5d488 58c0c73 cc5d488 58c0c73 d463102 cc5d488 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# =========================================================
# MODEL (ULTRA FAST FREE-TIER)
# =========================================================
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FILENAME = "tinyllama-1.1b-chat.Q4_K_M.gguf"
print("[SYSTEM] Downloading TinyLlama...")
MODEL_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME
)
print("[SYSTEM] Initializing model...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=512, # КРИТИЧНО для скорости
n_batch=1024,
n_threads=2, # РОВНО под HF CPU Basic
use_mmap=True,
use_mlock=False,
verbose=False
)
print("[SYSTEM] TinyLlama READY")
# =========================================================
# FASTAPI
# =========================================================
app = FastAPI(title="Apex Free Engine")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# =========================================================
# REQUEST MODEL
# =========================================================
class AnalysisRequest(BaseModel):
context: str
query: str
# =========================================================
# ROUTES
# =========================================================
@app.get("/")
def health():
return {
"status": "online",
"engine": "Apex",
"model": "TinyLlama-1.1B",
"tier": "HF Free"
}
@app.post("/analyze")
def analyze(req: AnalysisRequest):
try:
# СУПЕР КОРОТКИЙ, НО УМНЫЙ PROMPT
prompt = f"""<|system|>
Ты — Apex.
Отвечай кратко, логично и по делу.
Используй только данный контекст.
Если данных недостаточно — скажи об этом.
Язык: русский.
</s>
<|user|>
Контекст:
{req.context}
Вопрос:
{req.query}
</s>
<|assistant|>
"""
output = llm(
prompt,
max_tokens=60, # БОЛЬШЕ НЕЛЬЗЯ НА FREE
temperature=0.1, # Минимум фантазии
top_p=0.8,
stop=["</s>"],
echo=False
)
answer = output["choices"][0]["text"].strip()
return {
"result": answer,
"model": "TinyLlama-1.1B"
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=str(e)
)
|