Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # ========================================================= | |
| # MODEL (ULTRA FAST FREE-TIER) | |
| # ========================================================= | |
| REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" | |
| FILENAME = "tinyllama-1.1b-chat.Q4_K_M.gguf" | |
| print("[SYSTEM] Downloading TinyLlama...") | |
| MODEL_PATH = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME | |
| ) | |
| print("[SYSTEM] Initializing model...") | |
| llm = Llama( | |
| model_path=MODEL_PATH, | |
| n_ctx=512, # КРИТИЧНО для скорости | |
| n_batch=1024, | |
| n_threads=2, # РОВНО под HF CPU Basic | |
| use_mmap=True, | |
| use_mlock=False, | |
| verbose=False | |
| ) | |
| print("[SYSTEM] TinyLlama READY") | |
| # ========================================================= | |
| # FASTAPI | |
| # ========================================================= | |
| app = FastAPI(title="Apex Free Engine") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ========================================================= | |
| # REQUEST MODEL | |
| # ========================================================= | |
| class AnalysisRequest(BaseModel): | |
| context: str | |
| query: str | |
| # ========================================================= | |
| # ROUTES | |
| # ========================================================= | |
| def health(): | |
| return { | |
| "status": "online", | |
| "engine": "Apex", | |
| "model": "TinyLlama-1.1B", | |
| "tier": "HF Free" | |
| } | |
| def analyze(req: AnalysisRequest): | |
| try: | |
| # СУПЕР КОРОТКИЙ, НО УМНЫЙ PROMPT | |
| prompt = f"""<|system|> | |
| Ты — Apex. | |
| Отвечай кратко, логично и по делу. | |
| Используй только данный контекст. | |
| Если данных недостаточно — скажи об этом. | |
| Язык: русский. | |
| </s> | |
| <|user|> | |
| Контекст: | |
| {req.context} | |
| Вопрос: | |
| {req.query} | |
| </s> | |
| <|assistant|> | |
| """ | |
| output = llm( | |
| prompt, | |
| max_tokens=60, # БОЛЬШЕ НЕЛЬЗЯ НА FREE | |
| temperature=0.1, # Минимум фантазии | |
| top_p=0.8, | |
| stop=["</s>"], | |
| echo=False | |
| ) | |
| answer = output["choices"][0]["text"].strip() | |
| return { | |
| "result": answer, | |
| "model": "TinyLlama-1.1B" | |
| } | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=str(e) | |
| ) | |