| import os |
| import time |
| import uuid |
| import logging |
| import threading |
| import requests |
| from contextlib import asynccontextmanager |
| from typing import List, Optional |
|
|
| from fastapi import FastAPI, HTTPException, Depends |
| from fastapi.responses import HTMLResponse, JSONResponse |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| import uvicorn |
|
|
| |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") |
| logger = logging.getLogger(__name__) |
|
|
| BEARER_TOKEN = os.environ.get("BEARER_TOKEN", "quan11082012") |
| MODEL_DIR = "/app/models" |
| MODEL_PATH = os.path.join(MODEL_DIR, "qwen2.5-coder-7b-instruct-q4_k_m.gguf") |
| MODEL_NAME = "qwen2.5-coder-7b-instruct" |
| N_CTX = int(os.environ.get("N_CTX", "2048")) |
| N_THREADS = int(os.environ.get("N_THREADS", "4")) |
|
|
| |
| llm = None |
| STATUS = "Chưa khởi động" |
| DOWNLOAD_PERCENT = 0 |
| DOWNLOADED_GB = "0.00" |
| TOTAL_GB = "0.00" |
| ERROR_MSG = "" |
|
|
| def download_and_load_model(): |
| """Hàm chạy ngầm: Tải model có hiển thị tiến trình % và nạp vào RAM""" |
| global llm, STATUS, DOWNLOAD_PERCENT, DOWNLOADED_GB, TOTAL_GB, ERROR_MSG |
| |
| url = "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf" |
| |
| try: |
| os.makedirs(MODEL_DIR, exist_ok=True) |
| |
| |
| if not os.path.exists(MODEL_PATH) or os.path.getsize(MODEL_PATH) < 4000000000: |
| STATUS = "Đang tiến hành tải xuống model từ Hugging Face Hub..." |
| logger.info(STATUS) |
| |
| |
| response = requests.get(url, stream=True, timeout=60) |
| if response.status_code != 200: |
| raise Exception(f"Không thể kết nối đến link tải model. HTTP Code: {response.status_code}") |
| |
| total_bytes = int(response.headers.get('content-length', 4683132032)) |
| TOTAL_GB = f"{total_bytes / (1024**3):.2f}" |
| |
| downloaded_bytes = 0 |
| |
| with open(MODEL_PATH, "wb") as f: |
| for chunk in response.iter_content(chunk_size=4*1024*1024): |
| if chunk: |
| f.write(chunk) |
| downloaded_bytes += len(chunk) |
| |
| DOWNLOAD_PERCENT = min(int((downloaded_bytes / total_bytes) * 100), 100) |
| DOWNLOADED_GB = f"{downloaded_bytes / (1024**3):.2f}" |
| logger.info("✅ Tải xuống file GGUF hoàn tất.") |
| else: |
| logger.info("ℹ️ Tìm thấy model có sẵn tại local. Bỏ qua bước tải xuống.") |
| DOWNLOAD_PERCENT = 100 |
| size_bytes = os.path.getsize(MODEL_PATH) |
| DOWNLOADED_GB = f"{size_bytes / (1024**3):.2f}" |
| TOTAL_GB = DOWNLOADED_GB |
|
|
| |
| STATUS = "Đang nạp cấu hình model vào bộ nhớ RAM (Mất khoảng 1 phút)..." |
| logger.info(STATUS) |
| |
| from llama_cpp import Llama |
| llm = Llama( |
| model_path=MODEL_PATH, |
| n_ctx=N_CTX, |
| n_threads=N_THREADS, |
| n_gpu_layers=0, |
| verbose=False, |
| chat_format="chatml" |
| ) |
| |
| STATUS = "Ready" |
| logger.info("✅ [HỆ THỐNG] Toàn bộ quy trình hoàn tất. Model đã SẴN SÀNG!") |
| |
| except Exception as e: |
| STATUS = "Lỗi" |
| ERROR_MSG = str(e) |
| logger.error(f"❌ Quy trình khởi động thất bại: {e}") |
|
|
| |
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| |
| threading.Thread(target=download_and_load_model, daemon=True).start() |
| yield |
| logger.info("🛑 Đang đóng ứng dụng.") |
|
|
| |
| app = FastAPI(title="Minecraft Bot LLM Dashboard", lifespan=lifespan) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| security = HTTPBearer() |
|
|
| def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): |
| if credentials.credentials != BEARER_TOKEN: |
| raise HTTPException(status_code=401, detail="Sai Bearer Token.") |
| return credentials.credentials |
|
|
| |
| class ChatMessage(BaseModel): |
| role: str |
| content: str |
|
|
| class ChatCompletionRequest(BaseModel): |
| model: Optional[str] = None |
| messages: List[ChatMessage] |
| max_tokens: Optional[int] = None |
| temperature: Optional[float] = 0.5 |
|
|
| |
| @app.get("/api/status") |
| async def get_status(): |
| """Endpoint trả về tiến trình thời gian thực cho giao diện Front-end""" |
| return JSONResponse({ |
| "status": STATUS, |
| "percent": DOWNLOAD_PERCENT, |
| "downloaded": DOWNLOADED_GB, |
| "total": TOTAL_GB, |
| "error": ERROR_MSG |
| }) |
|
|
| @app.post("/v1/chat/completions", dependencies=[Depends(verify_token)]) |
| async def chat_completions(request: ChatCompletionRequest): |
| if STATUS != "Ready" or llm is None: |
| raise HTTPException(status_code=503, detail=f"Hệ thống chưa sẵn sàng. Trạng thái hiện tại: {STATUS}") |
| |
| try: |
| messages = [{"role": m.role, "content": m.content} for m in request.messages] |
| result = llm.create_chat_completion( |
| messages=messages, |
| max_tokens=request.max_tokens or 512, |
| temperature=request.temperature |
| ) |
| choice = result["choices"][0] |
| return { |
| "id": f"chatcmpl-{uuid.uuid4().hex}", |
| "object": "chat.completion", |
| "created": int(time.time()), |
| "model": MODEL_NAME, |
| "choices": [{ |
| "index": 0, |
| "message": {"role": "assistant", "content": choice["message"]["content"]}, |
| "finish_reason": "stop" |
| }] |
| } |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=f"Lỗi xử lý ngôn ngữ: {str(e)}") |
|
|
| |
| @app.get("/", response_class=HTMLResponse) |
| async def serve_index(): |
| html_content = """ |
| <!DOCTYPE html> |
| <html lang="vi"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>LLM Space Console & Chat Playground</title> |
| <style> |
| :root { |
| --bg-main: #121824; |
| --bg-card: #1c2333; |
| --accent: #3b82f6; |
| --accent-success: #10b981; |
| --accent-error: #ef4444; |
| --text-main: #f3f4f6; |
| --text-muted: #9ca3af; |
| } |
| * { box-sizing: border-box; margin: 0; padding: 0; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } |
| body { background-color: var(--bg-main); color: var(--text-main); padding: 20px; display: flex; flex-direction: column; height: 100vh; } |
| |
| .container { max-width: 900px; width: 100%; margin: 0 auto; display: flex; flex-direction: column; flex: 1; } |
| header { margin-bottom: 20px; text-align: center; } |
| header h1 { font-size: 24px; color: #fff; margin-bottom: 5px; } |
| header p { color: var(--text-muted); font-size: 14px; } |
| |
| .card { background-color: var(--bg-card); border-radius: 12px; padding: 20px; margin-bottom: 20px; border: 1px solid #2d3748; } |
| |
| .status-row { display: flex; justify-content: space-between; align-items: center; margin-bottom: 12px; } |
| .status-badge { padding: 6px 12px; border-radius: 20px; font-weight: bold; font-size: 14px; } |
| .status-waiting { background-color: #b45309; color: #fff; } |
| .status-ready { background-color: #047857; color: #fff; } |
| .status-error { background-color: #b91c1c; color: #fff; } |
| |
| .progress-container { width: 100%; background-color: #2d3748; border-radius: 8px; height: 20px; overflow: hidden; position: relative; margin-top: 10px; } |
| .progress-bar { height: 100%; background: linear-gradient(90deg, #2563eb, #3b82f6); width: 0%; transition: width 0.3s ease; } |
| .progress-text { position: absolute; width: 100%; text-align: center; top: 0; left: 0; line-height: 20px; font-size: 12px; font-weight: bold; color: #fff; } |
| |
| .chat-section { display: flex; flex-direction: column; flex: 1; min-height: 300px; opacity: 0.4; pointer-events: none; transition: all 0.3s ease; } |
| .chat-section.active { opacity: 1; pointer-events: auto; } |
| |
| .token-box { display: flex; gap: 10px; margin-bottom: 15px; } |
| .token-box input { flex: 1; background-color: var(--bg-main); border: 1px solid #2d3748; border-radius: 6px; padding: 8px 12px; color: #fff; font-size: 14px; } |
| |
| .chat-window { flex: 1; background-color: var(--bg-main); border-radius: 8px; border: 1px solid #2d3748; padding: 15px; overflow-y: auto; margin-bottom: 15px; display: flex; flex-direction: column; gap: 12px; } |
| .msg { max-width: 80%; padding: 10px 14px; border-radius: 8px; font-size: 14px; line-height: 1.5; word-break: break-word; } |
| .msg.user { background-color: var(--accent); color: #fff; align-self: flex-end; border-bottom-right-radius: 2px; } |
| .msg.bot { background-color: #2d3748; color: #fff; align-self: flex-start; border-bottom-left-radius: 2px; } |
| |
| .input-box { display: flex; gap: 10px; } |
| .input-box textarea { flex: 1; background-color: var(--bg-main); border: 1px solid #2d3748; border-radius: 6px; padding: 10px; color: #fff; font-size: 14px; resize: none; height: 42px; } |
| .input-box button { background-color: var(--accent-success); border: none; color: white; padding: 0 20px; border-radius: 6px; font-weight: bold; cursor: pointer; transition: background 0.2s; } |
| .input-box button:hover { background-color: #059669; } |
| .input-box button:disabled { background-color: #4b5563; cursor: not-allowed; } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <header> |
| <h1>LLM Space Control Console</h1> |
| <p>Theo dõi trạng thái tải hệ thống và thử nghiệm kết nối trực quan</p> |
| </header> |
| |
| <div class="card"> |
| <div class="status-row"> |
| <div> |
| <span style="color: var(--text-muted); font-size: 14px;">Trạng thái hệ thống:</span> |
| <div id="status-txt" style="font-weight: bold; margin-top: 4px;">Đang kiểm tra...</div> |
| </div> |
| <div id="status-badge" class="status-badge status-waiting">Đang xử lý</div> |
| </div> |
| |
| <div id="download-zone"> |
| <div style="display: flex; justify-content: space-between; font-size: 13px; color: var(--text-muted);"> |
| <span id="download-detail">Đang tính toán dung lượng...</span> |
| <span id="percent-txt">0%</span> |
| </div> |
| <div class="progress-container"> |
| <div id="p-bar" class="progress-bar"></div> |
| <div id="p-bar-txt" class="progress-text">0%</div> |
| </div> |
| </div> |
| </div> |
| |
| <div id="chat-zone" class="card chat-section"> |
| <div class="token-box"> |
| <input type="text" id="token-input" value="quan11082012" placeholder="Nhập Bearer Token bảo mật..."> |
| </div> |
| <div id="chat-win" class="chat-window"> |
| <div class="msg bot">Hệ thống đã kết nối thông suốt! Bạn có thể nhập tin nhắn bên dưới để kiểm tra phản hồi từ mô hình AI.</div> |
| </div> |
| <div class="input-box"> |
| <textarea id="msg-input" placeholder="Nhập nội dung trò chuyện test tại đây... (Bấm Enter hoặc Gửi)"></textarea> |
| <button id="send-btn" onclick="sendMessage()">Gửi</button> |
| </div> |
| </div> |
| </div> |
| |
| <script> |
| const statusTxt = document.getElementById("status-txt"); |
| const statusBadge = document.getElementById("status-badge"); |
| const downloadZone = document.getElementById("download-zone"); |
| const downloadDetail = document.getElementById("download-detail"); |
| const percentTxt = document.getElementById("percent-txt"); |
| const pBar = document.getElementById("p-bar"); |
| const pBarTxt = document.getElementById("p-bar-txt"); |
| const chatZone = document.getElementById("chat-zone"); |
| const chatWin = document.getElementById("chat-win"); |
| const msgInput = document.getElementById("msg-input"); |
| const sendBtn = document.getElementById("send-btn"); |
| |
| // Hàm pooling cập nhật liên tục trạng thái mỗi giây |
| async def checkStatus() { |
| try { |
| const res = await fetch("/api/status"); |
| const data = await res.json(); |
| |
| statusTxt.innerText = data.status; |
| |
| if(data.status === "Ready") { |
| statusBadge.innerText = "Sẵn Sàng"; |
| statusBadge.className = "status-badge status-ready"; |
| downloadZone.style.display = "none"; |
| chatZone.classList.add("active"); |
| } else if(data.status === "Lỗi") { |
| statusBadge.innerText = "Lỗi Hệ Thống"; |
| statusBadge.className = "status-badge status-error"; |
| statusTxt.innerText = "Lỗi: " + data.error; |
| } else { |
| statusBadge.innerText = "Đang Khởi Tạo"; |
| statusBadge.className = "status-badge status-waiting"; |
| |
| // Cập nhật thông số tiến trình tải xuống |
| percentTxt.innerText = data.percent + "%"; |
| pBar.style.width = data.percent + "%"; |
| pBarTxt.innerText = data.percent + "%"; |
| downloadDetail.innerText = `Đã tải: ${data.downloaded} GB / ${data.total} GB`; |
| } |
| } catch (e) { |
| statusTxt.innerText = "Mất kết nối tới Space Gateway..."; |
| } |
| } |
| |
| setInterval(checkStatus, 1500); |
| checkStatus(); |
| |
| // Hàm xử lý gửi tin nhắn Test Chat |
| async function sendMessage() { |
| const text = msgInput.value.trim(); |
| const token = document.getElementById("token-input").value.trim(); |
| if(!text) return; |
| |
| // Thêm tin nhắn của User vào khung chat |
| appendMessage("user", text); |
| msgInput.value = ""; |
| sendBtn.disabled = true; |
| |
| // Tạo hiệu ứng chờ đợi cho Bot |
| const loadingId = appendMessage("bot", "⏳ Đang suy nghĩ..."); |
| |
| try { |
| const response = await fetch("/v1/chat/completions", { |
| method: "POST", |
| headers: { |
| "Content-Type": "application/json", |
| "Authorization": `Bearer ${token}` |
| }, |
| body: JSON.stringify({ |
| model: "qwen2.5-coder-7b-instruct", |
| messages: [{ role: "user", content: text }], |
| max_tokens: 300 |
| }) |
| }); |
| |
| const data = await response.json(); |
| document.getElementById(loadingId).remove(); |
| |
| if(response.status === 200) { |
| appendMessage("bot", data.choices[0].message.content); |
| } else { |
| appendMessage("bot", `❌ Lỗi hệ thống (${response.status}): ${data.detail || "Không rõ nguyên nhân"}`); |
| } |
| } catch(err) { |
| document.getElementById(loadingId).remove(); |
| appendMessage("bot", "❌ Lỗi mạng: Không thể gửi request đến Endpoint."); |
| } finally { |
| sendBtn.disabled = false; |
| } |
| } |
| |
| function appendMessage(sender, text) { |
| const id = "msg-" + Date.now(); |
| const div = document.createElement("div"); |
| div.id = id; |
| div.className = `msg ${sender}`; |
| div.innerText = text; |
| chatWin.appendChild(div); |
| chatWin.scrollTop = chatWin.scrollHeight; |
| return id; |
| } |
| |
| msgInput.addEventListener("keydown", (e) => { |
| if(e.key === "Enter" && !e.shiftKey) { |
| e.preventDefault(); |
| sendMessage(); |
| } |
| }); |
| </script> |
| </body> |
| </html> |
| """ |
| return HTMLResponse(content=html_content) |
|
|
| if __name__ == "__main__": |
| uvicorn.run("app:app", host="0.0.0.0", port=7860, workers=1) |