Spaces:

VietCat
/

Gemma34B

Sleeping

App Files Files Community

VietCat commited on Jun 17, 2025

Commit

0afcb9c

1 Parent(s): 1becc27

init project

Browse files

Files changed (5) hide show

.gitignore +2 -0
Dockerfile +27 -0
app/main.py +162 -0
app/model_loader.py +13 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # files
2	+ *.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.10-slim
+# Cài công cụ cần thiết để build llama-cpp-python
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    git \
+    wget \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Cập nhật pip
+RUN pip install --upgrade pip
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Tải model GGUF từ Hugging Face Hub (ví dụ: TheBloke's repo)
+# Thay đổi URL và tên file nếu bạn dùng repo khác
+RUN mkdir -p models && \
+    wget -O models/gemma34b.gguf https://huggingface.co/Mungert/gemma-3-4b-it-gguf/resolve/main/google_gemma-3-4b-it-q4_k_l.gguf
+COPY ./app ./app
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/main.py ADDED Viewed

	@@ -0,0 +1,162 @@

+from fastapi import FastAPI, Request
+from pydantic import BaseModel
+import logging
+import time
+import asyncio
+import os
+from app.model_loader import load_model
+app = FastAPI()
+llm = None  # Khởi tạo sau
+class PromptRequest(BaseModel):
+    prompt: str
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+def format_prompt_as_chat(user_prompt: str) -> str:
+    messages = [
+        {
+            "role": "system",
+            "content": "Bạn là trợ lý đáng tin cậy, luôn trả lời ngắn gọn, và chính xác.",
+        },
+        {"role": "user", "content": user_prompt.strip()},
+    ]
+    formatted = (
+        "<|system|>\n" + messages[0]["content"] + "</s>\n"
+        "<|user|>\n" + messages[1]["content"] + "</s>\n"
+        "<|assistant|>\n"
+    )
+    return formatted
+def format_prompt_as_user_prompt(user_prompt: str) -> str:
+    messages = [
+        {"role": "user", "content": user_prompt.strip()},
+    ]
+    formatted = (
+        "<|user|>\n" + messages[0]["content"] + "</s>\n"
+    )
+    return formatted
+def format_prompt_as_pure_prompt(user_prompt: str) -> str:
+    messages = [
+        {"role": "user", "content": user_prompt.strip()},
+    ]
+    formatted = (
+        "" + messages[0]["content"] + "\n"
+    )
+    return formatted
+@app.on_event("startup")
+async def startup_event():
+    global llm
+    model_path = "models/gemma34b.gguf"
+    # Đợi file mô hình nếu chưa có (tối đa 60 giây)
+    timeout = 60
+    waited = 0
+    while not os.path.exists(model_path) and waited < timeout:
+        logging.info(f"Đang chờ mô hình xuất hiện tại {model_path}...")
+        await asyncio.sleep(2)
+        waited += 2
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Không tìm thấy mô hình sau {timeout} giây: {model_path}")
+    # Load mô hình trong thread riêng để không block event loop
+    llm = await asyncio.to_thread(load_model)
+    logging.info("✅ Đã tải mô hình thành công.")
+@app.post("/chat")
+async def chat(request: Request, prompt: PromptRequest):
+    start_time = time.time()
+    logging.info(f"📩 Nhận request từ {request.client.host} lúc {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    formatted_prompt = format_prompt_as_chat(prompt.prompt)
+    output = await asyncio.to_thread(
+        llm,
+        formatted_prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        stop=["</s>"]
+    )
+    end_time = time.time()
+    duration = end_time - start_time
+    logging.info(f"✅ Xử lý xong sau {duration:.2f} giây.")
+    return {"response": output["choices"][0]["text"].strip()}
+@app.post("/userchat")
+async def userchat(request: Request, prompt: PromptRequest):
+    start_time = time.time()
+    logging.info(f"📩 Nhận request từ {request.client.host} lúc {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    formatted_prompt = format_prompt_as_user_prompt(prompt.prompt)
+    output = await asyncio.to_thread(
+        llm,
+        formatted_prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        stop=["</s>"]
+    )
+    end_time = time.time()
+    duration = end_time - start_time
+    logging.info(f"✅ Xử lý xong sau {duration:.2f} giây.")
+    return {"response": output["choices"][0]["text"].strip()}
+@app.post("/purechat")
+async def purechat(request: Request, prompt: PromptRequest):
+    start_time = time.time()
+    logging.info(f"📩 Nhận request từ {request.client.host} lúc {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    formatted_prompt = format_prompt_as_pure_prompt(prompt.prompt)
+    output = await asyncio.to_thread(
+        llm,
+        formatted_prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        stop=["</s>"]
+    )
+    end_time = time.time()
+    duration = end_time - start_time
+    logging.info(f"✅ Xử lý xong sau {duration:.2f} giây.")
+    return {"response": output["choices"][0]["text"].strip()}
+@app.get("/")
+async def get():
+    start_time = time.time()
+    logging.info(f"📩 Nhận get request lúc {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    formatted_prompt = format_prompt_as_user_prompt("Bạn tên là gì?")
+    output = await asyncio.to_thread(
+        llm,
+        formatted_prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        stop=["</s>"]
+    )
+    end_time = time.time()
+    duration = end_time - start_time
+    logging.info(f"✅ Xử lý xong sau {duration:.2f} giây.")
+    return {"response": output["choices"][0]["text"].strip()}

app/model_loader.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from llama_cpp import Llama
+llm = None  # Sẽ được khởi tạo sau
+def load_model():
+    global llm
+    if llm is None:
+        llm = Llama(
+            model_path="models/gemma34b.gguf",
+            n_ctx=2048,
+            n_threads=4,
+        )
+    return llm

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+fastapi
+uvicorn
+llama-cpp-python==0.2.24