Spaces:

VietCat
/

tinyLlama1B

Sleeping

VietCat commited on Jun 16, 2025

Commit

43cc2bd

1 Parent(s): b987767

init project

Files changed (5) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

Dockerfile ADDED Viewed

+FROM python:3.10-slim
+# Đặt biến môi trường cho cache (sử dụng HF_HOME thay vì TRANSFORMERS_CACHE)
+ENV HF_HOME=/tmp/.cache
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Expose port mặc định HFS (7860)
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+from fastapi import FastAPI, Request
+from pydantic import BaseModel
+from llama_cpp import Llama
+app = FastAPI()
+# Load the model
+llm = Llama(
+    model_path="models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+    n_ctx=1024,
+    n_threads=2  # <= phù hợp HFS Free Tier
+)
+class PromptRequest(BaseModel):
+    prompt: str
+@app.post("/generate")
+async def generate_text(request: PromptRequest):
+    prompt = request.prompt
+    output = llm(prompt)
+    return {"response": output["choices"][0]["text"]}

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+llama-cpp-python==0.2.24