Spaces:
Build error
Build error
Commit ·
02a6500
1
Parent(s): 3742f73
feat: minimal FastAPI app for Llama via HF Inference Endpoint; Dockerfile + requirements
Browse files- Dockerfile +14 -0
- app.py +33 -0
- requirements.txt +3 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
USER user
|
| 6 |
+
ENV PATH="/home/user/.local/bin:/mingw64/bin:/usr/bin:/c/Users/HYPE R Series/bin:/c/Program Files/Microsoft/jdk-17.0.15.6-hotspot/bin:/c/WINDOWS/system32:/c/WINDOWS:/c/WINDOWS/System32/Wbem:/c/WINDOWS/System32/WindowsPowerShell/v1.0:/c/WINDOWS/System32/OpenSSH:/c/Program Files/dotnet:/cmd:/c/Users/HYPE R Series/AppData/Local/Programs/cursor/resources/app/bin:/c/Program Files/Microsoft SQL Server/150/Tools/Binn:/c/Program Files/Microsoft SQL Server/Client SDK/ODBC/170/Tools/Binn:/c/ProgramData/chocolatey/bin:/c/Program Files/nodejs:/c/Program Files/Docker/Docker/resources/bin:/c/Program Files/cursor/resources/app/bin:/c/Program Files (x86)/Microsoft SQL Server/160/DTS/Binn:/c/Program Files/Azure Data Studio/bin:/c/Program Files (x86)/Microsoft SQL Server/150/Tools/Binn:/c/Program Files/Microsoft SQL Server/150/DTS/Binn:/c/Program Files (x86)/Bitvise SSH Client:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Python312/Scripts:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Python312:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Launcher:/c/Program Files/MySQL/MySQL Shell 8.0/bin:/c/Users/HYPE R Series/AppData/Local/Microsoft/WindowsApps:/c/Users/HYPE R Series/AppData/Local/Programs/cursor/resources/app/bin:/c/Users/HYPE R Series/AppData/Local/Programs/Microsoft VS Code/bin:/c/Users/HYPE R Series/AppData/Roaming/npm:/c/Users/HYPE R Series/.dotnet/tools:/c/Program Files/Azure Data Studio/bin:/c/Users/HYPE R Series/.cursor/extensions/ms-python.debugpy-2025.14.1-win32-x64/bundled/scripts/noConfigScripts"
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY --chown=user . /app
|
| 14 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
import os, requests
|
| 4 |
+
|
| 5 |
+
app = FastAPI()
|
| 6 |
+
|
| 7 |
+
class ChatRequest(BaseModel):
|
| 8 |
+
message: str
|
| 9 |
+
|
| 10 |
+
@app.get("/")
|
| 11 |
+
def root():
|
| 12 |
+
return {"name": "Textilindo AI Power", "model": os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")}
|
| 13 |
+
|
| 14 |
+
@app.get("/health")
|
| 15 |
+
def health():
|
| 16 |
+
return {"status": "healthy"}
|
| 17 |
+
|
| 18 |
+
@app.post("/chat")
|
| 19 |
+
def chat(body: ChatRequest):
|
| 20 |
+
endpoint = (os.getenv("HF_ENDPOINT_URL") or "").rstrip("/")
|
| 21 |
+
token = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFAC_API_KEY_2")
|
| 22 |
+
model = os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
|
| 23 |
+
if not endpoint or not token:
|
| 24 |
+
raise HTTPException(status_code=500, detail="Endpoint or token not configured")
|
| 25 |
+
url = f"{endpoint}/v1/chat/completions"
|
| 26 |
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
| 27 |
+
payload = {"model": model, "messages": [{"role": "system", "content": "Jawablah singkat dalam Bahasa Indonesia."}, {"role": "user", "content": body.message}], "temperature": 0.5, "top_p": 0.9, "max_tokens": 180}
|
| 28 |
+
r = requests.post(url, headers=headers, json=payload, timeout=60)
|
| 29 |
+
if r.status_code >= 400:
|
| 30 |
+
raise HTTPException(status_code=502, detail=r.text)
|
| 31 |
+
data = r.json()
|
| 32 |
+
content = (data.get("choices") or [{}])[0].get("message", {}).get("content")
|
| 33 |
+
return {"response": content}
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
requests
|