harismlnaslm commited on
Commit
02a6500
·
1 Parent(s): 3742f73

feat: minimal FastAPI app for Llama via HF Inference Endpoint; Dockerfile + requirements

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -0
  2. app.py +33 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ FROM python:3.10-slim
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:/mingw64/bin:/usr/bin:/c/Users/HYPE R Series/bin:/c/Program Files/Microsoft/jdk-17.0.15.6-hotspot/bin:/c/WINDOWS/system32:/c/WINDOWS:/c/WINDOWS/System32/Wbem:/c/WINDOWS/System32/WindowsPowerShell/v1.0:/c/WINDOWS/System32/OpenSSH:/c/Program Files/dotnet:/cmd:/c/Users/HYPE R Series/AppData/Local/Programs/cursor/resources/app/bin:/c/Program Files/Microsoft SQL Server/150/Tools/Binn:/c/Program Files/Microsoft SQL Server/Client SDK/ODBC/170/Tools/Binn:/c/ProgramData/chocolatey/bin:/c/Program Files/nodejs:/c/Program Files/Docker/Docker/resources/bin:/c/Program Files/cursor/resources/app/bin:/c/Program Files (x86)/Microsoft SQL Server/160/DTS/Binn:/c/Program Files/Azure Data Studio/bin:/c/Program Files (x86)/Microsoft SQL Server/150/Tools/Binn:/c/Program Files/Microsoft SQL Server/150/DTS/Binn:/c/Program Files (x86)/Bitvise SSH Client:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Python312/Scripts:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Python312:/c/Users/HYPE R Series/AppData/Local/Programs/Python/Launcher:/c/Program Files/MySQL/MySQL Shell 8.0/bin:/c/Users/HYPE R Series/AppData/Local/Microsoft/WindowsApps:/c/Users/HYPE R Series/AppData/Local/Programs/cursor/resources/app/bin:/c/Users/HYPE R Series/AppData/Local/Programs/Microsoft VS Code/bin:/c/Users/HYPE R Series/AppData/Roaming/npm:/c/Users/HYPE R Series/.dotnet/tools:/c/Program Files/Azure Data Studio/bin:/c/Users/HYPE R Series/.cursor/extensions/ms-python.debugpy-2025.14.1-win32-x64/bundled/scripts/noConfigScripts"
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ COPY --chown=user . /app
14
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import os, requests
4
+
5
+ app = FastAPI()
6
+
7
+ class ChatRequest(BaseModel):
8
+ message: str
9
+
10
+ @app.get("/")
11
+ def root():
12
+ return {"name": "Textilindo AI Power", "model": os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")}
13
+
14
+ @app.get("/health")
15
+ def health():
16
+ return {"status": "healthy"}
17
+
18
+ @app.post("/chat")
19
+ def chat(body: ChatRequest):
20
+ endpoint = (os.getenv("HF_ENDPOINT_URL") or "").rstrip("/")
21
+ token = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFAC_API_KEY_2")
22
+ model = os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
23
+ if not endpoint or not token:
24
+ raise HTTPException(status_code=500, detail="Endpoint or token not configured")
25
+ url = f"{endpoint}/v1/chat/completions"
26
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
27
+ payload = {"model": model, "messages": [{"role": "system", "content": "Jawablah singkat dalam Bahasa Indonesia."}, {"role": "user", "content": body.message}], "temperature": 0.5, "top_p": 0.9, "max_tokens": 180}
28
+ r = requests.post(url, headers=headers, json=payload, timeout=60)
29
+ if r.status_code >= 400:
30
+ raise HTTPException(status_code=502, detail=r.text)
31
+ data = r.json()
32
+ content = (data.get("choices") or [{}])[0].get("message", {}).get("content")
33
+ return {"response": content}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ requests