Spaces:

harismlnaslm
/

Textilindo-AI-Power

Build error

Textilindo-AI-Power / app.py

feat: minimal FastAPI app for Llama via HF Inference Endpoint; Dockerfile + requirements

02a6500 3 months ago

1.43 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import os, requests

	app = FastAPI()

	class ChatRequest(BaseModel):
	message: str

	@app.get("/")
	def root():
	return {"name": "Textilindo AI Power", "model": os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")}

	@app.get("/health")
	def health():
	return {"status": "healthy"}

	@app.post("/chat")
	def chat(body: ChatRequest):
	endpoint = (os.getenv("HF_ENDPOINT_URL") or "").rstrip("/")
	token = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFAC_API_KEY_2")
	model = os.getenv("DEFAULT_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
	if not endpoint or not token:
	raise HTTPException(status_code=500, detail="Endpoint or token not configured")
	url = f"{endpoint}/v1/chat/completions"
	headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
	payload = {"model": model, "messages": [{"role": "system", "content": "Jawablah singkat dalam Bahasa Indonesia."}, {"role": "user", "content": body.message}], "temperature": 0.5, "top_p": 0.9, "max_tokens": 180}
	r = requests.post(url, headers=headers, json=payload, timeout=60)
	if r.status_code >= 400:
	raise HTTPException(status_code=502, detail=r.text)
	data = r.json()
	content = (data.get("choices") or [{}])[0].get("message", {}).get("content")
	return {"response": content}