Spaces:

Rid3
/

xtime-api

Sleeping

App Files Files Community

xtime-api / app.py

Rid3

Update app.py

206ca90 verified 2 months ago

raw

history blame contribute delete

2.21 kB

	import os
	import gc
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	model = None
	current_id = ""

	class ChatRequest(BaseModel):
	repo_id: str
	filename: str
	prompt: str
	system_prompt: str = "You are a helpful assistant."
	max_tokens: int = 512
	temperature: float = 0.7

	# Это главная страница. Если ты перейдешь по ссылке в браузере, ты должен увидеть это:
	@app.get("/")
	async def health():
	return {"status": "online", "message": "API is running. Use POST /chat to interact."}

	@app.post("/chat")
	async def chat(request: ChatRequest):
	global model, current_id
	new_id = f"{request.repo_id}/{request.filename}"

	try:
	if model is None or current_id != new_id:
	if model is not None:
	del model
	gc.collect()

	path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
	model = Llama(
	model_path=path,
	n_ctx=2048,
	n_threads=os.cpu_count() or 4,
	n_gpu_layers=0,
	verbose=False
	)
	current_id = new_id

	full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
	output = model.create_completion(
	prompt=full_prompt,
	max_tokens=request.max_tokens,
	temperature=request.temperature,
	stop=["User:", "System:", "</s>"]
	)

	return {
	"response": output["choices"][0]["text"].strip(),
	"model": current_id
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	# Hugging Face всегда использует порт 7860
	uvicorn.run(app, host="0.0.0.0", port=7860)