Spaces:

npmaiecosystem
/

loadbalancer

Running

App Files Files Community

loadbalancer / app.py

npmaiecosystem

Update app.py

2cb98b5 verified 4 days ago

raw

history blame contribute delete

5.74 kB

	from fastapi import FastAPI, Request, HTTPException
	from pydantic import BaseModel
	from typing import Annotated, Any, Optional
	import asyncio
	from redis.asyncio import Redis
	import httpx
	import os

	app = FastAPI()

	@app.post("/")
	def health_check():
	return "Healthy"

	password = os.environ.get("PASSWORD")
	r = Redis(
	host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
	port=15562,
	decode_responses=True,
	username="default",
	password=password,
	)



	Model_links = {
	"llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
	"qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
	"vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna",
	"gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b",
	"internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm",
	"falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon",
	"codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
	"mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral",
	"phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium",
	"qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
	"gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
	"llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama",
	"qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2",
	"vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
	"gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b",
	"internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
	"falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon",
	"codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama",
	"mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
	"phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi",
	"qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb",
	"gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
	}

	# Updated Lua Script
	LUA_CHECK_AND_INC = """
	local key = KEYS[1]

	local status = tonumber(redis.call('HGET', key, 'status') or '0')
	if status < 1 then
	redis.call('HSET', key, 'status', status + 1)
	return status
	end

	return -1
	"""

	LUA_REMOVAL_STATUS = """
	local key = KEYS[1]

	local status = tonumber(redis.call('HGET', key, 'status') or '0')
	if status > 0 then
	redis.call('HSET', key, 'status', status -1)
	return status -1
	end

	return 0
	"""

	async def check_cond(model_link: str, fall_model: Optional[list] = None):
	status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
	if status != -1:
	return {"link": model_link, "statusno": status}

	if fall_model:
	for model in fall_model:
	status = await r.eval(LUA_CHECK_AND_INC, 1, model)
	if status != -1:
	return {"link": model, "statusno": status}

	else:
	for model in Model_links.values():
	status = await r.eval(LUA_CHECK_AND_INC, 1, model)
	if status != -1:
	return {"link": model, "statusno": status}

	return None


	class Input(BaseModel):
	model: str
	temperature: float = 0.5
	prompt: str
	change: bool = True
	Models: Optional[list] = None

	@app.post("/load_balancer")
	async def llm_router(inputs: Input):
	if not inputs.model or not inputs.prompt:
	raise HTTPException(status_code=400, detail="Model name and Prompt are required.")

	if inputs.model not in Model_links:
	raise HTTPException(status_code=444, detail="Model not found.")

	model_link = Model_links[inputs.model]
	fall_links = []

	fall_models = inputs.Models
	if inputs.change and fall_models:
	for m in fall_models:
	model_name = f"{m}_fall"
	if model_name in Model_links.keys():
	link = Model_links[model_name]
	fall_links.append(link)
	else:
	raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")

	model_cond = await check_cond(model_link=model_link, fall_model=fall_links)

	if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
	return await router(
	model_url=model_cond["link"],
	prompt=inputs.prompt,
	temp=inputs.temperature
	)
	else:
	raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")

	async def router(model_url, prompt, temp):
	error_log = ""
	process= ""
	payload = {"prompt": prompt, "temperature": temp}
	timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)

	try:
	async with httpx.AsyncClient(timeout=timeout) as client:
	response = await client.post(model_url, json=payload)
	response.raise_for_status()
	f_response = response.json()["response"]
	if f_response is not None and str(f_response).strip() != "":
	process += f_response

	else:
	raise ValueError("Empty string or None returned in response from LLM")
	except Exception as e:
	error_log += f"LLM backend error: {str(e)}"

	finally:
	await r.eval(LUA_REMOVAL_STATUS, 1, model_url)

	if error_log:
	raise HTTPException(status_code=502, detail=error_log)
	else:
	return {"response": process}