File size: 18,403 Bytes
fc69895
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
image:
  repository: huggingface
  name: chat-ui

#nodeSelector:
#  role-huggingchat: "true"
#
#tolerations:
#  - key: "huggingface.co/huggingchat"
#    operator: "Equal"
#    value: "true"
#    effect: "NoSchedule"

serviceAccount:
  enabled: true
  create: true
  name: huggingchat-ephemeral

ingress:
  enabled: false

ingressInternal:
  enabled: true
  path: "/chat"
  annotations:
    external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech"
    alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
    alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
    alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public"
    alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public"
    alb.ingress.kubernetes.io/ssl-redirect: "443"
    alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
    alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
    alb.ingress.kubernetes.io/target-type: "ip"
    alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da"
    kubernetes.io/ingress.class: "alb"

envVars:
  TEST: "test"
  COUPLE_SESSION_WITH_COOKIE_NAME: "token"
  OPENID_SCOPES: "openid profile inference-api"
  USE_USER_TOKEN: "true"
  AUTOMATIC_LOGIN: "false"

  ADDRESS_HEADER: "X-Forwarded-For"
  APP_BASE: "/chat"
  ALLOW_IFRAME: "false"
  COOKIE_SAMESITE: "lax"
  COOKIE_SECURE: "true"
  EXPOSE_API: "true"
  METRICS_ENABLED: "true"
  LOG_LEVEL: "debug"
  NODE_LOG_STRUCTURED_DATA: "true"

  OPENAI_BASE_URL: "https://router.huggingface.co/v1"
  PUBLIC_APP_ASSETS: "huggingchat"
  PUBLIC_APP_NAME: "HuggingChat"
  PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
  PUBLIC_ORIGIN: "https://huggingface.co"
  PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"

  TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
  LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
  LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
  LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
  LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
  LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
  LLM_ROUTER_ENABLE_MULTIMODAL: "true"
  LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Thinking"
  PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
  PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
  PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
  MODELS: > 
    [
      { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
      { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
      { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
      { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
      { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
      { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
      { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
      { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
      { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
      { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
      { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
      { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
      { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
      { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
      { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
      { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
      { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
      { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
      { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
      { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
      { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
      { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
      { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
      { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
      { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
      { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
      { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
      { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
      { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
      { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
      { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
      { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
      { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
      { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
      { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
      { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
      { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
      { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
      { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
      { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
      { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
      { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
      { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
      { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
      { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
      { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
      { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
      { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
      { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
      { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
      { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
      { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
      { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
      { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
      { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
      { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
      { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
      { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
      { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
      { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
      { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
      { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
      { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
      { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
      { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
      { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
      { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
      { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
      { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
      { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
      { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
      { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
      { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
      { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
      { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
      { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
      { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
      { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
      { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
      { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
      { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
      { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
      { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
      { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
      { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
      { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
      { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
      { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
      { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
      { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
      { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
      { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
      { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
      { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
      { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
      { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
      { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
      { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
      { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
      { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
      { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
      { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
      { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
      { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
      { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
      { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
      { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
    ]

infisical:
  enabled: true
  env: "ephemeral-us-east-1"

replicas: 1
autoscaling:
  enabled: false

resources:
  requests:
    cpu: 2
    memory: 4Gi
  limits:
    cpu: 4
    memory: 8Gi