chat-ui-energy / chart /env /prod.yaml
nsarrazin's picture
Reduce temp on gemma 2 a bit
71ca905 unverified
raw
history blame
14.9 kB
image:
repository: registry.internal.huggingface.tech/chat-ui
name: chat-ui
nodeSelector:
role-hub-utils: "true"
tolerations:
- key: CriticalAddonsOnly
operator: Equal
serviceAccount:
enabled: true
create: true
name: huggingchat-prod
ingress:
path: "/chat"
annotations:
alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck"
alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
alb.ingress.kubernetes.io/group.name: "hub-prod"
alb.ingress.kubernetes.io/scheme: "internet-facing"
alb.ingress.kubernetes.io/ssl-redirect: "443"
alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
alb.ingress.kubernetes.io/target-node-labels: "role-hub-utils=true"
kubernetes.io/ingress.class: "alb"
envVars:
ADDRESS_HEADER: 'X-Forwarded-For'
ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]'
APP_BASE: "/chat"
ENABLE_ASSISTANTS: "true"
ENABLE_ASSISTANTS_RAG: "true"
EXPOSE_API: "true"
METRICS_PORT: 5565
LOG_LEVEL: "debug"
MODELS: >
[
{
"name" : "CohereForAI/c4ai-command-r-plus",
"tokenizer": "nsarrazin/c4ai-command-r-v01-tokenizer",
"description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!",
"modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
"websiteUrl": "https://docs.cohere.com/docs/command-r-plus",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png",
"tools": true,
"parameters": {
"stop": ["<|END_OF_TURN_TOKEN|>"],
"truncate" : 28672,
"max_new_tokens" : 2048,
"temperature" : 0.3
},
"promptExamples" : [
{
"title": "Generate a mouse portrait",
"prompt": "Generate the portrait of a scientific mouse in its laboratory."
}, {
"title": "Review a pull request",
"prompt": "Review this pull request: https://github.com/huggingface/chat-ui/pull/1131/files"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}
]
},
{
"name" : "meta-llama/Meta-Llama-3-70B-Instruct",
"description": "Meta Llama 3 delivers top performance on various benchmarks and introduces new features like better reasoning.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
"modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
"websiteUrl": "https://llama.meta.com/llama3/",
"tokenizer" : "philschmid/meta-llama-3-tokenizer",
"promptExamples" : [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
],
"parameters": {
"stop": ["<|eot_id|>"],
"truncate": 6144,
"max_new_tokens": 2047,
"temperature": 0.6,
"top_p" : 0.9
}
},
{
"name" : "google/gemma-2-27b-it",
"tokenizer": "google/gemma-2-27b-it",
"description": "Gemma 2 27B is the latest release in the Gemma family of models built by Google, with performance on par with Llama 3 70B. ",
"modelUrl": "https://huggingface.co/google/gemma-2-27b-it",
"websiteUrl": "https://huggingface.co/google/gemma-2-27b-it",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png",
"parameters": {
"truncate" : 6146,
"max_new_tokens" : 2044,
"temperature": 0.1,
"stop" : ["<end_of_turn>"]
},
"preprompt" : "",
"promptExamples" : [
{
"title": "Write a poem",
"prompt": "Write a poem to help me remember the first 10 elements on the periodic table, giving each element its own line."
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name" : "mistralai/Mixtral-8x7B-Instruct-v0.1",
"description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
"websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/",
"modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"preprompt" : "",
"chatPromptTemplate": "<s> {{#each messages}}{{#ifUser}}[INST]{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}} {{content}} [/INST]{{/ifUser}}{{#ifAssistant}} {{content}}</s> {{/ifAssistant}}{{/each}}",
"parameters" : {
"temperature" : 0.6,
"top_p" : 0.95,
"repetition_penalty" : 1.2,
"top_k" : 50,
"truncate" : 24576,
"max_new_tokens" : 8192,
"stop" : ["</s>"]
},
"promptExamples" : [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
"websiteUrl" : "https://nousresearch.com/",
"modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"tokenizer": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"chatPromptTemplate" : "{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}",
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
],
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"repetition_penalty": 1,
"top_k": 50,
"truncate": 24576,
"max_new_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
{
"name": "01-ai/Yi-1.5-34B-Chat",
"tokenizer": "01-ai/Yi-1.5-34B-Chat",
"description" : "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/01-ai-logo.png",
"modelUrl": "https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
"websiteUrl": "https://www.01.ai",
"preprompt": "",
"parameters": {
"stop": ["<|im_end|>"],
"temperature": 0.3,
"max_new_tokens": 1024,
"truncate": 1000,
"top_p": 0.8,
},
"promptExamples": [
{
"title": "我的名字用中文怎么写?",
"prompt": "请扮演一个起名大师,我将会给你一个我的英文名字,教我如何用中文写我的名字。"
}, {
"title": "写一首诗",
"prompt": "请写一首讲 AI 的诗"
}, {
"title": "工作汇报",
"prompt": "写一份工作汇报"
}
]
},
{
"name": "mistralai/Mistral-7B-Instruct-v0.2",
"displayName": "mistralai/Mistral-7B-Instruct-v0.2",
"description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
"websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/",
"modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
"tokenizer": "mistralai/Mistral-7B-Instruct-v0.2",
"preprompt": "",
"chatPromptTemplate" : "<s>{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}</s>{{/ifAssistant}}{{/each}}",
"parameters": {
"temperature": 0.3,
"top_p": 0.95,
"repetition_penalty": 1.2,
"top_k": 50,
"truncate": 3072,
"max_new_tokens": 1024,
"stop": ["</s>"]
},
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name": "microsoft/Phi-3-mini-4k-instruct",
"tokenizer": "microsoft/Phi-3-mini-4k-instruct",
"description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.",
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
"modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
"websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/",
"preprompt": "",
"chatPromptTemplate": "<s>{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}",
"parameters": {
"stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"],
"temperature": 0.7,
"max_new_tokens": 1024,
"truncate": 3071
},
"promptExamples": [
{
"title": "Write an email from bullet list",
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
}, {
"title": "Code a snake game",
"prompt": "Code a basic snake game in python, give explanations for each step."
}, {
"title": "Assist in a task",
"prompt": "How do I make a delicious lemon cheesecake?"
}
]
},
{
"name": "meta-llama/Meta-Llama-3-8B-Instruct",
"tokenizer" : "philschmid/meta-llama-3-tokenizer",
"parameters": {
"temperature": 0.1,
"stop": ["<|eot_id|>"],
},
"unlisted": true
}
]
NODE_ENV: "prod"
NODE_LOG_STRUCTURED_DATA: true
OLD_MODELS: >
[
{ "name": "bigcode/starcoder" },
{ "name": "OpenAssistant/oasst-sft-6-llama-30b-xor" },
{ "name": "HuggingFaceH4/zephyr-7b-alpha" },
{ "name": "openchat/openchat_3.5" },
{ "name": "openchat/openchat-3.5-1210" },
{ "name": "tiiuae/falcon-180B-chat" },
{ "name": "codellama/CodeLlama-34b-Instruct-hf" },
{ "name": "google/gemma-7b-it" },
{ "name": "meta-llama/Llama-2-70b-chat-hf" },
{ "name": "codellama/CodeLlama-70b-Instruct-hf" },
{ "name": "openchat/openchat-3.5-0106" }
]
PUBLIC_ORIGIN: "https://huggingface.co"
PUBLIC_SHARE_PREFIX: "https://hf.co/chat"
PUBLIC_ANNOUNCEMENT_BANNERS: "[]"
PUBLIC_APP_NAME: "HuggingChat"
PUBLIC_APP_ASSETS: "huggingchat"
PUBLIC_APP_COLOR: "yellow"
PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone."
PUBLIC_APP_DISCLAIMER_MESSAGE: "Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice."
PUBLIC_APP_DATA_SHARING: 0
PUBLIC_APP_DISCLAIMER: 1
PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
PUBLIC_APPLE_APP_ID: "6476778843"
REQUIRE_FEATURED_ASSISTANTS: "true"
TASK_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct"
TEXT_EMBEDDING_MODELS: >
[{
"name": "bge-base-en-v1-5-sxa",
"displayName": "bge-base-en-v1-5-sxa",
"chunkCharLength": 512,
"endpoints": [{
"type": "tei",
"url": "https://huggingchat-tei.hf.space/"
}]
}]
WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]'
XFF_DEPTH: '2'
infisical:
enabled: true
env: "prod-us-east-1"
autoscaling:
enabled: true
minReplicas: 6
maxReplicas: 30
targetMemoryUtilizationPercentage: "50"
targetCPUUtilizationPercentage: "50"
resources:
requests:
cpu: 4
memory: 8Gi
limits:
cpu: 4
memory: 8Gi
monitoring:
enabled: true