chat-ui-energy

Paused

App Files Files Community

chat-ui-energy / chart /env /prod.yaml

nsarrazin

Reduce temp on gemma 2 a bit

71ca905 unverified almost 2 years ago

raw

history blame

14.9 kB

	image:
	repository: registry.internal.huggingface.tech/chat-ui
	name: chat-ui

	nodeSelector:
	role-hub-utils: "true"

	tolerations:
	- key: CriticalAddonsOnly
	operator: Equal

	serviceAccount:
	enabled: true
	create: true
	name: huggingchat-prod

	ingress:
	path: "/chat"
	annotations:
	alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck"
	alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
	alb.ingress.kubernetes.io/group.name: "hub-prod"
	alb.ingress.kubernetes.io/scheme: "internet-facing"
	alb.ingress.kubernetes.io/ssl-redirect: "443"
	alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
	alb.ingress.kubernetes.io/target-node-labels: "role-hub-utils=true"
	kubernetes.io/ingress.class: "alb"

	envVars:
	ADDRESS_HEADER: 'X-Forwarded-For'
	ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]'
	APP_BASE: "/chat"
	ENABLE_ASSISTANTS: "true"
	ENABLE_ASSISTANTS_RAG: "true"
	EXPOSE_API: "true"
	METRICS_PORT: 5565
	LOG_LEVEL: "debug"
	MODELS: >
	[
	{
	"name" : "CohereForAI/c4ai-command-r-plus",
	"tokenizer": "nsarrazin/c4ai-command-r-v01-tokenizer",
	"description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!",
	"modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
	"websiteUrl": "https://docs.cohere.com/docs/command-r-plus",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png",
	"tools": true,
	"parameters": {
	"stop": ["<\|END_OF_TURN_TOKEN\|>"],
	"truncate" : 28672,
	"max_new_tokens" : 2048,
	"temperature" : 0.3
	},
	"promptExamples" : [
	{
	"title": "Generate a mouse portrait",
	"prompt": "Generate the portrait of a scientific mouse in its laboratory."
	}, {
	"title": "Review a pull request",
	"prompt": "Review this pull request: https://github.com/huggingface/chat-ui/pull/1131/files"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}
	]
	},
	{
	"name" : "meta-llama/Meta-Llama-3-70B-Instruct",
	"description": "Meta Llama 3 delivers top performance on various benchmarks and introduces new features like better reasoning.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
	"modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
	"websiteUrl": "https://llama.meta.com/llama3/",
	"tokenizer" : "philschmid/meta-llama-3-tokenizer",
	"promptExamples" : [
	{
	"title": "Write an email from bullet list",
	"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	],
	"parameters": {
	"stop": ["<\|eot_id\|>"],
	"truncate": 6144,
	"max_new_tokens": 2047,
	"temperature": 0.6,
	"top_p" : 0.9
	}
	},
	{
	"name" : "google/gemma-2-27b-it",
	"tokenizer": "google/gemma-2-27b-it",
	"description": "Gemma 2 27B is the latest release in the Gemma family of models built by Google, with performance on par with Llama 3 70B. ",
	"modelUrl": "https://huggingface.co/google/gemma-2-27b-it",
	"websiteUrl": "https://huggingface.co/google/gemma-2-27b-it",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png",
	"parameters": {
	"truncate" : 6146,
	"max_new_tokens" : 2044,
	"temperature": 0.1,
	"stop" : ["<end_of_turn>"]
	},
	"preprompt" : "",
	"promptExamples" : [
	{
	"title": "Write a poem",
	"prompt": "Write a poem to help me remember the first 10 elements on the periodic table, giving each element its own line."
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	]
	},
	{
	"name" : "mistralai/Mixtral-8x7B-Instruct-v0.1",
	"description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
	"websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/",
	"modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
	"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1",
	"preprompt" : "",
	"chatPromptTemplate": "<s> {{#each messages}}{{#ifUser}}[INST]{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}} {{content}} [/INST]{{/ifUser}}{{#ifAssistant}} {{content}}</s> {{/ifAssistant}}{{/each}}",
	"parameters" : {
	"temperature" : 0.6,
	"top_p" : 0.95,
	"repetition_penalty" : 1.2,
	"top_k" : 50,
	"truncate" : 24576,
	"max_new_tokens" : 8192,
	"stop" : ["</s>"]
	},
	"promptExamples" : [
	{
	"title": "Write an email from bullet list",
	"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	]
	},
	{
	"name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
	"websiteUrl" : "https://nousresearch.com/",
	"modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"tokenizer": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"chatPromptTemplate" : "{{#if @root.preprompt}}<\|im_start\|>system\n{{@root.preprompt}}<\|im_end\|>\n{{/if}}{{#each messages}}{{#ifUser}}<\|im_start\|>user\n{{content}}<\|im_end\|>\n<\|im_start\|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<\|im_end\|>\n{{/ifAssistant}}{{/each}}",
	"promptExamples": [
	{
	"title": "Write an email from bullet list",
	"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	],
	"parameters": {
	"temperature": 0.7,
	"top_p": 0.95,
	"repetition_penalty": 1,
	"top_k": 50,
	"truncate": 24576,
	"max_new_tokens": 2048,
	"stop": ["<\|im_end\|>"]
	}
	},
	{
	"name": "01-ai/Yi-1.5-34B-Chat",
	"tokenizer": "01-ai/Yi-1.5-34B-Chat",
	"description" : "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/01-ai-logo.png",
	"modelUrl": "https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
	"websiteUrl": "https://www.01.ai",
	"preprompt": "",
	"parameters": {
	"stop": ["<\|im_end\|>"],
	"temperature": 0.3,
	"max_new_tokens": 1024,
	"truncate": 1000,
	"top_p": 0.8,
	},
	"promptExamples": [
	{
	"title": "我的名字用中文怎么写？",
	"prompt": "请扮演一个起名大师，我将会给你一个我的英文名字，教我如何用中文写我的名字。"
	}, {
	"title": "写一首诗",
	"prompt": "请写一首讲 AI 的诗"
	}, {
	"title": "工作汇报",
	"prompt": "写一份工作汇报"
	}
	]
	},
	{
	"name": "mistralai/Mistral-7B-Instruct-v0.2",
	"displayName": "mistralai/Mistral-7B-Instruct-v0.2",
	"description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
	"websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/",
	"modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
	"tokenizer": "mistralai/Mistral-7B-Instruct-v0.2",
	"preprompt": "",
	"chatPromptTemplate" : "<s>{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}</s>{{/ifAssistant}}{{/each}}",
	"parameters": {
	"temperature": 0.3,
	"top_p": 0.95,
	"repetition_penalty": 1.2,
	"top_k": 50,
	"truncate": 3072,
	"max_new_tokens": 1024,
	"stop": ["</s>"]
	},
	"promptExamples": [
	{
	"title": "Write an email from bullet list",
	"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	]
	},
	{
	"name": "microsoft/Phi-3-mini-4k-instruct",
	"tokenizer": "microsoft/Phi-3-mini-4k-instruct",
	"description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.",
	"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
	"modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
	"websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/",
	"preprompt": "",
	"chatPromptTemplate": "<s>{{preprompt}}{{#each messages}}{{#ifUser}}<\|user\|>\n{{content}}<\|end\|>\n<\|assistant\|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<\|end\|>\n{{/ifAssistant}}{{/each}}",
	"parameters": {
	"stop": ["<\|end\|>", "<\|endoftext\|>", "<\|assistant\|>"],
	"temperature": 0.7,
	"max_new_tokens": 1024,
	"truncate": 3071
	},
	"promptExamples": [
	{
	"title": "Write an email from bullet list",
	"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
	}, {
	"title": "Code a snake game",
	"prompt": "Code a basic snake game in python, give explanations for each step."
	}, {
	"title": "Assist in a task",
	"prompt": "How do I make a delicious lemon cheesecake?"
	}
	]
	},
	{
	"name": "meta-llama/Meta-Llama-3-8B-Instruct",
	"tokenizer" : "philschmid/meta-llama-3-tokenizer",
	"parameters": {
	"temperature": 0.1,
	"stop": ["<\|eot_id\|>"],
	},
	"unlisted": true
	}
	]
	NODE_ENV: "prod"
	NODE_LOG_STRUCTURED_DATA: true
	OLD_MODELS: >
	[
	{ "name": "bigcode/starcoder" },
	{ "name": "OpenAssistant/oasst-sft-6-llama-30b-xor" },
	{ "name": "HuggingFaceH4/zephyr-7b-alpha" },
	{ "name": "openchat/openchat_3.5" },
	{ "name": "openchat/openchat-3.5-1210" },
	{ "name": "tiiuae/falcon-180B-chat" },
	{ "name": "codellama/CodeLlama-34b-Instruct-hf" },
	{ "name": "google/gemma-7b-it" },
	{ "name": "meta-llama/Llama-2-70b-chat-hf" },
	{ "name": "codellama/CodeLlama-70b-Instruct-hf" },
	{ "name": "openchat/openchat-3.5-0106" }
	]
	PUBLIC_ORIGIN: "https://huggingface.co"
	PUBLIC_SHARE_PREFIX: "https://hf.co/chat"
	PUBLIC_ANNOUNCEMENT_BANNERS: "[]"
	PUBLIC_APP_NAME: "HuggingChat"
	PUBLIC_APP_ASSETS: "huggingchat"
	PUBLIC_APP_COLOR: "yellow"
	PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone."
	PUBLIC_APP_DISCLAIMER_MESSAGE: "Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice."
	PUBLIC_APP_DATA_SHARING: 0
	PUBLIC_APP_DISCLAIMER: 1
	PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
	PUBLIC_APPLE_APP_ID: "6476778843"
	REQUIRE_FEATURED_ASSISTANTS: "true"
	TASK_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct"
	TEXT_EMBEDDING_MODELS: >
	[{
	"name": "bge-base-en-v1-5-sxa",
	"displayName": "bge-base-en-v1-5-sxa",
	"chunkCharLength": 512,
	"endpoints": [{
	"type": "tei",
	"url": "https://huggingchat-tei.hf.space/"
	}]
	}]
	WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]'
	XFF_DEPTH: '2'

	infisical:
	enabled: true
	env: "prod-us-east-1"

	autoscaling:
	enabled: true
	minReplicas: 6
	maxReplicas: 30
	targetMemoryUtilizationPercentage: "50"
	targetCPUUtilizationPercentage: "50"

	resources:
	requests:
	cpu: 4
	memory: 8Gi
	limits:
	cpu: 4
	memory: 8Gi

	monitoring:
	enabled: true