Spaces:
Sleeping
Sleeping
File size: 3,327 Bytes
bd1ec04 4d691ea bd1ec04 a8a81a0 bd1ec04 a8a81a0 bd1ec04 a8a81a0 bd1ec04 1b5517b bd1ec04 0d9116e bd1ec04 0d9116e bd1ec04 1b5517b bd1ec04 4d691ea a8a81a0 1b5517b a8a81a0 1b5517b bd1ec04 a8a81a0 1b5517b a8a81a0 1b5517b 0d9116e bd1ec04 a8a81a0 1b5517b bd1ec04 1b5517b bd1ec04 a8a81a0 bd1ec04 a8a81a0 1b5517b a8a81a0 1b5517b bd1ec04 1b5517b a8a81a0 aae51bf 1b5517b bd1ec04 1b5517b bd1ec04 1b5517b a8a81a0 bd1ec04 1b5517b bd1ec04 a8a81a0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | import gradio as gr
import os
from huggingface_hub import InferenceClient
AVAILABLE_MODELS = [
"meta-llama/Meta-Llama-3.1-70B-Instruct",
"meta-llama/Meta-Llama-3.1-8B-Instruct",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-7B-Instruct",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"deepseek-ai/DeepSeek-V3",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"microsoft/Phi-3.5-mini-instruct",
"google/gemma-2-27b-it",
"google/gemma-2-9b-it",
"HuggingFaceH4/zephyr-7b-beta",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]
def chat(message, history, selected_models):
if not selected_models:
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "⚠️ Select at least one model!"}]
if len(selected_models) > 5:
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "⚠️ Maximum 5 models!"}]
responses = []
for model in selected_models:
try:
client = InferenceClient(model=model, token=os.getenv("HF_TOKEN"))
try:
result = client.chat_completion(
messages=[{"role": "user", "content": message}],
max_tokens=500
)
resp = result.choices[0].message.content
except:
resp = client.text_generation(message, max_new_tokens=300)
responses.append(f"**{model}:**\n{resp}\n")
except Exception as e:
error = str(e)
if "loading" in error.lower():
responses.append(f"**{model}:**\n⏳ Loading...\n")
else:
responses.append(f"**{model}:**\n❌ {error[:100]}\n")
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "\n---\n\n".join(responses)}]
with gr.Blocks(title="Anki-Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Anki-Chat\nChat with top models: Llama, Qwen, DeepSeek, Mistral, Gemma & more!")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Select Models (Max 5)")
checks = gr.CheckboxGroup(
choices=AVAILABLE_MODELS,
label="Available Models",
value=[AVAILABLE_MODELS[1]]
)
gr.Markdown(
"**Providers:**\n"
"- 🦙 Meta Llama\n"
"- 💉 Qwen (Alibaba)\n"
"- 🦉 DeepSeek\n"
"- ✨ Mistral\n"
"- 🌐 Gemma\n"
"- 🔥 Phi"
)
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Chat", height=500)
msg = gr.Textbox(label="Message", placeholder="Type here...")
with gr.Row():
send = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
msg.submit(chat, [msg, chatbot, checks], [chatbot]).then(lambda: "", None, [msg])
send.click(chat, [msg, chatbot, checks], [chatbot]).then(lambda: "", None, [msg])
clear.click(lambda: [], None, [chatbot])
gr.Markdown("**Note:** Using HF Serverless API. Large models may take time to load.")
demo.launch() |