File size: 3,327 Bytes
bd1ec04
4d691ea
bd1ec04
 
 
a8a81a0
 
 
 
bd1ec04
a8a81a0
 
 
bd1ec04
 
a8a81a0
 
bd1ec04
 
 
 
1b5517b
bd1ec04
0d9116e
bd1ec04
0d9116e
bd1ec04
 
1b5517b
bd1ec04
4d691ea
a8a81a0
 
 
 
 
1b5517b
a8a81a0
1b5517b
 
bd1ec04
a8a81a0
 
1b5517b
a8a81a0
1b5517b
 
0d9116e
bd1ec04
a8a81a0
1b5517b
 
bd1ec04
 
 
1b5517b
bd1ec04
 
a8a81a0
bd1ec04
a8a81a0
 
1b5517b
 
 
a8a81a0
1b5517b
 
bd1ec04
1b5517b
a8a81a0
aae51bf
1b5517b
bd1ec04
1b5517b
 
bd1ec04
1b5517b
 
a8a81a0
bd1ec04
1b5517b
bd1ec04
a8a81a0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import os
from huggingface_hub import InferenceClient

AVAILABLE_MODELS = [
    "meta-llama/Meta-Llama-3.1-70B-Instruct",
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "Qwen/Qwen2.5-72B-Instruct",
    "Qwen/Qwen2.5-7B-Instruct",
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "deepseek-ai/DeepSeek-V3",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "microsoft/Phi-3.5-mini-instruct",
    "google/gemma-2-27b-it",
    "google/gemma-2-9b-it",
    "HuggingFaceH4/zephyr-7b-beta",
    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]

def chat(message, history, selected_models):
    if not selected_models:
        return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "⚠️ Select at least one model!"}]
    if len(selected_models) > 5:
        return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "⚠️ Maximum 5 models!"}]
    
    responses = []
    for model in selected_models:
        try:
            client = InferenceClient(model=model, token=os.getenv("HF_TOKEN"))
            try:
                result = client.chat_completion(
                    messages=[{"role": "user", "content": message}],
                    max_tokens=500
                )
                resp = result.choices[0].message.content
            except:
                resp = client.text_generation(message, max_new_tokens=300)
            responses.append(f"**{model}:**\n{resp}\n")
        except Exception as e:
            error = str(e)
            if "loading" in error.lower():
                responses.append(f"**{model}:**\n⏳ Loading...\n")
            else:
                responses.append(f"**{model}:**\n❌ {error[:100]}\n")
    
    return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "\n---\n\n".join(responses)}]

with gr.Blocks(title="Anki-Chat", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 Anki-Chat\nChat with top models: Llama, Qwen, DeepSeek, Mistral, Gemma & more!")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Select Models (Max 5)")
            checks = gr.CheckboxGroup(
                choices=AVAILABLE_MODELS,
                label="Available Models",
                value=[AVAILABLE_MODELS[1]]
            )
            gr.Markdown(
                "**Providers:**\n"
                "- 🦙 Meta Llama\n"
                "- 💉 Qwen (Alibaba)\n"
                "- 🦉 DeepSeek\n"
                "- ✨ Mistral\n"
                "- 🌐 Gemma\n"
                "- 🔥 Phi"
            )
        
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Chat", height=500)
            msg = gr.Textbox(label="Message", placeholder="Type here...")
            with gr.Row():
                send = gr.Button("Send", variant="primary")
                clear = gr.Button("Clear")
    
    msg.submit(chat, [msg, chatbot, checks], [chatbot]).then(lambda: "", None, [msg])
    send.click(chat, [msg, chatbot, checks], [chatbot]).then(lambda: "", None, [msg])
    clear.click(lambda: [], None, [chatbot])
    
    gr.Markdown("**Note:** Using HF Serverless API. Large models may take time to load.")

demo.launch()