File size: 5,058 Bytes
9dc500f
 
 
 
 
f3002dd
9dc500f
 
f565959
9dc500f
 
 
 
 
 
 
 
 
 
 
00db8f8
995895d
9dc500f
 
 
 
00db8f8
995895d
9dc500f
 
6eefd94
9dc500f
00db8f8
6eefd94
9dc500f
 
 
 
 
12e179c
 
9dc500f
 
 
 
 
 
 
 
 
 
 
 
c2f95db
 
 
4388224
 
461c47d
 
 
 
 
c2f95db
9dc500f
c2f95db
 
 
 
 
461c47d
 
 
 
 
 
 
c2f95db
9dc500f
f3002dd
 
68f86cd
f3002dd
 
 
9dc500f
 
 
 
 
6eefd94
995895d
6eefd94
9dc500f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3002dd
9dc500f
 
 
 
 
 
 
 
 
12e179c
9dc500f
 
 
973c2a9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import streamlit as st
from huggingface_hub import InferenceClient
import os
import sys

st.title("SmallZOO ChatBot 3B")

base_url="https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HG_Interference_API_TOKEN')

model_links ={ 
    "Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
    "Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
    "Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
}

model_info ={
    "Llama-3.2 [3B]":
        {'description':"""The Llama-3.2 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
        'logo':'logos/Meta.png',
        'url':'https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct'},

    "Qwen2.5 [3B]":
        {'description':"""The Qwen2.5 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
        'logo':'logos/Qwen.png',
        'url':'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct'},

      "Phi-3.5 [3.82B]":
         {'description':"""The Phi-3.5 mini instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
          \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
         'logo':'logos/ms.png',
         'url':'https://huggingface.co/microsoft/Phi-3.5-mini-instruct'},
}

def format_promt(message, custom_instructions=None):
    prompt = ""
    if custom_instructions:
        prompt += f"<|im_start|>system\n{custom_instructions}<|im_end|>\n"
    prompt += f"<|im_start|>user\n{message}<|im_end|>\n"
    return prompt

def reset_conversation():
    '''
    Resets Conversation
    '''
    st.session_state.conversation = []
    st.session_state.messages = []
    return None

models =[key for key in model_links.keys()]

selected_model = st.sidebar.selectbox(
    "Select Model",
    models,
    help="""Choose your AI model:

 • Llama-3.2: Balanced & versatile

 • Qwen2.5: Strong in reasoning

 • Phi-3.5: Good at technical tasks"""
)

temp_values = st.sidebar.slider(
    'Select a temperature value', 
    0.0, 
    1.0, 
    (0.5),
    help="""Controls randomness in responses:

  0 = focused/deterministic

 0.5 = balanced

  1 = more creative/random"""
)

custom_instructions = st.sidebar.text_area(
    "Custom Instructions",
    value="You are helpful assistant, act like a Human in conversation. Keep asnwers short and in English only!",
    help="Customize how the AI should behave"
)

st.sidebar.button('Reset Chat', on_click=reset_conversation)


st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown(f"[View model on 🤗 Hugging Face]({model_info[selected_model]['url']})")
st.sidebar.markdown("*Generated content can be outdated, inaccurate, offensive or non-factual!!!*")

if "prev_option" not in st.session_state:
    st.session_state.prev_option = selected_model

if st.session_state.prev_option != selected_model:
    st.session_state.messages = []
    # st.write(f"Changed to {selected_model}")
    st.session_state.prev_option = selected_model
    reset_conversation()


repo_id = model_links[selected_model]

st.subheader(f'{selected_model}')
# st.title(f'ChatBot Using {selected_model}')


if "messages" not in st.session_state:
    st.session_state.messages = []


for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):

    with st.chat_message("user"):
        st.markdown(prompt)

    st.session_state.messages.append({"role": "user", "content": prompt})

    formated_text = format_promt(prompt, custom_instructions)


    with st.chat_message("assistant"):
        client = InferenceClient(
            model=model_links[selected_model],)

        output = client.text_generation(
            formated_text,
            temperature=temp_values,#0.5
            max_new_tokens=1000,
            stream=True
        )

        # Create a placeholder for the streaming response
        message_placeholder = st.empty()
        full_response = ""

        # Stream the response and accumulate it
        for chunk in output:
            full_response += chunk
            message_placeholder.markdown(full_response + "▌")
        
        # Display final response and store it
        message_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})