File size: 5,297 Bytes
9dc500f
 
 
 
 
f3002dd
9dc500f
 
f565959
9dc500f
 
 
 
 
 
 
 
 
 
 
00db8f8
995895d
9dc500f
 
 
 
00db8f8
995895d
9dc500f
 
6eefd94
9dc500f
00db8f8
6eefd94
9dc500f
 
 
 
 
 
 
 
 
 
 
 
c2f95db
 
 
4388224
 
461c47d
 
 
 
 
c2f95db
9dc500f
f884299
c2f95db
 
 
 
461c47d
 
 
 
 
 
 
c2f95db
9dc500f
f3002dd
42e86b1
a4fc69a
f3002dd
 
 
9dc500f
 
 
 
 
6eefd94
995895d
6eefd94
9dc500f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff9d893
 
9dc500f
 
 
 
 
 
42e86b1
ff9d893
 
1a206bd
ff9d893
 
 
17c6df5
3685631
edd0a86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
from huggingface_hub import InferenceClient
import os
import sys

st.title("SmallZOO ChatBot 3B")

base_url="https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HG_Interference_API_TOKEN')

model_links ={ 
    "Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
    "Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
    "Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
}

model_info ={
    "Llama-3.2 [3B]":
        {'description':"""The Llama-3.2 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
        'logo':'logos/Meta.png',
        'url':'https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct'},

    "Qwen2.5 [3B]":
        {'description':"""The Qwen2.5 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
        'logo':'logos/Qwen.png',
        'url':'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct'},

      "Phi-3.5 [3.82B]":
         {'description':"""The Phi-3.5 mini instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
          \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
         'logo':'logos/ms.png',
         'url':'https://huggingface.co/microsoft/Phi-3.5-mini-instruct'},
}

def reset_conversation():
    '''
    Resets Conversation
    '''
    st.session_state.conversation = []
    st.session_state.messages = []
    return None

models =[key for key in model_links.keys()]

selected_model = st.sidebar.selectbox(
    "Select Model",
    models,
    help="""Choose your AI model:

 • Llama-3.2: Balanced & versatile

 • Qwen2.5: Strong in reasoning

 • Phi-3.5: Good at technical tasks"""
)

temp_value = st.sidebar.slider(
    'Select a temperature value', 
    0.0, 
    1.0, 
    (0.5),
    help="""Controls randomness in responses:

  0 = focused/deterministic

 0.5 = balanced

  1 = more creative/random"""
)

custom_instructions = st.sidebar.text_area(
    "Custom System Instructions",
    value="You are helpful assistant, act like a Human in conversation. Keep answers very short and in English only!",
    help="Customize how the AI should behave"
)

st.sidebar.button('Reset Chat', on_click=reset_conversation)


st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown(f"[View model on 🤗 Hugging Face]({model_info[selected_model]['url']})")
st.sidebar.markdown("*Generated content can be outdated, inaccurate, offensive or non-factual!!!*")

if "prev_option" not in st.session_state:
    st.session_state.prev_option = selected_model

if st.session_state.prev_option != selected_model:
    st.session_state.messages = []
    # st.write(f"Changed to {selected_model}")
    st.session_state.prev_option = selected_model
    reset_conversation()


repo_id = model_links[selected_model]

st.subheader(f'{selected_model}')
# st.title(f'ChatBot Using {selected_model}')


if "messages" not in st.session_state:
    st.session_state.messages = []


for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):

    with st.chat_message("user"):
        st.markdown(prompt)

    st.session_state.messages.append({"role": "system", "content": custom_instructions})

    st.session_state.messages.append({"role": "user", "content": prompt})

    with st.chat_message("assistant"):
        client = InferenceClient(
            model=model_links[selected_model],)

        try:
            
            output = client.text_generation(
                prompt,
                temperature=temp_value,#0.5
                max_new_tokens=3000,
                stream=True
            )

            # Create a placeholder for the streaming response
            message_placeholder = st.empty()
            full_response = ""

            # Stream the response and accumulate it
            for chunk in output:
                if isinstance(chunk, dict) and "generated_text" in chunk:
                    text_chunk = chunk["generated_text"]
                elif isinstance(chunk, str):
                    text_chunk = chunk
                else:
                    continue
                    
                full_response += text_chunk
                message_placeholder.markdown(full_response + "▌")
            
            # Display final response and store it
            message_placeholder.markdown(full_response)
            st.session_state.messages.append({"role": "assistant", "content": full_response})
        except Exception as e:
            st.error(f"Error: {str(e)}")