File size: 4,243 Bytes
9dc500f
 
 
 
 
 
 
 
f565959
9dc500f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973c2a9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
from huggingface_hub import InferenceClient
import os
import sys

st.title("SmallZOO-ChatBot-3B")

base_url="https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HG_Interference_API_TOKEN')

model_links ={ 
    "Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
    "Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
    "Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
}

model_info ={
    "Llama-3.2 [3B]":
        {'description':"""The Llama-3.2 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
        'logo':'./Meta.png'},

    "Qwen2.5 [3B]":
        {'description':"""The Qwen2.5 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
        'logo':'./Qwen.png'},


      "Phi-3.5 [3.82B]":
      {'description':"""The Phi-3.5 mini instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
          \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
      'logo':'./ms.png'},

}

def format_promt(message, custom_instructions=None):
    prompt = ""
    if custom_instructions:
        prompt += f"[INST] {custom_instructions} [/INST]"
    prompt += f"[INST] {message} [/INST]"
    return prompt

def reset_conversation():
    '''
    Resets Conversation
    '''
    st.session_state.conversation = []
    st.session_state.messages = []
    return None

models =[key for key in model_links.keys()]

selected_model = st.sidebar.selectbox("Select Model", models)

temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))

st.sidebar.button('Reset Chat', on_click=reset_conversation)


st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown("*Generated content can be inaccurate, offensive or non-factual!!!*")

if "prev_option" not in st.session_state:
    st.session_state.prev_option = selected_model

if st.session_state.prev_option != selected_model:
    st.session_state.messages = []
    # st.write(f"Changed to {selected_model}")
    st.session_state.prev_option = selected_model
    reset_conversation()


repo_id = model_links[selected_model]

st.subheader(f'{selected_model}')
# st.title(f'ChatBot Using {selected_model}')


if "messages" not in st.session_state:
    st.session_state.messages = []


for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):

    custom_instruction = "Act like a Human in conversation, you are helpfull assistant"

    with st.chat_message("user"):
        st.markdown(prompt)

    st.session_state.messages.append({"role": "user", "content": prompt})

    formated_text = format_promt(prompt, custom_instruction)


    with st.chat_message("assistant"):
        client = InferenceClient(
            model=model_links[selected_model],)

        output = client.text_generation(
            formated_text,
            temperature=temp_values,#0.5
            max_new_tokens=3000,
            stream=True
        )

        # Create a placeholder for the streaming response
        message_placeholder = st.empty()
        full_response = ""

        # Stream the response and accumulate it
        for chunk in output:
            full_response += chunk
            message_placeholder.markdown(full_response + "▌")
        
        # Display final response and store it
        message_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})