Spaces:
Build error
Build error
File size: 4,641 Bytes
9dc500f f3002dd 9dc500f f565959 9dc500f c2f95db 9dc500f c2f95db 9dc500f f3002dd 9dc500f f3002dd 9dc500f 973c2a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from huggingface_hub import InferenceClient
import os
import sys
st.title("SmallZOO ChatBot 3B")
base_url="https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HG_Interference_API_TOKEN')
model_links ={
"Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
"Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
"Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
}
model_info ={
"Llama-3.2 [3B]":
{'description':"""The Llama-3.2 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
'logo':'./Meta.png'},
"Qwen2.5 [3B]":
{'description':"""The Qwen2.5 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
'logo':'./Qwen.png'},
"Phi-3.5 [3.82B]":
{'description':"""The Phi-3.5 mini instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
'logo':'./ms.png'},
}
def format_promt(message, custom_instructions=None):
prompt = ""
if custom_instructions:
prompt += f"[INST] {custom_instructions} [/INST]"
prompt += f"[INST] {message} [/INST]"
return prompt
def reset_conversation():
'''
Resets Conversation
'''
st.session_state.conversation = []
st.session_state.messages = []
return None
models =[key for key in model_links.keys()]
selected_model = st.sidebar.selectbox(
"Select Model",
models,
help="Choose your AI model:\n• Llama-3.2: Balanced & versatile\n• Qwen2.5: Strong in reasoning\n• Phi-3.5: Good at technical tasks"
)
temp_values = st.sidebar.slider(
'Select a temperature value',
0.0,
1.0,
(0.5),
help="Controls randomness in responses: 0 = focused/deterministic, 1 = more creative/random"
)
custom_instructions = st.sidebar.text_area(
"Custom Instructions",
value="Act like a Human in conversation, you are helpful assistant. Keep asnwers short!",
help="Customize how the AI should behave"
)
st.sidebar.button('Reset Chat', on_click=reset_conversation)
st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown("*Generated content can be inaccurate, offensive or non-factual!!!*")
if "prev_option" not in st.session_state:
st.session_state.prev_option = selected_model
if st.session_state.prev_option != selected_model:
st.session_state.messages = []
# st.write(f"Changed to {selected_model}")
st.session_state.prev_option = selected_model
reset_conversation()
repo_id = model_links[selected_model]
st.subheader(f'{selected_model}')
# st.title(f'ChatBot Using {selected_model}')
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
formated_text = format_promt(prompt, custom_instructions)
with st.chat_message("assistant"):
client = InferenceClient(
model=model_links[selected_model],)
output = client.text_generation(
formated_text,
temperature=temp_values,#0.5
max_new_tokens=3000,
stream=True
)
# Create a placeholder for the streaming response
message_placeholder = st.empty()
full_response = ""
# Stream the response and accumulate it
for chunk in output:
full_response += chunk
message_placeholder.markdown(full_response + "▌")
# Display final response and store it
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
|