Spaces:

expandme-tech
/

SmallZOO-ChatBot-3B

Build error

File size: 5,058 Bytes

9dc500f
 
 
 
 
f3002dd
9dc500f
 
f565959
9dc500f
 
 
 
 
 
 
 
 
 
 
00db8f8
995895d
9dc500f
 
 
 
00db8f8
995895d
9dc500f
 
6eefd94
9dc500f
00db8f8
6eefd94
9dc500f
 
 
 
 
12e179c
 
9dc500f
 
 
 
 
 
 
 
 
 
 
 
c2f95db
 
 
4388224
 
461c47d
 
 
 
 
c2f95db
9dc500f
c2f95db
 
 
 
 
461c47d
 
 
 
 
 
 
c2f95db
9dc500f
f3002dd
 
68f86cd
f3002dd
 
 
9dc500f
 
 
 
 
6eefd94
995895d
6eefd94
9dc500f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3002dd
9dc500f
 
 
 
 
 
 
 
 
12e179c
9dc500f
 
 
973c2a9

import streamlit as st
from huggingface_hub import InferenceClient
import os
import sys

st.title("SmallZOO ChatBot 3B")

base_url="https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HG_Interference_API_TOKEN')

model_links ={ 
    "Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
    "Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
    "Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
}

model_info ={
    "Llama-3.2 [3B]":
        {'description':"""The Llama-3.2 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
        'logo':'logos/Meta.png',
        'url':'https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct'},

    "Qwen2.5 [3B]":
        {'description':"""The Qwen2.5 3B Instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
            \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
        'logo':'logos/Qwen.png',
        'url':'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct'},

      "Phi-3.5 [3.82B]":
         {'description':"""The Phi-3.5 mini instruct model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
          \nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
         'logo':'logos/ms.png',
         'url':'https://huggingface.co/microsoft/Phi-3.5-mini-instruct'},
}

def format_promt(message, custom_instructions=None):
    prompt = ""
    if custom_instructions:
        prompt += f"<|im_start|>system\n{custom_instructions}<|im_end|>\n"
    prompt += f"<|im_start|>user\n{message}<|im_end|>\n"
    return prompt

def reset_conversation():
    '''
    Resets Conversation
    '''
    st.session_state.conversation = []
    st.session_state.messages = []
    return None

models =[key for key in model_links.keys()]

selected_model = st.sidebar.selectbox(
    "Select Model",
    models,
    help="""Choose your AI model:

 • Llama-3.2: Balanced & versatile

 • Qwen2.5: Strong in reasoning

 • Phi-3.5: Good at technical tasks"""
)

temp_values = st.sidebar.slider(
    'Select a temperature value', 
    0.0, 
    1.0, 
    (0.5),
    help="""Controls randomness in responses:

  0 = focused/deterministic

 0.5 = balanced

  1 = more creative/random"""
)

custom_instructions = st.sidebar.text_area(
    "Custom Instructions",
    value="You are helpful assistant, act like a Human in conversation. Keep asnwers short and in English only!",
    help="Customize how the AI should behave"
)

st.sidebar.button('Reset Chat', on_click=reset_conversation)


st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown(f"[View model on 🤗 Hugging Face]({model_info[selected_model]['url']})")
st.sidebar.markdown("*Generated content can be outdated, inaccurate, offensive or non-factual!!!*")

if "prev_option" not in st.session_state:
    st.session_state.prev_option = selected_model

if st.session_state.prev_option != selected_model:
    st.session_state.messages = []
    # st.write(f"Changed to {selected_model}")
    st.session_state.prev_option = selected_model
    reset_conversation()


repo_id = model_links[selected_model]

st.subheader(f'{selected_model}')
# st.title(f'ChatBot Using {selected_model}')


if "messages" not in st.session_state:
    st.session_state.messages = []


for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):

    with st.chat_message("user"):
        st.markdown(prompt)

    st.session_state.messages.append({"role": "user", "content": prompt})

    formated_text = format_promt(prompt, custom_instructions)


    with st.chat_message("assistant"):
        client = InferenceClient(
            model=model_links[selected_model],)

        output = client.text_generation(
            formated_text,
            temperature=temp_values,#0.5
            max_new_tokens=1000,
            stream=True
        )

        # Create a placeholder for the streaming response
        message_placeholder = st.empty()
        full_response = ""

        # Stream the response and accumulate it
        for chunk in output:
            full_response += chunk
            message_placeholder.markdown(full_response + "▌")
        
        # Display final response and store it
        message_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})