Spaces:

expandme-tech
/

SmallZOO-ChatBot-3B

Build error

App Files Files Community

SmallZOO-ChatBot-3B / app.py

expandme

Rool them all

1a206bd about 1 year ago

raw

history blame contribute delete

5.3 kB

	import streamlit as st
	from huggingface_hub import InferenceClient
	import os
	import sys

	st.title("SmallZOO ChatBot 3B")

	base_url="https://api-inference.huggingface.co/models/"
	API_KEY = os.environ.get('HG_Interference_API_TOKEN')

	model_links ={
	"Llama-3.2 [3B]":base_url+"meta-llama/Llama-3.2-3B-Instruct",
	"Qwen2.5 [3B]":base_url+"Qwen/Qwen2.5-3B-Instruct",
	"Phi-3.5 [3.82B]":base_url+"microsoft/Phi-3.5-mini-instruct"
	}

	model_info ={
	"Llama-3.2 [3B]":
	{'description':"""The Llama-3.2 3B Instruct model is a Large Language Model (LLM) that's able to have question and answer interactions.\n \
	\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \n""",
	'logo':'logos/Meta.png',
	'url':'https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct'},

	"Qwen2.5 [3B]":
	{'description':"""The Qwen2.5 3B Instruct model is a Large Language Model (LLM) that's able to have question and answer interactions.\n \
	\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \\n""",
	'logo':'logos/Qwen.png',
	'url':'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct'},

	"Phi-3.5 [3.82B]":
	{'description':"""The Phi-3.5 mini instruct model is a Large Language Model (LLM) that's able to have question and answer interactions.\n \
	\nA SLM (Large Language Model) is best for applications requiring fast response times, low resource consumption, and specific, narrow tasks. \ \n""",
	'logo':'logos/ms.png',
	'url':'https://huggingface.co/microsoft/Phi-3.5-mini-instruct'},
	}

	def reset_conversation():
	'''
	Resets Conversation
	'''
	st.session_state.conversation = []
	st.session_state.messages = []
	return None

	models =[key for key in model_links.keys()]

	selected_model = st.sidebar.selectbox(
	"Select Model",
	models,
	help="""Choose your AI model:

	• Llama-3.2: Balanced & versatile

	• Qwen2.5: Strong in reasoning

	• Phi-3.5: Good at technical tasks"""
	)

	temp_value = st.sidebar.slider(
	'Select a temperature value',
	0.0,
	1.0,
	(0.5),
	help="""Controls randomness in responses:

	0 = focused/deterministic

	0.5 = balanced

	1 = more creative/random"""
	)

	custom_instructions = st.sidebar.text_area(
	"Custom System Instructions",
	value="You are helpful assistant, act like a Human in conversation. Keep answers very short and in English only!",
	help="Customize how the AI should behave"
	)

	st.sidebar.button('Reset Chat', on_click=reset_conversation)


	st.sidebar.write(f"You're now chatting with {selected_model}")
	st.sidebar.markdown(model_info[selected_model]['description'])
	st.sidebar.image(model_info[selected_model]['logo'])
	st.sidebar.markdown(f"[View model on 🤗 Hugging Face]({model_info[selected_model]['url']})")
	st.sidebar.markdown("Generated content can be outdated, inaccurate, offensive or non-factual!!!")

	if "prev_option" not in st.session_state:
	st.session_state.prev_option = selected_model

	if st.session_state.prev_option != selected_model:
	st.session_state.messages = []
	# st.write(f"Changed to {selected_model}")
	st.session_state.prev_option = selected_model
	reset_conversation()


	repo_id = model_links[selected_model]

	st.subheader(f'{selected_model}')
	# st.title(f'ChatBot Using {selected_model}')


	if "messages" not in st.session_state:
	st.session_state.messages = []


	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):

	with st.chat_message("user"):
	st.markdown(prompt)

	st.session_state.messages.append({"role": "system", "content": custom_instructions})

	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("assistant"):
	client = InferenceClient(
	model=model_links[selected_model],)

	try:

	output = client.text_generation(
	prompt,
	temperature=temp_value,#0.5
	max_new_tokens=3000,
	stream=True
	)

	# Create a placeholder for the streaming response
	message_placeholder = st.empty()
	full_response = ""

	# Stream the response and accumulate it
	for chunk in output:
	if isinstance(chunk, dict) and "generated_text" in chunk:
	text_chunk = chunk["generated_text"]
	elif isinstance(chunk, str):
	text_chunk = chunk
	else:
	continue

	full_response += text_chunk
	message_placeholder.markdown(full_response + "▌")

	# Display final response and store it
	message_placeholder.markdown(full_response)
	st.session_state.messages.append({"role": "assistant", "content": full_response})
	except Exception as e:
	st.error(f"Error: {str(e)}")