Spaces:

gdk0007
/

DemoSpace

Sleeping

App Files Files Community

DemoSpace / src /streamlit_app.py

gdk0007

Update src/streamlit_app.py

a90de82 verified 17 days ago

raw

history blame contribute delete

3.11 kB

	# Exercise : HuggingFace LLM Playground

	# Application flow:
	# 1. User selects the LLM
	# 2. User adjusts the model parameters (optional)
	# 3. User provide a query
	# 4. Selected model is invoked
	# 5. Result is shown to the user

	import streamlit as st
	from dotenv import load_dotenv
	import os

	# from langchain_community.llms import HuggingFaceHub
	from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
	from langchain_core.messages import HumanMessage


	# Load the API keys, if running locally
	# CHANGE the path to the env file

	# If HF space is used then set the env var HUGGINGFACEHUB_API_TOKEN in the settings
	try:
	load_dotenv('C:\\Users\\raj\\.jupyter\\.env')
	except:
	print("Environment file not found !! MUST find the env var HUGGINGFACEHUB_API_TOKEN to work.")



	# Title
	st.title('HuggingFace LLM playground')

	# Models that can be used
	# Add/remove models from this list as needed
	models = [
	'mistralai/Mistral-7B-Instruct-v0.2',
	'google/flan-t5-xxl',
	# 'meta-llama/Meta-Llama-3-8B',
	'tiiuae/falcon-40b-instruct',
	]

	# Selected model in model_id
	model_id = st.sidebar.selectbox(
	'Select model',
	options=tuple(models)
	)

	# Read the API key from environment - switch key for different providers
	# api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')

	if 'model-response' not in st.session_state:
	st.session_state['model-response'] = '<provide query & click on invoke>'

	# draw the box for model response
	st.text_area('Response', value = st.session_state['model-response'], height=400)

	# draw the box for query
	query = st.text_area('Query', placeholder='provide query & invoke', value='who was the president of the USA in 2023?')

	# Model parameter controls
	# https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html

	# Temperature
	temperature = st.sidebar.slider(
	label='Temperature',
	min_value=0.01,
	max_value=1.0
	)

	# Top p
	top_p = st.sidebar.slider(
	label='Top p',
	min_value=0.01,
	max_value=1.0,
	value=0.01
	)

	# Top k
	top_k = st.sidebar.slider(
	label='Top k',
	min_value=1,
	max_value=50,
	value=10
	)

	repetition_penalty = st.sidebar.slider(
	label='Repeatition penalty',
	min_value=0.0,
	max_value=5.0,
	value=1.0
	)

	# Maximum token
	max_tokens = st.sidebar.number_input(
	label='Max tokens',
	value=50
	)

	# Function to create the LLM
	def get_llm(model_id):
	return HuggingFaceEndpoint(
	repo_id=model_id,
	temperature=temperature,
	top_k = top_k,
	top_p = top_p,
	repetition_penalty = repetition_penalty,
	max_new_tokens=max_tokens,
	task="conversational"
	)

	# Function for invoking the LLM
	def invoke():
	llm_hf = get_llm(model_id)
	# 2. Wrap it in ChatHuggingFace
	chat_model = ChatHuggingFace(llm=llm_hf)

	# Show spinner, while we are waiting for the response
	with st.spinner('Invoking LLM ... '):
	st.session_state['model-response'] = chat_model.invoke([HumanMessage(content=query)]).content


	st.button("Invoke", on_click=invoke)