|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
|
|
|
|
|
|
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace |
|
|
from langchain_core.messages import HumanMessage |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
load_dotenv('C:\\Users\\raj\\.jupyter\\.env') |
|
|
except: |
|
|
print("Environment file not found !! MUST find the env var HUGGINGFACEHUB_API_TOKEN to work.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title('HuggingFace LLM playground') |
|
|
|
|
|
|
|
|
|
|
|
models = [ |
|
|
'mistralai/Mistral-7B-Instruct-v0.2', |
|
|
'google/flan-t5-xxl', |
|
|
|
|
|
'tiiuae/falcon-40b-instruct', |
|
|
] |
|
|
|
|
|
|
|
|
model_id = st.sidebar.selectbox( |
|
|
'Select model', |
|
|
options=tuple(models) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if 'model-response' not in st.session_state: |
|
|
st.session_state['model-response'] = '<provide query & click on invoke>' |
|
|
|
|
|
|
|
|
st.text_area('Response', value = st.session_state['model-response'], height=400) |
|
|
|
|
|
|
|
|
query = st.text_area('Query', placeholder='provide query & invoke', value='who was the president of the USA in 2023?') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
temperature = st.sidebar.slider( |
|
|
label='Temperature', |
|
|
min_value=0.01, |
|
|
max_value=1.0 |
|
|
) |
|
|
|
|
|
|
|
|
top_p = st.sidebar.slider( |
|
|
label='Top p', |
|
|
min_value=0.01, |
|
|
max_value=1.0, |
|
|
value=0.01 |
|
|
) |
|
|
|
|
|
|
|
|
top_k = st.sidebar.slider( |
|
|
label='Top k', |
|
|
min_value=1, |
|
|
max_value=50, |
|
|
value=10 |
|
|
) |
|
|
|
|
|
repetition_penalty = st.sidebar.slider( |
|
|
label='Repeatition penalty', |
|
|
min_value=0.0, |
|
|
max_value=5.0, |
|
|
value=1.0 |
|
|
) |
|
|
|
|
|
|
|
|
max_tokens = st.sidebar.number_input( |
|
|
label='Max tokens', |
|
|
value=50 |
|
|
) |
|
|
|
|
|
|
|
|
def get_llm(model_id): |
|
|
return HuggingFaceEndpoint( |
|
|
repo_id=model_id, |
|
|
temperature=temperature, |
|
|
top_k = top_k, |
|
|
top_p = top_p, |
|
|
repetition_penalty = repetition_penalty, |
|
|
max_new_tokens=max_tokens, |
|
|
task="conversational" |
|
|
) |
|
|
|
|
|
|
|
|
def invoke(): |
|
|
llm_hf = get_llm(model_id) |
|
|
|
|
|
chat_model = ChatHuggingFace(llm=llm_hf) |
|
|
|
|
|
|
|
|
with st.spinner('Invoking LLM ... '): |
|
|
st.session_state['model-response'] = chat_model.invoke([HumanMessage(content=query)]).content |
|
|
|
|
|
|
|
|
st.button("Invoke", on_click=invoke) |