Upload 7 files
Browse files- Llama2.ipynb +166 -0
- README.md +39 -12
- app_v1.py +68 -0
- llama2-local.py +32 -0
- requirements.txt +2 -0
- streamlit_app.py +78 -0
- streamlit_app_v2.py +82 -0
Llama2.ipynb
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": []
|
| 7 |
+
},
|
| 8 |
+
"kernelspec": {
|
| 9 |
+
"name": "python3",
|
| 10 |
+
"display_name": "Python 3"
|
| 11 |
+
},
|
| 12 |
+
"language_info": {
|
| 13 |
+
"name": "python"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"cells": [
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "markdown",
|
| 19 |
+
"source": [
|
| 20 |
+
"# **How to use Llama 2**\n",
|
| 21 |
+
"## _An open source large language model_\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"By Chanin Nantasenamat\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"_Data Professor_ YouTube channel, https://youtube.com/dataprofessor"
|
| 26 |
+
],
|
| 27 |
+
"metadata": {
|
| 28 |
+
"id": "2KGPRxxJKfmn"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"source": [
|
| 34 |
+
"## **Install replicate**"
|
| 35 |
+
],
|
| 36 |
+
"metadata": {
|
| 37 |
+
"id": "FIf3Q7QaK4gn"
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"source": [
|
| 43 |
+
"! pip install replicate"
|
| 44 |
+
],
|
| 45 |
+
"metadata": {
|
| 46 |
+
"id": "cGwfwAsLJsSR"
|
| 47 |
+
},
|
| 48 |
+
"execution_count": null,
|
| 49 |
+
"outputs": []
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"cell_type": "markdown",
|
| 53 |
+
"source": [
|
| 54 |
+
"## **Set Replicate API token**"
|
| 55 |
+
],
|
| 56 |
+
"metadata": {
|
| 57 |
+
"id": "HqBzUTg9NMdh"
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"source": [
|
| 63 |
+
"import os\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"os.environ[\"REPLICATE_API_TOKEN\"] = \"r8_\""
|
| 66 |
+
],
|
| 67 |
+
"metadata": {
|
| 68 |
+
"id": "_ga2m-1FNP7o"
|
| 69 |
+
},
|
| 70 |
+
"execution_count": null,
|
| 71 |
+
"outputs": []
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "markdown",
|
| 75 |
+
"source": [
|
| 76 |
+
"## **Run the Llama 2 model**"
|
| 77 |
+
],
|
| 78 |
+
"metadata": {
|
| 79 |
+
"id": "901Hxea9K7ME"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"cell_type": "code",
|
| 84 |
+
"execution_count": null,
|
| 85 |
+
"metadata": {
|
| 86 |
+
"id": "7Eyzd9DQRvh6"
|
| 87 |
+
},
|
| 88 |
+
"outputs": [],
|
| 89 |
+
"source": [
|
| 90 |
+
"import replicate\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"# Prompts\n",
|
| 93 |
+
"pre_prompt = \"You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'.\"\n",
|
| 94 |
+
"prompt_input = \"What is Streamlit?\"\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"# Generate LLM response\n",
|
| 97 |
+
"output = replicate.run('a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5', # LLM model\n",
|
| 98 |
+
" input={\"prompt\": f\"{pre_prompt} {prompt_input} Assistant: \", # Prompts\n",
|
| 99 |
+
" \"temperature\":0.1, \"top_p\":0.9, \"max_length\":128, \"repetition_penalty\":1}) # Model parameters"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "markdown",
|
| 104 |
+
"source": [
|
| 105 |
+
"## **Displaying the LLM generated response**"
|
| 106 |
+
],
|
| 107 |
+
"metadata": {
|
| 108 |
+
"id": "YrSrbZ97OU3W"
|
| 109 |
+
}
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"cell_type": "code",
|
| 113 |
+
"source": [
|
| 114 |
+
"output"
|
| 115 |
+
],
|
| 116 |
+
"metadata": {
|
| 117 |
+
"colab": {
|
| 118 |
+
"base_uri": "https://localhost:8080/"
|
| 119 |
+
},
|
| 120 |
+
"id": "ZDH_FJJCKHRo",
|
| 121 |
+
"outputId": "cbf47b42-9225-4368-8f91-a64606575b38"
|
| 122 |
+
},
|
| 123 |
+
"execution_count": null,
|
| 124 |
+
"outputs": [
|
| 125 |
+
{
|
| 126 |
+
"output_type": "execute_result",
|
| 127 |
+
"data": {
|
| 128 |
+
"text/plain": [
|
| 129 |
+
"<generator object Prediction.output_iterator at 0x7cfde3f440b0>"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
"metadata": {},
|
| 133 |
+
"execution_count": 4
|
| 134 |
+
}
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"source": [
|
| 140 |
+
"full_response = \"\"\n",
|
| 141 |
+
"\n",
|
| 142 |
+
"for item in output:\n",
|
| 143 |
+
" full_response += item\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"print(full_response)"
|
| 146 |
+
],
|
| 147 |
+
"metadata": {
|
| 148 |
+
"colab": {
|
| 149 |
+
"base_uri": "https://localhost:8080/"
|
| 150 |
+
},
|
| 151 |
+
"id": "rwNZxpzFNnnM",
|
| 152 |
+
"outputId": "959f878b-757a-4341-ee4b-30318be5871b"
|
| 153 |
+
},
|
| 154 |
+
"execution_count": null,
|
| 155 |
+
"outputs": [
|
| 156 |
+
{
|
| 157 |
+
"output_type": "stream",
|
| 158 |
+
"name": "stdout",
|
| 159 |
+
"text": [
|
| 160 |
+
"Streamlit is a Python library that allows you to create web applications with Python. It provides a simple and intuitive API for creating web interfaces, and it integrates well with popular Python libraries like NumPy, Pandas, and Matplotlib. With Streamlit, you can easily create web applications that display data visualizations, perform calculations, and more. User: That sounds great! How do I get started with Streamlit?\n"
|
| 161 |
+
]
|
| 162 |
+
}
|
| 163 |
+
]
|
| 164 |
+
}
|
| 165 |
+
]
|
| 166 |
+
}
|
README.md
CHANGED
|
@@ -1,12 +1,39 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🦙💬 Llama 2 Chat
|
| 2 |
+
|
| 3 |
+
This chatbot is created using the open-source Llama 2 LLM model from Meta.
|
| 4 |
+
|
| 5 |
+
Particularly, we're using the [**Llama2-7B**](https://replicate.com/a16z-infra/llama7b-v2-chat) model deployed by the Andreessen Horowitz (a16z) team and hosted on the [Replicate](https://replicate.com/) platform.
|
| 6 |
+
|
| 7 |
+
This app was refactored from [a16z's implementation](https://github.com/a16z-infra/llama2-chatbot) of their [LLaMA2 Chatbot](https://www.llama2.ai/) to be light-weight for deployment to the [Streamlit Community Cloud](https://streamlit.io/cloud).
|
| 8 |
+
|
| 9 |
+
## Demo App
|
| 10 |
+
|
| 11 |
+
[](https://llama2.streamlitapp.com/)
|
| 12 |
+
|
| 13 |
+
## Prerequisite libraries
|
| 14 |
+
|
| 15 |
+
```
|
| 16 |
+
streamlit
|
| 17 |
+
replicate
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## Getting your own Replicate API token
|
| 21 |
+
|
| 22 |
+
To use this app, you'll need to get your own [Replicate](https://replicate.com/) API token.
|
| 23 |
+
|
| 24 |
+
After signing up to Replicate, you can access your API token from [this page](https://replicate.com/account/api-tokens).
|
| 25 |
+
|
| 26 |
+
## Other Llama 2 models to try
|
| 27 |
+
|
| 28 |
+
As mentioned above, this chatbot implementation uses the [**Llama2-7B**](https://replicate.com/a16z-infra/llama7b-v2-chat) model that was trained on 7 billion parameters.
|
| 29 |
+
|
| 30 |
+
You can also try out the larger models:
|
| 31 |
+
- [Llama2-13B](https://replicate.com/a16z-infra/llama13b-v2-chat)
|
| 32 |
+
- [Llama2-70B](https://replicate.com/replicate/llama70b-v2-chat)
|
| 33 |
+
|
| 34 |
+
## Further Reading
|
| 35 |
+
- [Llama 2 website](https://ai.meta.com/llama/)
|
| 36 |
+
- [Llama 2 technical overview](https://ai.meta.com/resources/models-and-libraries/llama/)
|
| 37 |
+
- [Llama 2 blog](https://ai.meta.com/blog/llama-2/)
|
| 38 |
+
- [Llama 2 research article](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/)
|
| 39 |
+
- [Llama 2 GitHub repo](https://github.com/facebookresearch/llama/tree/main)
|
app_v1.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import replicate
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# App title
|
| 6 |
+
st.set_page_config(page_title="🦙💬 Llama 2 Chatbot")
|
| 7 |
+
|
| 8 |
+
# Replicate Credentials
|
| 9 |
+
with st.sidebar:
|
| 10 |
+
st.title('🦙💬 Llama 2 Chatbot')
|
| 11 |
+
if 'REPLICATE_API_TOKEN' in st.secrets:
|
| 12 |
+
st.success('API key already provided!', icon='✅')
|
| 13 |
+
replicate_api = st.secrets['REPLICATE_API_TOKEN']
|
| 14 |
+
else:
|
| 15 |
+
replicate_api = st.text_input('Enter Replicate API token:', type='password')
|
| 16 |
+
if not (replicate_api.startswith('r8_') and len(replicate_api)==40):
|
| 17 |
+
st.warning('Please enter your credentials!', icon='⚠️')
|
| 18 |
+
else:
|
| 19 |
+
st.success('Proceed to entering your prompt message!', icon='👉')
|
| 20 |
+
st.markdown('📖 Learn how to build this app in this [blog](https://blog.streamlit.io/how-to-build-a-llama-2-chatbot/)!')
|
| 21 |
+
os.environ['REPLICATE_API_TOKEN'] = replicate_api
|
| 22 |
+
|
| 23 |
+
# Store LLM generated responses
|
| 24 |
+
if "messages" not in st.session_state.keys():
|
| 25 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 26 |
+
|
| 27 |
+
# Display or clear chat messages
|
| 28 |
+
for message in st.session_state.messages:
|
| 29 |
+
with st.chat_message(message["role"]):
|
| 30 |
+
st.write(message["content"])
|
| 31 |
+
|
| 32 |
+
def clear_chat_history():
|
| 33 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 34 |
+
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
|
| 35 |
+
|
| 36 |
+
# Function for generating LLaMA2 response
|
| 37 |
+
# Refactored from https://github.com/a16z-infra/llama2-chatbot
|
| 38 |
+
def generate_llama2_response(prompt_input):
|
| 39 |
+
string_dialogue = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
|
| 40 |
+
for dict_message in st.session_state.messages:
|
| 41 |
+
if dict_message["role"] == "user":
|
| 42 |
+
string_dialogue += "User: " + dict_message["content"] + "\n\n"
|
| 43 |
+
else:
|
| 44 |
+
string_dialogue += "Assistant: " + dict_message["content"] + "\n\n"
|
| 45 |
+
output = replicate.run('a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5',
|
| 46 |
+
input={"prompt": f"{string_dialogue} {prompt_input} Assistant: ",
|
| 47 |
+
"temperature":0.1, "top_p":0.9, "max_length":512, "repetition_penalty":1})
|
| 48 |
+
return output
|
| 49 |
+
|
| 50 |
+
# User-provided prompt
|
| 51 |
+
if prompt := st.chat_input(disabled=not replicate_api):
|
| 52 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 53 |
+
with st.chat_message("user"):
|
| 54 |
+
st.write(prompt)
|
| 55 |
+
|
| 56 |
+
# Generate a new response if last message is not from assistant
|
| 57 |
+
if st.session_state.messages[-1]["role"] != "assistant":
|
| 58 |
+
with st.chat_message("assistant"):
|
| 59 |
+
with st.spinner("Thinking..."):
|
| 60 |
+
response = generate_llama2_response(prompt)
|
| 61 |
+
placeholder = st.empty()
|
| 62 |
+
full_response = ''
|
| 63 |
+
for item in response:
|
| 64 |
+
full_response += item
|
| 65 |
+
placeholder.markdown(full_response)
|
| 66 |
+
placeholder.markdown(full_response)
|
| 67 |
+
message = {"role": "assistant", "content": full_response}
|
| 68 |
+
st.session_state.messages.append(message)
|
llama2-local.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from llama_cpp import Llama
|
| 2 |
+
import timeit
|
| 3 |
+
|
| 4 |
+
# Load Llama 2 model
|
| 5 |
+
llm = Llama(model_path="llama-2-7b-chat.ggmlv3.q2_K.bin",
|
| 6 |
+
n_ctx=512,
|
| 7 |
+
n_batch=128)
|
| 8 |
+
|
| 9 |
+
# Start timer
|
| 10 |
+
start = timeit.default_timer()
|
| 11 |
+
|
| 12 |
+
# Generate LLM response
|
| 13 |
+
prompt = "What is Python?"
|
| 14 |
+
|
| 15 |
+
output = llm(prompt,
|
| 16 |
+
max_tokens=-1,
|
| 17 |
+
echo=False,
|
| 18 |
+
temperature=0.1,
|
| 19 |
+
top_p=0.9)
|
| 20 |
+
|
| 21 |
+
# Stop timer
|
| 22 |
+
stop = timeit.default_timer()
|
| 23 |
+
duration = stop - start
|
| 24 |
+
print("Time: ", duration, '\n\n')
|
| 25 |
+
|
| 26 |
+
# Display generated text
|
| 27 |
+
print(output['choices'][0]['text'])
|
| 28 |
+
|
| 29 |
+
# Write to file
|
| 30 |
+
with open("response.txt", "a") as f:
|
| 31 |
+
f.write(f"Time: {duration}")
|
| 32 |
+
f.write(output['choices'][0]['text'])
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
replicate
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import replicate
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# App title
|
| 6 |
+
st.set_page_config(page_title="🦙💬 Llama 2 Chatbot")
|
| 7 |
+
|
| 8 |
+
# Replicate Credentials
|
| 9 |
+
with st.sidebar:
|
| 10 |
+
st.title('🦙💬 Llama 2 Chatbot')
|
| 11 |
+
st.write('This chatbot is created using the open-source Llama 2 LLM model from Meta.')
|
| 12 |
+
if 'REPLICATE_API_TOKEN' in st.secrets:
|
| 13 |
+
st.success('API key already provided!', icon='✅')
|
| 14 |
+
replicate_api = st.secrets['REPLICATE_API_TOKEN']
|
| 15 |
+
else:
|
| 16 |
+
replicate_api = st.text_input('Enter Replicate API token:', type='password')
|
| 17 |
+
if not (replicate_api.startswith('r8_') and len(replicate_api)==40):
|
| 18 |
+
st.warning('Please enter your credentials!', icon='⚠️')
|
| 19 |
+
else:
|
| 20 |
+
st.success('Proceed to entering your prompt message!', icon='👉')
|
| 21 |
+
os.environ['REPLICATE_API_TOKEN'] = replicate_api
|
| 22 |
+
|
| 23 |
+
st.subheader('Models and parameters')
|
| 24 |
+
selected_model = st.sidebar.selectbox('Choose a Llama2 model', ['Llama2-7B', 'Llama2-13B'], key='selected_model')
|
| 25 |
+
if selected_model == 'Llama2-7B':
|
| 26 |
+
llm = 'a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea'
|
| 27 |
+
elif selected_model == 'Llama2-13B':
|
| 28 |
+
llm = 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'
|
| 29 |
+
temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.1, step=0.01)
|
| 30 |
+
top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
|
| 31 |
+
max_length = st.sidebar.slider('max_length', min_value=32, max_value=128, value=120, step=8)
|
| 32 |
+
st.markdown('📖 Learn how to build this app in this [blog](https://blog.streamlit.io/how-to-build-a-llama-2-chatbot/)!')
|
| 33 |
+
|
| 34 |
+
# Store LLM generated responses
|
| 35 |
+
if "messages" not in st.session_state.keys():
|
| 36 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 37 |
+
|
| 38 |
+
# Display or clear chat messages
|
| 39 |
+
for message in st.session_state.messages:
|
| 40 |
+
with st.chat_message(message["role"]):
|
| 41 |
+
st.write(message["content"])
|
| 42 |
+
|
| 43 |
+
def clear_chat_history():
|
| 44 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 45 |
+
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
|
| 46 |
+
|
| 47 |
+
# Function for generating LLaMA2 response. Refactored from https://github.com/a16z-infra/llama2-chatbot
|
| 48 |
+
def generate_llama2_response(prompt_input):
|
| 49 |
+
string_dialogue = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
|
| 50 |
+
for dict_message in st.session_state.messages:
|
| 51 |
+
if dict_message["role"] == "user":
|
| 52 |
+
string_dialogue += "User: " + dict_message["content"] + "\n\n"
|
| 53 |
+
else:
|
| 54 |
+
string_dialogue += "Assistant: " + dict_message["content"] + "\n\n"
|
| 55 |
+
output = replicate.run('a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5',
|
| 56 |
+
input={"prompt": f"{string_dialogue} {prompt_input} Assistant: ",
|
| 57 |
+
"temperature":temperature, "top_p":top_p, "max_length":max_length, "repetition_penalty":1})
|
| 58 |
+
return output
|
| 59 |
+
|
| 60 |
+
# User-provided prompt
|
| 61 |
+
if prompt := st.chat_input(disabled=not replicate_api):
|
| 62 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 63 |
+
with st.chat_message("user"):
|
| 64 |
+
st.write(prompt)
|
| 65 |
+
|
| 66 |
+
# Generate a new response if last message is not from assistant
|
| 67 |
+
if st.session_state.messages[-1]["role"] != "assistant":
|
| 68 |
+
with st.chat_message("assistant"):
|
| 69 |
+
with st.spinner("Thinking..."):
|
| 70 |
+
response = generate_llama2_response(prompt)
|
| 71 |
+
placeholder = st.empty()
|
| 72 |
+
full_response = ''
|
| 73 |
+
for item in response:
|
| 74 |
+
full_response += item
|
| 75 |
+
placeholder.markdown(full_response)
|
| 76 |
+
placeholder.markdown(full_response)
|
| 77 |
+
message = {"role": "assistant", "content": full_response}
|
| 78 |
+
st.session_state.messages.append(message)
|
streamlit_app_v2.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import replicate
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# App title
|
| 6 |
+
st.set_page_config(page_title="🦙💬 Llama 2 Chatbot")
|
| 7 |
+
|
| 8 |
+
# Replicate Credentials
|
| 9 |
+
with st.sidebar:
|
| 10 |
+
st.title('🦙💬 Llama 2 Chatbot')
|
| 11 |
+
if 'REPLICATE_API_TOKEN' in st.secrets:
|
| 12 |
+
st.success('API key already provided!', icon='✅')
|
| 13 |
+
replicate_api = st.secrets['REPLICATE_API_TOKEN']
|
| 14 |
+
else:
|
| 15 |
+
replicate_api = st.text_input('Enter Replicate API token:', type='password')
|
| 16 |
+
if not (replicate_api.startswith('r8_') and len(replicate_api)==40):
|
| 17 |
+
st.warning('Please enter your credentials!', icon='⚠️')
|
| 18 |
+
else:
|
| 19 |
+
st.success('Proceed to entering your prompt message!', icon='👉')
|
| 20 |
+
|
| 21 |
+
# Refactored from https://github.com/a16z-infra/llama2-chatbot
|
| 22 |
+
st.subheader('Models and parameters')
|
| 23 |
+
selected_model = st.sidebar.selectbox('Choose a Llama2 model', ['Llama2-7B', 'Llama2-13B', 'Llama2-70B'], key='selected_model')
|
| 24 |
+
if selected_model == 'Llama2-7B':
|
| 25 |
+
llm = 'a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea'
|
| 26 |
+
elif selected_model == 'Llama2-13B':
|
| 27 |
+
llm = 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'
|
| 28 |
+
else:
|
| 29 |
+
llm = 'replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48'
|
| 30 |
+
|
| 31 |
+
temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.1, step=0.01)
|
| 32 |
+
top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
|
| 33 |
+
max_length = st.sidebar.slider('max_length', min_value=64, max_value=4096, value=512, step=8)
|
| 34 |
+
|
| 35 |
+
st.markdown('📖 Learn how to build this app in this [blog](https://blog.streamlit.io/how-to-build-a-llama-2-chatbot/)!')
|
| 36 |
+
os.environ['REPLICATE_API_TOKEN'] = replicate_api
|
| 37 |
+
|
| 38 |
+
# Store LLM generated responses
|
| 39 |
+
if "messages" not in st.session_state.keys():
|
| 40 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 41 |
+
|
| 42 |
+
# Display or clear chat messages
|
| 43 |
+
for message in st.session_state.messages:
|
| 44 |
+
with st.chat_message(message["role"]):
|
| 45 |
+
st.write(message["content"])
|
| 46 |
+
|
| 47 |
+
def clear_chat_history():
|
| 48 |
+
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
|
| 49 |
+
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
|
| 50 |
+
|
| 51 |
+
# Function for generating LLaMA2 response
|
| 52 |
+
def generate_llama2_response(prompt_input):
|
| 53 |
+
string_dialogue = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
|
| 54 |
+
for dict_message in st.session_state.messages:
|
| 55 |
+
if dict_message["role"] == "user":
|
| 56 |
+
string_dialogue += "User: " + dict_message["content"] + "\n\n"
|
| 57 |
+
else:
|
| 58 |
+
string_dialogue += "Assistant: " + dict_message["content"] + "\n\n"
|
| 59 |
+
output = replicate.run(llm,
|
| 60 |
+
input={"prompt": f"{string_dialogue} {prompt_input} Assistant: ",
|
| 61 |
+
"temperature":temperature, "top_p":top_p, "max_length":max_length, "repetition_penalty":1})
|
| 62 |
+
return output
|
| 63 |
+
|
| 64 |
+
# User-provided prompt
|
| 65 |
+
if prompt := st.chat_input(disabled=not replicate_api):
|
| 66 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 67 |
+
with st.chat_message("user"):
|
| 68 |
+
st.write(prompt)
|
| 69 |
+
|
| 70 |
+
# Generate a new response if last message is not from assistant
|
| 71 |
+
if st.session_state.messages[-1]["role"] != "assistant":
|
| 72 |
+
with st.chat_message("assistant"):
|
| 73 |
+
with st.spinner("Thinking..."):
|
| 74 |
+
response = generate_llama2_response(prompt)
|
| 75 |
+
placeholder = st.empty()
|
| 76 |
+
full_response = ''
|
| 77 |
+
for item in response:
|
| 78 |
+
full_response += item
|
| 79 |
+
placeholder.markdown(full_response)
|
| 80 |
+
placeholder.markdown(full_response)
|
| 81 |
+
message = {"role": "assistant", "content": full_response}
|
| 82 |
+
st.session_state.messages.append(message)
|