Spaces:

bestofaiml
/

snowflake-arctic-demo

Runtime error

App Files Files Community

carolinefrascasnowflake commited on Apr 24, 2024

Commit

3ae0fe0

verified ·

1 Parent(s): 3c2cf7c

count tokens (#1)

Browse files

- count tokens (5a25c51bf7eb8c22ac3215d9a16a43ca41873447)

Files changed (1) hide show

app.py +32 -3

app.py CHANGED Viewed

@@ -1,6 +1,16 @@
 import streamlit as st
 import replicate
 import os
 # App title
 st.set_page_config(page_title="Snowflake Arctic")
@@ -21,7 +31,7 @@ with st.sidebar:
     os.environ['REPLICATE_API_TOKEN'] = replicate_api
     st.subheader("Adjust model parameters")
-    temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.6, step=0.01)
     top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
 # Store LLM-generated responses
@@ -37,7 +47,20 @@ def clear_chat_history():
     st.session_state.messages = [{"role": "assistant", "content": "Hi. I'm Arctic, a new, efficient, intelligent, and truly open language model created by Snowflake AI Research. Ask me anything."}]
 st.sidebar.button('Clear chat history', on_click=clear_chat_history)
-st.sidebar.caption('Built by [Snowflake](https://snowflake.com/) to demonstrate [Snowflake Arctic](https://www.snowflake.com/blog/arctic-open-and-efficient-foundation-language-models-snowflake).')
 # Function for generating Snowflake Arctic response
 def generate_arctic_response():
@@ -50,9 +73,15 @@ def generate_arctic_response():
     prompt.append("<|im_start|>assistant")
     prompt.append("")
     for event in replicate.stream("snowflake/snowflake-arctic-instruct",
-                           input={"prompt": "\n".join(prompt),
                                   "prompt_template": r"{prompt}",
                                   "temperature": temperature,
                                   "top_p": top_p,

 import streamlit as st
 import replicate
 import os
+from transformers import AutoTokenizer
+# # Assuming you have a specific tokenizers for Llama; if not, use an appropriate one like this
+# tokenizer = AutoTokenizer.from_pretrained("allenai/llama")
+# text = "Example text to tokenize."
+# tokens = tokenizer.tokenize(text)
+# num_tokens = len(tokens)
+# print("Number of tokens:", num_tokens)
 # App title
 st.set_page_config(page_title="Snowflake Arctic")
     os.environ['REPLICATE_API_TOKEN'] = replicate_api
     st.subheader("Adjust model parameters")
+    temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.3, step=0.01)
     top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
 # Store LLM-generated responses
     st.session_state.messages = [{"role": "assistant", "content": "Hi. I'm Arctic, a new, efficient, intelligent, and truly open language model created by Snowflake AI Research. Ask me anything."}]
 st.sidebar.button('Clear chat history', on_click=clear_chat_history)
+st.sidebar.caption('Built by [Snowflake](https://snowflake.com/) to demonstrate [Snowflake Arctic](https://www.snowflake.com/blog/arctic-open-and-efficient-foundation-language-models-snowflake). App hosted on [Streamlit Community Cloud](https://streamlit.io/cloud). Model hosted by [Replicate](https://replicate.com/snowflake/snowflake-arctic-instruct).')
+@st.cache_resource
+def get_tokenizer():
+    """Get a tokenizer to make sure we're not sending too much text
+    text to the Model. Eventually we will replace this with ArcticTokenizer
+    """
+    return AutoTokenizer.from_pretrained("huggyllama/llama-7b")
+def get_num_tokens(prompt):
+    """Get the number of tokens in a given prompt"""
+    tokenizer = get_tokenizer()
+    tokens = tokenizer.tokenize(prompt)
+    return len(tokens)
 # Function for generating Snowflake Arctic response
 def generate_arctic_response():
     prompt.append("<|im_start|>assistant")
     prompt.append("")
+    prompt_str = "\n".join(prompt)
+    if get_num_tokens(prompt_str) >= 4096:
+        st.error("Conversation length too long. Please keep it under 4096 tokens.")
+        st.button('Clear chat history', on_click=clear_chat_history, key="clear_chat_history")
+        st.stop()
     for event in replicate.stream("snowflake/snowflake-arctic-instruct",
+                           input={"prompt": prompt_str,
                                   "prompt_template": r"{prompt}",
                                   "temperature": temperature,
                                   "top_p": top_p,