Spaces:

wilame
/

marketer_chatbot

Sleeping

Wilame Lima commited on Aug 14, 2024

Commit

45e0afe

1 Parent(s): 90263a4

Trim content of the url if it's too long

Files changed (3) hide show

config.py CHANGED Viewed

@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
 import requests
 import re
 import time
 # load variables from the env file
 load_dotenv()

 import requests
 import re
 import time
+import tiktoken
 # load variables from the env file
 load_dotenv()

functions.py CHANGED Viewed

@@ -116,11 +116,32 @@ def look_for_actions(user_input:str, message:str):
             # add the content of the website to the message
             url_content = f'{user_input}. Content of the site {url}:\n{get_site_content(url)}'
             return (True, url_content)
     # if there is no action to perform, return None
     return (False, None)

             # add the content of the website to the message
             url_content = f'{user_input}. Content of the site {url}:\n{get_site_content(url)}'
+            # check if the url_content is too long. If soo, keep trimming the text until it is not too long
+            while get_token_amount(url_content) > 5000:
+                url_content = url_content[:-100]
             return (True, url_content)
     # if there is no action to perform, return None
     return (False, None)
+@st.cache_data(ttl=3600)
+def get_token_amount(text,
+                     model_name="gpt-4") -> int:
+    """Uses the tiktoken library to check if a text is too long for a given model.
+    Even tough we are using a Llama model, we are using the GPT-4 model ans an approximation.
+    Args:
+        text (str): The text to check.
+        model_name (str): The name of the model to check. Defaults to "gpt-4".
+    Returns:
+        int: The number of tokens in the text.
+    """
+    encoding = tiktoken.encoding_for_model(model_name)
+    tokens = encoding.encode(text)
+    return len(tokens)

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 streamlit
 python-dotenv
 huggingface_hub
-beautifulsoup4

 streamlit
 python-dotenv
 huggingface_hub
+beautifulsoup4
+tiktoken