Spaces:

wilame
/

marketer_chatbot

Sleeping

App Files Files Community

Wilame Lima commited on Aug 14, 2024

Commit

90263a4

1 Parent(s): b48b867

Add intent to get url content

Browse files

Files changed (4) hide show

app.py +11 -29
config.py +37 -1
functions.py +125 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ st.sidebar.markdown(
     This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
     Ask the model marketing-related questions and see how it responds. Have fun!
     Model used: [{MODEL_PATH}]({MODEL_LINK})
     """
 )
@@ -37,11 +37,11 @@ for message in chat_history:
     with st.chat_message(message['role']):
         st.write(message['content'])
-# keep only last 10 messages
-shorter_history = [message for message in chat_history[-10:] if 'content' in message]
 # include a system prompt to explain the bot what to do
-shorter_history = [{'role': 'system', 'content': SYSTEM_PROMPT}] + shorter_history
 # get the input from user
 user_input = st.chat_input("Write something...")
@@ -53,32 +53,14 @@ if user_input:
     # make the request
     with st.spinner("Generating the response..."):
-        client = InferenceClient(
-            "meta-llama/Meta-Llama-3-8B-Instruct",
-            token=HUGGING_FACE_API_KEY,
-            )
-        messages = shorter_history + [{'role': 'user', 'content': user_input}]
-        # query the model
-        try:
-            response = client.chat_completion(
-                messages=messages,
-                max_tokens = 500,
-                stream = False,
-                )
-            # get the response
-            message = response.choices[0].message['content']
-            # append to the history
-            chat_history.append({'content':user_input, 'role':'user'})
-            chat_history.append(response.choices[0].message) # append the response
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
-            st.stop()
     st.session_state['chat_history'] = chat_history
     st.rerun()

     This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
     Ask the model marketing-related questions and see how it responds. Have fun!
     Model used: [{MODEL_PATH}]({MODEL_LINK})
     """
 )
     with st.chat_message(message['role']):
         st.write(message['content'])
+# keep only last 50 messages
+short_history = [message for message in chat_history[-50:] if 'content' in message]
 # include a system prompt to explain the bot what to do
+short_history = [{'role': 'system', 'content': SYSTEM_PROMPT}] + short_history
 # get the input from user
 user_input = st.chat_input("Write something...")
     # make the request
     with st.spinner("Generating the response..."):
+        # create a shorter_history to avoid to keep a fair usage of the API
+        short_history = short_history + [{'role': 'user', 'content': user_input}]
+        # get the fill history for the next iteration
+        chat_history = make_request(user_input,
+                                    short_history,
+                                    chat_history)
     st.session_state['chat_history'] = chat_history
     st.rerun()

config.py CHANGED Viewed

@@ -2,6 +2,10 @@ import streamlit as st
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 import os
 # load variables from the env file
 load_dotenv()
@@ -12,4 +16,36 @@ DASHBOARD_TITLE = "The Marketer Chatbot"
 MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
 MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
-SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce. Your goal is to provide clear, concise, and accurate responses within 3-4 sentences. You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics. Recognize when a more complex, detailed response is required and provide it with clarity. Always prioritize delivering actionable insights and practical advice. Never engage in converations that are not marketing-related."""

 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 import os
+from bs4 import BeautifulSoup
+import requests
+import re
+import time
 # load variables from the env file
 load_dotenv()
 MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
 MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
+SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
+                   You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
+                   Recognize when a more complex, detailed response is required and provide it with clarity.
+                   Always prioritize delivering actionable insights and practical advice.
+                   Never engage in converations that are not marketing-related.
+                   After THE LAST user response, ask yourself "do I need to visit an url to provide the answer?". If the answer is yes, return ONLY:
+                   ###ACTION###getSiteContent###URL###
+                   The URL MUST BE THE ONE THE USER PROVIDED. Just change it if you need to add the 'https://' prefix.
+                   If you DON'T find an URL, just provide the answer as usual.
+                   REMEMBER: Just look for the URL in the LAST user's response. Ignore other URLs in the conversation.
+                   """
+SYSTEM_PROMPT_NO_URL = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
+                   You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
+                   Recognize when a more complex, detailed response is required and provide it with clarity.
+                   Always prioritize delivering actionable insights and practical advice.
+                   Never engage in converations that are not marketing-related.
+                   """

functions.py CHANGED Viewed

	@@ -1 +1,126 @@
1	from config import *

 from config import *
+@st.cache_data()
+def make_request(user_input:str,
+                 short_history:list,
+                 chat_history:list):
+    """Makes a request to the Hugging Face API"""
+    client = InferenceClient(
+        MODEL_PATH,
+        token=HUGGING_FACE_API_KEY,
+        )
+    try:
+        response = client.chat_completion(
+                messages=short_history,
+                max_tokens = 5000,
+                stream = False,
+                )
+        # get the response
+        message = response.choices[0].message['content']
+        # analyse the content to see if there is an action to perform
+        try:
+            perform_actions = look_for_actions(user_input, message)
+        except Exception as e:
+            st.info(f"An error occurred while looking for actions: {e}")
+            perform_actions = (False, None)
+        # if there was an action to perform, resubmit the question to the chatbot:
+        if perform_actions[0]:
+            # replace the last message in the short history with the new message
+            short_history[-1] = {'role':'user', 'content':perform_actions[1]}
+            # replace the first message with the system prompt without url analysis
+            short_history[0] = {'role':'system', 'content':SYSTEM_PROMPT_NO_URL}
+            # wait a little bit to avoid the API limit
+            time.sleep(1)
+            # make the request again
+            response = client.chat_completion(
+                messages=short_history,
+                max_tokens = 5000,
+                stream = False,
+                )
+        # append to the history
+        chat_history.append({'content':user_input, 'role':'user'})
+        chat_history.append(response.choices[0].message) # append the response
+        return chat_history
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+        st.stop()
+@st.cache_data()
+def get_site_content(url:str):
+    """Receives a URL and returns the content of the site"""
+    # create an user agent
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+    # get the site content
+    response = requests.get(url, headers=headers)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # remove styles and scripts
+    for script in soup(["script", "style"]):
+        script.extract()
+    # let the meta descriptions of the header and all the content inside the body
+    # for the meta tags, get the tag itself and its content
+    meta_tags = soup.head.find_all('meta')
+    meta_tags_text = ''
+    for tag in meta_tags:
+        meta_tags_text += f'<{tag.name} {tag.attrs}>\n'
+    # get the body text
+    body_text = soup.body.get_text()
+    # join the meta tags and the body text
+    text = f'{meta_tags_text}\n{body_text}'
+    # remove empty lines
+    text = os.linesep.join([s for s in text.splitlines() if s])
+    return text
+def look_for_actions(user_input:str, message:str):
+    """Reveives a message and look for the pattern ###ACTION###function###URL###"""
+    # check if the pattern is in the message.
+    if '###' in message:
+        # split the message by the pattern ###ACTION###function###URL### to get the URL and the action
+        split_string = message.split('###')
+        if 'getSiteContent' in message:
+            st.info("I need to visit the site to provide the answer. Please wait...")
+            url = split_string[3].strip()
+            # remove everything inside ### and ### (including the ###) from the user_input
+            user_input = re.sub(r'###.*?###', '', user_input)
+            # add the content of the website to the message
+            url_content = f'{user_input}. Content of the site {url}:\n{get_site_content(url)}'
+            return (True, url_content)
+    # if there is no action to perform, return None
+    return (False, None)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 streamlit
 python-dotenv
-huggingface_hub

 streamlit
 python-dotenv
+huggingface_hub
+beautifulsoup4