Spaces:
Sleeping
Sleeping
Wilame Lima
commited on
Commit
·
90263a4
1
Parent(s):
b48b867
Add intent to get url content
Browse files- app.py +11 -29
- config.py +37 -1
- functions.py +125 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -12,7 +12,7 @@ st.sidebar.markdown(
|
|
| 12 |
This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
|
| 13 |
|
| 14 |
Ask the model marketing-related questions and see how it responds. Have fun!
|
| 15 |
-
|
| 16 |
Model used: [{MODEL_PATH}]({MODEL_LINK})
|
| 17 |
"""
|
| 18 |
)
|
|
@@ -37,11 +37,11 @@ for message in chat_history:
|
|
| 37 |
with st.chat_message(message['role']):
|
| 38 |
st.write(message['content'])
|
| 39 |
|
| 40 |
-
# keep only last
|
| 41 |
-
|
| 42 |
|
| 43 |
# include a system prompt to explain the bot what to do
|
| 44 |
-
|
| 45 |
|
| 46 |
# get the input from user
|
| 47 |
user_input = st.chat_input("Write something...")
|
|
@@ -53,32 +53,14 @@ if user_input:
|
|
| 53 |
|
| 54 |
# make the request
|
| 55 |
with st.spinner("Generating the response..."):
|
| 56 |
-
|
| 57 |
-
client = InferenceClient(
|
| 58 |
-
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 59 |
-
token=HUGGING_FACE_API_KEY,
|
| 60 |
-
)
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
stream = False,
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
# get the response
|
| 73 |
-
message = response.choices[0].message['content']
|
| 74 |
-
|
| 75 |
-
# append to the history
|
| 76 |
-
chat_history.append({'content':user_input, 'role':'user'})
|
| 77 |
-
chat_history.append(response.choices[0].message) # append the response
|
| 78 |
-
|
| 79 |
-
except Exception as e:
|
| 80 |
-
st.error(f"An error occurred: {e}")
|
| 81 |
-
st.stop()
|
| 82 |
|
| 83 |
st.session_state['chat_history'] = chat_history
|
| 84 |
st.rerun()
|
|
|
|
| 12 |
This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
|
| 13 |
|
| 14 |
Ask the model marketing-related questions and see how it responds. Have fun!
|
| 15 |
+
|
| 16 |
Model used: [{MODEL_PATH}]({MODEL_LINK})
|
| 17 |
"""
|
| 18 |
)
|
|
|
|
| 37 |
with st.chat_message(message['role']):
|
| 38 |
st.write(message['content'])
|
| 39 |
|
| 40 |
+
# keep only last 50 messages
|
| 41 |
+
short_history = [message for message in chat_history[-50:] if 'content' in message]
|
| 42 |
|
| 43 |
# include a system prompt to explain the bot what to do
|
| 44 |
+
short_history = [{'role': 'system', 'content': SYSTEM_PROMPT}] + short_history
|
| 45 |
|
| 46 |
# get the input from user
|
| 47 |
user_input = st.chat_input("Write something...")
|
|
|
|
| 53 |
|
| 54 |
# make the request
|
| 55 |
with st.spinner("Generating the response..."):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
# create a shorter_history to avoid to keep a fair usage of the API
|
| 58 |
+
short_history = short_history + [{'role': 'user', 'content': user_input}]
|
| 59 |
+
|
| 60 |
+
# get the fill history for the next iteration
|
| 61 |
+
chat_history = make_request(user_input,
|
| 62 |
+
short_history,
|
| 63 |
+
chat_history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
st.session_state['chat_history'] = chat_history
|
| 66 |
st.rerun()
|
config.py
CHANGED
|
@@ -2,6 +2,10 @@ import streamlit as st
|
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# load variables from the env file
|
| 7 |
load_dotenv()
|
|
@@ -12,4 +16,36 @@ DASHBOARD_TITLE = "The Marketer Chatbot"
|
|
| 12 |
MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 13 |
MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
|
| 14 |
|
| 15 |
-
SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
import os
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
+
import requests
|
| 7 |
+
import re
|
| 8 |
+
import time
|
| 9 |
|
| 10 |
# load variables from the env file
|
| 11 |
load_dotenv()
|
|
|
|
| 16 |
MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 17 |
MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
|
| 18 |
|
| 19 |
+
SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
|
| 20 |
+
|
| 21 |
+
You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
|
| 22 |
+
|
| 23 |
+
Recognize when a more complex, detailed response is required and provide it with clarity.
|
| 24 |
+
|
| 25 |
+
Always prioritize delivering actionable insights and practical advice.
|
| 26 |
+
|
| 27 |
+
Never engage in converations that are not marketing-related.
|
| 28 |
+
|
| 29 |
+
After THE LAST user response, ask yourself "do I need to visit an url to provide the answer?". If the answer is yes, return ONLY:
|
| 30 |
+
|
| 31 |
+
###ACTION###getSiteContent###URL###
|
| 32 |
+
|
| 33 |
+
The URL MUST BE THE ONE THE USER PROVIDED. Just change it if you need to add the 'https://' prefix.
|
| 34 |
+
|
| 35 |
+
If you DON'T find an URL, just provide the answer as usual.
|
| 36 |
+
|
| 37 |
+
REMEMBER: Just look for the URL in the LAST user's response. Ignore other URLs in the conversation.
|
| 38 |
+
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
SYSTEM_PROMPT_NO_URL = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
|
| 43 |
+
|
| 44 |
+
You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
|
| 45 |
+
|
| 46 |
+
Recognize when a more complex, detailed response is required and provide it with clarity.
|
| 47 |
+
|
| 48 |
+
Always prioritize delivering actionable insights and practical advice.
|
| 49 |
+
|
| 50 |
+
Never engage in converations that are not marketing-related.
|
| 51 |
+
"""
|
functions.py
CHANGED
|
@@ -1 +1,126 @@
|
|
| 1 |
from config import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from config import *
|
| 2 |
+
|
| 3 |
+
@st.cache_data()
|
| 4 |
+
def make_request(user_input:str,
|
| 5 |
+
short_history:list,
|
| 6 |
+
chat_history:list):
|
| 7 |
+
|
| 8 |
+
"""Makes a request to the Hugging Face API"""
|
| 9 |
+
|
| 10 |
+
client = InferenceClient(
|
| 11 |
+
MODEL_PATH,
|
| 12 |
+
token=HUGGING_FACE_API_KEY,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
response = client.chat_completion(
|
| 17 |
+
messages=short_history,
|
| 18 |
+
max_tokens = 5000,
|
| 19 |
+
stream = False,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# get the response
|
| 23 |
+
message = response.choices[0].message['content']
|
| 24 |
+
|
| 25 |
+
# analyse the content to see if there is an action to perform
|
| 26 |
+
try:
|
| 27 |
+
perform_actions = look_for_actions(user_input, message)
|
| 28 |
+
|
| 29 |
+
except Exception as e:
|
| 30 |
+
st.info(f"An error occurred while looking for actions: {e}")
|
| 31 |
+
perform_actions = (False, None)
|
| 32 |
+
|
| 33 |
+
# if there was an action to perform, resubmit the question to the chatbot:
|
| 34 |
+
if perform_actions[0]:
|
| 35 |
+
|
| 36 |
+
# replace the last message in the short history with the new message
|
| 37 |
+
short_history[-1] = {'role':'user', 'content':perform_actions[1]}
|
| 38 |
+
|
| 39 |
+
# replace the first message with the system prompt without url analysis
|
| 40 |
+
short_history[0] = {'role':'system', 'content':SYSTEM_PROMPT_NO_URL}
|
| 41 |
+
|
| 42 |
+
# wait a little bit to avoid the API limit
|
| 43 |
+
time.sleep(1)
|
| 44 |
+
|
| 45 |
+
# make the request again
|
| 46 |
+
response = client.chat_completion(
|
| 47 |
+
messages=short_history,
|
| 48 |
+
max_tokens = 5000,
|
| 49 |
+
stream = False,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# append to the history
|
| 53 |
+
chat_history.append({'content':user_input, 'role':'user'})
|
| 54 |
+
chat_history.append(response.choices[0].message) # append the response
|
| 55 |
+
|
| 56 |
+
return chat_history
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
st.error(f"An error occurred: {e}")
|
| 60 |
+
st.stop()
|
| 61 |
+
|
| 62 |
+
@st.cache_data()
|
| 63 |
+
def get_site_content(url:str):
|
| 64 |
+
|
| 65 |
+
"""Receives a URL and returns the content of the site"""
|
| 66 |
+
|
| 67 |
+
# create an user agent
|
| 68 |
+
headers = {
|
| 69 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
# get the site content
|
| 73 |
+
response = requests.get(url, headers=headers)
|
| 74 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 75 |
+
|
| 76 |
+
# remove styles and scripts
|
| 77 |
+
for script in soup(["script", "style"]):
|
| 78 |
+
script.extract()
|
| 79 |
+
|
| 80 |
+
# let the meta descriptions of the header and all the content inside the body
|
| 81 |
+
# for the meta tags, get the tag itself and its content
|
| 82 |
+
meta_tags = soup.head.find_all('meta')
|
| 83 |
+
meta_tags_text = ''
|
| 84 |
+
for tag in meta_tags:
|
| 85 |
+
meta_tags_text += f'<{tag.name} {tag.attrs}>\n'
|
| 86 |
+
|
| 87 |
+
# get the body text
|
| 88 |
+
body_text = soup.body.get_text()
|
| 89 |
+
|
| 90 |
+
# join the meta tags and the body text
|
| 91 |
+
text = f'{meta_tags_text}\n{body_text}'
|
| 92 |
+
|
| 93 |
+
# remove empty lines
|
| 94 |
+
text = os.linesep.join([s for s in text.splitlines() if s])
|
| 95 |
+
|
| 96 |
+
return text
|
| 97 |
+
|
| 98 |
+
def look_for_actions(user_input:str, message:str):
|
| 99 |
+
|
| 100 |
+
"""Reveives a message and look for the pattern ###ACTION###function###URL###"""
|
| 101 |
+
|
| 102 |
+
# check if the pattern is in the message.
|
| 103 |
+
if '###' in message:
|
| 104 |
+
|
| 105 |
+
# split the message by the pattern ###ACTION###function###URL### to get the URL and the action
|
| 106 |
+
split_string = message.split('###')
|
| 107 |
+
|
| 108 |
+
if 'getSiteContent' in message:
|
| 109 |
+
|
| 110 |
+
st.info("I need to visit the site to provide the answer. Please wait...")
|
| 111 |
+
|
| 112 |
+
url = split_string[3].strip()
|
| 113 |
+
|
| 114 |
+
# remove everything inside ### and ### (including the ###) from the user_input
|
| 115 |
+
user_input = re.sub(r'###.*?###', '', user_input)
|
| 116 |
+
|
| 117 |
+
# add the content of the website to the message
|
| 118 |
+
url_content = f'{user_input}. Content of the site {url}:\n{get_site_content(url)}'
|
| 119 |
+
|
| 120 |
+
return (True, url_content)
|
| 121 |
+
|
| 122 |
+
# if there is no action to perform, return None
|
| 123 |
+
return (False, None)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
streamlit
|
| 2 |
python-dotenv
|
| 3 |
-
huggingface_hub
|
|
|
|
|
|
| 1 |
streamlit
|
| 2 |
python-dotenv
|
| 3 |
+
huggingface_hub
|
| 4 |
+
beautifulsoup4
|