Wilame Lima commited on
Commit
90263a4
·
1 Parent(s): b48b867

Add intent to get url content

Browse files
Files changed (4) hide show
  1. app.py +11 -29
  2. config.py +37 -1
  3. functions.py +125 -0
  4. requirements.txt +2 -1
app.py CHANGED
@@ -12,7 +12,7 @@ st.sidebar.markdown(
12
  This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
13
 
14
  Ask the model marketing-related questions and see how it responds. Have fun!
15
-
16
  Model used: [{MODEL_PATH}]({MODEL_LINK})
17
  """
18
  )
@@ -37,11 +37,11 @@ for message in chat_history:
37
  with st.chat_message(message['role']):
38
  st.write(message['content'])
39
 
40
- # keep only last 10 messages
41
- shorter_history = [message for message in chat_history[-10:] if 'content' in message]
42
 
43
  # include a system prompt to explain the bot what to do
44
- shorter_history = [{'role': 'system', 'content': SYSTEM_PROMPT}] + shorter_history
45
 
46
  # get the input from user
47
  user_input = st.chat_input("Write something...")
@@ -53,32 +53,14 @@ if user_input:
53
 
54
  # make the request
55
  with st.spinner("Generating the response..."):
56
-
57
- client = InferenceClient(
58
- "meta-llama/Meta-Llama-3-8B-Instruct",
59
- token=HUGGING_FACE_API_KEY,
60
- )
61
 
62
- messages = shorter_history + [{'role': 'user', 'content': user_input}]
63
-
64
- # query the model
65
- try:
66
- response = client.chat_completion(
67
- messages=messages,
68
- max_tokens = 500,
69
- stream = False,
70
- )
71
-
72
- # get the response
73
- message = response.choices[0].message['content']
74
-
75
- # append to the history
76
- chat_history.append({'content':user_input, 'role':'user'})
77
- chat_history.append(response.choices[0].message) # append the response
78
-
79
- except Exception as e:
80
- st.error(f"An error occurred: {e}")
81
- st.stop()
82
 
83
  st.session_state['chat_history'] = chat_history
84
  st.rerun()
 
12
  This project uses Streamlit to create a simple chatbot interface that allows you to chat with the model using the Hugging Face Inference API.
13
 
14
  Ask the model marketing-related questions and see how it responds. Have fun!
15
+
16
  Model used: [{MODEL_PATH}]({MODEL_LINK})
17
  """
18
  )
 
37
  with st.chat_message(message['role']):
38
  st.write(message['content'])
39
 
40
+ # keep only last 50 messages
41
+ short_history = [message for message in chat_history[-50:] if 'content' in message]
42
 
43
  # include a system prompt to explain the bot what to do
44
+ short_history = [{'role': 'system', 'content': SYSTEM_PROMPT}] + short_history
45
 
46
  # get the input from user
47
  user_input = st.chat_input("Write something...")
 
53
 
54
  # make the request
55
  with st.spinner("Generating the response..."):
 
 
 
 
 
56
 
57
+ # create a shorter_history to avoid to keep a fair usage of the API
58
+ short_history = short_history + [{'role': 'user', 'content': user_input}]
59
+
60
+ # get the fill history for the next iteration
61
+ chat_history = make_request(user_input,
62
+ short_history,
63
+ chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  st.session_state['chat_history'] = chat_history
66
  st.rerun()
config.py CHANGED
@@ -2,6 +2,10 @@ import streamlit as st
2
  from dotenv import load_dotenv
3
  from huggingface_hub import InferenceClient
4
  import os
 
 
 
 
5
 
6
  # load variables from the env file
7
  load_dotenv()
@@ -12,4 +16,36 @@ DASHBOARD_TITLE = "The Marketer Chatbot"
12
  MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
13
  MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
14
 
15
- SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce. Your goal is to provide clear, concise, and accurate responses within 3-4 sentences. You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics. Recognize when a more complex, detailed response is required and provide it with clarity. Always prioritize delivering actionable insights and practical advice. Never engage in converations that are not marketing-related."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from dotenv import load_dotenv
3
  from huggingface_hub import InferenceClient
4
  import os
5
+ from bs4 import BeautifulSoup
6
+ import requests
7
+ import re
8
+ import time
9
 
10
  # load variables from the env file
11
  load_dotenv()
 
16
  MODEL_PATH = "meta-llama/Meta-Llama-3-8B-Instruct"
17
  MODEL_LINK = f"https://huggingface.co/{MODEL_PATH}"
18
 
19
+ SYSTEM_PROMPT = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
20
+
21
+ You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
22
+
23
+ Recognize when a more complex, detailed response is required and provide it with clarity.
24
+
25
+ Always prioritize delivering actionable insights and practical advice.
26
+
27
+ Never engage in converations that are not marketing-related.
28
+
29
+ After THE LAST user response, ask yourself "do I need to visit an url to provide the answer?". If the answer is yes, return ONLY:
30
+
31
+ ###ACTION###getSiteContent###URL###
32
+
33
+ The URL MUST BE THE ONE THE USER PROVIDED. Just change it if you need to add the 'https://' prefix.
34
+
35
+ If you DON'T find an URL, just provide the answer as usual.
36
+
37
+ REMEMBER: Just look for the URL in the LAST user's response. Ignore other URLs in the conversation.
38
+
39
+ """
40
+
41
+
42
+ SYSTEM_PROMPT_NO_URL = """You are a specialized AI in marketing and e-commerce and your goal is to provide clear, concise, and accurate responses within 3-4 sentences.
43
+
44
+ You must demonstrate deep expertise in all aspects of marketing, including digital strategies, customer behavior, e-commerce trends, SEO, content marketing, and data analytics.
45
+
46
+ Recognize when a more complex, detailed response is required and provide it with clarity.
47
+
48
+ Always prioritize delivering actionable insights and practical advice.
49
+
50
+ Never engage in converations that are not marketing-related.
51
+ """
functions.py CHANGED
@@ -1 +1,126 @@
1
  from config import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from config import *
2
+
3
+ @st.cache_data()
4
+ def make_request(user_input:str,
5
+ short_history:list,
6
+ chat_history:list):
7
+
8
+ """Makes a request to the Hugging Face API"""
9
+
10
+ client = InferenceClient(
11
+ MODEL_PATH,
12
+ token=HUGGING_FACE_API_KEY,
13
+ )
14
+
15
+ try:
16
+ response = client.chat_completion(
17
+ messages=short_history,
18
+ max_tokens = 5000,
19
+ stream = False,
20
+ )
21
+
22
+ # get the response
23
+ message = response.choices[0].message['content']
24
+
25
+ # analyse the content to see if there is an action to perform
26
+ try:
27
+ perform_actions = look_for_actions(user_input, message)
28
+
29
+ except Exception as e:
30
+ st.info(f"An error occurred while looking for actions: {e}")
31
+ perform_actions = (False, None)
32
+
33
+ # if there was an action to perform, resubmit the question to the chatbot:
34
+ if perform_actions[0]:
35
+
36
+ # replace the last message in the short history with the new message
37
+ short_history[-1] = {'role':'user', 'content':perform_actions[1]}
38
+
39
+ # replace the first message with the system prompt without url analysis
40
+ short_history[0] = {'role':'system', 'content':SYSTEM_PROMPT_NO_URL}
41
+
42
+ # wait a little bit to avoid the API limit
43
+ time.sleep(1)
44
+
45
+ # make the request again
46
+ response = client.chat_completion(
47
+ messages=short_history,
48
+ max_tokens = 5000,
49
+ stream = False,
50
+ )
51
+
52
+ # append to the history
53
+ chat_history.append({'content':user_input, 'role':'user'})
54
+ chat_history.append(response.choices[0].message) # append the response
55
+
56
+ return chat_history
57
+
58
+ except Exception as e:
59
+ st.error(f"An error occurred: {e}")
60
+ st.stop()
61
+
62
+ @st.cache_data()
63
+ def get_site_content(url:str):
64
+
65
+ """Receives a URL and returns the content of the site"""
66
+
67
+ # create an user agent
68
+ headers = {
69
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
70
+ }
71
+
72
+ # get the site content
73
+ response = requests.get(url, headers=headers)
74
+ soup = BeautifulSoup(response.text, 'html.parser')
75
+
76
+ # remove styles and scripts
77
+ for script in soup(["script", "style"]):
78
+ script.extract()
79
+
80
+ # let the meta descriptions of the header and all the content inside the body
81
+ # for the meta tags, get the tag itself and its content
82
+ meta_tags = soup.head.find_all('meta')
83
+ meta_tags_text = ''
84
+ for tag in meta_tags:
85
+ meta_tags_text += f'<{tag.name} {tag.attrs}>\n'
86
+
87
+ # get the body text
88
+ body_text = soup.body.get_text()
89
+
90
+ # join the meta tags and the body text
91
+ text = f'{meta_tags_text}\n{body_text}'
92
+
93
+ # remove empty lines
94
+ text = os.linesep.join([s for s in text.splitlines() if s])
95
+
96
+ return text
97
+
98
+ def look_for_actions(user_input:str, message:str):
99
+
100
+ """Reveives a message and look for the pattern ###ACTION###function###URL###"""
101
+
102
+ # check if the pattern is in the message.
103
+ if '###' in message:
104
+
105
+ # split the message by the pattern ###ACTION###function###URL### to get the URL and the action
106
+ split_string = message.split('###')
107
+
108
+ if 'getSiteContent' in message:
109
+
110
+ st.info("I need to visit the site to provide the answer. Please wait...")
111
+
112
+ url = split_string[3].strip()
113
+
114
+ # remove everything inside ### and ### (including the ###) from the user_input
115
+ user_input = re.sub(r'###.*?###', '', user_input)
116
+
117
+ # add the content of the website to the message
118
+ url_content = f'{user_input}. Content of the site {url}:\n{get_site_content(url)}'
119
+
120
+ return (True, url_content)
121
+
122
+ # if there is no action to perform, return None
123
+ return (False, None)
124
+
125
+
126
+
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  streamlit
2
  python-dotenv
3
- huggingface_hub
 
 
1
  streamlit
2
  python-dotenv
3
+ huggingface_hub
4
+ beautifulsoup4