fmr-aeg commited on
Commit
1845748
·
verified ·
1 Parent(s): 2b79cf9

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .idea/workspace.xml +1 -1
  2. README.md +3 -1
  3. src/aiagent/utils/ecom_tools.py +17 -106
.idea/workspace.xml CHANGED
@@ -267,7 +267,7 @@
267
  <workItem from="1747779058058" duration="9506000" />
268
  <workItem from="1747841281902" duration="245000" />
269
  <workItem from="1749511880222" duration="3463000" />
270
- <workItem from="1749582965687" duration="7880000" />
271
  </task>
272
  <task id="LOCAL-00001" summary="Last commit before refacto">
273
  <option name="closed" value="true" />
 
267
  <workItem from="1747779058058" duration="9506000" />
268
  <workItem from="1747841281902" duration="245000" />
269
  <workItem from="1749511880222" duration="3463000" />
270
+ <workItem from="1749582965687" duration="8487000" />
271
  </task>
272
  <task id="LOCAL-00001" summary="Last commit before refacto">
273
  <option name="closed" value="true" />
README.md CHANGED
@@ -37,7 +37,9 @@ It simulates an interactive personal shopper, capable of:
37
  Your browser does not support the video tag.
38
  </video>
39
 
40
- 🎥 [demo video](assets/pres_video.mp4)
 
 
41
 
42
  ---
43
 
 
37
  Your browser does not support the video tag.
38
  </video>
39
 
40
+ 🎥 [demo video here](assets/pres_video.mp4)
41
+ or
42
+ [here](https://drive.google.com/file/d/18cZo3iLbtoua6VG7AxpmOn2UIWLv-KIA/view)
43
 
44
  ---
45
 
src/aiagent/utils/ecom_tools.py CHANGED
@@ -89,68 +89,13 @@ class GetProductDescriptionTool(Tool):
89
 
90
  def __init__(self):
91
  super().__init__()
92
- self.headers_list = headers_list = [
93
- {
94
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
95
- "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
96
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
97
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
98
- "Accept-Encoding": "gzip, deflate, br",
99
- "Connection": "keep-alive",
100
- "Referer": "https://www.google.com/",
101
- "Cache-Control": "max-age=0",
102
- "Upgrade-Insecure-Requests": "1",
103
- "Sec-Fetch-Dest": "document",
104
- "Sec-Fetch-Mode": "navigate",
105
- "Sec-Fetch-Site": "same-origin",
106
- "Sec-Fetch-User": "?1"
107
- },
108
- {
109
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
110
- "(KHTML, like Gecko) Version/14.0 Safari/605.1.15",
111
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
112
- "Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.5,en;q=0.3",
113
- "Accept-Encoding": "gzip, deflate",
114
- "Connection": "keep-alive",
115
- "Referer": "https://www.bing.com/",
116
- "Cache-Control": "no-cache",
117
- "Upgrade-Insecure-Requests": "1",
118
- "Sec-Fetch-Dest": "document",
119
- "Sec-Fetch-Mode": "navigate",
120
- "Sec-Fetch-Site": "none",
121
- "Sec-Fetch-User": "?1"
122
- },
123
- {
124
- "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
125
- "(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36",
126
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
127
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
128
- "Accept-Encoding": "gzip, deflate",
129
- "Connection": "keep-alive",
130
- "Referer": "https://duckduckgo.com/",
131
- "Cache-Control": "max-age=0",
132
- "Upgrade-Insecure-Requests": "1",
133
- "Sec-Fetch-Dest": "document",
134
- "Sec-Fetch-Mode": "navigate",
135
- "Sec-Fetch-Site": "same-origin",
136
- "Sec-Fetch-User": "?1"
137
- },
138
- {
139
- "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) "
140
- "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
141
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
142
- "Accept-Language": "fr-FR",
143
- "Accept-Encoding": "gzip, deflate, br",
144
- "Connection": "keep-alive",
145
- "Referer": "https://www.apple.com/",
146
- "Cache-Control": "max-age=0",
147
- "Upgrade-Insecure-Requests": "1",
148
- "Sec-Fetch-Dest": "document",
149
- "Sec-Fetch-Mode": "navigate",
150
- "Sec-Fetch-Site": "same-origin",
151
- "Sec-Fetch-User": "?1"
152
- }
153
- ]
154
 
155
  @staticmethod
156
  def _clean_product_url(product_url: str) -> str:
@@ -163,7 +108,7 @@ class GetProductDescriptionTool(Tool):
163
 
164
  product_url = self._clean_product_url(product_url)
165
  try:
166
- response = requests.get(product_url, headers=random.choice(self.headers_list))
167
  response.raise_for_status()
168
 
169
  soup = BeautifulSoup(response.text, 'html.parser')
@@ -226,51 +171,17 @@ def search_on_amazon(keyword: str) -> list[dict]:
226
  - delivery_date : information on delivery date
227
  """
228
 
229
- headers_list = [
230
- {
231
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
232
- 'Chrome/92.0.4515.159 Safari/537.36',
233
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
234
- 'Accept-Encoding': 'gzip, deflate, br',
235
- 'Connection': 'keep-alive',
236
- 'Upgrade-Insecure-Requests': '1',
237
- },
238
- {
239
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) '
240
- 'Version/14.0.3 Safari/605.1.15',
241
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
242
- 'Accept-Encoding': 'gzip, deflate',
243
- 'Connection': 'keep-alive',
244
- 'Upgrade-Insecure-Requests': '1',
245
- },
246
- {
247
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
248
- 'Chrome/88.0.4324.182 Safari/537.36',
249
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
250
- 'Accept-Encoding': 'gzip, deflate',
251
- 'Connection': 'keep-alive',
252
- 'Upgrade-Insecure-Requests': '1',
253
- },
254
- {
255
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 '
256
- '(KHTML, like Gecko) Version/14.0 Mobile/15A372 Safari/604.1',
257
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
258
- 'Accept-Encoding': 'gzip, deflate, br',
259
- 'Connection': 'keep-alive',
260
- 'Upgrade-Insecure-Requests': '1',
261
- },
262
- {
263
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
264
- 'Chrome/90.0.4430.212 Safari/537.36',
265
- 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
266
- 'Accept-Encoding': 'gzip, deflate, br',
267
- 'Connection': 'keep-alive',
268
- 'Upgrade-Insecure-Requests': '1',
269
- }
270
- ]
271
  url = f"https://www.amazon.fr/s?k={keyword.replace(' ', '+')}" # Could be adapted for other countries
272
 
273
- response = requests.get(url, headers=random.choice(headers_list))
274
 
275
  if response.status_code != 200:
276
  print("Error during page loading")
 
89
 
90
  def __init__(self):
91
  super().__init__()
92
+ self.headers = {
93
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15',
94
+ 'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
95
+ 'Accept-Encoding': 'gzip, deflate, br',
96
+ 'Connection': 'keep-alive',
97
+ 'Upgrade-Insecure-Requests': '1',
98
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  @staticmethod
101
  def _clean_product_url(product_url: str) -> str:
 
108
 
109
  product_url = self._clean_product_url(product_url)
110
  try:
111
+ response = requests.get(product_url, headers=self.headers)
112
  response.raise_for_status()
113
 
114
  soup = BeautifulSoup(response.text, 'html.parser')
 
171
  - delivery_date : information on delivery date
172
  """
173
 
174
+ headers = {
175
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
176
+ 'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
177
+ 'Accept-Encoding': 'gzip, deflate, br',
178
+ 'Connection': 'keep-alive',
179
+ 'Upgrade-Insecure-Requests': '1',
180
+ }
181
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  url = f"https://www.amazon.fr/s?k={keyword.replace(' ', '+')}" # Could be adapted for other countries
183
 
184
+ response = requests.get(url, headers=headers)
185
 
186
  if response.status_code != 200:
187
  print("Error during page loading")