|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
from openai import OpenAI |
|
|
import base64 |
|
|
|
|
|
from config import openai_api |
|
|
|
|
|
def encode_image(image_path): |
|
|
with open(image_path, "rb") as image_file: |
|
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
|
|
|
|
|
def get_page_content(url): |
|
|
|
|
|
response = requests.get(url) |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.content, "html.parser") |
|
|
url_detailed = soup.find("a", {"data-testid":"ux-call-to-action"})["href"] |
|
|
url_detailed |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
response = requests.get(url_detailed) |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.content, "html.parser") |
|
|
|
|
|
|
|
|
product_details_div = soup.find("div", {"class":"tabs__cell"}) |
|
|
product_details_div.get_text() |
|
|
|
|
|
except: |
|
|
|
|
|
|
|
|
response = requests.get(url) |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.content, "html.parser") |
|
|
|
|
|
|
|
|
product_details_div = soup.find("div", {"class":"tabs__cell"}) |
|
|
title = soup.find("h1", {"class" : "x-item-title__mainTitle"}) |
|
|
title.get_text() |
|
|
|
|
|
|
|
|
description = f"### TITLE : {title.get_text()} \n" |
|
|
for div in product_details_div.find_all("div"): |
|
|
description += div.get_text("\n") |
|
|
try: |
|
|
img_part = soup.find("div", {"class":"ux-image-grid no-scrollbar"}) |
|
|
imgs = [] |
|
|
for img in img_part.find_all("img"): |
|
|
imgs.append(img["src"]) |
|
|
|
|
|
|
|
|
|
|
|
except: |
|
|
img_part = soup.find("div", {"class":"ux-image-carousel-item"}) |
|
|
imgs.append(img_part.find_all("img")[0]["src"]) |
|
|
pass |
|
|
return description, imgs |
|
|
|
|
|
|
|
|
|
|
|
client = OpenAI(api_key=openai_api) |
|
|
def get_ai_response(prompt_content, additional_information= ""): |
|
|
response = client.chat.completions.create( |
|
|
model="gpt-4o", |
|
|
messages=[ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": [ |
|
|
{ |
|
|
"type": "text", |
|
|
"text": f"You will be given ebay listings. Your task is to say if there is anything forbidden by their policies or illegal within the legal framework (former court decisions etc). You specify the legal basis, justify thoroughly, and cite all necessary regulations that are relevant and in the scope of application for this type of product in the corresponding region. Your focus should be on preventing any illegal listing or doing. {additional_information}\n\nYou format your answers like this:\n\n# Compliance: Yes/No\n\n# Justification:\n" |
|
|
} |
|
|
] |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": prompt_content |
|
|
}, |
|
|
], |
|
|
temperature=1, |
|
|
max_tokens=1439, |
|
|
top_p=1, |
|
|
frequency_penalty=0, |
|
|
presence_penalty=0, |
|
|
stream=True |
|
|
) |
|
|
return response |
|
|
|
|
|
|
|
|
def get_answer(url, additional_information = ""): |
|
|
description, imgs = get_page_content(url) |
|
|
img_paths = [] |
|
|
for image_url in imgs: |
|
|
filename = image_url.split("images/")[1].replace("/", "_") |
|
|
response = requests.get(image_url, stream=True) |
|
|
if response.status_code == 200: |
|
|
with open(filename, "wb") as f: |
|
|
for chunk in response.iter_content(1024): |
|
|
f.write(chunk) |
|
|
|
|
|
img_paths.append(filename) |
|
|
else: |
|
|
print(f"Failed to download {image_url}") |
|
|
|
|
|
|
|
|
|
|
|
prompt_content = [] |
|
|
for img_path in img_paths: |
|
|
b64_img = encode_image(img_path) |
|
|
dict_img = { |
|
|
"type": "image_url", |
|
|
"image_url": { |
|
|
"url": f"data:image/jpeg;base64,{b64_img}", |
|
|
} |
|
|
} |
|
|
prompt_content.append(dict_img) |
|
|
|
|
|
prompt_content.append({ |
|
|
"type": "text", |
|
|
"text": description |
|
|
}) |
|
|
answer = get_ai_response(prompt_content, additional_information) |
|
|
|
|
|
return answer, description, imgs |