Upload 4 files
Browse files- app.py +49 -0
- get_answer.py +130 -0
- logs.py +50 -0
- requirements.txt +18 -0
app.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from functools import partial
|
| 4 |
+
from get_answer import get_answer
|
| 5 |
+
from logs import save_logs
|
| 6 |
+
import gdown
|
| 7 |
+
from config import folder_id, json_url_id
|
| 8 |
+
download_url = f'https://drive.google.com/uc?id={json_url_id}'
|
| 9 |
+
output = 'secret_google_service_account.json'
|
| 10 |
+
gdown.download(download_url, output, quiet=False)
|
| 11 |
+
def stream(query,additional_information = ""):
|
| 12 |
+
resp, description, images = get_answer(query, additional_information)
|
| 13 |
+
answer = ""
|
| 14 |
+
listing = ""
|
| 15 |
+
for img in images:
|
| 16 |
+
listing += f" "
|
| 17 |
+
listing += f"\n---\n{description}"
|
| 18 |
+
yield "" , listing
|
| 19 |
+
for chunk in resp:
|
| 20 |
+
if chunk.choices[0].delta.content is not None:
|
| 21 |
+
answer = answer + chunk.choices[0].delta.content
|
| 22 |
+
yield answer, listing
|
| 23 |
+
save_logs(query, answer, folder_id=folder_id)
|
| 24 |
+
|
| 25 |
+
title = ""
|
| 26 |
+
with gr.Blocks(title=title,theme='nota-ai/theme',css="footer {visibility: hidden}") as demo:
|
| 27 |
+
gr.Markdown(f"## {title}")
|
| 28 |
+
|
| 29 |
+
with gr.Row():
|
| 30 |
+
with gr.Column(scale=6):
|
| 31 |
+
with gr.Row():
|
| 32 |
+
with gr.Column(scale=8):
|
| 33 |
+
url_input = gr.Textbox(placeholder="https://www.ebay.fr/itm/123", lines=1, label="Link to listing")
|
| 34 |
+
additional_infomration = gr.Textbox(placeholder="Perfume samples are forbidden", lines=1, label="Missed interpretation? Add any additional information")
|
| 35 |
+
with gr.Column(scale=1):
|
| 36 |
+
chat_submit_button = gr.Button(value="Submit ▶")
|
| 37 |
+
with gr.Row():
|
| 38 |
+
with gr.Column(scale=1):
|
| 39 |
+
listing = gr.Markdown("Waiting for link...")
|
| 40 |
+
with gr.Column(scale=3):
|
| 41 |
+
compliance_output = gr.Markdown("Waiting for link...")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
fn_chat = stream
|
| 45 |
+
|
| 46 |
+
chat_submit_button.click(fn=fn_chat, inputs=[url_input, additional_infomration], outputs=[compliance_output, listing])
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
demo.launch(max_threads=40, share=True)
|
get_answer.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
import base64
|
| 5 |
+
|
| 6 |
+
from config import openai_api
|
| 7 |
+
|
| 8 |
+
def encode_image(image_path):
|
| 9 |
+
with open(image_path, "rb") as image_file:
|
| 10 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_page_content(url):
|
| 14 |
+
# Send a GET request to the page
|
| 15 |
+
response = requests.get(url)
|
| 16 |
+
|
| 17 |
+
# Parse the HTML content using BeautifulSoup
|
| 18 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 19 |
+
url_detailed = soup.find("a", {"data-testid":"ux-call-to-action"})["href"]
|
| 20 |
+
url_detailed
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
# Send a GET request to the page
|
| 25 |
+
response = requests.get(url_detailed)
|
| 26 |
+
|
| 27 |
+
# Parse the HTML content using BeautifulSoup
|
| 28 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 29 |
+
|
| 30 |
+
# Extract the product description
|
| 31 |
+
product_details_div = soup.find("div", {"class":"tabs__cell"})
|
| 32 |
+
product_details_div.get_text()
|
| 33 |
+
|
| 34 |
+
except:
|
| 35 |
+
|
| 36 |
+
# Send a GET request to the page
|
| 37 |
+
response = requests.get(url)
|
| 38 |
+
|
| 39 |
+
# Parse the HTML content using BeautifulSoup
|
| 40 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 41 |
+
|
| 42 |
+
# Extract the product description
|
| 43 |
+
product_details_div = soup.find("div", {"class":"tabs__cell"})
|
| 44 |
+
title = soup.find("h1", {"class" : "x-item-title__mainTitle"})
|
| 45 |
+
title.get_text()
|
| 46 |
+
|
| 47 |
+
# description = {product_details_div.get_text("\n")}"
|
| 48 |
+
description = f"### TITLE : {title.get_text()} \n"
|
| 49 |
+
for div in product_details_div.find_all("div"):
|
| 50 |
+
description += div.get_text("\n")
|
| 51 |
+
try:
|
| 52 |
+
img_part = soup.find("div", {"class":"ux-image-grid no-scrollbar"})
|
| 53 |
+
imgs = []
|
| 54 |
+
for img in img_part.find_all("img"):
|
| 55 |
+
imgs.append(img["src"])
|
| 56 |
+
# Print the results
|
| 57 |
+
# print(f"Description: {description}")
|
| 58 |
+
# print(f"Image URLs: {imgs}")
|
| 59 |
+
except:
|
| 60 |
+
img_part = soup.find("div", {"class":"ux-image-carousel-item"})
|
| 61 |
+
imgs.append(img_part.find_all("img")[0]["src"])
|
| 62 |
+
pass
|
| 63 |
+
return description, imgs
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
client = OpenAI(api_key=openai_api)
|
| 68 |
+
def get_ai_response(prompt_content, additional_information= ""):
|
| 69 |
+
response = client.chat.completions.create(
|
| 70 |
+
model="gpt-4o",
|
| 71 |
+
messages=[
|
| 72 |
+
{
|
| 73 |
+
"role": "system",
|
| 74 |
+
"content": [
|
| 75 |
+
{
|
| 76 |
+
"type": "text",
|
| 77 |
+
"text": f"You will be given ebay listings. Your task is to say if there is anything forbidden by their policies or illegal within the legal framework (former court decisions etc). You specify the legal basis, justify thoroughly, and cite all necessary regulations that are relevant and in the scope of application for this type of product in the corresponding region. Your focus should be on preventing any illegal listing or doing. {additional_information}\n\nYou format your answers like this:\n\n# Compliance: Yes/No\n\n# Justification:\n"
|
| 78 |
+
}
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"role": "user",
|
| 83 |
+
"content": prompt_content
|
| 84 |
+
},
|
| 85 |
+
],
|
| 86 |
+
temperature=1,
|
| 87 |
+
max_tokens=1439,
|
| 88 |
+
top_p=1,
|
| 89 |
+
frequency_penalty=0,
|
| 90 |
+
presence_penalty=0,
|
| 91 |
+
stream=True
|
| 92 |
+
)
|
| 93 |
+
return response
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def get_answer(url, additional_information = ""):
|
| 97 |
+
description, imgs = get_page_content(url)
|
| 98 |
+
img_paths = []
|
| 99 |
+
for image_url in imgs:
|
| 100 |
+
filename = image_url.split("images/")[1].replace("/", "_")
|
| 101 |
+
response = requests.get(image_url, stream=True)
|
| 102 |
+
if response.status_code == 200:
|
| 103 |
+
with open(filename, "wb") as f:
|
| 104 |
+
for chunk in response.iter_content(1024):
|
| 105 |
+
f.write(chunk)
|
| 106 |
+
# print(f"Downloaded {filename}")
|
| 107 |
+
img_paths.append(filename)
|
| 108 |
+
else:
|
| 109 |
+
print(f"Failed to download {image_url}")
|
| 110 |
+
|
| 111 |
+
# img_path = img_paths[0]
|
| 112 |
+
# base64_image = encode_image(img_path)
|
| 113 |
+
prompt_content = []
|
| 114 |
+
for img_path in img_paths:
|
| 115 |
+
b64_img = encode_image(img_path)
|
| 116 |
+
dict_img = {
|
| 117 |
+
"type": "image_url",
|
| 118 |
+
"image_url": {
|
| 119 |
+
"url": f"data:image/jpeg;base64,{b64_img}",
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
prompt_content.append(dict_img)
|
| 123 |
+
|
| 124 |
+
prompt_content.append({
|
| 125 |
+
"type": "text",
|
| 126 |
+
"text": description
|
| 127 |
+
})
|
| 128 |
+
answer = get_ai_response(prompt_content, additional_information)
|
| 129 |
+
# answer = ""
|
| 130 |
+
return answer, description, imgs
|
logs.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from google.oauth2 import service_account
|
| 3 |
+
from googleapiclient.discovery import build
|
| 4 |
+
from googleapiclient.http import MediaFileUpload
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
def save_logs(query,response, folder_id = ""):
|
| 8 |
+
to_save = f"LOG ENTRY\nQUERY\n{query}\n=================================\nRESPONSE\n{response}\n****************************************\n"
|
| 9 |
+
|
| 10 |
+
# Get the current date and time
|
| 11 |
+
now = datetime.now()
|
| 12 |
+
filename = str(now).replace(":","").replace(" ","").replace("-","").replace(".","")+".txt"
|
| 13 |
+
with open(filename, 'w') as file:
|
| 14 |
+
file.write(to_save)
|
| 15 |
+
# Path to the service account key file
|
| 16 |
+
SERVICE_ACCOUNT_FILE = 'secret_google_service_account.json'
|
| 17 |
+
|
| 18 |
+
# Define the required scopes
|
| 19 |
+
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
| 20 |
+
|
| 21 |
+
# Authenticate using the service account key file
|
| 22 |
+
credentials = service_account.Credentials.from_service_account_file(
|
| 23 |
+
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
|
| 24 |
+
|
| 25 |
+
# Build the Google Drive API client
|
| 26 |
+
service = build('drive', 'v3', credentials=credentials)
|
| 27 |
+
|
| 28 |
+
# Specify the folder ID where you want to upload the file
|
| 29 |
+
|
| 30 |
+
# Metadata of the file to be uploaded
|
| 31 |
+
file_metadata = {
|
| 32 |
+
'name': filename, # Name of the file to be uploaded
|
| 33 |
+
'parents': [folder_id] # Folder ID
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Path to the file you want to upload
|
| 37 |
+
file_path = filename
|
| 38 |
+
|
| 39 |
+
# Create a MediaFileUpload object to upload the file
|
| 40 |
+
media = MediaFileUpload(file_path, mimetype='text/plain')
|
| 41 |
+
|
| 42 |
+
# Use the Drive API to upload the file
|
| 43 |
+
file = service.files().create(
|
| 44 |
+
body=file_metadata,
|
| 45 |
+
media_body=media,
|
| 46 |
+
fields='id'
|
| 47 |
+
).execute()
|
| 48 |
+
|
| 49 |
+
# Print the file ID of the uploaded file
|
| 50 |
+
print('Saved in Google Drive - File ID: %s' % file.get('id'))
|
requirements.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
requests
|
| 2 |
+
bs4
|
| 3 |
+
openai
|
| 4 |
+
pandas
|
| 5 |
+
numpy
|
| 6 |
+
tabula-py
|
| 7 |
+
openai
|
| 8 |
+
gradio
|
| 9 |
+
pyPDF2
|
| 10 |
+
bs4
|
| 11 |
+
nltk
|
| 12 |
+
tiktoken
|
| 13 |
+
pdf2image
|
| 14 |
+
gdown
|
| 15 |
+
google-auth
|
| 16 |
+
google-auth-oauthlib
|
| 17 |
+
google-auth-httplib2
|
| 18 |
+
google-api-python-client
|