Adr740 commited on
Commit
edaef93
·
verified ·
1 Parent(s): 733eb59

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +49 -0
  2. get_answer.py +130 -0
  3. logs.py +50 -0
  4. requirements.txt +18 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from functools import partial
4
+ from get_answer import get_answer
5
+ from logs import save_logs
6
+ import gdown
7
+ from config import folder_id, json_url_id
8
+ download_url = f'https://drive.google.com/uc?id={json_url_id}'
9
+ output = 'secret_google_service_account.json'
10
+ gdown.download(download_url, output, quiet=False)
11
+ def stream(query,additional_information = ""):
12
+ resp, description, images = get_answer(query, additional_information)
13
+ answer = ""
14
+ listing = ""
15
+ for img in images:
16
+ listing += f"![]({img}) "
17
+ listing += f"\n---\n{description}"
18
+ yield "" , listing
19
+ for chunk in resp:
20
+ if chunk.choices[0].delta.content is not None:
21
+ answer = answer + chunk.choices[0].delta.content
22
+ yield answer, listing
23
+ save_logs(query, answer, folder_id=folder_id)
24
+
25
+ title = ""
26
+ with gr.Blocks(title=title,theme='nota-ai/theme',css="footer {visibility: hidden}") as demo:
27
+ gr.Markdown(f"## {title}")
28
+
29
+ with gr.Row():
30
+ with gr.Column(scale=6):
31
+ with gr.Row():
32
+ with gr.Column(scale=8):
33
+ url_input = gr.Textbox(placeholder="https://www.ebay.fr/itm/123", lines=1, label="Link to listing")
34
+ additional_infomration = gr.Textbox(placeholder="Perfume samples are forbidden", lines=1, label="Missed interpretation? Add any additional information")
35
+ with gr.Column(scale=1):
36
+ chat_submit_button = gr.Button(value="Submit ▶")
37
+ with gr.Row():
38
+ with gr.Column(scale=1):
39
+ listing = gr.Markdown("Waiting for link...")
40
+ with gr.Column(scale=3):
41
+ compliance_output = gr.Markdown("Waiting for link...")
42
+
43
+
44
+ fn_chat = stream
45
+
46
+ chat_submit_button.click(fn=fn_chat, inputs=[url_input, additional_infomration], outputs=[compliance_output, listing])
47
+
48
+
49
+ demo.launch(max_threads=40, share=True)
get_answer.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from openai import OpenAI
4
+ import base64
5
+
6
+ from config import openai_api
7
+
8
+ def encode_image(image_path):
9
+ with open(image_path, "rb") as image_file:
10
+ return base64.b64encode(image_file.read()).decode('utf-8')
11
+
12
+
13
+ def get_page_content(url):
14
+ # Send a GET request to the page
15
+ response = requests.get(url)
16
+
17
+ # Parse the HTML content using BeautifulSoup
18
+ soup = BeautifulSoup(response.content, "html.parser")
19
+ url_detailed = soup.find("a", {"data-testid":"ux-call-to-action"})["href"]
20
+ url_detailed
21
+
22
+
23
+ try:
24
+ # Send a GET request to the page
25
+ response = requests.get(url_detailed)
26
+
27
+ # Parse the HTML content using BeautifulSoup
28
+ soup = BeautifulSoup(response.content, "html.parser")
29
+
30
+ # Extract the product description
31
+ product_details_div = soup.find("div", {"class":"tabs__cell"})
32
+ product_details_div.get_text()
33
+
34
+ except:
35
+
36
+ # Send a GET request to the page
37
+ response = requests.get(url)
38
+
39
+ # Parse the HTML content using BeautifulSoup
40
+ soup = BeautifulSoup(response.content, "html.parser")
41
+
42
+ # Extract the product description
43
+ product_details_div = soup.find("div", {"class":"tabs__cell"})
44
+ title = soup.find("h1", {"class" : "x-item-title__mainTitle"})
45
+ title.get_text()
46
+
47
+ # description = {product_details_div.get_text("\n")}"
48
+ description = f"### TITLE : {title.get_text()} \n"
49
+ for div in product_details_div.find_all("div"):
50
+ description += div.get_text("\n")
51
+ try:
52
+ img_part = soup.find("div", {"class":"ux-image-grid no-scrollbar"})
53
+ imgs = []
54
+ for img in img_part.find_all("img"):
55
+ imgs.append(img["src"])
56
+ # Print the results
57
+ # print(f"Description: {description}")
58
+ # print(f"Image URLs: {imgs}")
59
+ except:
60
+ img_part = soup.find("div", {"class":"ux-image-carousel-item"})
61
+ imgs.append(img_part.find_all("img")[0]["src"])
62
+ pass
63
+ return description, imgs
64
+
65
+
66
+
67
+ client = OpenAI(api_key=openai_api)
68
+ def get_ai_response(prompt_content, additional_information= ""):
69
+ response = client.chat.completions.create(
70
+ model="gpt-4o",
71
+ messages=[
72
+ {
73
+ "role": "system",
74
+ "content": [
75
+ {
76
+ "type": "text",
77
+ "text": f"You will be given ebay listings. Your task is to say if there is anything forbidden by their policies or illegal within the legal framework (former court decisions etc). You specify the legal basis, justify thoroughly, and cite all necessary regulations that are relevant and in the scope of application for this type of product in the corresponding region. Your focus should be on preventing any illegal listing or doing. {additional_information}\n\nYou format your answers like this:\n\n# Compliance: Yes/No\n\n# Justification:\n"
78
+ }
79
+ ]
80
+ },
81
+ {
82
+ "role": "user",
83
+ "content": prompt_content
84
+ },
85
+ ],
86
+ temperature=1,
87
+ max_tokens=1439,
88
+ top_p=1,
89
+ frequency_penalty=0,
90
+ presence_penalty=0,
91
+ stream=True
92
+ )
93
+ return response
94
+
95
+
96
+ def get_answer(url, additional_information = ""):
97
+ description, imgs = get_page_content(url)
98
+ img_paths = []
99
+ for image_url in imgs:
100
+ filename = image_url.split("images/")[1].replace("/", "_")
101
+ response = requests.get(image_url, stream=True)
102
+ if response.status_code == 200:
103
+ with open(filename, "wb") as f:
104
+ for chunk in response.iter_content(1024):
105
+ f.write(chunk)
106
+ # print(f"Downloaded {filename}")
107
+ img_paths.append(filename)
108
+ else:
109
+ print(f"Failed to download {image_url}")
110
+
111
+ # img_path = img_paths[0]
112
+ # base64_image = encode_image(img_path)
113
+ prompt_content = []
114
+ for img_path in img_paths:
115
+ b64_img = encode_image(img_path)
116
+ dict_img = {
117
+ "type": "image_url",
118
+ "image_url": {
119
+ "url": f"data:image/jpeg;base64,{b64_img}",
120
+ }
121
+ }
122
+ prompt_content.append(dict_img)
123
+
124
+ prompt_content.append({
125
+ "type": "text",
126
+ "text": description
127
+ })
128
+ answer = get_ai_response(prompt_content, additional_information)
129
+ # answer = ""
130
+ return answer, description, imgs
logs.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google.oauth2 import service_account
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.http import MediaFileUpload
5
+ from datetime import datetime
6
+
7
+ def save_logs(query,response, folder_id = ""):
8
+ to_save = f"LOG ENTRY\nQUERY\n{query}\n=================================\nRESPONSE\n{response}\n****************************************\n"
9
+
10
+ # Get the current date and time
11
+ now = datetime.now()
12
+ filename = str(now).replace(":","").replace(" ","").replace("-","").replace(".","")+".txt"
13
+ with open(filename, 'w') as file:
14
+ file.write(to_save)
15
+ # Path to the service account key file
16
+ SERVICE_ACCOUNT_FILE = 'secret_google_service_account.json'
17
+
18
+ # Define the required scopes
19
+ SCOPES = ['https://www.googleapis.com/auth/drive.file']
20
+
21
+ # Authenticate using the service account key file
22
+ credentials = service_account.Credentials.from_service_account_file(
23
+ SERVICE_ACCOUNT_FILE, scopes=SCOPES)
24
+
25
+ # Build the Google Drive API client
26
+ service = build('drive', 'v3', credentials=credentials)
27
+
28
+ # Specify the folder ID where you want to upload the file
29
+
30
+ # Metadata of the file to be uploaded
31
+ file_metadata = {
32
+ 'name': filename, # Name of the file to be uploaded
33
+ 'parents': [folder_id] # Folder ID
34
+ }
35
+
36
+ # Path to the file you want to upload
37
+ file_path = filename
38
+
39
+ # Create a MediaFileUpload object to upload the file
40
+ media = MediaFileUpload(file_path, mimetype='text/plain')
41
+
42
+ # Use the Drive API to upload the file
43
+ file = service.files().create(
44
+ body=file_metadata,
45
+ media_body=media,
46
+ fields='id'
47
+ ).execute()
48
+
49
+ # Print the file ID of the uploaded file
50
+ print('Saved in Google Drive - File ID: %s' % file.get('id'))
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ requests
2
+ bs4
3
+ openai
4
+ pandas
5
+ numpy
6
+ tabula-py
7
+ openai
8
+ gradio
9
+ pyPDF2
10
+ bs4
11
+ nltk
12
+ tiktoken
13
+ pdf2image
14
+ gdown
15
+ google-auth
16
+ google-auth-oauthlib
17
+ google-auth-httplib2
18
+ google-api-python-client