sam0303 commited on
Commit
e5b9101
·
verified ·
1 Parent(s): 1348eb0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from selenium import webdriver
4
+ import base64
5
+ import requests
6
+ import json
7
+ import csv
8
+ import gradio as gr
9
+ from openai import OpenAI
10
+ import uuid
11
+
12
+
13
+ def capture_full_page_screenshots(url, output_folder, scroll_size=400):
14
+ driver = webdriver.Chrome()
15
+ driver.get(url)
16
+ driver.maximize_window()
17
+
18
+ if not os.path.exists(output_folder):
19
+ os.makedirs(output_folder)
20
+
21
+ total_height = driver.execute_script("return document.body.scrollHeight")
22
+ scroll_position = 0
23
+
24
+ while scroll_position < total_height:
25
+ # Generate a random UUID string for each screenshot
26
+ random_string = str(uuid.uuid4())
27
+ screenshot_path = os.path.join(output_folder, f"screenshot_{random_string}.png")
28
+ driver.save_screenshot(screenshot_path)
29
+ print(f"Saved {screenshot_path}")
30
+
31
+ scroll_position += scroll_size
32
+ driver.execute_script(f"window.scrollTo(0, {scroll_position});")
33
+ time.sleep(1)
34
+
35
+ driver.quit()
36
+ return f"Screenshots saved to {output_folder}"
37
+
38
+
39
+
40
+ def encode_image(image_path):
41
+ with open(image_path, "rb") as image_file:
42
+ return base64.b64encode(image_file.read()).decode('utf-8')
43
+
44
+ def vision(api_key, folder_path):
45
+ headers = {
46
+ "Content-Type": "application/json",
47
+ "Authorization": f"Bearer {api_key}"
48
+ }
49
+
50
+ csv_file_path = 'product_details.csv'
51
+ with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
52
+ csv_writer = csv.writer(file)
53
+ csv_writer.writerow(["Product Name", "Product Price"])
54
+
55
+ for filename in os.listdir(folder_path):
56
+ if filename.endswith(".png"):
57
+ image_path = os.path.join(folder_path, filename)
58
+ base64_image = encode_image(image_path)
59
+
60
+ payload = {
61
+ "model": "gpt-4-turbo",
62
+ "messages": [
63
+ {
64
+ "role": "user",
65
+ "content": [
66
+ {
67
+ "type": "text",
68
+ "text": """The image might contain name of some products and their respective pricing.
69
+ Identify them. Ignore the partially visible names. Return me the details in json format.
70
+ The json output should have two variables: 1. Product Name 2. Product Price
71
+ You should only pass the json output and say nothing else. Just the json output in needed
72
+ """
73
+ },
74
+ {
75
+ "type": "image_url",
76
+ "image_url": {
77
+ "url": f"data:image/jpeg;base64,{base64_image}"
78
+ }
79
+ }
80
+ ]
81
+ }
82
+ ],
83
+ "max_tokens": 300
84
+ }
85
+
86
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
87
+
88
+ if response.status_code == 200:
89
+ data = response.json()
90
+ if 'choices' in data and len(data['choices']) > 0 and 'message' in data['choices'][0]:
91
+ content = data['choices'][0]['message']['content']
92
+ try:
93
+ clean_content = content.strip().replace('```json', '').replace('```', '').replace('\n', '')
94
+ products = json.loads(clean_content)
95
+ for product in products:
96
+ if 'Product Name' in product and 'Product Price' in product:
97
+ csv_writer.writerow([product['Product Name'], product['Product Price']])
98
+ except json.JSONDecodeError as e:
99
+ print("Failed to parse JSON:", e)
100
+ print("Cleaned JSON content that failed:", repr(clean_content))
101
+
102
+ else:
103
+ continue
104
+ return "Successfully Updated the File"
105
+
106
+ def update_url(url_input, output_folder, api_key_input):
107
+ client = OpenAI(api_key=api_key_input)
108
+ current_url = url_input
109
+
110
+ while True:
111
+ try:
112
+ completion = client.chat.completions.create(
113
+ model="gpt-3.5-turbo-0301",
114
+ messages=[
115
+ {"role": "system", "content": "You are a URL modifier. Given an url, you will modify it accordingly. You will not access the website"},
116
+ {"role": "user", "content": f'You need to modify the given url {current_url} in a way where I can access the following page. Try to identify at which part of the url, the pagination is defined and modify that part. Just provide the modified URL. You are not supposed to talk anything else with the user.'}
117
+ ]
118
+ )
119
+
120
+ # Correctly extract the updated URL from the response
121
+ updated_url = completion.choices[0].message.content
122
+
123
+ # Check if the URL is valid
124
+ response = requests.get(updated_url)
125
+ if response.status_code != 200:
126
+ print(f"Failed to access {updated_url}. Stopping the loop.")
127
+ break
128
+
129
+ capture_full_page_screenshots(updated_url, output_folder)
130
+ current_url = updated_url
131
+
132
+ except Exception as e:
133
+ print(f"An error occurred: {e}. Stopping the loop.")
134
+ break
135
+
136
+ return f"Processing Completed. Screenshots saved in {output_folder}"
137
+
138
+ def process(url, output_folder, api_key,web_type_input):
139
+ if web_type_input == "Dynamic":
140
+ capture_full_page_screenshots(url, output_folder)
141
+ vision(api_key, output_folder)
142
+ else:
143
+ capture_full_page_screenshots(url, output_folder)
144
+ update_url(url, output_folder,api_key)
145
+ vision(api_key, output_folder)
146
+
147
+ return "Processing Completed"
148
+
149
+ # Gradio UI
150
+ url_input = gr.Textbox(label="URL")
151
+ output_folder_input = gr.Textbox(label="Output Folder Path")
152
+ api_key_input = gr.Textbox(label="API Key", type="password")
153
+ web_type_input = mode_input = gr.Dropdown(label="Mode", choices=["Dynamic", "Paginated"])
154
+
155
+
156
+ gr.Interface(
157
+ fn=process,
158
+ inputs=[url_input, output_folder_input, api_key_input,web_type_input ],
159
+ outputs="text",
160
+ title="Full Page Screenshot and OCR"
161
+ ).launch()