rfonseca85 commited on
Commit
ed88065
·
1 Parent(s): c0f5855

Creating app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import streamlit as st
3
+ import asyncio
4
+ from multiprocessing import Pool
5
+ import asyncio
6
+ from pyppeteer import launch
7
+ import base64
8
+ import os
9
+ from openai import OpenAI
10
+ import json
11
+
12
+ model = OpenAI()
13
+ model.timeout = 15
14
+
15
+ #################### OpenAI ####################
16
+ api_key = os.getenv("OPENAI_API_KEY")
17
+ if not api_key:
18
+ with st.sidebar:
19
+ api_key = st.text_input("Please enter your OpenAI API key")
20
+ if api_key:
21
+ model.api_key = api_key
22
+
23
+
24
+
25
+
26
+ #################### Functions ####################
27
+
28
+ # Define the function to be run in a separate process since it is async
29
+ def run_pyppeteer(url):
30
+ # Start the event loop and run the async function
31
+ loop = asyncio.new_event_loop()
32
+ asyncio.set_event_loop(loop)
33
+ result = loop.run_until_complete(capture_screenshots(url))
34
+ loop.close()
35
+ return result
36
+
37
+ # Get the url from prompt using GPT3.5
38
+ def get_url_from_prompt(prompt):
39
+
40
+ messages = [
41
+ {
42
+ "role": "system",
43
+ "content": "You are a web crawler. Your job is to give the user a URL to go to in order to find the answer to the question. Go to a direct URL that will likely have the answer to the user's question. Respond in the following JSON fromat: {\"url\": \"<put url here>\"}",
44
+ },
45
+ {
46
+ "role": "user",
47
+ "content": prompt,
48
+ }
49
+ ]
50
+
51
+ response = model.chat.completions.create(
52
+ model="gpt-3.5-turbo-1106",
53
+ messages=messages,
54
+ max_tokens=1024,
55
+ response_format={"type": "json_object"},
56
+ seed=2232,
57
+ )
58
+
59
+ message = response.choices[0].message
60
+ message_json = json.loads(message.content)
61
+ url = message_json["url"]
62
+
63
+ messages.append({
64
+ "role": "assistant",
65
+ "content": message.content,
66
+ })
67
+
68
+ return url
69
+
70
+ # Capture screenshots from url using pyppeteer
71
+ async def capture_screenshots(url):
72
+
73
+ browser = await launch()
74
+ page = await browser.newPage()
75
+
76
+ await page.setViewport({'width': 1920, 'height': 1080})
77
+ await page.goto(url)
78
+ screenshots_dict = {}
79
+ screenshot_counter = 1
80
+
81
+ while True:
82
+ screenshot = await page.screenshot()
83
+ screenshot_base64 = base64.b64encode(screenshot).decode('utf-8')
84
+ screenshot_key = f"screenshot_{screenshot_counter}"
85
+ screenshots_dict[screenshot_key] = screenshot_base64
86
+
87
+ await page.evaluate("window.scrollBy(0, window.innerHeight)")
88
+ scroll_top = await page.evaluate("window.scrollY")
89
+ scroll_height = await page.evaluate("document.body.scrollHeight")
90
+ inner_height = await page.evaluate("window.innerHeight")
91
+
92
+ if scroll_top + inner_height >= scroll_height:
93
+ break
94
+
95
+ screenshot_counter += 1
96
+
97
+ await browser.close()
98
+ return screenshots_dict
99
+
100
+ def scrape_images_using_gtpVision(prompt, screenshot):
101
+
102
+ messages = [
103
+ {
104
+ "role": "user",
105
+ "content": prompt,
106
+ }
107
+ ]
108
+ response = model.chat.completions.create(
109
+ model="gpt-4-vision-preview",
110
+ messages=[
111
+ {
112
+ "role": "system",
113
+ "content": "Your job is to answer the user's question based on the given screenshot only with more than 95% certainty. Answer the user as an assistant, but don't tell that the information is from a screenshot or an image. Pretend it is information that you know. If you can't answer the question, dont look for another answer, simply respond with the code `ANSWER_NOT_FOUND` and nothing else. Thats extreame important that you dont try to find the answer from another source",
114
+ }
115
+ ] + messages[1:] + [
116
+ {
117
+ "role": "user",
118
+ "content": [
119
+ {
120
+ "type": "image_url",
121
+ "image_url": f"data:image/png;base64,{screenshot}",
122
+ },
123
+ {
124
+ "type": "text",
125
+ "text": prompt,
126
+ }
127
+ ]
128
+ }
129
+ ],
130
+ max_tokens=1024,
131
+ )
132
+ message = response.choices[0].message
133
+
134
+ return message.content
135
+
136
+ #################### Streamlit app ####################
137
+ def app():
138
+ st.title("💬 Virtu.web")
139
+ st.caption("🚀 A web scraper using AI (GPT4Vision + Pyppeteer) by Virtu.ai")
140
+
141
+ prompt = st.text_input("Your question?", key="search_q")
142
+ if prompt:
143
+
144
+ # Check if API key is provided
145
+ if not api_key:
146
+ st.error("Please enter your OpenAI API key or set it as an environment variable")
147
+ st.code("Add the line below to ~/.bashrc or ~/.zshrc" + "\n" + "export OPENAI_API_KEY=your_openai_key" + "\n" + "source ~/.bashrc or source ~/.zshrc")
148
+ st.stop()
149
+
150
+ with st.status("🤖 I'm looking for the answer... Follow the process in the meantime", expanded=True) as status:
151
+
152
+ start_time = time.time()
153
+
154
+ st.write("Searching for a url using gpt3.5...")
155
+ url = get_url_from_prompt(prompt)
156
+ st.write("Found URL " + url)
157
+
158
+ st.write("Retrieving screenshots from url using pyppeteer...")
159
+ # Create a pool of processes
160
+ pool = Pool(processes=1)
161
+ result = pool.apply_async(run_pyppeteer, (url,))
162
+ # Continue with other tasks while waiting for the subprocess to finish
163
+ while not result.ready():
164
+ time.sleep(1) # You can adjust the sleep interval as needed
165
+ # Get the result from the subprocess
166
+ screenshots_dict = result.get()
167
+ st.write("Screenshots retrieved")
168
+
169
+ st.write("Processing images using GPT4Vision...")
170
+ for i, screenshot in enumerate(screenshots_dict.values(), start=1):
171
+
172
+ # Now you can process the result and continue with your Streamlit code
173
+ gptVision_return_message = scrape_images_using_gtpVision(prompt, screenshot)
174
+ st.write("Scraping image using GPT4Vision... Image " + str(i) + " of " + str(len(screenshots_dict)) + " processed")
175
+
176
+ binary_image = base64.b64decode(screenshot)
177
+
178
+ if "ANSWER_NOT_FOUND" in gptVision_return_message:
179
+ st.write("Answer not found on current screenshot below, I'll keep looking in the same website")
180
+ st.image(binary_image)
181
+ continue
182
+ else:
183
+ st.write("Answer found on screenshot below")
184
+ st.image(binary_image)
185
+
186
+ st.session_state["messages"] = [{"role": "assistant", "content": gptVision_return_message}]
187
+
188
+ end_time = time.time()
189
+
190
+ status.update(label="Search took - {:.2f} seconds".format(end_time - start_time), state="complete", expanded=False)
191
+
192
+ break
193
+
194
+ st.write(gptVision_return_message)
195
+
196
+ app()