honeybansal23 commited on
Commit
0eacdd5
·
1 Parent(s): 34ae0fe
Dockerfile CHANGED
@@ -1,45 +1,61 @@
1
- # Use the official Python image as the base image
2
- FROM python:3.10-slim
3
-
4
- # Install dependencies for Selenium and Chrome
5
- RUN apt-get update && apt-get install -y \
6
- wget \
7
- unzip \
8
- curl \
9
- gnupg \
10
- libnss3 \
11
- libgconf-2-4 \
12
- libxi6 \
13
- libxcursor1 \
14
- libxrandr2 \
15
- libxss1 \
16
- libxtst6 \
17
- fonts-liberation \
18
- xdg-utils \
19
- libatk-bridge2.0-0 \
20
- libgtk-3-0 \
21
- --no-install-recommends && \
22
- apt-get clean && \
23
- rm -rf /var/lib/apt/lists/*
24
-
25
- RUN apt-get update && apt-get install -y wget unzip && \
26
- wget https://dl.google.com/Linux/direct/google-chrome-stable_current_amd64.deb && \
27
- apt install -y ./google-chrome-stable_current_amd64.deb && \
28
- rm google-chrome-stable_current_amd64.deb && \
29
- apt-get clean
30
- RUN useradd -m -u 1000 user
31
- USER user
32
- ENV HOME=/home/user \
33
- PATH=/home/user/.local/bin:$PATH
34
-
35
- WORKDIR $HOME/app
36
- # WORKDIR /app
37
-
38
- COPY --chown=user . $HOME/app
39
- # COPY . /app
40
- # Install Python dependencies
41
- RUN pip install --no-cache-dir -r requirements.txt
42
-
43
- # Run the Selenium script
44
- # CMD ["gunicorn", "-b", "0.0.0.0:7860","app:app"]
45
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Use the official Python image as the base image
2
+ # FROM python:3.10-slim
3
+
4
+ # # Install dependencies for Selenium and Chrome
5
+ # RUN apt-get update && apt-get install -y \
6
+ # wget \
7
+ # unzip \
8
+ # curl \
9
+ # gnupg \
10
+ # libnss3 \
11
+ # libgconf-2-4 \
12
+ # libxi6 \
13
+ # libxcursor1 \
14
+ # libxrandr2 \
15
+ # libxss1 \
16
+ # libxtst6 \
17
+ # fonts-liberation \
18
+ # xdg-utils \
19
+ # libatk-bridge2.0-0 \
20
+ # libgtk-3-0 \
21
+ # --no-install-recommends && \
22
+ # apt-get clean && \
23
+ # rm -rf /var/lib/apt/lists/*
24
+
25
+ # RUN apt-get update && apt-get install -y wget unzip && \
26
+ # wget https://dl.google.com/Linux/direct/google-chrome-stable_current_amd64.deb && \
27
+ # apt install -y ./google-chrome-stable_current_amd64.deb && \
28
+ # rm google-chrome-stable_current_amd64.deb && \
29
+ # apt-get clean
30
+ # RUN useradd -m -u 1000 user
31
+ # USER user
32
+ # ENV HOME=/home/user \
33
+ # PATH=/home/user/.local/bin:$PATH
34
+
35
+ # WORKDIR $HOME/app
36
+ # # WORKDIR /app
37
+
38
+ # COPY --chown=user . $HOME/app
39
+ # # COPY . /app
40
+ # # Install Python dependencies
41
+ # RUN pip install --no-cache-dir -r requirements.txt
42
+
43
+ # # Run the Selenium script
44
+ # # CMD ["gunicorn", "-b", "0.0.0.0:7860","app:app"]
45
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
46
+ ARG PORT=443
47
+ FROM cypress/browsers:latest
48
+
49
+ RUN apt-get install python3 -y
50
+
51
+ RUN echo $(python3 -m site --user-base)
52
+
53
+ COPY requirements.txt .
54
+
55
+ ENV PATH /home/root/.local/bin:${PATH}
56
+
57
+ RUN apt-get update && apt-get install -y python3-pip && pip install -r requirements.txt
58
+
59
+ COPY . .
60
+
61
+ CMD uvicorn main:app --host 0.0.0.0 --port $PORT
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -1,205 +1,241 @@
1
- import os
2
- import random
3
- import time
4
- import pandas as pd
5
- from fastapi import FastAPI, HTTPException
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from selenium import webdriver
 
7
  from selenium.webdriver.chrome.service import Service
8
- from selenium.webdriver.common.action_chains import ActionChains
9
- from selenium.webdriver.chrome.options import Options
10
  from webdriver_manager.chrome import ChromeDriverManager
11
- from webdriver_manager.core.driver_cache import DriverCacheManager
12
  from selenium.webdriver.common.by import By
13
- from fake_headers import Headers
14
- from fastapi.middleware.cors import CORSMiddleware
15
- import logging
16
- from selenium_driverless import webdriver as webdriverless
17
-
18
-
19
- # Initialize FastAPI
20
- app = FastAPI(
21
- debug=True,
22
- title="NextAnalytics Server",
23
- consumes=["application/x-www-form-urlencoded", "multipart/form-data"],
24
- docs_url='/swagger'
25
- )
26
-
27
- # Configure CORS
28
- app.add_middleware(
29
- CORSMiddleware,
30
- allow_origins=["*"],
31
- allow_credentials=True,
32
- allow_methods=["*"],
33
- allow_headers=["*"],
34
- )
35
-
36
- # Setup ChromeDriver and Selenium
37
- # custom_wdm_cache = os.path.join(os.getcwd(), 'custom_wdm_cache')
38
- # os.environ['WDM_LOCAL'] = custom_wdm_cache
39
-
40
- # Setup logging
41
- logging.basicConfig(level=logging.INFO)
42
-
43
- def setup_chromedriver():
44
- logging.info("Setting up ChromeDriver...")
45
- # custom_wdm_cache = os.path.join(os.getcwd(), 'custom_wdm_cache')
46
- # os.environ['WDM_LOCAL'] = custom_wdm_cache
47
- # cache_manager = DriverCacheManager(custom_wdm_cache)
48
- # os.chmod(custom_wdm_cache, 0o755) # Ensure proper permissions
49
- # path = ChromeDriverManager(cache_manager=cache_manager).install()
50
- path = ChromeDriverManager().install()
51
- os.chmod(path, 0o755) # Ensure ChromeDriver is executable
52
- logging.info(f"ChromeDriver path: {path}")
53
- return path
54
-
55
- # Setup headless Chrome options
56
- # Define a custom user agent
57
- my_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
58
-
59
- # my_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
60
-
61
- # proxy = None
62
- browser_option = Options()
63
- browser_option.add_argument("--headless") # Running in headless mode (no GUI)
64
- browser_option.add_argument("--no-sandbox")
65
- browser_option.add_argument("--disable-dev-shm-usage")
66
- browser_option.add_argument("--ignore-certificate-errors")
67
- # browser_option.add_argument("--disable-gpu")
68
- # browser_option.add_argument("--log-level=3")
69
- # browser_option.add_argument("--disable-notifications")
70
- # browser_option.add_argument("--disable-popup-blocking")
71
- browser_option.add_argument(f"--user-agent={my_user_agent}")
72
-
73
- # if proxy:
74
- # browser_option.add_argument(f"--proxy-server={proxy}")
75
-
76
-
77
- # Setup WebDriver
78
- driver_path = setup_chromedriver()
79
- service = Service(executable_path=driver_path)
80
- driver = webdriver.Chrome(service=service, options=browser_option)
81
- # actions = ActionChains(driver)
82
-
83
- def getSearchPostData(search_keyword, index, name="", forCompetitorAnalysis=False):
84
- # Navigate to the search results page
85
- url = f'https://www.reddit.com/search/?q={search_keyword}'
86
- driver.get(url)
87
- time.sleep(3) # Consider using WebDriverWait instead of sleep for better reliability
88
- logging.info("Navigated to search page.")
89
-
90
- posts_data = []
91
- list_length = 0 # posts count
92
- try:
93
- if forCompetitorAnalysis:
94
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
95
- time.sleep(5)
96
- post_cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid="post-title-text"]')
97
- post_cards_1 = driver.find_elements(By.CSS_SELECTOR, 'div[data-testid="search-counter-row"]')
98
- post_cards_2 = driver.find_elements(By.CSS_SELECTOR, 'faceplate-timeago')
99
- logging.info(f"Found {len(post_cards)} post cards.")
100
-
101
- idx = list_length
102
- for card in post_cards_1:
103
- try:
104
- votes_count = card.find_element(By.XPATH, './/faceplate-number').text
105
- comments_count = card.find_element(By.XPATH,
106
- './/span[contains(text(), "comment") or contains(text(), "comments")]/preceding-sibling::faceplate-number'
107
- ).text
108
- posts_data.append({
109
- "index": idx,
110
- "comment_count": comments_count,
111
- "votes_count": votes_count
112
- })
113
- idx += 1
114
- except Exception as e:
115
- logging.error(f"Error processing post_card_1: {e}")
116
-
117
- idx = list_length
118
- for card in post_cards:
119
- try:
120
- url = card.get_attribute("href")
121
- title = card.text
122
- posts_data[idx]["title"] = title
123
- posts_data[idx]["url"] = url
124
- idx += 1
125
- except Exception as e:
126
- logging.error(f"Error processing post_cards: {e}")
127
-
128
- idx = list_length
129
- for card in post_cards_2:
130
- try:
131
- time_element = card.find_element(By.XPATH, './time')
132
- post_time = time_element.get_attribute('datetime')
133
- posts_data[idx]["time"] = post_time
134
- idx += 1
135
- except Exception as e:
136
- logging.error(f"Error processing post_cards_2: {e}")
137
- except Exception as e:
138
- logging.error(f"Error in scrolling or extracting data: {e}")
139
-
140
- df = pd.DataFrame(posts_data)
141
- df.to_csv(f'posts_data_{index}.csv', index=False)
142
- logging.info(f"Data saved to posts_data_{index}.csv")
143
- return df
144
-
145
- def get_webpage_title(url: str) -> str:
146
- try:
147
- getSearchPostData(search_keyword="migraine", index=0)
148
- url="https://www.reddit.com"
149
- driver.get(url)
150
- title = driver.title
151
- logging.info(f"Page title: {title}")
152
- return title
153
- except Exception as e:
154
- logging.error(f"Error fetching webpage title: {e}")
155
- return str(e)
156
-
157
- @app.get("/")
158
- async def home():
159
- return {"message": "Hello"}
160
-
161
- @app.get("/get-title/")
162
- async def fetch_title(url: str):
163
- """
164
- Fetch the title of a webpage by URL.
165
- Example: /get-title/?url=https://www.reddit.com
166
- """
167
- try:
168
- title = get_webpage_title(url)
169
- return {"url": url, "title": title}
170
- except Exception as e:
171
- raise HTTPException(status_code=500, detail=str(e))
172
- @app.get("/get-reddit/")
173
- async def getReddit(url: str):
174
- """
175
- Fetch the title of a webpage by URL.
176
- Example: /get-title/?url=https://www.reddit.com
177
- """
178
- try:
179
- options = webdriverless.ChromeOptions()
180
- driver_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
181
-
182
- options.add_argument("--headless") # Running in headless mode (no GUI)
183
- options.add_argument("--no-sandbox")
184
- options.add_argument("--disable-dev-shm-usage")
185
- options.add_argument("--ignore-certificate-errors")
186
- options.add_argument(f"--user-agent={driver_agent}")
187
-
188
- title="Notitle"
189
- async with webdriverless.Chrome(options=options) as driver:
190
- await driver.get('https://www.reddit.com')
191
- time.sleep(3)
192
-
193
- title = await driver.title
194
- url = await driver.current_url
195
- source = await driver.page_source
196
- print(title)
197
- return {"url": url, "title": title}
198
- return {"url": url, "title": title}
199
- except Exception as e:
200
- raise HTTPException(status_code=500, detail=str(e))
201
-
202
- # Run the app
203
- # if __name__ == "__main__":
204
- # import uvicorn
205
- # uvicorn.run(app, host="127.0.0.1", port=7860)
 
1
+ # import os
2
+ # import random
3
+ # import time
4
+ # import pandas as pd
5
+ # from fastapi import FastAPI, HTTPException
6
+ # from selenium import webdriver
7
+ # from selenium.webdriver.chrome.service import Service
8
+ # from selenium.webdriver.common.action_chains import ActionChains
9
+ # from selenium.webdriver.chrome.options import Options
10
+ # from webdriver_manager.chrome import ChromeDriverManager
11
+ # from webdriver_manager.core.driver_cache import DriverCacheManager
12
+ # from selenium.webdriver.common.by import By
13
+ # from fake_headers import Headers
14
+ # from fastapi.middleware.cors import CORSMiddleware
15
+ # import logging
16
+ # from selenium_driverless import webdriver as webdriverless
17
+
18
+
19
+ # # Initialize FastAPI
20
+ # app = FastAPI(
21
+ # debug=True,
22
+ # title="NextAnalytics Server",
23
+ # consumes=["application/x-www-form-urlencoded", "multipart/form-data"],
24
+ # docs_url='/swagger'
25
+ # )
26
+
27
+ # # Configure CORS
28
+ # app.add_middleware(
29
+ # CORSMiddleware,
30
+ # allow_origins=["*"],
31
+ # allow_credentials=True,
32
+ # allow_methods=["*"],
33
+ # allow_headers=["*"],
34
+ # )
35
+
36
+ # # Setup ChromeDriver and Selenium
37
+ # # custom_wdm_cache = os.path.join(os.getcwd(), 'custom_wdm_cache')
38
+ # # os.environ['WDM_LOCAL'] = custom_wdm_cache
39
+
40
+ # # Setup logging
41
+ # logging.basicConfig(level=logging.INFO)
42
+
43
+ # def setup_chromedriver():
44
+ # logging.info("Setting up ChromeDriver...")
45
+ # # custom_wdm_cache = os.path.join(os.getcwd(), 'custom_wdm_cache')
46
+ # # os.environ['WDM_LOCAL'] = custom_wdm_cache
47
+ # # cache_manager = DriverCacheManager(custom_wdm_cache)
48
+ # # os.chmod(custom_wdm_cache, 0o755) # Ensure proper permissions
49
+ # # path = ChromeDriverManager(cache_manager=cache_manager).install()
50
+ # path = ChromeDriverManager().install()
51
+ # os.chmod(path, 0o755) # Ensure ChromeDriver is executable
52
+ # logging.info(f"ChromeDriver path: {path}")
53
+ # return path
54
+
55
+ # # Setup headless Chrome options
56
+ # # Define a custom user agent
57
+ # my_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
58
+
59
+ # # my_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
60
+
61
+ # # proxy = None
62
+ # browser_option = Options()
63
+ # browser_option.add_argument("--headless") # Running in headless mode (no GUI)
64
+ # browser_option.add_argument("--no-sandbox")
65
+ # browser_option.add_argument("--disable-dev-shm-usage")
66
+ # browser_option.add_argument("--ignore-certificate-errors")
67
+ # # browser_option.add_argument("--disable-gpu")
68
+ # # browser_option.add_argument("--log-level=3")
69
+ # # browser_option.add_argument("--disable-notifications")
70
+ # # browser_option.add_argument("--disable-popup-blocking")
71
+ # browser_option.add_argument(f"--user-agent={my_user_agent}")
72
+
73
+ # # if proxy:
74
+ # # browser_option.add_argument(f"--proxy-server={proxy}")
75
+
76
+
77
+ # # Setup WebDriver
78
+ # driver_path = setup_chromedriver()
79
+ # service = Service(executable_path=driver_path)
80
+ # driver = webdriver.Chrome(service=service, options=browser_option)
81
+ # # actions = ActionChains(driver)
82
+
83
+ # def getSearchPostData(search_keyword, index, name="", forCompetitorAnalysis=False):
84
+ # # Navigate to the search results page
85
+ # url = f'https://www.reddit.com/search/?q={search_keyword}'
86
+ # driver.get(url)
87
+ # time.sleep(3) # Consider using WebDriverWait instead of sleep for better reliability
88
+ # logging.info("Navigated to search page.")
89
+
90
+ # posts_data = []
91
+ # list_length = 0 # posts count
92
+ # try:
93
+ # if forCompetitorAnalysis:
94
+ # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
95
+ # time.sleep(5)
96
+ # post_cards = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid="post-title-text"]')
97
+ # post_cards_1 = driver.find_elements(By.CSS_SELECTOR, 'div[data-testid="search-counter-row"]')
98
+ # post_cards_2 = driver.find_elements(By.CSS_SELECTOR, 'faceplate-timeago')
99
+ # logging.info(f"Found {len(post_cards)} post cards.")
100
+
101
+ # idx = list_length
102
+ # for card in post_cards_1:
103
+ # try:
104
+ # votes_count = card.find_element(By.XPATH, './/faceplate-number').text
105
+ # comments_count = card.find_element(By.XPATH,
106
+ # './/span[contains(text(), "comment") or contains(text(), "comments")]/preceding-sibling::faceplate-number'
107
+ # ).text
108
+ # posts_data.append({
109
+ # "index": idx,
110
+ # "comment_count": comments_count,
111
+ # "votes_count": votes_count
112
+ # })
113
+ # idx += 1
114
+ # except Exception as e:
115
+ # logging.error(f"Error processing post_card_1: {e}")
116
+
117
+ # idx = list_length
118
+ # for card in post_cards:
119
+ # try:
120
+ # url = card.get_attribute("href")
121
+ # title = card.text
122
+ # posts_data[idx]["title"] = title
123
+ # posts_data[idx]["url"] = url
124
+ # idx += 1
125
+ # except Exception as e:
126
+ # logging.error(f"Error processing post_cards: {e}")
127
+
128
+ # idx = list_length
129
+ # for card in post_cards_2:
130
+ # try:
131
+ # time_element = card.find_element(By.XPATH, './time')
132
+ # post_time = time_element.get_attribute('datetime')
133
+ # posts_data[idx]["time"] = post_time
134
+ # idx += 1
135
+ # except Exception as e:
136
+ # logging.error(f"Error processing post_cards_2: {e}")
137
+ # except Exception as e:
138
+ # logging.error(f"Error in scrolling or extracting data: {e}")
139
+
140
+ # df = pd.DataFrame(posts_data)
141
+ # df.to_csv(f'posts_data_{index}.csv', index=False)
142
+ # logging.info(f"Data saved to posts_data_{index}.csv")
143
+ # return df
144
+
145
+ # def get_webpage_title(url: str) -> str:
146
+ # try:
147
+ # getSearchPostData(search_keyword="migraine", index=0)
148
+ # url="https://www.reddit.com"
149
+ # driver.get(url)
150
+ # title = driver.title
151
+ # logging.info(f"Page title: {title}")
152
+ # return title
153
+ # except Exception as e:
154
+ # logging.error(f"Error fetching webpage title: {e}")
155
+ # return str(e)
156
+
157
+ # @app.get("/")
158
+ # async def home():
159
+ # return {"message": "Hello"}
160
+
161
+ # @app.get("/get-title/")
162
+ # async def fetch_title(url: str):
163
+ # """
164
+ # Fetch the title of a webpage by URL.
165
+ # Example: /get-title/?url=https://www.reddit.com
166
+ # """
167
+ # try:
168
+ # title = get_webpage_title(url)
169
+ # return {"url": url, "title": title}
170
+ # except Exception as e:
171
+ # raise HTTPException(status_code=500, detail=str(e))
172
+ # @app.get("/get-reddit/")
173
+ # async def getReddit(url: str):
174
+ # """
175
+ # Fetch the title of a webpage by URL.
176
+ # Example: /get-title/?url=https://www.reddit.com
177
+ # """
178
+ # try:
179
+ # options = webdriverless.ChromeOptions()
180
+ # driver_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
181
+
182
+ # options.add_argument("--headless") # Running in headless mode (no GUI)
183
+ # options.add_argument("--no-sandbox")
184
+ # options.add_argument("--disable-dev-shm-usage")
185
+ # options.add_argument("--ignore-certificate-errors")
186
+ # options.add_argument(f"--user-agent={driver_agent}")
187
+
188
+ # title="Notitle"
189
+ # async with webdriverless.Chrome(options=options) as driver:
190
+ # await driver.get('https://www.reddit.com')
191
+ # time.sleep(3)
192
+
193
+ # title = await driver.title
194
+ # url = await driver.current_url
195
+ # source = await driver.page_source
196
+ # print(title)
197
+ # return {"url": url, "title": title}
198
+ # return {"url": url, "title": title}
199
+ # except Exception as e:
200
+ # raise HTTPException(status_code=500, detail=str(e))
201
+
202
+ # # Run the app
203
+ # # if __name__ == "__main__":
204
+ # # import uvicorn
205
+ # # uvicorn.run(app, host="127.0.0.1", port=7860)
206
+
207
  from selenium import webdriver
208
+ from flask import Flask, request
209
  from selenium.webdriver.chrome.service import Service
 
 
210
  from webdriver_manager.chrome import ChromeDriverManager
 
211
  from selenium.webdriver.common.by import By
212
+
213
+ app = Flask(__name__)
214
+
215
+
216
+ def download_selenium():
217
+ chrome_options = webdriver.ChromeOptions()
218
+ driver_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
219
+
220
+ chrome_options.add_argument("--headless")
221
+ chrome_options.add_argument("--no-sandbox")
222
+ chrome_options.add_argument("--disable-dev-shm-usage")
223
+ prefs = {"profile.managed_default_content_settings.images": 2}
224
+ chrome_options.add_experimental_option("prefs", prefs)
225
+ chrome_options.add_argument(f"--user-agent={driver_agent}")
226
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
227
+ driver.get("https://reddit.com")
228
+ title = driver.title
229
+ # language = driver.find_element(By.XPATH, "//div[@id='SIvCob']").text
230
+ data = {'Page Title': title}
231
+ return data
232
+
233
+
234
+ @app.route('/', methods = ['GET','POST'])
235
+ def home():
236
+ if (request.method == 'GET'):
237
+ return download_selenium()
238
+
239
+
240
+ if __name__ == "__main__":
241
+ app.run(debug=True, port=3000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,14 +1,21 @@
1
- flask==3.0.3
2
- gunicorn
3
- flask_cors
4
- selenium==4.26.1
5
- uvicorn
6
- scalar_fastapi==1.0.3
7
- requests
8
- pandas
9
- numpy
10
- webdriver_manager==4.0.2
11
- fake_headers
12
- fastapi
13
- selenium_driverless
14
- asyncio
 
 
 
 
 
 
 
 
1
+ # flask==3.0.3
2
+ # gunicorn
3
+ # flask_cors
4
+ # selenium==4.26.1
5
+ # uvicorn
6
+ # scalar_fastapi==1.0.3
7
+ # requests
8
+ # pandas
9
+ # numpy
10
+ # webdriver_manager==4.0.2
11
+ # fake_headers
12
+ # fastapi
13
+ # selenium_driverless
14
+ # asyncio
15
+
16
+ selenium==4.6.0
17
+ requests==2.28.1
18
+ webdriver_manager==3.8.4
19
+ packaging==21.3
20
+ flask-restful==0.3.9
21
+ gunicorn==20.1.0
selenium_webapp-main/Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG PORT=443
2
+ FROM cypress/browsers:latest
3
+
4
+ RUN apt-get install python3 -y
5
+
6
+ RUN echo $(python3 -m site --user-base)
7
+
8
+ COPY requirements.txt .
9
+
10
+ ENV PATH /home/root/.local/bin:${PATH}
11
+
12
+ RUN apt-get update && apt-get install -y python3-pip && pip install -r requirements.txt
13
+
14
+ COPY . .
15
+
16
+ CMD uvicorn main:app --host 0.0.0.0 --port $PORT
selenium_webapp-main/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ selenium==4.6.0
2
+ requests==2.28.1
3
+ webdriver_manager==3.8.4
4
+ packaging==21.3
5
+ flask-restful==0.3.9
6
+ gunicorn==20.1.0
selenium_webapp-main/selenium_webapp.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from flask import Flask, request
3
+ from selenium.webdriver.chrome.service import Service
4
+ from webdriver_manager.chrome import ChromeDriverManager
5
+ from selenium.webdriver.common.by import By
6
+
7
+ app = Flask(__name__)
8
+
9
+
10
+ def download_selenium():
11
+ chrome_options = webdriver.ChromeOptions()
12
+ chrome_options.add_argument("--headless")
13
+ chrome_options.add_argument("--no-sandbox")
14
+ chrome_options.add_argument("--disable-dev-shm-usage")
15
+ prefs = {"profile.managed_default_content_settings.images": 2}
16
+ chrome_options.add_experimental_option("prefs", prefs)
17
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
18
+ driver.get("https://google.com")
19
+ title = driver.title
20
+ language = driver.find_element(By.XPATH, "//div[@id='SIvCob']").text
21
+ data = {'Page Title': title, 'Language': language}
22
+ return data
23
+
24
+
25
+ @app.route('/', methods = ['GET','POST'])
26
+ def home():
27
+ if (request.method == 'GET'):
28
+ return download_selenium()
29
+
30
+
31
+ if __name__ == "__main__":
32
+ app.run(debug=True, port=3000)