honey234 commited on
Commit
cb71789
·
1 Parent(s): 1c3d2b2
Files changed (2) hide show
  1. Dockerfile +34 -84
  2. app.py +14 -34
Dockerfile CHANGED
@@ -1,26 +1,26 @@
1
- # # Use the official Python image as the base image
2
- # FROM python:3.10-slim
3
 
4
- # # Install dependencies for Selenium and Chrome
5
- # RUN apt-get update && apt-get install -y \
6
- # wget \
7
- # unzip \
8
- # curl \
9
- # gnupg \
10
- # libnss3 \
11
- # libgconf-2-4 \
12
- # libxi6 \
13
- # libxcursor1 \
14
- # libxrandr2 \
15
- # libxss1 \
16
- # libxtst6 \
17
- # fonts-liberation \
18
- # xdg-utils \
19
- # libatk-bridge2.0-0 \
20
- # libgtk-3-0 \
21
- # --no-install-recommends && \
22
- # apt-get clean && \
23
- # rm -rf /var/lib/apt/lists/*
24
 
25
  # RUN apt-get update && apt-get install -y wget unzip && \
26
  # wget https://dl.google.com/Linux/direct/google-chrome-stable_current_amd64.deb && \
@@ -28,74 +28,24 @@
28
  # rm google-chrome-stable_current_amd64.deb && \
29
  # apt-get clean
30
 
31
- # RUN which google-chrome
32
-
33
- # # Update the package list and install wget, unzip, and Firefox
34
- # # RUN apt-get update && apt-get install -y wget unzip \
35
- # # && apt-get install -y firefox-esr \
36
- # # && apt-get clean
37
- # RUN useradd -m -u 1000 user
38
- # USER user
39
- # ENV HOME=/home/user \
40
- # PATH=/home/user/.local/bin:$PATH
41
-
42
- # WORKDIR $HOME/app
43
- # # WORKDIR /app
44
-
45
- # COPY --chown=user . $HOME/app
46
- # # COPY . /app
47
- # # Install Python dependencies
48
- # RUN pip install --no-cache-dir -r requirements.txt
49
-
50
- # # Run the Selenium script
51
- # # CMD ["gunicorn", "-b", "0.0.0.0:7860","app:app"]
52
- # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
53
-
54
- FROM python:3.10-slim
55
-
56
- # install google chrome
57
-
58
- # RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
59
-
60
- # RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
61
-
62
- # RUN apt-get -y update
63
-
64
- # RUN apt-get install -y google-chrome-stable
65
-
66
- RUN apt-get update && apt-get install -y wget unzip && \
67
- wget https://dl.google.com/Linux/direct/google-chrome-stable_current_amd64.deb && \
68
- apt install -y ./google-chrome-stable_current_amd64.deb && \
69
- rm google-chrome-stable_current_amd64.deb && \
70
- apt-get clean
71
-
72
-
73
- # install chromedriver
74
-
75
- # RUN apt-get install -yqq unzip
76
-
77
- # RUN wget -O chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
78
-
79
- # RUN unzip chromedriver.zip chromedriver -d /usr/bin/
80
-
81
- # set display port to avoid crash
82
-
83
- ENV DISPLAY=:99
84
-
85
- # WORKDIR /code
86
  RUN useradd -m -u 1000 user
87
  USER user
88
  ENV HOME=/home/user \
89
  PATH=/home/user/.local/bin:$PATH
90
 
91
  WORKDIR $HOME/app
92
- # # WORKDIR /app
93
 
94
- COPY ./requirements.txt /code/requirements.txt
95
-
96
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
97
  COPY --chown=user . $HOME/app
98
- # COPY . .
99
-
 
100
 
101
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
1
+ # Use the official Python image as the base image
2
+ FROM python:3.10-slim
3
 
4
+ # Install dependencies for Selenium and Chrome
5
+ RUN apt-get update && apt-get install -y \
6
+ wget \
7
+ unzip \
8
+ curl \
9
+ gnupg \
10
+ libnss3 \
11
+ libgconf-2-4 \
12
+ libxi6 \
13
+ libxcursor1 \
14
+ libxrandr2 \
15
+ libxss1 \
16
+ libxtst6 \
17
+ fonts-liberation \
18
+ xdg-utils \
19
+ libatk-bridge2.0-0 \
20
+ libgtk-3-0 \
21
+ --no-install-recommends && \
22
+ apt-get clean && \
23
+ rm -rf /var/lib/apt/lists/*
24
 
25
  # RUN apt-get update && apt-get install -y wget unzip && \
26
  # wget https://dl.google.com/Linux/direct/google-chrome-stable_current_amd64.deb && \
 
28
  # rm google-chrome-stable_current_amd64.deb && \
29
  # apt-get clean
30
 
31
+ # Update the package list and install wget, unzip, and Firefox
32
+ RUN apt-get update && apt-get install -y wget unzip \
33
+ && apt-get install -y firefox-esr \
34
+ && apt-get clean
35
+ RUN which firefox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  RUN useradd -m -u 1000 user
37
  USER user
38
  ENV HOME=/home/user \
39
  PATH=/home/user/.local/bin:$PATH
40
 
41
  WORKDIR $HOME/app
42
+ # WORKDIR /app
43
 
 
 
 
44
  COPY --chown=user . $HOME/app
45
+ # COPY . /app
46
+ # Install Python dependencies
47
+ RUN pip install --no-cache-dir -r requirements.txt
48
 
49
+ # Run the Selenium script
50
+ # CMD ["gunicorn", "-b", "0.0.0.0:7860","app:app"]
51
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -4,29 +4,18 @@ import time
4
  import pandas as pd
5
  from fastapi import FastAPI, HTTPException
6
  from selenium import webdriver
7
- from selenium.webdriver.chrome.service import Service
8
  from selenium.webdriver.common.action_chains import ActionChains
9
- from selenium.webdriver.chrome.options import Options
10
- from webdriver_manager.chrome import ChromeDriverManager
11
  from webdriver_manager.core.driver_cache import DriverCacheManager
12
  from selenium.webdriver.common.by import By
13
  from fake_headers import Headers
14
  from fastapi.middleware.cors import CORSMiddleware
15
  import logging
16
- # from selenium_driverless import webdriver as webdriverless
17
 
18
- # proxy_username="ockzoweb"
19
- # proxy_password="23wxmulibzuq"
20
- # proxy_address="198.23.239.134"
21
- # proxy_port="6540"
22
 
23
- # proxy_url=f"http://{proxy_username}:{proxy_password}@{proxy_address}:{proxy_port}"
24
- # seleniumwire_options = {
25
- # "proxy": {
26
- # "http": proxy_url,
27
- # "https": proxy_url,
28
- # }
29
- # }
30
  # Initialize FastAPI
31
  app = FastAPI(
32
  debug=True,
@@ -65,26 +54,24 @@ def setup_chromedriver():
65
 
66
  # Setup headless Chrome options
67
  # Define a custom user agent
68
- # my_user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0"
69
 
70
  # my_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
71
- header = Headers().generate()["User-Agent"]
72
- # capabilities = webdriver.DesiredCapabilities.FIREFOX
73
  # proxy = None
74
  browser_option = Options()
75
  browser_option.add_argument("--headless") # Running in headless mode (no GUI)
76
  browser_option.add_argument("--no-sandbox")
77
  browser_option.add_argument("--disable-dev-shm-usage")
78
- # browser_option.add_argument("--ignore-certificate-errors")
79
- # browser_option.binary_location = '/usr/bin/firefox'
80
- # browser_option.binary_location = r'C:\Users\HP\.cache\selenium\firefox\win64\133.0\firefox.exe'
81
  # browser_option.add_argument("--disable-gpu")
82
  # browser_option.add_argument("--log-level=3")
83
  # browser_option.add_argument("--disable-notifications")
84
  # browser_option.add_argument("--disable-popup-blocking")
85
- browser_option.add_argument("--user-agent={}".format(header))
86
- # browser_option.add_argument(f"--user-agent={my_user_agent}")
87
- logging.info(f"header: {header}")
88
  # if proxy:
89
  # browser_option.add_argument(f"--proxy-server={proxy}")
90
 
@@ -92,12 +79,10 @@ logging.info(f"header: {header}")
92
  # Setup WebDriver
93
  driver_path = setup_chromedriver()
94
  service = Service(executable_path=driver_path)
95
- # driver = webdriver.Chrome(service=service, options=browser_option,)
96
  # actions = ActionChains(driver)
97
 
98
  def getSearchPostData(search_keyword, index, name="", forCompetitorAnalysis=False):
99
- driver = webdriver.Chrome( options=browser_option,)
100
-
101
  # Navigate to the search results page
102
  url = f'https://www.reddit.com/search/?q={search_keyword}'
103
  driver.get(url)
@@ -161,16 +146,11 @@ def getSearchPostData(search_keyword, index, name="", forCompetitorAnalysis=Fals
161
 
162
  def get_webpage_title(url: str) -> str:
163
  try:
164
- # getSearchPostData(search_keyword="migraine", index=0)
165
- # driver = webdriver.Chrome( options=browser_option,)
166
- driver = webdriver.Chrome(service=service, options=browser_option,)
167
-
168
  url="https://www.reddit.com"
169
  driver.get(url)
170
- time.sleep(3)
171
  title = driver.title
172
  logging.info(f"Page title: {title}")
173
- driver.quit()
174
  return title
175
  except Exception as e:
176
  logging.error(f"Error fetching webpage title: {e}")
@@ -234,7 +214,7 @@ async def fetch_title(url: str):
234
  # from selenium.webdriver.common.proxy import Proxy, ProxyType
235
  # app = Flask(__name__)
236
 
237
-
238
  # def download_selenium():
239
  # prox = Proxy()
240
  # prox.proxy_type = ProxyType.MANUAL
 
4
  import pandas as pd
5
  from fastapi import FastAPI, HTTPException
6
  from selenium import webdriver
7
+ from selenium.webdriver.firefox.service import Service
8
  from selenium.webdriver.common.action_chains import ActionChains
9
+ from selenium.webdriver.firefox.options import Options
10
+ from webdriver_manager.firefox import GeckoDriverManager
11
  from webdriver_manager.core.driver_cache import DriverCacheManager
12
  from selenium.webdriver.common.by import By
13
  from fake_headers import Headers
14
  from fastapi.middleware.cors import CORSMiddleware
15
  import logging
16
+ from selenium_driverless import webdriver as webdriverless
17
 
 
 
 
 
18
 
 
 
 
 
 
 
 
19
  # Initialize FastAPI
20
  app = FastAPI(
21
  debug=True,
 
54
 
55
  # Setup headless Chrome options
56
  # Define a custom user agent
57
+ my_user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0"
58
 
59
  # my_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
60
+
61
+ capabilities = webdriver.DesiredCapabilities.FIREFOX
62
  # proxy = None
63
  browser_option = Options()
64
  browser_option.add_argument("--headless") # Running in headless mode (no GUI)
65
  browser_option.add_argument("--no-sandbox")
66
  browser_option.add_argument("--disable-dev-shm-usage")
67
+ browser_option.add_argument("--ignore-certificate-errors")
68
+ browser_option.binary_location = '/usr/bin/firefox'
 
69
  # browser_option.add_argument("--disable-gpu")
70
  # browser_option.add_argument("--log-level=3")
71
  # browser_option.add_argument("--disable-notifications")
72
  # browser_option.add_argument("--disable-popup-blocking")
73
+ browser_option.add_argument(f"--user-agent={my_user_agent}")
74
+
 
75
  # if proxy:
76
  # browser_option.add_argument(f"--proxy-server={proxy}")
77
 
 
79
  # Setup WebDriver
80
  driver_path = setup_chromedriver()
81
  service = Service(executable_path=driver_path)
82
+ driver = webdriver.Firefox(service=service, options=browser_option)
83
  # actions = ActionChains(driver)
84
 
85
  def getSearchPostData(search_keyword, index, name="", forCompetitorAnalysis=False):
 
 
86
  # Navigate to the search results page
87
  url = f'https://www.reddit.com/search/?q={search_keyword}'
88
  driver.get(url)
 
146
 
147
  def get_webpage_title(url: str) -> str:
148
  try:
149
+ getSearchPostData(search_keyword="migraine", index=0)
 
 
 
150
  url="https://www.reddit.com"
151
  driver.get(url)
 
152
  title = driver.title
153
  logging.info(f"Page title: {title}")
 
154
  return title
155
  except Exception as e:
156
  logging.error(f"Error fetching webpage title: {e}")
 
214
  # from selenium.webdriver.common.proxy import Proxy, ProxyType
215
  # app = Flask(__name__)
216
 
217
+
218
  # def download_selenium():
219
  # prox = Proxy()
220
  # prox.proxy_type = ProxyType.MANUAL