Spaces:
No application file
No application file
Commit ·
f11978e
1
Parent(s): 71f9aed
Changes made
Browse files
selenium-scraper/Dockerfile
CHANGED
|
@@ -42,4 +42,8 @@ WORKDIR /app
|
|
| 42 |
EXPOSE 8000
|
| 43 |
|
| 44 |
# Start FastAPI
|
| 45 |
-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
EXPOSE 8000
|
| 43 |
|
| 44 |
# Start FastAPI
|
| 45 |
+
CMD ["uvicorn", "selenium-scraper.app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
selenium-scraper/app/clickloom_scrape.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
| 8 |
chromedriver_autoinstaller.install() # Automatically installs compatible driver
|
| 9 |
|
| 10 |
options = Options()
|
| 11 |
-
options.binary_location = "/usr/bin/chromium"
|
| 12 |
options.add_argument("--headless")
|
| 13 |
options.add_argument("--no-sandbox")
|
| 14 |
options.add_argument("--disable-dev-shm-usage")
|
|
|
|
| 8 |
chromedriver_autoinstaller.install() # Automatically installs compatible driver
|
| 9 |
|
| 10 |
options = Options()
|
| 11 |
+
options.binary_location = "/usr/bin/chromium-browser"
|
| 12 |
options.add_argument("--headless")
|
| 13 |
options.add_argument("--no-sandbox")
|
| 14 |
options.add_argument("--disable-dev-shm-usage")
|
selenium-scraper/app/example.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 1 |
+
import requests
|
| 2 |
|
| 3 |
+
# URL of the running FastAPI scraper server
|
| 4 |
+
base_url = "https://huggingface.co/spaces/apexherbert200/selenium-scraper2"
|
| 5 |
|
| 6 |
+
# Target page to scrape
|
| 7 |
+
params = {"link": "https://jobright.ai/jobs/info/681ab6e27e673b00b9024e36"}
|
| 8 |
|
| 9 |
+
# Send GET request
|
| 10 |
+
response = requests.get(base_url, params=params)
|
| 11 |
|
| 12 |
+
# Print the JSON response
|
| 13 |
+
if response.status_code == 200:
|
| 14 |
+
data = response.json()
|
| 15 |
+
print(data)
|
| 16 |
+
else:
|
| 17 |
+
print("Error:", response.status_code, response.text)
|