Spaces:
Sleeping
Sleeping
Subhajit Chakraborty commited on
Commit ·
925c4eb
1
Parent(s): 8f6827d
update files(4)
Browse files- Dockerfile +1 -0
- src/services/scrape_worker.py +4 -0
Dockerfile
CHANGED
|
@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y \
|
|
| 6 |
build-essential \
|
| 7 |
curl \
|
| 8 |
git \
|
|
|
|
| 9 |
libnss3 \
|
| 10 |
libxss1 \
|
| 11 |
libappindicator3-1 \
|
|
|
|
| 6 |
build-essential \
|
| 7 |
curl \
|
| 8 |
git \
|
| 9 |
+
chromium \
|
| 10 |
libnss3 \
|
| 11 |
libxss1 \
|
| 12 |
libappindicator3-1 \
|
src/services/scrape_worker.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
import sys
|
| 2 |
import json
|
|
|
|
|
|
|
| 3 |
from requests_html import HTMLSession
|
|
|
|
|
|
|
| 4 |
|
| 5 |
def scrape_website(url: str) -> str:
|
| 6 |
session = HTMLSession()
|
|
|
|
| 1 |
import sys
|
| 2 |
import json
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
from requests_html import HTMLSession
|
| 6 |
+
if os.path.exists("/usr/bin/chromium"):
|
| 7 |
+
os.environ['PYPPETEER_CHROMIUM_REVISION'] = '/usr/bin/chromium'
|
| 8 |
|
| 9 |
def scrape_website(url: str) -> str:
|
| 10 |
session = HTMLSession()
|