Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,26 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Ad Scraper (Meta/Instagram Ad Library) — Selenium core with optional Gradio UI
|
| 3 |
-
-----------------------------------------------------------------------------
|
| 4 |
-
Why this rewrite?
|
| 5 |
-
- The previous version imported `gradio` at module import time. In some sandboxes, the
|
| 6 |
-
Python `ssl` module is unavailable, which breaks `gradio` (and anything that imports
|
| 7 |
-
`websockets`). That caused: `ModuleNotFoundError: No module named 'ssl'`.
|
| 8 |
-
|
| 9 |
-
Fixes in this version:
|
| 10 |
-
- **Lazy import Gradio** only when launching the UI, so importing this file never
|
| 11 |
-
requires `ssl`.
|
| 12 |
-
- **Selenium-only core** scraper lives in pure functions.
|
| 13 |
-
- **No webdriver downloads at import** — we try a system Chrome/Chromium first;
|
| 14 |
-
only then (optionally) use `webdriver_manager` inside a `try` block.
|
| 15 |
-
- **CLI fallback** so you can run without Gradio.
|
| 16 |
-
- **Unit tests** for HTML parsing (no network/SSL needed).
|
| 17 |
-
|
| 18 |
-
Run modes:
|
| 19 |
-
- CLI (default): `python ad_scraper.py --keyword "GenZ skin care brand"`
|
| 20 |
-
- Gradio UI (if your env supports ssl): `python ad_scraper.py --gradio`
|
| 21 |
-
|
| 22 |
-
Tip for local dev: ensure Chrome/Chromium and matching ChromeDriver are on PATH.
|
| 23 |
-
"""
|
| 24 |
from __future__ import annotations
|
| 25 |
|
| 26 |
import sys
|
|
@@ -33,11 +10,9 @@ from dataclasses import dataclass, asdict
|
|
| 33 |
from typing import List, Dict, Optional, Tuple
|
| 34 |
from urllib.parse import quote_plus
|
| 35 |
|
| 36 |
-
# Third-party imports that DO NOT pull in ssl at import time
|
| 37 |
from bs4 import BeautifulSoup # type: ignore
|
| 38 |
import pandas as pd # type: ignore
|
| 39 |
|
| 40 |
-
# Selenium imports (safe to import without ssl; network happens later)
|
| 41 |
from selenium import webdriver # type: ignore
|
| 42 |
from selenium.webdriver.common.by import By # type: ignore
|
| 43 |
from selenium.webdriver.chrome.service import Service # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import sys
|
|
|
|
| 10 |
from typing import List, Dict, Optional, Tuple
|
| 11 |
from urllib.parse import quote_plus
|
| 12 |
|
|
|
|
| 13 |
from bs4 import BeautifulSoup # type: ignore
|
| 14 |
import pandas as pd # type: ignore
|
| 15 |
|
|
|
|
| 16 |
from selenium import webdriver # type: ignore
|
| 17 |
from selenium.webdriver.common.by import By # type: ignore
|
| 18 |
from selenium.webdriver.chrome.service import Service # type: ignore
|