gaur3009 commited on
Commit
7cf08eb
·
verified ·
1 Parent(s): 54602dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -25
app.py CHANGED
@@ -1,26 +1,3 @@
1
- """
2
- Ad Scraper (Meta/Instagram Ad Library) — Selenium core with optional Gradio UI
3
- -----------------------------------------------------------------------------
4
- Why this rewrite?
5
- - The previous version imported `gradio` at module import time. In some sandboxes, the
6
- Python `ssl` module is unavailable, which breaks `gradio` (and anything that imports
7
- `websockets`). That caused: `ModuleNotFoundError: No module named 'ssl'`.
8
-
9
- Fixes in this version:
10
- - **Lazy import Gradio** only when launching the UI, so importing this file never
11
- requires `ssl`.
12
- - **Selenium-only core** scraper lives in pure functions.
13
- - **No webdriver downloads at import** — we try a system Chrome/Chromium first;
14
- only then (optionally) use `webdriver_manager` inside a `try` block.
15
- - **CLI fallback** so you can run without Gradio.
16
- - **Unit tests** for HTML parsing (no network/SSL needed).
17
-
18
- Run modes:
19
- - CLI (default): `python ad_scraper.py --keyword "GenZ skin care brand"`
20
- - Gradio UI (if your env supports ssl): `python ad_scraper.py --gradio`
21
-
22
- Tip for local dev: ensure Chrome/Chromium and matching ChromeDriver are on PATH.
23
- """
24
  from __future__ import annotations
25
 
26
  import sys
@@ -33,11 +10,9 @@ from dataclasses import dataclass, asdict
33
  from typing import List, Dict, Optional, Tuple
34
  from urllib.parse import quote_plus
35
 
36
- # Third-party imports that DO NOT pull in ssl at import time
37
  from bs4 import BeautifulSoup # type: ignore
38
  import pandas as pd # type: ignore
39
 
40
- # Selenium imports (safe to import without ssl; network happens later)
41
  from selenium import webdriver # type: ignore
42
  from selenium.webdriver.common.by import By # type: ignore
43
  from selenium.webdriver.chrome.service import Service # type: ignore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import sys
 
10
  from typing import List, Dict, Optional, Tuple
11
  from urllib.parse import quote_plus
12
 
 
13
  from bs4 import BeautifulSoup # type: ignore
14
  import pandas as pd # type: ignore
15
 
 
16
  from selenium import webdriver # type: ignore
17
  from selenium.webdriver.common.by import By # type: ignore
18
  from selenium.webdriver.chrome.service import Service # type: ignore