hfariborzi commited on
Commit
d0bbc02
·
verified ·
1 Parent(s): e2ae05f

Update Facebook_Marketplace_Scraper.py

Browse files
Files changed (1) hide show
  1. Facebook_Marketplace_Scraper.py +54 -4
Facebook_Marketplace_Scraper.py CHANGED
@@ -25,10 +25,18 @@ from datetime import datetime
25
  from bs4 import BeautifulSoup as soup
26
  from splinter import Browser
27
  from webdriver_manager.chrome import ChromeDriverManager
28
- from webdriver_manager.firefox import GeckoDriverManager
29
  from selenium.webdriver.chrome.service import Service as ChromeService
30
  from selenium.webdriver.firefox.service import Service as FirefoxService
31
  import json
 
 
 
 
 
 
 
 
 
32
 
33
  class FacebookMarketplaceScraper:
34
  def __init__(self, headless=True, debug=True):
@@ -43,11 +51,18 @@ class FacebookMarketplaceScraper:
43
  self.debug = debug
44
  self.browser = None
45
  self.listings = []
 
46
 
47
  # Create output directory if it doesn't exist
48
  self.output_dir = "marketplace_output"
49
  if not os.path.exists(self.output_dir):
50
  os.makedirs(self.output_dir)
 
 
 
 
 
 
51
 
52
  def initialize_browser(self):
53
  """Initialize browser with Splinter"""
@@ -55,22 +70,50 @@ class FacebookMarketplaceScraper:
55
  # First try Firefox (better for cloud environments like Hugging Face)
56
  try:
57
  print("Attempting to initialize Firefox browser...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  browser_options = {
59
  'headless': self.headless,
60
- 'service': FirefoxService(GeckoDriverManager().install())
61
  }
 
62
  self.browser = Browser('firefox', **browser_options)
63
  print("Firefox browser initialized successfully")
64
  return True
65
  except Exception as firefox_error:
66
  print(f"Firefox initialization failed: {firefox_error}")
 
 
67
 
68
  # Fallback to Chrome if Firefox fails
69
  print("Falling back to Chrome browser...")
70
  service = ChromeService(ChromeDriverManager().install())
 
 
 
 
 
 
 
 
71
  browser_options = {
72
  'headless': self.headless,
73
- 'service': service
 
74
  }
75
  self.browser = Browser('chrome', **browser_options)
76
  print("Chrome browser initialized successfully")
@@ -87,9 +130,16 @@ class FacebookMarketplaceScraper:
87
  try:
88
  self.browser.quit()
89
  print("Browser closed successfully")
90
- self.browser = None
91
  except Exception as e:
92
  print(f"Error closing browser: {e}")
 
 
 
 
 
 
 
 
93
 
94
  def build_search_url(self, location, min_price=None, max_price=None, days_listed=None,
95
  min_mileage=None, max_mileage=None, min_year=None, max_year=None,
 
25
  from bs4 import BeautifulSoup as soup
26
  from splinter import Browser
27
  from webdriver_manager.chrome import ChromeDriverManager
 
28
  from selenium.webdriver.chrome.service import Service as ChromeService
29
  from selenium.webdriver.firefox.service import Service as FirefoxService
30
  import json
31
+ import geckodriver_autoinstaller
32
+ import platform
33
+
34
+ # For headless browser in cloud environments
35
+ try:
36
+ from pyvirtualdisplay import Display
37
+ VIRTUAL_DISPLAY_AVAILABLE = True
38
+ except ImportError:
39
+ VIRTUAL_DISPLAY_AVAILABLE = False
40
 
41
  class FacebookMarketplaceScraper:
42
  def __init__(self, headless=True, debug=True):
 
51
  self.debug = debug
52
  self.browser = None
53
  self.listings = []
54
+ self.display = None
55
 
56
  # Create output directory if it doesn't exist
57
  self.output_dir = "marketplace_output"
58
  if not os.path.exists(self.output_dir):
59
  os.makedirs(self.output_dir)
60
+
61
+ # Initialize virtual display for cloud environments if available
62
+ if VIRTUAL_DISPLAY_AVAILABLE and platform.system() == 'Linux':
63
+ self.display = Display(visible=0, size=(1920, 1080))
64
+ self.display.start()
65
+ print("Virtual display started")
66
 
67
  def initialize_browser(self):
68
  """Initialize browser with Splinter"""
 
70
  # First try Firefox (better for cloud environments like Hugging Face)
71
  try:
72
  print("Attempting to initialize Firefox browser...")
73
+
74
+ # Auto-install geckodriver
75
+ geckodriver_path = geckodriver_autoinstaller.install()
76
+ print(f"Geckodriver installed at: {geckodriver_path}")
77
+
78
+ # Configure Firefox options
79
+ from selenium import webdriver
80
+ from selenium.webdriver.firefox.options import Options
81
+
82
+ firefox_options = Options()
83
+ if self.headless:
84
+ firefox_options.add_argument("--headless")
85
+
86
+ firefox_options.add_argument("--no-sandbox")
87
+ firefox_options.add_argument("--disable-dev-shm-usage")
88
+
89
  browser_options = {
90
  'headless': self.headless,
91
+ 'options': firefox_options
92
  }
93
+
94
  self.browser = Browser('firefox', **browser_options)
95
  print("Firefox browser initialized successfully")
96
  return True
97
  except Exception as firefox_error:
98
  print(f"Firefox initialization failed: {firefox_error}")
99
+ import traceback
100
+ traceback.print_exc()
101
 
102
  # Fallback to Chrome if Firefox fails
103
  print("Falling back to Chrome browser...")
104
  service = ChromeService(ChromeDriverManager().install())
105
+
106
+ from selenium.webdriver.chrome.options import Options
107
+ chrome_options = Options()
108
+ if self.headless:
109
+ chrome_options.add_argument("--headless")
110
+ chrome_options.add_argument("--no-sandbox")
111
+ chrome_options.add_argument("--disable-dev-shm-usage")
112
+
113
  browser_options = {
114
  'headless': self.headless,
115
+ 'service': service,
116
+ 'options': chrome_options
117
  }
118
  self.browser = Browser('chrome', **browser_options)
119
  print("Chrome browser initialized successfully")
 
130
  try:
131
  self.browser.quit()
132
  print("Browser closed successfully")
 
133
  except Exception as e:
134
  print(f"Error closing browser: {e}")
135
+
136
+ # Stop virtual display if it was started
137
+ if self.display:
138
+ try:
139
+ self.display.stop()
140
+ print("Virtual display stopped")
141
+ except:
142
+ pass
143
 
144
  def build_search_url(self, location, min_price=None, max_price=None, days_listed=None,
145
  min_mileage=None, max_mileage=None, min_year=None, max_year=None,