a-k-dey commited on
Commit
4710f1a
·
1 Parent(s): b4adf10

Fix: Correct playwright-stealth v2.0.2 API usage and expand discovery to RSS/GitHub/Search

Browse files
Files changed (1) hide show
  1. app/core/scrapers.py +31 -9
app/core/scrapers.py CHANGED
@@ -136,7 +136,7 @@ class DiscoverySurfer:
136
  logger.info(f"Surfer exploring {start_url} (depth={depth})")
137
 
138
  from playwright.async_api import async_playwright
139
- from playwright_stealth import stealth
140
  import random
141
  import asyncio
142
 
@@ -151,7 +151,8 @@ class DiscoverySurfer:
151
  )
152
 
153
  page = await context.new_page()
154
- await stealth(page)
 
155
  logger.info("Stealth Sub-Agent active. Masking automation signatures.")
156
 
157
  # Human-like navigation: Random delay before goto
@@ -225,22 +226,43 @@ class DiscoverySurfer:
225
  return []
226
 
227
  async def swoop(self) -> List[Dict]:
228
- """The main autonomous 'Swoop' - using the Discovery Surfer."""
229
  all_signals = []
 
230
 
231
- # Start hubs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  hubs = [
233
  "https://news.ycombinator.com",
234
  "https://www.reddit.com/r/MachineLearning/",
235
- "https://www.reddit.com/r/Programming/"
 
 
236
  ]
237
 
238
- surfer = DiscoverySurfer()
239
  for hub in hubs:
240
- results = await surfer.surf(hub)
241
- all_signals.extend(results)
 
 
 
242
 
243
- logger.info(f"Swoop completed. Found {len(all_signals)} potential signals through human-like surfing.")
244
  return all_signals
245
 
246
  scraper = DiscoverySurfer()
 
136
  logger.info(f"Surfer exploring {start_url} (depth={depth})")
137
 
138
  from playwright.async_api import async_playwright
139
+ from playwright_stealth import Stealth
140
  import random
141
  import asyncio
142
 
 
151
  )
152
 
153
  page = await context.new_page()
154
+ stealth = Stealth()
155
+ await stealth.apply_async(page)
156
  logger.info("Stealth Sub-Agent active. Masking automation signatures.")
157
 
158
  # Human-like navigation: Random delay before goto
 
226
  return []
227
 
228
  async def swoop(self) -> List[Dict]:
229
+ """The main autonomous 'Swoop' - combining surfing, RSS, and deep audits."""
230
  all_signals = []
231
+ tech_scraper = TechnicalScraper()
232
 
233
+ # 1. Deep GitHub & Search Audits
234
+ logger.info("Discovery Orchestrator: initiating technical audits...")
235
+ github_signals = await tech_scraper.fetch_github_audit()
236
+ all_signals.extend(github_signals)
237
+
238
+ for query in settings.TARGET_QUERIES:
239
+ search_signals = await tech_scraper.fetch_search(query)
240
+ all_signals.extend(search_signals)
241
+
242
+ # 2. RSS Forensic Feeds
243
+ logger.info("Discovery Orchestrator: sifting through RSS hubs...")
244
+ for feed in settings.RSS_FEEDS:
245
+ rss_signals = await tech_scraper.fetch_rss(feed)
246
+ all_signals.extend(rss_signals)
247
+
248
+ # 3. Human-like Browser Discovery (Surfing)
249
+ logger.info("Discovery Orchestrator: deploying browser-based surfs...")
250
  hubs = [
251
  "https://news.ycombinator.com",
252
  "https://www.reddit.com/r/MachineLearning/",
253
+ "https://www.reddit.com/r/Programming/",
254
+ "https://dev.to/t/architecture",
255
+ "https://www.infoq.com/news/"
256
  ]
257
 
 
258
  for hub in hubs:
259
+ try:
260
+ results = await self.surf(hub)
261
+ all_signals.extend(results)
262
+ except Exception as e:
263
+ logger.error(f"Discovery Orchestrator: Surf failure on {hub}: {e}")
264
 
265
+ logger.info(f"Swoop completed. Found {len(all_signals)} total forensic signals across all vectors.")
266
  return all_signals
267
 
268
  scraper = DiscoverySurfer()