/** * Showpad Browser Scraper * Uses Playwright to access and extract content from Showpad shared spaces * Since Showpad's API requires OAuth credentials, we use browser automation * to access publicly shared content. */ import { chromium, Browser, Page } from 'playwright'; interface ShowpadAsset { name: string; type: string; url?: string; thumbnail?: string; } interface ShowpadContent { title?: string; assets: ShowpadAsset[]; sections: string[]; } async function extractShowpadContent(page: Page): Promise { const content: ShowpadContent = { assets: [], sections: [] }; // Wait for the app to load await page.waitForLoadState('networkidle'); // Try to get the title try { content.title = await page.title(); } catch (e) { // Ignore } // Extract any visible text sections const textElements = await page.$$('h1, h2, h3, p, .title, .description'); for (const el of textElements) { const text = await el.textContent(); if (text && text.trim().length > 0) { content.sections.push(text.trim()); } } // Look for image assets const images = await page.$$('img'); for (const img of images) { const src = await img.getAttribute('src'); const alt = await img.getAttribute('alt'); if (src) { content.assets.push({ name: alt || 'Unknown', type: 'image', url: src }); } } // Look for video elements const videos = await page.$$('video'); for (const video of videos) { const src = await video.getAttribute('src'); const poster = await video.getAttribute('poster'); content.assets.push({ name: 'Video', type: 'video', url: src || undefined, thumbnail: poster || undefined }); } // Look for download links const links = await page.$$('a[href*="download"], a[href*="asset"]'); for (const link of links) { const href = await link.getAttribute('href'); const text = await link.textContent(); if (href) { content.assets.push({ name: text?.trim() || 'Download', type: 'link', url: href }); } } return content; } async function main() { const SHARED_SPACE_URL = 'https://tdcerhverv.showpad.biz/s/01kcmt54k3mczgv67x53hcchpw/auth/register'; console.log('Starting Showpad Browser Scraper...\n'); console.log('Target URL:', SHARED_SPACE_URL); let browser: Browser | null = null; try { // Launch browser in headed mode for first run to see what's happening console.log('\nLaunching browser...'); browser = await chromium.launch({ headless: false, // Set to true for production slowMo: 500 // Slow down for visibility }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 } }); const page = await context.newPage(); console.log('Navigating to shared space...'); await page.goto(SHARED_SPACE_URL, { waitUntil: 'networkidle' }); // Take a screenshot to see what we're dealing with const screenshotPath = './showpad-screenshot.png'; await page.screenshot({ path: screenshotPath, fullPage: true }); console.log(`Screenshot saved to: ${screenshotPath}`); // Get the page content console.log('\nPage title:', await page.title()); console.log('Current URL:', page.url()); // Wait for any Angular app to load await page.waitForTimeout(3000); // Extract visible content console.log('\nExtracting content...'); const content = await extractShowpadContent(page); console.log('\n=== Extracted Content ==='); console.log('Title:', content.title); console.log('\nSections found:', content.sections.length); for (const section of content.sections.slice(0, 10)) { console.log(' -', section.substring(0, 100)); } console.log('\nAssets found:', content.assets.length); for (const asset of content.assets.slice(0, 10)) { console.log(` - [${asset.type}] ${asset.name}`); if (asset.url) console.log(` URL: ${asset.url.substring(0, 80)}...`); } // Check if there's a registration form const registrationForm = await page.$('form, [class*="register"], [class*="signup"]'); if (registrationForm) { console.log('\nRegistration form detected! The shared space requires registration.'); console.log('To access content, you would need to:'); console.log('1. Fill out the registration form'); console.log('2. Verify your email'); console.log('3. Log in to access the content'); } // Check current page state const pageContent = await page.content(); if (pageContent.includes('register') || pageContent.includes('sign up')) { console.log('\nNote: Page appears to be showing registration prompt'); } // Keep browser open for inspection console.log('\nBrowser will stay open for 30 seconds for inspection...'); await page.waitForTimeout(30000); } catch (error: any) { console.error('Error:', error.message); } finally { if (browser) { await browser.close(); console.log('\nBrowser closed.'); } } } main().catch(console.error);