Spaces:
Paused
Paused
| /** | |
| * Showpad Browser Scraper | |
| * Uses Playwright to access and extract content from Showpad shared spaces | |
| * Since Showpad's API requires OAuth credentials, we use browser automation | |
| * to access publicly shared content. | |
| */ | |
| import { chromium, Browser, Page } from 'playwright'; | |
| interface ShowpadAsset { | |
| name: string; | |
| type: string; | |
| url?: string; | |
| thumbnail?: string; | |
| } | |
| interface ShowpadContent { | |
| title?: string; | |
| assets: ShowpadAsset[]; | |
| sections: string[]; | |
| } | |
| async function extractShowpadContent(page: Page): Promise<ShowpadContent> { | |
| const content: ShowpadContent = { | |
| assets: [], | |
| sections: [] | |
| }; | |
| // Wait for the app to load | |
| await page.waitForLoadState('networkidle'); | |
| // Try to get the title | |
| try { | |
| content.title = await page.title(); | |
| } catch (e) { | |
| // Ignore | |
| } | |
| // Extract any visible text sections | |
| const textElements = await page.$$('h1, h2, h3, p, .title, .description'); | |
| for (const el of textElements) { | |
| const text = await el.textContent(); | |
| if (text && text.trim().length > 0) { | |
| content.sections.push(text.trim()); | |
| } | |
| } | |
| // Look for image assets | |
| const images = await page.$$('img'); | |
| for (const img of images) { | |
| const src = await img.getAttribute('src'); | |
| const alt = await img.getAttribute('alt'); | |
| if (src) { | |
| content.assets.push({ | |
| name: alt || 'Unknown', | |
| type: 'image', | |
| url: src | |
| }); | |
| } | |
| } | |
| // Look for video elements | |
| const videos = await page.$$('video'); | |
| for (const video of videos) { | |
| const src = await video.getAttribute('src'); | |
| const poster = await video.getAttribute('poster'); | |
| content.assets.push({ | |
| name: 'Video', | |
| type: 'video', | |
| url: src || undefined, | |
| thumbnail: poster || undefined | |
| }); | |
| } | |
| // Look for download links | |
| const links = await page.$$('a[href*="download"], a[href*="asset"]'); | |
| for (const link of links) { | |
| const href = await link.getAttribute('href'); | |
| const text = await link.textContent(); | |
| if (href) { | |
| content.assets.push({ | |
| name: text?.trim() || 'Download', | |
| type: 'link', | |
| url: href | |
| }); | |
| } | |
| } | |
| return content; | |
| } | |
| async function main() { | |
| const SHARED_SPACE_URL = 'https://tdcerhverv.showpad.biz/s/01kcmt54k3mczgv67x53hcchpw/auth/register'; | |
| console.log('Starting Showpad Browser Scraper...\n'); | |
| console.log('Target URL:', SHARED_SPACE_URL); | |
| let browser: Browser | null = null; | |
| try { | |
| // Launch browser in headed mode for first run to see what's happening | |
| console.log('\nLaunching browser...'); | |
| browser = await chromium.launch({ | |
| headless: false, // Set to true for production | |
| slowMo: 500 // Slow down for visibility | |
| }); | |
| const context = await browser.newContext({ | |
| viewport: { width: 1920, height: 1080 } | |
| }); | |
| const page = await context.newPage(); | |
| console.log('Navigating to shared space...'); | |
| await page.goto(SHARED_SPACE_URL, { waitUntil: 'networkidle' }); | |
| // Take a screenshot to see what we're dealing with | |
| const screenshotPath = './showpad-screenshot.png'; | |
| await page.screenshot({ path: screenshotPath, fullPage: true }); | |
| console.log(`Screenshot saved to: ${screenshotPath}`); | |
| // Get the page content | |
| console.log('\nPage title:', await page.title()); | |
| console.log('Current URL:', page.url()); | |
| // Wait for any Angular app to load | |
| await page.waitForTimeout(3000); | |
| // Extract visible content | |
| console.log('\nExtracting content...'); | |
| const content = await extractShowpadContent(page); | |
| console.log('\n=== Extracted Content ==='); | |
| console.log('Title:', content.title); | |
| console.log('\nSections found:', content.sections.length); | |
| for (const section of content.sections.slice(0, 10)) { | |
| console.log(' -', section.substring(0, 100)); | |
| } | |
| console.log('\nAssets found:', content.assets.length); | |
| for (const asset of content.assets.slice(0, 10)) { | |
| console.log(` - [${asset.type}] ${asset.name}`); | |
| if (asset.url) console.log(` URL: ${asset.url.substring(0, 80)}...`); | |
| } | |
| // Check if there's a registration form | |
| const registrationForm = await page.$('form, [class*="register"], [class*="signup"]'); | |
| if (registrationForm) { | |
| console.log('\nRegistration form detected! The shared space requires registration.'); | |
| console.log('To access content, you would need to:'); | |
| console.log('1. Fill out the registration form'); | |
| console.log('2. Verify your email'); | |
| console.log('3. Log in to access the content'); | |
| } | |
| // Check current page state | |
| const pageContent = await page.content(); | |
| if (pageContent.includes('register') || pageContent.includes('sign up')) { | |
| console.log('\nNote: Page appears to be showing registration prompt'); | |
| } | |
| // Keep browser open for inspection | |
| console.log('\nBrowser will stay open for 30 seconds for inspection...'); | |
| await page.waitForTimeout(30000); | |
| } catch (error: any) { | |
| console.error('Error:', error.message); | |
| } finally { | |
| if (browser) { | |
| await browser.close(); | |
| console.log('\nBrowser closed.'); | |
| } | |
| } | |
| } | |
| main().catch(console.error); | |