Spaces:
Paused
Paused
File size: 5,348 Bytes
34367da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | /**
* Showpad Browser Scraper
* Uses Playwright to access and extract content from Showpad shared spaces
* Since Showpad's API requires OAuth credentials, we use browser automation
* to access publicly shared content.
*/
import { chromium, Browser, Page } from 'playwright';
interface ShowpadAsset {
name: string;
type: string;
url?: string;
thumbnail?: string;
}
interface ShowpadContent {
title?: string;
assets: ShowpadAsset[];
sections: string[];
}
async function extractShowpadContent(page: Page): Promise<ShowpadContent> {
const content: ShowpadContent = {
assets: [],
sections: []
};
// Wait for the app to load
await page.waitForLoadState('networkidle');
// Try to get the title
try {
content.title = await page.title();
} catch (e) {
// Ignore
}
// Extract any visible text sections
const textElements = await page.$$('h1, h2, h3, p, .title, .description');
for (const el of textElements) {
const text = await el.textContent();
if (text && text.trim().length > 0) {
content.sections.push(text.trim());
}
}
// Look for image assets
const images = await page.$$('img');
for (const img of images) {
const src = await img.getAttribute('src');
const alt = await img.getAttribute('alt');
if (src) {
content.assets.push({
name: alt || 'Unknown',
type: 'image',
url: src
});
}
}
// Look for video elements
const videos = await page.$$('video');
for (const video of videos) {
const src = await video.getAttribute('src');
const poster = await video.getAttribute('poster');
content.assets.push({
name: 'Video',
type: 'video',
url: src || undefined,
thumbnail: poster || undefined
});
}
// Look for download links
const links = await page.$$('a[href*="download"], a[href*="asset"]');
for (const link of links) {
const href = await link.getAttribute('href');
const text = await link.textContent();
if (href) {
content.assets.push({
name: text?.trim() || 'Download',
type: 'link',
url: href
});
}
}
return content;
}
async function main() {
const SHARED_SPACE_URL = 'https://tdcerhverv.showpad.biz/s/01kcmt54k3mczgv67x53hcchpw/auth/register';
console.log('Starting Showpad Browser Scraper...\n');
console.log('Target URL:', SHARED_SPACE_URL);
let browser: Browser | null = null;
try {
// Launch browser in headed mode for first run to see what's happening
console.log('\nLaunching browser...');
browser = await chromium.launch({
headless: false, // Set to true for production
slowMo: 500 // Slow down for visibility
});
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 }
});
const page = await context.newPage();
console.log('Navigating to shared space...');
await page.goto(SHARED_SPACE_URL, { waitUntil: 'networkidle' });
// Take a screenshot to see what we're dealing with
const screenshotPath = './showpad-screenshot.png';
await page.screenshot({ path: screenshotPath, fullPage: true });
console.log(`Screenshot saved to: ${screenshotPath}`);
// Get the page content
console.log('\nPage title:', await page.title());
console.log('Current URL:', page.url());
// Wait for any Angular app to load
await page.waitForTimeout(3000);
// Extract visible content
console.log('\nExtracting content...');
const content = await extractShowpadContent(page);
console.log('\n=== Extracted Content ===');
console.log('Title:', content.title);
console.log('\nSections found:', content.sections.length);
for (const section of content.sections.slice(0, 10)) {
console.log(' -', section.substring(0, 100));
}
console.log('\nAssets found:', content.assets.length);
for (const asset of content.assets.slice(0, 10)) {
console.log(` - [${asset.type}] ${asset.name}`);
if (asset.url) console.log(` URL: ${asset.url.substring(0, 80)}...`);
}
// Check if there's a registration form
const registrationForm = await page.$('form, [class*="register"], [class*="signup"]');
if (registrationForm) {
console.log('\nRegistration form detected! The shared space requires registration.');
console.log('To access content, you would need to:');
console.log('1. Fill out the registration form');
console.log('2. Verify your email');
console.log('3. Log in to access the content');
}
// Check current page state
const pageContent = await page.content();
if (pageContent.includes('register') || pageContent.includes('sign up')) {
console.log('\nNote: Page appears to be showing registration prompt');
}
// Keep browser open for inspection
console.log('\nBrowser will stay open for 30 seconds for inspection...');
await page.waitForTimeout(30000);
} catch (error: any) {
console.error('Error:', error.message);
} finally {
if (browser) {
await browser.close();
console.log('\nBrowser closed.');
}
}
}
main().catch(console.error);
|