widgettdc-api / apps /backend /scripts /showpadBrowserScraper.ts
Kraft102's picture
Update backend source
34367da verified
/**
* Showpad Browser Scraper
* Uses Playwright to access and extract content from Showpad shared spaces
* Since Showpad's API requires OAuth credentials, we use browser automation
* to access publicly shared content.
*/
import { chromium, Browser, Page } from 'playwright';
interface ShowpadAsset {
name: string;
type: string;
url?: string;
thumbnail?: string;
}
interface ShowpadContent {
title?: string;
assets: ShowpadAsset[];
sections: string[];
}
async function extractShowpadContent(page: Page): Promise<ShowpadContent> {
const content: ShowpadContent = {
assets: [],
sections: []
};
// Wait for the app to load
await page.waitForLoadState('networkidle');
// Try to get the title
try {
content.title = await page.title();
} catch (e) {
// Ignore
}
// Extract any visible text sections
const textElements = await page.$$('h1, h2, h3, p, .title, .description');
for (const el of textElements) {
const text = await el.textContent();
if (text && text.trim().length > 0) {
content.sections.push(text.trim());
}
}
// Look for image assets
const images = await page.$$('img');
for (const img of images) {
const src = await img.getAttribute('src');
const alt = await img.getAttribute('alt');
if (src) {
content.assets.push({
name: alt || 'Unknown',
type: 'image',
url: src
});
}
}
// Look for video elements
const videos = await page.$$('video');
for (const video of videos) {
const src = await video.getAttribute('src');
const poster = await video.getAttribute('poster');
content.assets.push({
name: 'Video',
type: 'video',
url: src || undefined,
thumbnail: poster || undefined
});
}
// Look for download links
const links = await page.$$('a[href*="download"], a[href*="asset"]');
for (const link of links) {
const href = await link.getAttribute('href');
const text = await link.textContent();
if (href) {
content.assets.push({
name: text?.trim() || 'Download',
type: 'link',
url: href
});
}
}
return content;
}
async function main() {
const SHARED_SPACE_URL = 'https://tdcerhverv.showpad.biz/s/01kcmt54k3mczgv67x53hcchpw/auth/register';
console.log('Starting Showpad Browser Scraper...\n');
console.log('Target URL:', SHARED_SPACE_URL);
let browser: Browser | null = null;
try {
// Launch browser in headed mode for first run to see what's happening
console.log('\nLaunching browser...');
browser = await chromium.launch({
headless: false, // Set to true for production
slowMo: 500 // Slow down for visibility
});
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 }
});
const page = await context.newPage();
console.log('Navigating to shared space...');
await page.goto(SHARED_SPACE_URL, { waitUntil: 'networkidle' });
// Take a screenshot to see what we're dealing with
const screenshotPath = './showpad-screenshot.png';
await page.screenshot({ path: screenshotPath, fullPage: true });
console.log(`Screenshot saved to: ${screenshotPath}`);
// Get the page content
console.log('\nPage title:', await page.title());
console.log('Current URL:', page.url());
// Wait for any Angular app to load
await page.waitForTimeout(3000);
// Extract visible content
console.log('\nExtracting content...');
const content = await extractShowpadContent(page);
console.log('\n=== Extracted Content ===');
console.log('Title:', content.title);
console.log('\nSections found:', content.sections.length);
for (const section of content.sections.slice(0, 10)) {
console.log(' -', section.substring(0, 100));
}
console.log('\nAssets found:', content.assets.length);
for (const asset of content.assets.slice(0, 10)) {
console.log(` - [${asset.type}] ${asset.name}`);
if (asset.url) console.log(` URL: ${asset.url.substring(0, 80)}...`);
}
// Check if there's a registration form
const registrationForm = await page.$('form, [class*="register"], [class*="signup"]');
if (registrationForm) {
console.log('\nRegistration form detected! The shared space requires registration.');
console.log('To access content, you would need to:');
console.log('1. Fill out the registration form');
console.log('2. Verify your email');
console.log('3. Log in to access the content');
}
// Check current page state
const pageContent = await page.content();
if (pageContent.includes('register') || pageContent.includes('sign up')) {
console.log('\nNote: Page appears to be showing registration prompt');
}
// Keep browser open for inspection
console.log('\nBrowser will stay open for 30 seconds for inspection...');
await page.waitForTimeout(30000);
} catch (error: any) {
console.error('Error:', error.message);
} finally {
if (browser) {
await browser.close();
console.log('\nBrowser closed.');
}
}
}
main().catch(console.error);