File size: 5,348 Bytes
34367da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/**

 * Showpad Browser Scraper

 * Uses Playwright to access and extract content from Showpad shared spaces

 * Since Showpad's API requires OAuth credentials, we use browser automation

 * to access publicly shared content.

 */

import { chromium, Browser, Page } from 'playwright';

interface ShowpadAsset {
  name: string;
  type: string;
  url?: string;
  thumbnail?: string;
}

interface ShowpadContent {
  title?: string;
  assets: ShowpadAsset[];
  sections: string[];
}

async function extractShowpadContent(page: Page): Promise<ShowpadContent> {
  const content: ShowpadContent = {
    assets: [],
    sections: []
  };

  // Wait for the app to load
  await page.waitForLoadState('networkidle');

  // Try to get the title
  try {
    content.title = await page.title();
  } catch (e) {
    // Ignore
  }

  // Extract any visible text sections
  const textElements = await page.$$('h1, h2, h3, p, .title, .description');
  for (const el of textElements) {
    const text = await el.textContent();
    if (text && text.trim().length > 0) {
      content.sections.push(text.trim());
    }
  }

  // Look for image assets
  const images = await page.$$('img');
  for (const img of images) {
    const src = await img.getAttribute('src');
    const alt = await img.getAttribute('alt');
    if (src) {
      content.assets.push({
        name: alt || 'Unknown',
        type: 'image',
        url: src
      });
    }
  }

  // Look for video elements
  const videos = await page.$$('video');
  for (const video of videos) {
    const src = await video.getAttribute('src');
    const poster = await video.getAttribute('poster');
    content.assets.push({
      name: 'Video',
      type: 'video',
      url: src || undefined,
      thumbnail: poster || undefined
    });
  }

  // Look for download links
  const links = await page.$$('a[href*="download"], a[href*="asset"]');
  for (const link of links) {
    const href = await link.getAttribute('href');
    const text = await link.textContent();
    if (href) {
      content.assets.push({
        name: text?.trim() || 'Download',
        type: 'link',
        url: href
      });
    }
  }

  return content;
}

async function main() {
  const SHARED_SPACE_URL = 'https://tdcerhverv.showpad.biz/s/01kcmt54k3mczgv67x53hcchpw/auth/register';

  console.log('Starting Showpad Browser Scraper...\n');
  console.log('Target URL:', SHARED_SPACE_URL);

  let browser: Browser | null = null;

  try {
    // Launch browser in headed mode for first run to see what's happening
    console.log('\nLaunching browser...');
    browser = await chromium.launch({
      headless: false, // Set to true for production
      slowMo: 500 // Slow down for visibility
    });

    const context = await browser.newContext({
      viewport: { width: 1920, height: 1080 }
    });

    const page = await context.newPage();

    console.log('Navigating to shared space...');
    await page.goto(SHARED_SPACE_URL, { waitUntil: 'networkidle' });

    // Take a screenshot to see what we're dealing with
    const screenshotPath = './showpad-screenshot.png';
    await page.screenshot({ path: screenshotPath, fullPage: true });
    console.log(`Screenshot saved to: ${screenshotPath}`);

    // Get the page content
    console.log('\nPage title:', await page.title());
    console.log('Current URL:', page.url());

    // Wait for any Angular app to load
    await page.waitForTimeout(3000);

    // Extract visible content
    console.log('\nExtracting content...');
    const content = await extractShowpadContent(page);

    console.log('\n=== Extracted Content ===');
    console.log('Title:', content.title);
    console.log('\nSections found:', content.sections.length);
    for (const section of content.sections.slice(0, 10)) {
      console.log('  -', section.substring(0, 100));
    }

    console.log('\nAssets found:', content.assets.length);
    for (const asset of content.assets.slice(0, 10)) {
      console.log(`  - [${asset.type}] ${asset.name}`);
      if (asset.url) console.log(`    URL: ${asset.url.substring(0, 80)}...`);
    }

    // Check if there's a registration form
    const registrationForm = await page.$('form, [class*="register"], [class*="signup"]');
    if (registrationForm) {
      console.log('\nRegistration form detected! The shared space requires registration.');
      console.log('To access content, you would need to:');
      console.log('1. Fill out the registration form');
      console.log('2. Verify your email');
      console.log('3. Log in to access the content');
    }

    // Check current page state
    const pageContent = await page.content();
    if (pageContent.includes('register') || pageContent.includes('sign up')) {
      console.log('\nNote: Page appears to be showing registration prompt');
    }

    // Keep browser open for inspection
    console.log('\nBrowser will stay open for 30 seconds for inspection...');
    await page.waitForTimeout(30000);

  } catch (error: any) {
    console.error('Error:', error.message);
  } finally {
    if (browser) {
      await browser.close();
      console.log('\nBrowser closed.');
    }
  }
}

main().catch(console.error);