devusman commited on
Commit
cfc5143
Β·
verified Β·
1 Parent(s): 7a6e7ae

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +56 -67
server.js CHANGED
@@ -1,10 +1,13 @@
1
  const express = require('express');
2
- const puppeteerExtra = require('puppeteer-extra'); // NEW: For stealth
3
- const StealthPlugin = require('puppeteer-extra-plugin-stealth'); // NEW: Stealth plugin
4
  const cors = require('cors');
5
  const { EventEmitter } = require('events');
 
 
 
6
 
7
- puppeteerExtra.use(StealthPlugin()); // NEW: Enable stealth plugin
8
 
9
  const app = express();
10
  const port = 7860;
@@ -12,7 +15,7 @@ const port = 7860;
12
  app.use(cors());
13
  app.use(express.json());
14
 
15
- // --- Progress Tracking and Job Storage --- (Unchanged)
16
  const progressTrackers = new Map();
17
  const downloadJobs = new Map();
18
 
@@ -41,7 +44,7 @@ class ProgressTracker extends EventEmitter {
41
  }
42
  }
43
 
44
- // --- Puppeteer Logic (Updated for Stealth and Reliability) ---
45
  const bypassCookiesAndRestrictions = async (page, progressTracker) => {
46
  progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
47
 
@@ -65,7 +68,7 @@ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
65
  }
66
  }
67
 
68
- // Step 2: Inject CSS to hide cookie banners immediately (Unchanged)
69
  await page.addStyleTag({
70
  content: `
71
  /* Hide all possible cookie banners */
@@ -106,7 +109,7 @@ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
106
  `
107
  });
108
 
109
- // Step 3: Inject JavaScript to handle dynamic cookie banners (Unchanged)
110
  await page.evaluateOnNewDocument(() => {
111
  // Override common cookie consent functions
112
  window.cookieConsent = { accepted: true };
@@ -213,8 +216,8 @@ const unblurContent = async (page, progressTracker) => {
213
  };
214
 
215
  removeRestrictions();
216
- const intervalId = setInterval(removeRestrictions, 1000); // Reduced from 2000ms to 1000ms
217
- setTimeout(() => clearInterval(intervalId), 30000); // Reduced from 60000ms to 30000ms
218
  });
219
 
220
  progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
@@ -229,13 +232,11 @@ const applyPrintStyles = async (page, progressTracker) => {
229
  style.id = "print-style-extension";
230
  style.innerHTML = `
231
  @page {
232
- /* Set page size to A4 and remove default margins */
233
  size: A4 portrait;
234
  margin: 0mm;
235
  }
236
  @media print {
237
  html, body {
238
- /* Ensure the body takes the full width and has no extra padding/margin */
239
  width: 210mm !important;
240
  height: auto !important;
241
  margin: 0 !important;
@@ -244,7 +245,6 @@ const applyPrintStyles = async (page, progressTracker) => {
244
  background: white !important;
245
  color: black !important;
246
  }
247
- /* Remove all unwanted elements like headers, footers, sidebars, etc. */
248
  header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
249
  [class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"],
250
  .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
@@ -254,17 +254,11 @@ const applyPrintStyles = async (page, progressTracker) => {
254
  .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
255
  display: none !important;
256
  }
257
- /* Force all elements to have a transparent background and no shadow */
258
  * {
259
  box-shadow: none !important;
260
  background: transparent !important;
261
  color: inherit !important;
262
  }
263
- /*
264
- * KEY FIX: Target the main document container.
265
- * Force it to be a block element, remove any transforms or max-widths,
266
- * and center it perfectly within the page.
267
- */
268
  .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
269
  .Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container {
270
  position: static !important;
@@ -273,10 +267,9 @@ const applyPrintStyles = async (page, progressTracker) => {
273
  max-width: none !important;
274
  margin: 0 !important;
275
  padding: 0 !important;
276
- box-sizing: border-box; /* Include padding in width calculation */
277
  transform: none !important;
278
  }
279
- /* Ensure individual pages and images within the document use the full width */
280
  [data-page], .page, .document-page, img {
281
  page-break-after: always !important;
282
  page-break-inside: avoid !important;
@@ -298,12 +291,19 @@ const applyPrintStyles = async (page, progressTracker) => {
298
 
299
  const studocuDownloader = async (url, options = {}, progressTracker = null) => {
300
  let browser;
 
301
  try {
302
  progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
303
 
 
 
 
 
 
304
  console.log("πŸš€ Launching browser with enhanced stealth configuration...");
305
- browser = await puppeteerExtra.launch({ // UPDATED: Use puppeteerExtra
306
  headless: true,
 
307
  args: [
308
  '--no-sandbox',
309
  '--disable-setuid-sandbox',
@@ -333,19 +333,16 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
333
  progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
334
 
335
  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36');
336
- await page.setViewport({ width: 794, height: 1122 }); // A4 size in pixels at 96 DPI
337
 
338
- // NOTE: Stealth plugin handles most of this, but keeping for extra safety
339
  await page.evaluateOnNewDocument(() => {
340
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
341
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
342
  Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
343
  });
344
 
345
- // Set up cookie and content bypass
346
  await bypassCookiesAndRestrictions(page, progressTracker);
347
 
348
- // Block unnecessary resources (UPDATED: Block more aggressively, including scripts, fonts, and stylesheets if not critical)
349
  await page.setRequestInterception(true);
350
  page.on('request', (req) => {
351
  const resourceType = req.resourceType();
@@ -357,9 +354,9 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
357
  }
358
 
359
  if (
360
- ['image', 'media', 'font', 'stylesheet'].includes(resourceType) && // Block non-essential images/media/fonts/styles early if not core
361
- !reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || // Allow core document images
362
- resourceType === 'script' && !reqUrl.includes('studocu') || // Block third-party scripts
363
  reqUrl.includes('doubleclick') ||
364
  reqUrl.includes('googletagmanager') ||
365
  reqUrl.includes('facebook.com') ||
@@ -378,19 +375,18 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
378
  }
379
  });
380
 
381
- // Login if credentials provided
382
  if (options.email && options.password) {
383
  progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
384
 
385
  console.log("πŸ”‘ Logging in to StuDocu...");
386
- await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 }); // Reduced timeout from 120000
387
- await page.waitForSelector('#email', { timeout: 10000 }); // Reduced from 15000
388
  await page.type('#email', options.email);
389
  await page.type('#password', options.password);
390
  await page.click('button[type="submit"]');
391
  try {
392
- await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 15000 }); // Reduced from 30000
393
- await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 5000 }); // Reduced from 10000
394
  console.log("βœ… Login successful.");
395
  progressTracker?.updateProgress(18, 'authenticated', 'Login successful');
396
  } catch (e) {
@@ -399,34 +395,31 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
399
  }
400
  }
401
 
402
- // Removed homepage visit as it's not strictly necessary for session setup; directly navigate to URL
403
  progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...');
404
  console.log(`πŸ“„ Navigating to ${url}...`);
405
 
406
  let navigationSuccess = false;
407
  let attempts = 0;
408
- const maxAttempts = 3; // Reduced from 5 to minimize retries
409
  while (!navigationSuccess && attempts < maxAttempts) {
410
  try {
411
  attempts++;
412
  progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
413
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
414
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }); // Reduced timeout from 150000
415
  navigationSuccess = true;
416
  } catch (e) {
417
  console.log(`Navigation attempt ${attempts} failed:`, e.message);
418
  if (attempts >= maxAttempts) throw e;
419
- await new Promise(resolve => setTimeout(resolve, 5000)); // Reduced retry delay from 15000 to 5000ms
420
  }
421
  }
422
 
423
  progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...');
424
- await new Promise(resolve => setTimeout(resolve, 2000)); // Reduced from 5000ms
425
 
426
- // Apply content unblurring
427
  await unblurContent(page, progressTracker);
428
 
429
- // Wait for document content
430
  progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...');
431
  console.log("⏳ Waiting for document content to load...");
432
 
@@ -437,7 +430,7 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
437
  let contentFound = false;
438
  for (const selector of contentSelectors) {
439
  try {
440
- await page.waitForSelector(selector, { timeout: 10000 }); // Reduced from 20000
441
  console.log(`βœ… Found content with selector: ${selector}`);
442
  contentFound = true;
443
  break;
@@ -450,7 +443,6 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
450
  console.log("⚠️ No specific content selector found, proceeding with page content...");
451
  }
452
 
453
- // Enhanced scrolling to load all content (Optimized: Increased scroll distance, reduced delays)
454
  progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...');
455
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
456
 
@@ -459,27 +451,25 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
459
  let scrollHeight = document.body.scrollHeight;
460
  while (true) {
461
  let totalHeight = 0;
462
- const distance = 600; // Increased from 300 for faster coverage
463
  while (totalHeight < scrollHeight) {
464
  window.scrollBy(0, distance);
465
  totalHeight += distance;
466
- await delay(200); // Reduced from 500ms
467
  }
468
- await delay(1000); // Reduced from 2000ms
469
  const newHeight = document.body.scrollHeight;
470
  if (newHeight === scrollHeight) break;
471
  scrollHeight = newHeight;
472
  }
473
  window.scrollTo({ top: 0, behavior: "smooth" });
474
- await delay(500); // Reduced from 1000ms
475
  });
476
 
477
  progressTracker?.updateProgress(70, 'processing', 'Processing loaded content...');
478
 
479
- // Re-apply unblur after loading new content
480
  await unblurContent(page, progressTracker);
481
 
482
- // Wait for all images to load (Optimized: Reduced per-image timeout, parallel wait)
483
  progressTracker?.updateProgress(75, 'loading_images', 'Loading images...');
484
  console.log("πŸ–ΌοΈ Waiting for all images to load...");
485
 
@@ -490,15 +480,14 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
490
  return new Promise((resolve) => {
491
  img.addEventListener('load', resolve);
492
  img.addEventListener('error', resolve);
493
- setTimeout(resolve, 5000); // Reduced from 15000ms
494
  });
495
  }));
496
  });
497
 
498
- await new Promise(resolve => setTimeout(resolve, 2000)); // Reduced from 5000ms
499
  progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...');
500
 
501
- // Set exact height
502
  await page.evaluate(() => {
503
  const getDocumentHeight = () => Math.max(
504
  document.body.scrollHeight, document.body.offsetHeight,
@@ -510,7 +499,6 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
510
  document.body.style.overflow = 'hidden !important';
511
  });
512
 
513
- // Content verification (Unchanged, as it's quick)
514
  const contentCheck = await page.evaluate(() => {
515
  const textContent = document.body.textContent || '';
516
  const images = document.querySelectorAll('img');
@@ -537,7 +525,6 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
537
  console.warn("⚠️ Warning: Limited document content detected.");
538
  }
539
 
540
- // Apply print styles and generate PDF
541
  await applyPrintStyles(page, progressTracker);
542
  await page.emulateMediaType('print');
543
 
@@ -546,9 +533,9 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
546
 
547
  const pdfBuffer = await page.pdf({
548
  printBackground: true,
549
- preferCSSPageSize: true, // Use the @page size
550
  displayHeaderFooter: false,
551
- timeout: 60000, // Reduced from 180000
552
  scale: 1,
553
  omitBackground: false
554
  });
@@ -570,10 +557,20 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
570
  console.log("Error closing browser:", e.message);
571
  }
572
  }
 
 
 
 
 
 
 
 
 
 
573
  }
574
  };
575
 
576
- // --- API Routes --- (Unchanged)
577
  app.post('/api/request-download', (req, res) => {
578
  const { url, email, password } = req.body;
579
  if (!url || !url.includes('studocu.com')) {
@@ -588,20 +585,16 @@ app.post('/api/request-download', (req, res) => {
588
 
589
  console.log(`🎯 Processing request for: ${url} [Session: ${sessionId}]`);
590
 
591
- // Respond to the client immediately with the session ID
592
  res.json({ sessionId });
593
 
594
- // --- Start the PDF generation in the background ---
595
  studocuDownloader(url, { email, password }, progressTracker)
596
  .then(pdfBuffer => {
597
- // Store the successful result
598
  downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer });
599
- progressTrackers.delete(sessionId); // Clean up live tracker
600
  })
601
  .catch(error => {
602
- // Store the error
603
  downloadJobs.set(sessionId, { status: 'error', message: error.message });
604
- progressTrackers.delete(sessionId); // Clean up live tracker
605
  });
606
  });
607
 
@@ -610,7 +603,6 @@ app.get('/api/progress/:sessionId', (req, res) => {
610
  const tracker = progressTrackers.get(sessionId);
611
 
612
  if (tracker) {
613
- // Job is in progress, return live data
614
  return res.json({
615
  sessionId,
616
  progress: tracker.progress,
@@ -622,7 +614,6 @@ app.get('/api/progress/:sessionId', (req, res) => {
622
 
623
  const job = downloadJobs.get(sessionId);
624
  if (job) {
625
- // Job is finished, return final state
626
  if (job.status === 'completed') {
627
  return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' });
628
  }
@@ -654,14 +645,12 @@ app.get('/api/download/:sessionId', (req, res) => {
654
  res.setHeader('Content-Type', 'application/pdf');
655
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
656
  res.send(job.buffer);
657
- // Optional: Clean up the job after download to save memory
658
- // downloadJobs.delete(sessionId);
659
  } else {
660
  res.status(500).json({ error: 'An unknown error occurred.' });
661
  }
662
  });
663
 
664
- // --- Health and Info Endpoints (Unchanged) ---
665
  app.get('/health', (req, res) => {
666
  res.json({
667
  status: 'healthy',
 
1
  const express = require('express');
2
+ const puppeteerExtra = require('puppeteer-extra');
3
+ const StealthPlugin = require('puppeteer-extra-plugin-stealth');
4
  const cors = require('cors');
5
  const { EventEmitter } = require('events');
6
+ const os = require('os'); // NEW: For accessing the operating system's temporary directory
7
+ const fs = require('fs').promises; // NEW: For file system operations
8
+ const path = require('path'); // NEW: For handling file paths
9
 
10
+ puppeteerExtra.use(StealthPlugin());
11
 
12
  const app = express();
13
  const port = 7860;
 
15
  app.use(cors());
16
  app.use(express.json());
17
 
18
+ // --- Progress Tracking and Job Storage ---
19
  const progressTrackers = new Map();
20
  const downloadJobs = new Map();
21
 
 
44
  }
45
  }
46
 
47
+ // --- Puppeteer Logic (Updated for Stealth, Reliability, and Cleanup) ---
48
  const bypassCookiesAndRestrictions = async (page, progressTracker) => {
49
  progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
50
 
 
68
  }
69
  }
70
 
71
+ // Step 2: Inject CSS to hide cookie banners immediately
72
  await page.addStyleTag({
73
  content: `
74
  /* Hide all possible cookie banners */
 
109
  `
110
  });
111
 
112
+ // Step 3: Inject JavaScript to handle dynamic cookie banners
113
  await page.evaluateOnNewDocument(() => {
114
  // Override common cookie consent functions
115
  window.cookieConsent = { accepted: true };
 
216
  };
217
 
218
  removeRestrictions();
219
+ const intervalId = setInterval(removeRestrictions, 1000);
220
+ setTimeout(() => clearInterval(intervalId), 30000);
221
  });
222
 
223
  progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
 
232
  style.id = "print-style-extension";
233
  style.innerHTML = `
234
  @page {
 
235
  size: A4 portrait;
236
  margin: 0mm;
237
  }
238
  @media print {
239
  html, body {
 
240
  width: 210mm !important;
241
  height: auto !important;
242
  margin: 0 !important;
 
245
  background: white !important;
246
  color: black !important;
247
  }
 
248
  header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
249
  [class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"],
250
  .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
 
254
  .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
255
  display: none !important;
256
  }
 
257
  * {
258
  box-shadow: none !important;
259
  background: transparent !important;
260
  color: inherit !important;
261
  }
 
 
 
 
 
262
  .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
263
  .Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container {
264
  position: static !important;
 
267
  max-width: none !important;
268
  margin: 0 !important;
269
  padding: 0 !important;
270
+ box-sizing: border-box;
271
  transform: none !important;
272
  }
 
273
  [data-page], .page, .document-page, img {
274
  page-break-after: always !important;
275
  page-break-inside: avoid !important;
 
291
 
292
  const studocuDownloader = async (url, options = {}, progressTracker = null) => {
293
  let browser;
294
+ let userDataDir = null; // NEW: Initialize userDataDir to null
295
  try {
296
  progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
297
 
298
+ // NEW: Create a temporary directory for the browser session
299
+ const tempDir = os.tmpdir();
300
+ userDataDir = await fs.mkdtemp(path.join(tempDir, 'puppeteer-'));
301
+ console.log(`πŸ“‚ Created temporary user data directory: ${userDataDir}`);
302
+
303
  console.log("πŸš€ Launching browser with enhanced stealth configuration...");
304
+ browser = await puppeteerExtra.launch({
305
  headless: true,
306
+ userDataDir: userDataDir, // NEW: Use the temporary directory
307
  args: [
308
  '--no-sandbox',
309
  '--disable-setuid-sandbox',
 
333
  progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
334
 
335
  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36');
336
+ await page.setViewport({ width: 794, height: 1122 });
337
 
 
338
  await page.evaluateOnNewDocument(() => {
339
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
340
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
341
  Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
342
  });
343
 
 
344
  await bypassCookiesAndRestrictions(page, progressTracker);
345
 
 
346
  await page.setRequestInterception(true);
347
  page.on('request', (req) => {
348
  const resourceType = req.resourceType();
 
354
  }
355
 
356
  if (
357
+ ['image', 'media', 'font', 'stylesheet'].includes(resourceType) &&
358
+ !reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') ||
359
+ resourceType === 'script' && !reqUrl.includes('studocu') ||
360
  reqUrl.includes('doubleclick') ||
361
  reqUrl.includes('googletagmanager') ||
362
  reqUrl.includes('facebook.com') ||
 
375
  }
376
  });
377
 
 
378
  if (options.email && options.password) {
379
  progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
380
 
381
  console.log("πŸ”‘ Logging in to StuDocu...");
382
+ await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
383
+ await page.waitForSelector('#email', { timeout: 10000 });
384
  await page.type('#email', options.email);
385
  await page.type('#password', options.password);
386
  await page.click('button[type="submit"]');
387
  try {
388
+ await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 15000 });
389
+ await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 5000 });
390
  console.log("βœ… Login successful.");
391
  progressTracker?.updateProgress(18, 'authenticated', 'Login successful');
392
  } catch (e) {
 
395
  }
396
  }
397
 
 
398
  progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...');
399
  console.log(`πŸ“„ Navigating to ${url}...`);
400
 
401
  let navigationSuccess = false;
402
  let attempts = 0;
403
+ const maxAttempts = 3;
404
  while (!navigationSuccess && attempts < maxAttempts) {
405
  try {
406
  attempts++;
407
  progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
408
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
409
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
410
  navigationSuccess = true;
411
  } catch (e) {
412
  console.log(`Navigation attempt ${attempts} failed:`, e.message);
413
  if (attempts >= maxAttempts) throw e;
414
+ await new Promise(resolve => setTimeout(resolve, 5000));
415
  }
416
  }
417
 
418
  progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...');
419
+ await new Promise(resolve => setTimeout(resolve, 2000));
420
 
 
421
  await unblurContent(page, progressTracker);
422
 
 
423
  progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...');
424
  console.log("⏳ Waiting for document content to load...");
425
 
 
430
  let contentFound = false;
431
  for (const selector of contentSelectors) {
432
  try {
433
+ await page.waitForSelector(selector, { timeout: 10000 });
434
  console.log(`βœ… Found content with selector: ${selector}`);
435
  contentFound = true;
436
  break;
 
443
  console.log("⚠️ No specific content selector found, proceeding with page content...");
444
  }
445
 
 
446
  progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...');
447
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
448
 
 
451
  let scrollHeight = document.body.scrollHeight;
452
  while (true) {
453
  let totalHeight = 0;
454
+ const distance = 600;
455
  while (totalHeight < scrollHeight) {
456
  window.scrollBy(0, distance);
457
  totalHeight += distance;
458
+ await delay(200);
459
  }
460
+ await delay(1000);
461
  const newHeight = document.body.scrollHeight;
462
  if (newHeight === scrollHeight) break;
463
  scrollHeight = newHeight;
464
  }
465
  window.scrollTo({ top: 0, behavior: "smooth" });
466
+ await delay(500);
467
  });
468
 
469
  progressTracker?.updateProgress(70, 'processing', 'Processing loaded content...');
470
 
 
471
  await unblurContent(page, progressTracker);
472
 
 
473
  progressTracker?.updateProgress(75, 'loading_images', 'Loading images...');
474
  console.log("πŸ–ΌοΈ Waiting for all images to load...");
475
 
 
480
  return new Promise((resolve) => {
481
  img.addEventListener('load', resolve);
482
  img.addEventListener('error', resolve);
483
+ setTimeout(resolve, 5000);
484
  });
485
  }));
486
  });
487
 
488
+ await new Promise(resolve => setTimeout(resolve, 2000));
489
  progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...');
490
 
 
491
  await page.evaluate(() => {
492
  const getDocumentHeight = () => Math.max(
493
  document.body.scrollHeight, document.body.offsetHeight,
 
499
  document.body.style.overflow = 'hidden !important';
500
  });
501
 
 
502
  const contentCheck = await page.evaluate(() => {
503
  const textContent = document.body.textContent || '';
504
  const images = document.querySelectorAll('img');
 
525
  console.warn("⚠️ Warning: Limited document content detected.");
526
  }
527
 
 
528
  await applyPrintStyles(page, progressTracker);
529
  await page.emulateMediaType('print');
530
 
 
533
 
534
  const pdfBuffer = await page.pdf({
535
  printBackground: true,
536
+ preferCSSPageSize: true,
537
  displayHeaderFooter: false,
538
+ timeout: 60000,
539
  scale: 1,
540
  omitBackground: false
541
  });
 
557
  console.log("Error closing browser:", e.message);
558
  }
559
  }
560
+ // NEW: Clean up the temporary directory
561
+ if (userDataDir) {
562
+ console.log(`πŸ—‘οΈ Cleaning up temporary directory: ${userDataDir}`);
563
+ try {
564
+ await fs.rm(userDataDir, { recursive: true, force: true });
565
+ console.log("βœ… Temporary directory cleaned up.");
566
+ } catch (e) {
567
+ console.error(`❌ Failed to clean up temporary directory ${userDataDir}:`, e.message);
568
+ }
569
+ }
570
  }
571
  };
572
 
573
+ // --- API Routes ---
574
  app.post('/api/request-download', (req, res) => {
575
  const { url, email, password } = req.body;
576
  if (!url || !url.includes('studocu.com')) {
 
585
 
586
  console.log(`🎯 Processing request for: ${url} [Session: ${sessionId}]`);
587
 
 
588
  res.json({ sessionId });
589
 
 
590
  studocuDownloader(url, { email, password }, progressTracker)
591
  .then(pdfBuffer => {
 
592
  downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer });
593
+ progressTrackers.delete(sessionId);
594
  })
595
  .catch(error => {
 
596
  downloadJobs.set(sessionId, { status: 'error', message: error.message });
597
+ progressTrackers.delete(sessionId);
598
  });
599
  });
600
 
 
603
  const tracker = progressTrackers.get(sessionId);
604
 
605
  if (tracker) {
 
606
  return res.json({
607
  sessionId,
608
  progress: tracker.progress,
 
614
 
615
  const job = downloadJobs.get(sessionId);
616
  if (job) {
 
617
  if (job.status === 'completed') {
618
  return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' });
619
  }
 
645
  res.setHeader('Content-Type', 'application/pdf');
646
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
647
  res.send(job.buffer);
 
 
648
  } else {
649
  res.status(500).json({ error: 'An unknown error occurred.' });
650
  }
651
  });
652
 
653
+ // --- Health and Info Endpoints ---
654
  app.get('/health', (req, res) => {
655
  res.json({
656
  status: 'healthy',