|
|
const express = require('express'); |
|
|
const puppeteerExtra = require('puppeteer-extra'); |
|
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); |
|
|
|
|
|
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha'); |
|
|
const cors = require('cors'); |
|
|
const { EventEmitter } = require('events'); |
|
|
const os = require('os'); |
|
|
const fs = require('fs').promises; |
|
|
const path = require('path'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
puppeteerExtra.use( |
|
|
RecaptchaPlugin({ |
|
|
provider: { id: '2captcha', token: 'cc4f0d688032c69ecf359cccdabbacb9' } |
|
|
}) |
|
|
); |
|
|
puppeteerExtra.use(StealthPlugin()); |
|
|
|
|
|
|
|
|
const app = express(); |
|
|
const port = 7860; |
|
|
|
|
|
app.use(cors()); |
|
|
app.use(express.json()); |
|
|
|
|
|
|
|
|
const progressTrackers = new Map(); |
|
|
const downloadJobs = new Map(); |
|
|
|
|
|
class ProgressTracker extends EventEmitter { |
|
|
constructor(sessionId) { |
|
|
super(); |
|
|
this.sessionId = sessionId; |
|
|
this.progress = 0; |
|
|
this.status = 'initializing'; |
|
|
this.message = ''; |
|
|
} |
|
|
|
|
|
updateProgress(progress, status, message) { |
|
|
this.progress = progress; |
|
|
this.status = status; |
|
|
this.message = message; |
|
|
const update = { |
|
|
sessionId: this.sessionId, |
|
|
progress, |
|
|
status, |
|
|
message, |
|
|
timestamp: new Date().toISOString() |
|
|
}; |
|
|
this.emit('progress', update); |
|
|
console.log(`π [${this.sessionId}] ${progress}% - ${status}: ${message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const bypassCookiesAndRestrictions = async (page, progressTracker) => { |
|
|
|
|
|
progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...'); |
|
|
|
|
|
console.log("πͺ Starting comprehensive cookie and restriction bypass..."); |
|
|
|
|
|
const preCookies = [ |
|
|
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' }, |
|
|
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' }, |
|
|
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' }, |
|
|
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' }, |
|
|
{ name: 'user_consent', value: '1', domain: '.studocu.com' }, |
|
|
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' }, |
|
|
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' }, |
|
|
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' }, |
|
|
]; |
|
|
for (const cookie of preCookies) { |
|
|
try { |
|
|
await page.setCookie(cookie); |
|
|
} catch (e) { |
|
|
console.log(`Failed to set cookie ${cookie.name}:`, e.message); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
await page.addStyleTag({ |
|
|
content: ` |
|
|
/* Hide all possible cookie banners */ |
|
|
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i], |
|
|
.gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal, |
|
|
.cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal, |
|
|
[class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance, |
|
|
div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") { |
|
|
display: none !important; |
|
|
visibility: hidden !important; |
|
|
opacity: 0 !important; |
|
|
z-index: -9999 !important; |
|
|
pointer-events: none !important; |
|
|
} |
|
|
/* Remove blur and premium overlays */ |
|
|
[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i] { |
|
|
filter: none !important; |
|
|
backdrop-filter: none !important; |
|
|
opacity: 1 !important; |
|
|
visibility: visible !important; |
|
|
} |
|
|
/* Ensure document content is visible */ |
|
|
.document-content, .page-content, [data-page] { |
|
|
filter: none !important; |
|
|
opacity: 1 !important; |
|
|
visibility: visible !important; |
|
|
pointer-events: auto !important; |
|
|
} |
|
|
/* Remove fixed overlays */ |
|
|
.fixed-overlay, .sticky-overlay, .content-overlay { |
|
|
display: none !important; |
|
|
} |
|
|
/* Restore scrolling */ |
|
|
html, body { |
|
|
overflow: auto !important; |
|
|
position: static !important; |
|
|
} |
|
|
` |
|
|
}); |
|
|
|
|
|
|
|
|
await page.evaluateOnNewDocument(() => { |
|
|
|
|
|
window.cookieConsent = { accepted: true }; |
|
|
window.gtag = () => { }; |
|
|
window.ga = () => { }; |
|
|
window.dataLayer = []; |
|
|
|
|
|
|
|
|
const observer = new MutationObserver((mutations) => { |
|
|
mutations.forEach((mutation) => { |
|
|
mutation.addedNodes.forEach((node) => { |
|
|
if (node.nodeType === 1) { |
|
|
const element = node; |
|
|
const text = element.textContent || ''; |
|
|
const className = element.className || ''; |
|
|
const id = element.id || ''; |
|
|
|
|
|
if ( |
|
|
text.toLowerCase().includes('cookie') || |
|
|
text.toLowerCase().includes('consent') || |
|
|
text.toLowerCase().includes('privacy policy') || |
|
|
className.toLowerCase().includes('cookie') || |
|
|
className.toLowerCase().includes('consent') || |
|
|
className.toLowerCase().includes('gdpr') || |
|
|
id.toLowerCase().includes('cookie') || |
|
|
id.toLowerCase().includes('consent') |
|
|
) { |
|
|
console.log('Removing detected cookie banner:', element); |
|
|
element.remove(); |
|
|
} |
|
|
} |
|
|
}); |
|
|
}); |
|
|
}); |
|
|
observer.observe(document.body, { childList: true, subtree: true }); |
|
|
|
|
|
|
|
|
setInterval(() => { |
|
|
const cookieElements = document.querySelectorAll(` |
|
|
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], |
|
|
.gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk, |
|
|
.cmp-banner, .cc-banner |
|
|
`); |
|
|
cookieElements.forEach(el => el.remove()); |
|
|
|
|
|
document.body.style.overflow = 'auto'; |
|
|
document.documentElement.style.overflow = 'auto'; |
|
|
}, 1000); |
|
|
}); |
|
|
progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully'); |
|
|
return true; |
|
|
}; |
|
|
|
|
|
|
|
|
const unblurContent = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...'); |
|
|
|
|
|
console.log("π Unblurring content and bypassing premium restrictions..."); |
|
|
await page.evaluate(() => { |
|
|
const removeRestrictions = () => { |
|
|
const removeBySelector = (selector) => { |
|
|
document.querySelectorAll(selector).forEach(el => el.remove()); |
|
|
}; |
|
|
|
|
|
removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert"); |
|
|
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8"); |
|
|
|
|
|
const removeBlur = (element = document) => { |
|
|
element.querySelectorAll("*").forEach(el => { |
|
|
const style = window.getComputedStyle(el); |
|
|
if ( |
|
|
style.filter?.includes("blur") || |
|
|
style.backdropFilter?.includes("blur") || |
|
|
parseFloat(style.opacity) < 1 || |
|
|
(el.className && el.className.toString().toLowerCase().includes("blur")) || |
|
|
(el.className && el.className.toString().toLowerCase().includes("premium")) |
|
|
) { |
|
|
el.style.filter = "none !important"; |
|
|
el.style.backdropFilter = "none !important"; |
|
|
el.style.opacity = "1 !important"; |
|
|
if (el.classList) { |
|
|
el.classList.remove("blur", "blurred", "premium-blur"); |
|
|
} |
|
|
} |
|
|
}); |
|
|
}; |
|
|
|
|
|
removeBlur(); |
|
|
removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i]'); |
|
|
|
|
|
const contentSelectors = [ |
|
|
'.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]', |
|
|
'[data-testid*="page"]', '.page', '.document-page', 'main', 'article' |
|
|
]; |
|
|
contentSelectors.forEach(selector => { |
|
|
document.querySelectorAll(selector).forEach(el => { |
|
|
el.style.setProperty('filter', 'none', 'important'); |
|
|
el.style.setProperty('opacity', '1', 'important'); |
|
|
el.style.setProperty('visibility', 'visible', 'important'); |
|
|
el.style.setProperty('display', 'block', 'important'); |
|
|
el.style.setProperty('pointer-events', 'auto', 'important'); |
|
|
}); |
|
|
}); |
|
|
}; |
|
|
|
|
|
removeRestrictions(); |
|
|
const intervalId = setInterval(removeRestrictions, 1000); |
|
|
setTimeout(() => clearInterval(intervalId), 30000); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed'); |
|
|
}; |
|
|
|
|
|
const applyPrintStyles = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(85, 'styling', 'Applying print styles...'); |
|
|
|
|
|
console.log("π¨οΈ Applying print styles for clean PDF..."); |
|
|
await page.evaluate(() => { |
|
|
const style = document.createElement("style"); |
|
|
style.id = "print-style-extension"; |
|
|
style.innerHTML = ` |
|
|
@page { |
|
|
size: A4 portrait; |
|
|
margin: 0mm; |
|
|
} |
|
|
@media print { |
|
|
html, body { |
|
|
width: 210mm !important; |
|
|
height: auto !important; |
|
|
margin: 0 !important; |
|
|
padding: 0 !important; |
|
|
overflow: visible !important; |
|
|
background: white !important; |
|
|
color: black !important; |
|
|
} |
|
|
header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner, |
|
|
[class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"], |
|
|
.ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ, |
|
|
.HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB, |
|
|
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ, |
|
|
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper, |
|
|
.Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 { |
|
|
display: none !important; |
|
|
} |
|
|
* { |
|
|
box-shadow: none !important; |
|
|
background: transparent !important; |
|
|
color: inherit !important; |
|
|
} |
|
|
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, |
|
|
.Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container { |
|
|
position: static !important; |
|
|
display: block !important; |
|
|
width: 100% !important; |
|
|
max-width: none !important; |
|
|
margin: 0 !important; |
|
|
padding: 0 !important; |
|
|
box-sizing: border-box; |
|
|
transform: none !important; |
|
|
} |
|
|
[data-page], .page, .document-page, img { |
|
|
page-break-after: always !important; |
|
|
page-break-inside: avoid !important; |
|
|
page-break-before: avoid !important; |
|
|
width: 100% !important; |
|
|
max-width: 100% !important; |
|
|
height: auto !important; |
|
|
display: block !important; |
|
|
margin: 0 !important; |
|
|
padding: 0 !important; |
|
|
} |
|
|
} |
|
|
`; |
|
|
document.head.appendChild(style); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully'); |
|
|
}; |
|
|
|
|
|
|
|
|
const studocuDownloader = async (url, options = {}, progressTracker = null) => { |
|
|
let browser; |
|
|
let userDataDir = null; |
|
|
|
|
|
const isDebugging = false; |
|
|
|
|
|
try { |
|
|
progressTracker?.updateProgress(0, 'initializing', 'Starting browser...'); |
|
|
|
|
|
const tempDir = os.tmpdir(); |
|
|
userDataDir = await fs.mkdtemp(path.join(tempDir, 'puppeteer-')); |
|
|
console.log(`π Created temporary user data directory: ${userDataDir}`); |
|
|
|
|
|
console.log("π Launching browser with enhanced stealth configuration..."); |
|
|
browser = await puppeteerExtra.launch({ |
|
|
headless: !isDebugging, |
|
|
userDataDir: userDataDir, |
|
|
args: [ |
|
|
'--no-sandbox', |
|
|
'--disable-setuid-sandbox', |
|
|
'--disable-infobars', |
|
|
'--disable-dev-shm-usage', |
|
|
'--disable-blink-features=AutomationControlled', |
|
|
'--window-size=1920,1080' |
|
|
], |
|
|
ignoreHTTPSErrors: true, |
|
|
}); |
|
|
|
|
|
const page = await browser.newPage(); |
|
|
progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...'); |
|
|
|
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'); |
|
|
await page.setViewport({ width: 1920, height: 1080 }); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await page.setRequestInterception(true); |
|
|
page.on('request', (req) => { |
|
|
const resourceType = req.resourceType(); |
|
|
const reqUrl = req.url().toLowerCase(); |
|
|
|
|
|
if (resourceType === 'document') { |
|
|
req.continue(); |
|
|
return; |
|
|
} |
|
|
if ( |
|
|
['image', 'media', 'font', 'stylesheet'].includes(resourceType) && |
|
|
!reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || |
|
|
resourceType === 'script' && !reqUrl.includes('studocu') || |
|
|
reqUrl.includes('doubleclick') || |
|
|
reqUrl.includes('googletagmanager') || |
|
|
reqUrl.includes('facebook.com') || |
|
|
reqUrl.includes('twitter.com') || |
|
|
reqUrl.includes('analytics') || |
|
|
reqUrl.includes('gtm') || |
|
|
reqUrl.includes('hotjar') || |
|
|
reqUrl.includes('mixpanel') || |
|
|
reqUrl.includes('onetrust') || |
|
|
reqUrl.includes('cookielaw') || |
|
|
(resourceType === 'other' && reqUrl.includes('/track/')) |
|
|
) { |
|
|
req.abort(); |
|
|
} else { |
|
|
req.continue(); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(5, 'navigating', 'Navigating to document...'); |
|
|
console.log(`π‘οΈ Navigating to ${url} and preparing for Cloudflare challenge...`); |
|
|
try { |
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120000 }); |
|
|
|
|
|
|
|
|
|
|
|
console.log("β³ Waiting for Cloudflare challenge to be solved..."); |
|
|
progressTracker?.updateProgress(8, 'solving_cf', 'Solving Cloudflare challenge...'); |
|
|
|
|
|
await page.waitForSelector('#search-input', { timeout: 90000 }); |
|
|
|
|
|
console.log("β
Cloudflare challenge passed! You are on the Studocu page."); |
|
|
progressTracker?.updateProgress(10, 'navigation_complete', 'Successfully navigated to document'); |
|
|
|
|
|
} catch (e) { |
|
|
console.error("β Failed to bypass Cloudflare or navigate to the page.", e.message); |
|
|
|
|
|
const screenshotPath = path.join(os.tmpdir(), `cloudflare_failure_${Date.now()}.png`); |
|
|
await page.screenshot({ path: screenshotPath, fullPage: true }); |
|
|
console.log(`πΈ Screenshot saved to ${screenshotPath}`); |
|
|
throw new Error("Could not bypass Cloudflare. The site may be actively blocking, or the page structure changed."); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await bypassCookiesAndRestrictions(page, progressTracker); |
|
|
|
|
|
if (options.email && options.password) { |
|
|
progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...'); |
|
|
|
|
|
} |
|
|
|
|
|
progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...'); |
|
|
await new Promise(resolve => setTimeout(resolve, 2000)); |
|
|
|
|
|
await unblurContent(page, progressTracker); |
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...'); |
|
|
console.log("β³ Waiting for document content to load..."); |
|
|
|
|
|
const contentSelectors = [ |
|
|
'.document-content', '.page-content', '[data-page]', '[data-testid*="document"]', |
|
|
'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img' |
|
|
]; |
|
|
let contentFound = false; |
|
|
for (const selector of contentSelectors) { |
|
|
try { |
|
|
await page.waitForSelector(selector, { timeout: 10000 }); |
|
|
console.log(`β
Found content with selector: ${selector}`); |
|
|
contentFound = true; |
|
|
break; |
|
|
} catch (e) { |
|
|
console.log(`β Selector ${selector} not found, trying next...`); |
|
|
} |
|
|
} |
|
|
|
|
|
if (!contentFound) { |
|
|
console.log("β οΈ No specific content selector found, proceeding with page content..."); |
|
|
} |
|
|
|
|
|
progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...'); |
|
|
console.log("π Loading all document pages with enhanced slow scroll..."); |
|
|
|
|
|
await page.evaluate(async () => { |
|
|
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); |
|
|
let scrollHeight = document.body.scrollHeight; |
|
|
while (true) { |
|
|
let totalHeight = 0; |
|
|
const distance = 600; |
|
|
while (totalHeight < scrollHeight) { |
|
|
window.scrollBy(0, distance); |
|
|
totalHeight += distance; |
|
|
await delay(200); |
|
|
} |
|
|
await delay(1000); |
|
|
const newHeight = document.body.scrollHeight; |
|
|
if (newHeight === scrollHeight) break; |
|
|
scrollHeight = newHeight; |
|
|
} |
|
|
window.scrollTo({ top: 0, behavior: "smooth" }); |
|
|
await delay(500); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(70, 'processing', 'Processing loaded content...'); |
|
|
|
|
|
await unblurContent(page, progressTracker); |
|
|
|
|
|
progressTracker?.updateProgress(75, 'loading_images', 'Loading images...'); |
|
|
console.log("πΌοΈ Waiting for all images to load..."); |
|
|
|
|
|
await page.evaluate(async () => { |
|
|
const images = Array.from(document.querySelectorAll('img')); |
|
|
await Promise.all(images.map(img => { |
|
|
if (img.complete) return Promise.resolve(); |
|
|
return new Promise((resolve) => { |
|
|
img.addEventListener('load', resolve); |
|
|
img.addEventListener('error', resolve); |
|
|
setTimeout(resolve, 5000); |
|
|
}); |
|
|
})); |
|
|
}); |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 2000)); |
|
|
progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...'); |
|
|
|
|
|
await page.evaluate(() => { |
|
|
const getDocumentHeight = () => Math.max( |
|
|
document.body.scrollHeight, document.body.offsetHeight, |
|
|
document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight |
|
|
); |
|
|
const height = getDocumentHeight(); |
|
|
document.body.style.height = `${height}px !important`; |
|
|
document.documentElement.style.height = `${height}px !important`; |
|
|
document.body.style.overflow = 'hidden !important'; |
|
|
}); |
|
|
|
|
|
const contentCheck = await page.evaluate(() => { |
|
|
const textContent = document.body.textContent || ''; |
|
|
const images = document.querySelectorAll('img'); |
|
|
const documentImages = Array.from(images).filter(img => |
|
|
img.src.includes('document') || img.src.includes('page') || |
|
|
img.alt.includes('document') || img.alt.includes('page') |
|
|
); |
|
|
return { |
|
|
totalText: textContent.length, |
|
|
totalImages: images.length, |
|
|
documentImages: documentImages.length, |
|
|
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000 |
|
|
}; |
|
|
}); |
|
|
|
|
|
console.log("π Content verification:", { |
|
|
textLength: contentCheck.totalText, |
|
|
images: contentCheck.totalImages, |
|
|
documentImages: contentCheck.documentImages, |
|
|
hasContent: contentCheck.hasDocumentContent |
|
|
}); |
|
|
|
|
|
if (!contentCheck.hasDocumentContent) { |
|
|
console.warn("β οΈ Warning: Limited document content detected."); |
|
|
} |
|
|
|
|
|
await applyPrintStyles(page, progressTracker); |
|
|
await page.emulateMediaType('print'); |
|
|
|
|
|
progressTracker?.updateProgress(90, 'generating', 'Generating PDF...'); |
|
|
console.log("π Generating PDF..."); |
|
|
|
|
|
const pdfBuffer = await page.pdf({ |
|
|
printBackground: true, |
|
|
preferCSSPageSize: true, |
|
|
displayHeaderFooter: false, |
|
|
timeout: 60000, |
|
|
scale: 1, |
|
|
omitBackground: false |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!'); |
|
|
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`); |
|
|
return pdfBuffer; |
|
|
|
|
|
|
|
|
} catch (error) { |
|
|
progressTracker?.updateProgress(-1, 'error', error.message); |
|
|
console.error("β Error during PDF generation:", error); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browser) { |
|
|
console.log("π Closing browser..."); |
|
|
try { |
|
|
await browser.close(); |
|
|
} catch (e) { |
|
|
console.log("Error closing browser:", e.message); |
|
|
} |
|
|
} |
|
|
if (userDataDir) { |
|
|
console.log(`ποΈ Cleaning up temporary directory: ${userDataDir}`); |
|
|
try { |
|
|
await fs.rm(userDataDir, { recursive: true, force: true }); |
|
|
console.log("β
Temporary directory cleaned up."); |
|
|
} catch (e) { |
|
|
console.error(`β Failed to clean up temporary directory ${userDataDir}:`, e.message); |
|
|
} |
|
|
} |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
app.post('/api/request-download', (req, res) => { |
|
|
const { url, email, password } = req.body; |
|
|
if (!url || !url.includes('studocu.com')) { |
|
|
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' }); |
|
|
} |
|
|
|
|
|
const sessionId = Date.now().toString(); |
|
|
const progressTracker = new ProgressTracker(sessionId); |
|
|
|
|
|
progressTrackers.set(sessionId, progressTracker); |
|
|
downloadJobs.set(sessionId, { status: 'processing' }); |
|
|
|
|
|
console.log(`π― Processing request for: ${url} [Session: ${sessionId}]`); |
|
|
|
|
|
res.json({ sessionId }); |
|
|
|
|
|
studocuDownloader(url, { email, password }, progressTracker) |
|
|
.then(pdfBuffer => { |
|
|
downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer }); |
|
|
progressTrackers.delete(sessionId); |
|
|
}) |
|
|
.catch(error => { |
|
|
downloadJobs.set(sessionId, { status: 'error', message: error.message }); |
|
|
progressTrackers.delete(sessionId); |
|
|
}); |
|
|
}); |
|
|
|
|
|
app.get('/api/progress/:sessionId', (req, res) => { |
|
|
const { sessionId } = req.params; |
|
|
const tracker = progressTrackers.get(sessionId); |
|
|
|
|
|
if (tracker) { |
|
|
return res.json({ |
|
|
sessionId, |
|
|
progress: tracker.progress, |
|
|
status: tracker.status, |
|
|
message: tracker.message, |
|
|
timestamp: new Date().toISOString() |
|
|
}); |
|
|
} |
|
|
|
|
|
const job = downloadJobs.get(sessionId); |
|
|
if (job) { |
|
|
if (job.status === 'completed') { |
|
|
return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' }); |
|
|
} |
|
|
if (job.status === 'error') { |
|
|
return res.json({ sessionId, progress: -1, status: 'error', message: job.message }); |
|
|
} |
|
|
} |
|
|
|
|
|
return res.status(404).json({ error: 'Session not found' }); |
|
|
}); |
|
|
|
|
|
app.get('/api/download/:sessionId', (req, res) => { |
|
|
const { sessionId } = req.params; |
|
|
const job = downloadJobs.get(sessionId); |
|
|
|
|
|
if (!job) { |
|
|
return res.status(404).json({ error: 'Download session not found or expired.' }); |
|
|
} |
|
|
|
|
|
if (job.status === 'processing') { |
|
|
return res.status(400).json({ error: 'Download is still processing.' }); |
|
|
} |
|
|
|
|
|
if (job.status === 'error') { |
|
|
return res.status(500).json({ error: `Failed to generate PDF: ${job.message}` }); |
|
|
} |
|
|
|
|
|
if (job.status === 'completed' && job.buffer) { |
|
|
res.setHeader('Content-Type', 'application/pdf'); |
|
|
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf'); |
|
|
res.send(job.buffer); |
|
|
} else { |
|
|
res.status(500).json({ error: 'An unknown error occurred.' }); |
|
|
} |
|
|
}); |
|
|
|
|
|
app.get('/health', (req, res) => { |
|
|
res.json({ |
|
|
status: 'healthy', |
|
|
timestamp: new Date().toISOString(), |
|
|
uptime: process.uptime(), |
|
|
activeDownloads: progressTrackers.size |
|
|
}); |
|
|
}); |
|
|
|
|
|
app.get('/', (req, res) => { |
|
|
res.json({ |
|
|
message: 'π Enhanced StuDocu Downloader API v5.3 - Real-time Progress Tracking with Cloudflare Bypass', |
|
|
version: '5.3.0', |
|
|
features: [ |
|
|
'π‘οΈ Cloudflare JS Challenge Bypass', |
|
|
'πͺ Advanced cookie banner bypass', |
|
|
'π Premium content unblurring', |
|
|
'π Login support for full access', |
|
|
'π Real-time progress tracking via polling', |
|
|
'π Clean PDF generation with print styles', |
|
|
'π΅οΈ Enhanced stealth to evade bot detection' |
|
|
], |
|
|
endpoints: { |
|
|
request: 'POST /api/request-download (body: {url, filename?, email?, password?})', |
|
|
progress: 'GET /api/progress/:sessionId', |
|
|
download: 'GET /api/download/:sessionId', |
|
|
health: 'GET /health' |
|
|
} |
|
|
}); |
|
|
}); |
|
|
|
|
|
process.on('SIGTERM', () => { |
|
|
console.log('SIGTERM received, shutting down gracefully...'); |
|
|
process.exit(0); |
|
|
}); |
|
|
|
|
|
process.on('SIGINT', () => { |
|
|
console.log('SIGINT received, shutting down gracefully...'); |
|
|
process.exit(0); |
|
|
}); |
|
|
|
|
|
app.listen(port, () => { |
|
|
console.log(`π Enhanced StuDocu Downloader v5.3.0 running on http://localhost:${port}`); |
|
|
console.log(`β¨ Features: Cloudflare Bypass, Real-time progress tracking, enhanced stealth, and user feedback`); |
|
|
}); |