test / server.js
devusman's picture
ipdaye
1ff2605
raw
history blame
27.5 kB
const express = require('express');
const puppeteer = require('puppeteer');
const cors = require('cors');
const app = express();
const port = 7860;
app.use(cors());
app.use(express.json());
/**
* Advanced cookie banner and content bypass for StuDocu
*/
const bypassCookiesAndRestrictions = async (page) => {
console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
// Step 1: Set cookies before page load
const preCookies = [
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' },
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' },
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' },
{ name: 'user_consent', value: '1', domain: '.studocu.com' },
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' },
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
];
for (const cookie of preCookies) {
try {
await page.setCookie(cookie);
} catch (e) {
console.log(`Failed to set cookie ${cookie.name}:`, e.message);
}
}
// Step 2: Inject CSS to hide cookie banners immediately
await page.addStyleTag({
content: `
/* Hide all possible cookie banners */
[id*="cookie" i]:not(img):not(input),
[class*="cookie" i]:not(img):not(input),
[data-testid*="cookie" i],
[aria-label*="cookie" i],
.gdpr-banner, .gdpr-popup, .gdpr-modal,
.consent-banner, .consent-popup, .consent-modal,
.privacy-banner, .privacy-popup, .privacy-modal,
.cookie-law, .cookie-policy, .cookie-compliance,
.onetrust-banner-sdk, #onetrust-consent-sdk,
.cmp-banner, .cmp-popup, .cmp-modal,
[class*="CookieBanner"], [class*="CookieNotice"],
[class*="ConsentBanner"], [class*="ConsentManager"],
.cc-banner, .cc-window, .cc-compliance,
div[style*="position: fixed"]:has-text("cookie"),
div[style*="position: fixed"]:has-text("consent"),
.fixed:has-text("cookie"), .fixed:has-text("consent") {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
z-index: -9999 !important;
pointer-events: none !important;
}
/* Remove blur and premium overlays */
[class*="blur" i], [class*="premium" i],
[class*="paywall" i], [class*="sample-preview-blur" i] {
filter: none !important;
backdrop-filter: none !important;
opacity: 1 !important;
visibility: visible !important;
}
/* Ensure document content is visible */
.document-content, .page-content, [data-page] {
filter: none !important;
opacity: 1 !important;
visibility: visible !important;
pointer-events: auto !important;
}
/* Remove fixed overlays */
.fixed-overlay, .sticky-overlay, .content-overlay {
display: none !important;
}
/* Restore scrolling */
html, body {
overflow: auto !important;
position: static !important;
}
`
});
// Step 3: Inject JavaScript to handle dynamic cookie banners
await page.evaluateOnNewDocument(() => {
// Override common cookie consent functions
window.cookieConsent = { accepted: true };
window.gtag = () => { };
window.ga = () => { };
window.dataLayer = [];
// Mutation observer to catch dynamically added cookie banners
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.nodeType === 1) { // Element node
const element = node;
const text = element.textContent || '';
const className = element.className || '';
const id = element.id || '';
// Check if this looks like a cookie banner
if (
text.toLowerCase().includes('cookie') ||
text.toLowerCase().includes('consent') ||
text.toLowerCase().includes('privacy policy') ||
className.toLowerCase().includes('cookie') ||
className.toLowerCase().includes('consent') ||
className.toLowerCase().includes('gdpr') ||
id.toLowerCase().includes('cookie') ||
id.toLowerCase().includes('consent')
) {
console.log('Removing detected cookie banner:', element);
element.remove();
}
}
});
});
});
observer.observe(document.body, { childList: true, subtree: true });
// Set up periodic cleanup
setInterval(() => {
const cookieElements = document.querySelectorAll(`
[id*="cookie" i]:not(img):not(input),
[class*="cookie" i]:not(img):not(input),
[data-testid*="cookie" i],
.gdpr-banner, .consent-banner, .privacy-banner,
.onetrust-banner-sdk, #onetrust-consent-sdk,
.cmp-banner, .cc-banner
`);
cookieElements.forEach(el => el.remove());
// Restore body scroll
document.body.style.overflow = 'auto';
document.documentElement.style.overflow = 'auto';
}, 1000);
});
return true;
};
/**
* Enhanced content unblurring and premium bypass (integrated from extension script)
*/
const unblurContent = async (page) => {
console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
await page.evaluate(() => {
// Function to remove all visual restrictions
const removeRestrictions = () => {
const removeBySelector = (selector) => {
document.querySelectorAll(selector).forEach(el => el.remove());
};
// Remove ads by known class or ID
removeBySelector("#adbox");
removeBySelector(".adsbox");
removeBySelector(".ad-box");
removeBySelector(".banner-ads");
removeBySelector(".advert");
// Remove premium banner container
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
// Enhanced blur removal
const removeBlur = (element = document) => {
element.querySelectorAll("*").forEach(el => {
const style = window.getComputedStyle(el);
// Check for blur via filter, backdrop-filter, or class names
if (
style.filter?.includes("blur") ||
style.backdropFilter?.includes("blur") ||
parseFloat(style.opacity) < 1 ||
(el.className && el.className.toString().toLowerCase().includes("blur")) ||
(el.className && el.className.toString().toLowerCase().includes("premium"))
) {
el.style.filter = "none !important";
el.style.backdropFilter = "none !important";
el.style.opacity = "1 !important";
if (el.classList) {
el.classList.remove("blur", "blurred", "premium-blur");
}
}
// Check parent elements for blur-inducing styles
const parent = el.parentElement;
if (parent) {
const parentStyle = window.getComputedStyle(parent);
if (
parentStyle.filter?.includes("blur") ||
parentStyle.backdropFilter?.includes("blur") ||
parseFloat(parentStyle.opacity) < 1
) {
parent.style.filter = "none !important";
parent.style.backdropFilter = "none !important";
parent.style.opacity = "1 !important";
}
}
});
};
// Remove dark overlays and paywall-like elements
document.querySelectorAll("div, section, aside").forEach(el => {
const style = window.getComputedStyle(el);
if (
style.backgroundColor.includes("rgba") &&
(style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
(el.className && el.className.toString().toLowerCase().includes("overlay")) ||
(el.className && el.className.toString().toLowerCase().includes("paywall"))
) {
el.remove();
}
});
removeBlur();
// Remove other restrictions
removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i]');
// Ensure document content is visible
const contentSelectors = [
'.document-content', '.page-content', '.content',
'[data-page]', '[data-testid*="document"]', '[data-testid*="page"]',
'.page', '.document-page', 'main', 'article'
];
contentSelectors.forEach(selector => {
document.querySelectorAll(selector).forEach(el => {
el.style.setProperty('filter', 'none', 'important');
el.style.setProperty('opacity', '1', 'important');
el.style.setProperty('visibility', 'visible', 'important');
el.style.setProperty('display', 'block', 'important');
el.style.setProperty('pointer-events', 'auto', 'important');
});
});
// Remove overlay divs that might be blocking content
const overlays = document.querySelectorAll(`
[class*="overlay" i], [class*="modal" i], [class*="popup" i],
[class*="banner" i], [style*="position: fixed"],
[style*="position: absolute"][style*="z-index"]
`);
overlays.forEach(overlay => {
const text = overlay.textContent || '';
if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
text.includes('cookie') || text.includes('consent') || text.includes('login')) {
overlay.remove();
}
});
};
// Run immediately
removeRestrictions();
// Run periodically
const intervalId = setInterval(removeRestrictions, 2000);
// Clean up after 60 seconds
setTimeout(() => {
clearInterval(intervalId);
}, 60000);
});
};
/**
* Apply print styles for clean PDF output (integrated from extension script with improvements)
*/
const applyPrintStyles = async (page) => {
console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
await page.evaluate(() => {
const style = document.createElement("style");
style.id = "print-style-extension";
style.innerHTML = `
@page {
size: A4 portrait;
margin: 5mm;
}
@media print {
html, body {
margin: 0 !important;
padding: 0 !important;
overflow: visible !important;
}
header, footer, nav, aside, .no-print, .ads, .sidebar,
.premium-banner, .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho,
.Sidebar_sidebar-scrollable__kqeBZ, .HeaderWrapper_header-wrapper__mCmf3,
.Layout_visible-content-bottom-wrapper-sticky__yaaAB,
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper,
#top-bar-wrapper, .Layout_sidebar-wrapper__unavM,
.Layout_is-open__9DQr4 {
display: none !important;
}
body {
background: white !important;
color: black !important;
}
* {
box-shadow: none !important;
background: transparent !important;
}
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
.Viewer_document-wrapper__XsO4j, .page-content {
display: flex !important;
flex-direction: column !important;
width: 100% !important;
max-width: 210mm !important;
margin: 0 auto !important;
}
[data-page], .page, .document-page, img {
page-break-after: always !important;
page-break-inside: avoid !important;
page-break-before: avoid !important;
width: 100% !important;
max-width: 100% !important;
height: auto !important;
}
}
`;
document.head.appendChild(style);
});
};
/**
* Enhanced StuDocu downloader with comprehensive bypasses and login support
*/
const studocuDownloader = async (url, options = {}) => {
let browser;
try {
console.log("πŸš€ Launching browser with stealth configuration...");
browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-features=VizDisplayCompositor',
'--disable-background-networking',
'--disable-background-timer-throttling',
'--disable-renderer-backgrounding',
'--disable-backgrounding-occluded-windows',
'--disable-ipc-flooding-protection',
'--disable-web-security',
'--disable-features=site-per-process',
'--disable-blink-features=AutomationControlled',
'--disable-extensions'
],
timeout: 300000,
});
const page = await browser.newPage();
// Set realistic browser characteristics
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
await page.setViewport({ width: 794, height: 1122 });
// Hide webdriver property
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
});
// Set up cookie and content bypass
await bypassCookiesAndRestrictions(page);
// Block unnecessary resources
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
const reqUrl = req.url();
// Block trackers, ads, and analytics
if (
reqUrl.includes('doubleclick') ||
reqUrl.includes('googletagmanager') ||
reqUrl.includes('facebook.com') ||
reqUrl.includes('twitter.com') ||
reqUrl.includes('analytics') ||
reqUrl.includes('gtm') ||
reqUrl.includes('hotjar') ||
reqUrl.includes('mixpanel') ||
reqUrl.includes('onetrust') ||
reqUrl.includes('cookielaw') ||
resourceType === 'other' && reqUrl.includes('track')
) {
req.abort();
} else {
req.continue();
}
});
// Login if credentials provided (for premium content)
if (options.email && options.password) {
console.log("πŸ”‘ Logging in to StuDocu...");
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
await page.waitForSelector('#email', { timeout: 15000 });
await page.type('#email', options.email);
await page.type('#password', options.password);
await page.click('button[type="submit"]');
try {
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
// Additional check for successful login
await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
console.log("βœ… Login successful.");
} catch (e) {
console.error("❌ Login failed:", e.message);
throw new Error("Login failed. Check credentials, if CAPTCHA is present, or try again.");
}
} else {
console.log("⚠️ No login credentials provided. Full unblurred content requires premium account.");
}
console.log(`πŸ“„ Navigating to ${url}...`);
// Navigate with retry logic
let navigationSuccess = false;
let attempts = 0;
const maxAttempts = 3;
while (!navigationSuccess && attempts < maxAttempts) {
try {
attempts++;
console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
navigationSuccess = true;
} catch (e) {
console.log(`Navigation attempt ${attempts} failed:`, e.message);
if (attempts >= maxAttempts) throw e;
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
// Wait for initial load
await new Promise(resolve => setTimeout(resolve, 5000));
// Apply content unblurring
await unblurContent(page);
// Wait for document content with multiple selectors
console.log("⏳ Waiting for document content to load...");
const contentSelectors = [
'.document-content', '.page-content', '[data-page]',
'[data-testid*="document"]', 'img[src*="document"]',
'img[src*="page"]', '.page', 'main img', 'article img'
];
let contentFound = false;
for (const selector of contentSelectors) {
try {
await page.waitForSelector(selector, { timeout: 20000 });
console.log(`βœ… Found content with selector: ${selector}`);
contentFound = true;
break;
} catch (e) {
console.log(`❌ Selector ${selector} not found, trying next...`);
}
}
if (!contentFound) {
console.log("⚠️ No specific content selector found, proceeding with page content...");
}
// Enhanced scrolling to load all content with loop for stability
console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
await page.evaluate(async () => {
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
let scrollHeight = document.body.scrollHeight;
while (true) {
let totalHeight = 0;
const distance = 300;
while (totalHeight < scrollHeight) {
window.scrollBy(0, distance);
totalHeight += distance;
await delay(500); // Increased delay for better loading
}
await delay(2000); // Extra wait after reaching bottom
const newHeight = document.body.scrollHeight;
if (newHeight === scrollHeight) break;
scrollHeight = newHeight;
}
// Scroll to top
window.scrollTo({ top: 0, behavior: "smooth" });
await delay(1000);
});
// Re-apply unblur after loading new content
await unblurContent(page);
// Wait for all images to load
console.log("πŸ–ΌοΈ Waiting for all images to load...");
await page.evaluate(async () => {
const images = Array.from(document.querySelectorAll('img'));
await Promise.all(images.map(img => {
if (img.complete) return Promise.resolve();
return new Promise((resolve) => {
img.addEventListener('load', resolve);
img.addEventListener('error', resolve);
setTimeout(resolve, 15000);
});
}));
});
// Additional wait for any lazy loading
await new Promise(resolve => setTimeout(resolve, 10000));
// Set exact height to avoid extra blank pages
console.log("πŸ“ Setting exact document height...");
await page.evaluate(() => {
const getDocumentHeight = () => Math.max(
document.body.scrollHeight, document.body.offsetHeight,
document.documentElement.clientHeight, document.documentElement.scrollHeight,
document.documentElement.offsetHeight
);
const height = getDocumentHeight();
document.body.style.height = `${height}px !important`;
document.documentElement.style.height = `${height}px !important`;
document.body.style.overflow = 'hidden !important';
});
// Final content verification
const contentCheck = await page.evaluate(() => {
const textContent = document.body.textContent || '';
const images = document.querySelectorAll('img');
const documentImages = Array.from(images).filter(img =>
img.src.includes('document') || img.src.includes('page') ||
img.alt.includes('document') || img.alt.includes('page')
);
return {
totalText: textContent.length,
totalImages: images.length,
documentImages: documentImages.length,
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000,
sampleText: textContent.substring(0, 300)
};
});
console.log("πŸ“Š Content verification:", {
textLength: contentCheck.totalText,
images: contentCheck.totalImages,
documentImages: contentCheck.documentImages,
hasContent: contentCheck.hasDocumentContent
});
if (!contentCheck.hasDocumentContent) {
console.warn("⚠️ Warning: Limited document content detected. Use premium credentials for full access.");
}
// Apply print styles
await applyPrintStyles(page);
// Emulate print media
await page.emulateMediaType('print');
// Generate PDF
console.log("πŸ”„ Generating PDF...");
const pdfBuffer = await page.pdf({
printBackground: true,
preferCSSPageSize: true,
displayHeaderFooter: false,
timeout: 180000,
scale: 1,
omitBackground: false
});
console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
return pdfBuffer;
} catch (error) {
console.error("❌ Error during PDF generation:", error);
if (error.message.includes('timeout')) {
throw new Error("Request timed out. The document may be taking too long to load. Please try again.");
} else if (error.message.includes('net::')) {
throw new Error("Network error. Please check the URL and your internet connection.");
} else if (error.message.includes('ERR_BLOCKED')) {
throw new Error("Access blocked. Try again or check if the document is publicly accessible.");
} else {
throw new Error(`Failed to generate PDF: ${error.message}`);
}
} finally {
if (browser) {
console.log("πŸ”’ Closing browser...");
try {
await browser.close();
} catch (e) {
console.log("Error closing browser:", e.message);
}
}
}
};
// API Routes
app.post('/api/download', async (req, res) => {
const { url, filename, email, password } = req.body;
if (!url) {
return res.status(400).json({ error: 'URL is required.' });
}
if (!url.includes('studocu.com')) {
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
}
let normalizedUrl = url.trim();
if (!normalizedUrl.startsWith('http')) {
normalizedUrl = 'https://' + normalizedUrl;
}
console.log(`🎯 Processing request for: ${normalizedUrl}`);
try {
const startTime = Date.now();
const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
res.setHeader('Content-Length', pdfBuffer.length);
res.send(pdfBuffer);
console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
} catch (error) {
console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
res.status(500).json({ error: error.message });
}
});
app.get('/health', (req, res) => {
res.json({ status: 'healthy', timestamp: new Date().toISOString(), uptime: process.uptime() });
});
app.get('/', (req, res) => {
res.json({
message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
version: '5.3',
features: [
'πŸͺ Advanced cookie banner bypass',
'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
'πŸ”‘ Login support for full unblurred content access',
'πŸ€– Anti-bot detection evasion',
'πŸ“„ Full document content extraction with print styles for clean PDF'
],
endpoints: {
download: 'POST /api/download (body: {url, filename?, email?, password?})',
health: 'GET /health'
},
note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
});
});
process.on('SIGTERM', () => {
console.log('SIGTERM received, shutting down gracefully...');
process.exit(0);
});
process.on('SIGINT', () => {
console.log('SIGINT received, shutting down gracefully...');
process.exit(0);
});
app.listen(port, () => {
console.log(`πŸš€ Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
});