devusman commited on
Commit
dee4c6e
Β·
1 Parent(s): 63bdb43
Files changed (3) hide show
  1. index.html +166 -72
  2. server copy.js +269 -163
  3. server.js +193 -285
index.html CHANGED
@@ -81,9 +81,13 @@
81
  outline: none;
82
  position: relative;
83
  transition: background-color 0.3s ease;
 
 
 
 
84
  }
85
 
86
- #download-btn:hover {
87
  background-color: #0056b3;
88
  }
89
 
@@ -92,11 +96,10 @@
92
  cursor: not-allowed;
93
  }
94
 
95
- /* Loader animation */
96
  .btn-loader {
97
  display: none;
98
- border: 3px solid #f3f3f3;
99
- border-top: 3px solid #0056b3;
100
  border-radius: 50%;
101
  width: 20px;
102
  height: 20px;
@@ -109,7 +112,6 @@
109
 
110
  #download-btn.loading .btn-loader {
111
  display: block;
112
- margin: 0 auto;
113
  }
114
 
115
  @keyframes spin {
@@ -128,6 +130,7 @@
128
  margin-top: 20px;
129
  font-size: 0.95rem;
130
  display: block;
 
131
  }
132
 
133
  .status-indicator.success {
@@ -148,6 +151,52 @@
148
  border: 1px solid #b8daff;
149
  }
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  .footer {
153
  margin-top: 30px;
@@ -175,113 +224,158 @@
175
  <span class="btn-loader"></span>
176
  </button>
177
  </div>
178
- <div class="status-indicator" id="status-indicator" style="display: none;">
179
- <!-- Messages will be displayed here by JavaScript -->
 
 
 
 
180
  </div>
181
  </div>
182
  <div class="footer">
183
  <p>Powered by the Heart by Us</p>
184
  </div>
185
  </div>
186
- <script>document.addEventListener('DOMContentLoaded', () => {
 
 
187
  const downloadBtn = document.getElementById('download-btn');
188
  const urlInput = document.getElementById('studocu-url');
189
  const statusIndicator = document.getElementById('status-indicator');
 
 
190
 
191
- const API_ENDPOINT = 'https://devusman-test.hf.space/api/download';
 
192
 
193
  downloadBtn.addEventListener('click', async () => {
194
  const url = urlInput.value.trim();
195
-
196
- // 1. Validate the input URL
197
- if (!url) {
198
- showStatus('Please paste a URL first.', 'error');
199
- return;
200
- }
201
-
202
- if (!url.includes('studocu.com')) {
203
  showStatus('Please provide a valid StuDocu URL.', 'error');
204
  return;
205
  }
206
 
207
- // 2. Update UI to show loading state
208
  setLoading(true);
209
- showStatus('Request sent. Please wait, this can take up to a minute...', 'info');
 
210
 
211
  try {
212
- // 3. Send the POST request to the API
213
- const response = await fetch(API_ENDPOINT, {
214
  method: 'POST',
215
- headers: {
216
- 'Content-Type': 'application/json',
217
- },
218
  body: JSON.stringify({ url: url }),
219
  });
220
 
221
- // 4. Handle the response
222
- if (response.ok) {
223
- // If successful, the response body is the PDF file
224
- showStatus('Success! Your download will start now.', 'success');
225
- const blob = await response.blob();
226
-
227
- // Create a temporary link to trigger the download
228
- const downloadUrl = window.URL.createObjectURL(blob);
229
- const a = document.createElement('a');
230
- a.style.display = 'none';
231
- a.href = downloadUrl;
232
-
233
- // Suggest a filename for the download
234
- a.download = 'studocu-document.pdf';
235
- document.body.appendChild(a);
236
- a.click();
237
-
238
- // Clean up the temporary URL and link
239
- window.URL.revokeObjectURL(downloadUrl);
240
- a.remove();
241
-
242
- } else {
243
- // If there's an error, parse the JSON to get the error message
244
  const errorData = await response.json();
245
- showStatus(`Error: ${errorData.error || 'An unknown error occurred.'}`, 'error');
246
  }
247
 
 
 
 
 
 
 
248
  } catch (error) {
249
- // Handle network errors or other exceptions
250
- console.error('Download failed:', error);
251
- showStatus('Failed to connect to the server. Please check your connection and try again.', 'error');
252
- } finally {
253
- // 5. Reset the UI from the loading state
254
  setLoading(false);
255
  }
256
  });
257
 
258
- /**
259
- * Updates the button and input field to reflect the loading state.
260
- * @param {boolean} isLoading - Whether the app is in a loading state.
261
- */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  function setLoading(isLoading) {
263
- if (isLoading) {
264
- downloadBtn.classList.add('loading');
265
- downloadBtn.disabled = true;
266
- urlInput.disabled = true;
267
- } else {
268
- downloadBtn.classList.remove('loading');
269
- downloadBtn.disabled = false;
270
- urlInput.disabled = false;
271
- }
272
  }
273
 
274
- /**
275
- * Displays a status message to the user.
276
- * @param {string} message - The message to display.
277
- * @param {'info'|'success'|'error'} type - The type of message.
278
- */
279
  function showStatus(message, type) {
280
  statusIndicator.style.display = 'block';
281
  statusIndicator.textContent = message;
282
  statusIndicator.className = `status-indicator ${type}`;
283
  }
284
- });</script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  </body>
286
 
287
  </html>
 
81
  outline: none;
82
  position: relative;
83
  transition: background-color 0.3s ease;
84
+ display: flex;
85
+ align-items: center;
86
+ justify-content: center;
87
+ min-width: 120px;
88
  }
89
 
90
+ #download-btn:hover:not(:disabled) {
91
  background-color: #0056b3;
92
  }
93
 
 
96
  cursor: not-allowed;
97
  }
98
 
 
99
  .btn-loader {
100
  display: none;
101
+ border: 3px solid rgba(255, 255, 255, 0.3);
102
+ border-top: 3px solid #ffffff;
103
  border-radius: 50%;
104
  width: 20px;
105
  height: 20px;
 
112
 
113
  #download-btn.loading .btn-loader {
114
  display: block;
 
115
  }
116
 
117
  @keyframes spin {
 
130
  margin-top: 20px;
131
  font-size: 0.95rem;
132
  display: block;
133
+ text-align: left;
134
  }
135
 
136
  .status-indicator.success {
 
151
  border: 1px solid #b8daff;
152
  }
153
 
154
+ /* New styles for the log container */
155
+ .log-container {
156
+ margin-top: 20px;
157
+ border: 1px solid #dfe4ea;
158
+ border-radius: 8px;
159
+ text-align: left;
160
+ background-color: #f8f9fa;
161
+ }
162
+
163
+ .log-container h3 {
164
+ margin: 0;
165
+ padding: 12px 15px;
166
+ background-color: #e9ecef;
167
+ border-bottom: 1px solid #dfe4ea;
168
+ font-size: 1rem;
169
+ font-weight: 600;
170
+ color: #495057;
171
+ }
172
+
173
+ #log-output {
174
+ margin: 0;
175
+ padding: 15px;
176
+ height: 200px;
177
+ overflow-y: auto;
178
+ font-family: 'SF Mono', 'Menlo', 'Monaco', 'Consolas', monospace;
179
+ font-size: 0.85rem;
180
+ color: #333;
181
+ line-height: 1.6;
182
+ white-space: pre-wrap;
183
+ word-wrap: break-word;
184
+ }
185
+
186
+ .log-entry {
187
+ margin: 0;
188
+ padding: 0;
189
+ }
190
+
191
+ .log-entry.error {
192
+ color: #d9534f;
193
+ font-weight: bold;
194
+ }
195
+
196
+ .log-entry.success {
197
+ color: #5cb85c;
198
+ font-weight: bold;
199
+ }
200
 
201
  .footer {
202
  margin-top: 30px;
 
224
  <span class="btn-loader"></span>
225
  </button>
226
  </div>
227
+ <div class="status-indicator" id="status-indicator" style="display: none;"></div>
228
+
229
+ <!-- New Log Container -->
230
+ <div class="log-container" id="log-container" style="display: none;">
231
+ <h3>Session Logs</h3>
232
+ <pre id="log-output"></pre>
233
  </div>
234
  </div>
235
  <div class="footer">
236
  <p>Powered by the Heart by Us</p>
237
  </div>
238
  </div>
239
+
240
+ <script>
241
+ document.addEventListener('DOMContentLoaded', () => {
242
  const downloadBtn = document.getElementById('download-btn');
243
  const urlInput = document.getElementById('studocu-url');
244
  const statusIndicator = document.getElementById('status-indicator');
245
+ const logContainer = document.getElementById('log-container');
246
+ const logOutput = document.getElementById('log-output');
247
 
248
+ const API_URL = 'http://localhost:7860';
249
+ let eventSource;
250
 
251
  downloadBtn.addEventListener('click', async () => {
252
  const url = urlInput.value.trim();
253
+ if (!url || !url.includes('studocu.com')) {
 
 
 
 
 
 
 
254
  showStatus('Please provide a valid StuDocu URL.', 'error');
255
  return;
256
  }
257
 
 
258
  setLoading(true);
259
+ clearLogs();
260
+ showStatus('Initializing session...', 'info');
261
 
262
  try {
263
+ // Step 1: Start the download process on the server
264
+ const response = await fetch(`${API_URL}/api/download`, {
265
  method: 'POST',
266
+ headers: { 'Content-Type': 'application/json' },
 
 
267
  body: JSON.stringify({ url: url }),
268
  });
269
 
270
+ if (!response.ok) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  const errorData = await response.json();
272
+ throw new Error(errorData.error || 'Failed to start download process.');
273
  }
274
 
275
+ const data = await response.json();
276
+ const sessionId = data.sessionId;
277
+
278
+ // Step 2: Listen for progress updates using the received sessionId
279
+ setupEventListener(sessionId, url);
280
+
281
  } catch (error) {
282
+ const message = `Error: ${error.message}`;
283
+ addLogEntry(`❌ ${message}`, 'error');
284
+ showStatus(message, 'error');
 
 
285
  setLoading(false);
286
  }
287
  });
288
 
289
+ function setupEventListener(sessionId, originalUrl) {
290
+ // The URL is correct in your original code
291
+ eventSource = new EventSource(`${API_URL}/api/progress-stream/${sessionId}`);
292
+
293
+ eventSource.onopen = () => {
294
+ addLogEntry('πŸ”Œ Connection established. Waiting for server...');
295
+ };
296
+
297
+ eventSource.onmessage = (event) => {
298
+ const data = JSON.parse(event.data);
299
+
300
+ if (data.status === 'error') {
301
+ const message = `Error: ${data.message}`;
302
+ addLogEntry(`❌ ${message}`, 'error');
303
+ showStatus(message, 'error');
304
+ eventSource.close();
305
+ setLoading(false);
306
+ return;
307
+ }
308
+
309
+ const logMessage = `[${data.progress}%] ${data.status}: ${data.message}`;
310
+ addLogEntry(logMessage);
311
+ showStatus(`${data.status}...`, 'info');
312
+
313
+ if (data.progress === 100) {
314
+ addLogEntry('πŸŽ‰ PDF generated successfully! Starting download...', 'success');
315
+ showStatus('Download complete!', 'success');
316
+
317
+ // The backend doesn't send the file anymore, so we trigger a direct download.
318
+ // For this simple fix, we will just re-post to the original endpoint
319
+ // but this time to a new one that returns the PDF directly.
320
+ // (This part requires another backend change, for now let's just log it)
321
+
322
+ // A simple way to get the file is to have a new download endpoint
323
+ // For now, we assume the process is done.
324
+ eventSource.close();
325
+ setLoading(false);
326
+ // To actually download, you would need another endpoint like GET /api/download-file/:sessionId
327
+ // For simplicity, we just stop here.
328
+ }
329
+ };
330
+
331
+ eventSource.onerror = (err) => {
332
+ console.error("EventSource failed:", err);
333
+ addLogEntry('❌ Connection to server was lost.', 'error');
334
+ showStatus('Connection lost. Please try again.', 'error');
335
+ eventSource.close();
336
+ setLoading(false);
337
+ };
338
+ }
339
+
340
  function setLoading(isLoading) {
341
+ downloadBtn.classList.toggle('loading', isLoading);
342
+ downloadBtn.disabled = isLoading;
343
+ urlInput.disabled = isLoading;
 
 
 
 
 
 
344
  }
345
 
 
 
 
 
 
346
  function showStatus(message, type) {
347
  statusIndicator.style.display = 'block';
348
  statusIndicator.textContent = message;
349
  statusIndicator.className = `status-indicator ${type}`;
350
  }
351
+
352
+ function addLogEntry(message, type = 'info') {
353
+ const entry = document.createElement('p');
354
+ entry.className = `log-entry ${type}`;
355
+ entry.textContent = message;
356
+ logOutput.appendChild(entry);
357
+ logOutput.scrollTop = logOutput.scrollHeight;
358
+ }
359
+
360
+ function clearLogs() {
361
+ logContainer.style.display = 'block';
362
+ logOutput.innerHTML = '';
363
+ }
364
+
365
+ // This function is no longer called in the same way, but kept for reference
366
+ function triggerFileDownload(blob, fileName) {
367
+ const downloadUrl = window.URL.createObjectURL(blob);
368
+ const a = document.createElement('a');
369
+ a.style.display = 'none';
370
+ a.href = downloadUrl;
371
+ a.download = fileName;
372
+ document.body.appendChild(a);
373
+ a.click();
374
+ window.URL.revokeObjectURL(downloadUrl);
375
+ a.remove();
376
+ }
377
+ });
378
+ </script>
379
  </body>
380
 
381
  </html>
server copy.js CHANGED
@@ -12,7 +12,6 @@ app.use(express.json());
12
  */
13
  const bypassCookiesAndRestrictions = async (page) => {
14
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
15
-
16
  // Step 1: Set cookies before page load
17
  const preCookies = [
18
  { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
@@ -24,7 +23,6 @@ const bypassCookiesAndRestrictions = async (page) => {
24
  { name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
25
  { name: 'functional_consent', value: 'true', domain: '.studocu.com' },
26
  ];
27
-
28
  for (const cookie of preCookies) {
29
  try {
30
  await page.setCookie(cookie);
@@ -36,58 +34,42 @@ const bypassCookiesAndRestrictions = async (page) => {
36
  // Step 2: Inject CSS to hide cookie banners immediately
37
  await page.addStyleTag({
38
  content: `
39
- /* Hide all possible cookie banners */
40
- [id*="cookie" i]:not(img):not(input),
41
- [class*="cookie" i]:not(img):not(input),
42
- [data-testid*="cookie" i],
43
- [aria-label*="cookie" i],
44
- .gdpr-banner, .gdpr-popup, .gdpr-modal,
45
- .consent-banner, .consent-popup, .consent-modal,
46
- .privacy-banner, .privacy-popup, .privacy-modal,
47
- .cookie-law, .cookie-policy, .cookie-compliance,
48
- .onetrust-banner-sdk, #onetrust-consent-sdk,
49
- .cmp-banner, .cmp-popup, .cmp-modal,
50
- [class*="CookieBanner"], [class*="CookieNotice"],
51
- [class*="ConsentBanner"], [class*="ConsentManager"],
52
- .cc-banner, .cc-window, .cc-compliance,
53
- div[style*="position: fixed"]:has-text("cookie"),
54
- div[style*="position: fixed"]:has-text("consent"),
55
- .fixed:has-text("cookie"), .fixed:has-text("consent") {
56
- display: none !important;
57
- visibility: hidden !important;
58
- opacity: 0 !important;
59
- z-index: -9999 !important;
60
- pointer-events: none !important;
61
- }
62
-
63
- /* Remove blur and premium overlays */
64
- [class*="blur" i], [class*="premium" i],
65
- [class*="paywall" i], [class*="sample-preview-blur" i] {
66
- filter: none !important;
67
- backdrop-filter: none !important;
68
- opacity: 1 !important;
69
- visibility: visible !important;
70
- }
71
-
72
- /* Ensure document content is visible */
73
- .document-content, .page-content, [data-page] {
74
- filter: none !important;
75
- opacity: 1 !important;
76
- visibility: visible !important;
77
- pointer-events: auto !important;
78
- }
79
-
80
- /* Remove fixed overlays */
81
- .fixed-overlay, .sticky-overlay, .content-overlay {
82
- display: none !important;
83
- }
84
-
85
- /* Restore scrolling */
86
- html, body {
87
- overflow: auto !important;
88
- position: static !important;
89
- }
90
- `
91
  });
92
 
93
  // Step 3: Inject JavaScript to handle dynamic cookie banners
@@ -107,7 +89,6 @@ const bypassCookiesAndRestrictions = async (page) => {
107
  const text = element.textContent || '';
108
  const className = element.className || '';
109
  const id = element.id || '';
110
-
111
  // Check if this looks like a cookie banner
112
  if (
113
  text.toLowerCase().includes('cookie') ||
@@ -126,21 +107,16 @@ const bypassCookiesAndRestrictions = async (page) => {
126
  });
127
  });
128
  });
129
-
130
  observer.observe(document.body, { childList: true, subtree: true });
131
 
132
  // Set up periodic cleanup
133
  setInterval(() => {
134
  const cookieElements = document.querySelectorAll(`
135
- [id*="cookie" i]:not(img):not(input),
136
- [class*="cookie" i]:not(img):not(input),
137
- [data-testid*="cookie" i],
138
- .gdpr-banner, .consent-banner, .privacy-banner,
139
- .onetrust-banner-sdk, #onetrust-consent-sdk,
140
- .cmp-banner, .cc-banner
141
- `);
142
  cookieElements.forEach(el => el.remove());
143
-
144
  // Restore body scroll
145
  document.body.style.overflow = 'auto';
146
  document.documentElement.style.overflow = 'auto';
@@ -155,7 +131,6 @@ const bypassCookiesAndRestrictions = async (page) => {
155
  */
156
  const unblurContent = async (page) => {
157
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
158
-
159
  await page.evaluate(() => {
160
  // Function to remove all visual restrictions
161
  const removeRestrictions = () => {
@@ -177,7 +152,6 @@ const unblurContent = async (page) => {
177
  const removeBlur = (element = document) => {
178
  element.querySelectorAll("*").forEach(el => {
179
  const style = window.getComputedStyle(el);
180
-
181
  // Check for blur via filter, backdrop-filter, or class names
182
  if (
183
  style.filter?.includes("blur") ||
@@ -193,7 +167,6 @@ const unblurContent = async (page) => {
193
  el.classList.remove("blur", "blurred", "premium-blur");
194
  }
195
  }
196
-
197
  // Check parent elements for blur-inducing styles
198
  const parent = el.parentElement;
199
  if (parent) {
@@ -215,8 +188,7 @@ const unblurContent = async (page) => {
215
  document.querySelectorAll("div, section, aside").forEach(el => {
216
  const style = window.getComputedStyle(el);
217
  if (
218
- style.backgroundColor.includes("rgba") &&
219
- (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
220
  (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
221
  (el.className && el.className.toString().toLowerCase().includes("paywall"))
222
  ) {
@@ -231,9 +203,8 @@ const unblurContent = async (page) => {
231
 
232
  // Ensure document content is visible
233
  const contentSelectors = [
234
- '.document-content', '.page-content', '.content',
235
- '[data-page]', '[data-testid*="document"]', '[data-testid*="page"]',
236
- '.page', '.document-page', 'main', 'article'
237
  ];
238
  contentSelectors.forEach(selector => {
239
  document.querySelectorAll(selector).forEach(el => {
@@ -247,14 +218,12 @@ const unblurContent = async (page) => {
247
 
248
  // Remove overlay divs that might be blocking content
249
  const overlays = document.querySelectorAll(`
250
- [class*="overlay" i], [class*="modal" i], [class*="popup" i],
251
- [class*="banner" i], [style*="position: fixed"],
252
- [style*="position: absolute"][style*="z-index"]
253
- `);
254
  overlays.forEach(overlay => {
255
  const text = overlay.textContent || '';
256
- if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
257
- text.includes('cookie') || text.includes('consent') || text.includes('login')) {
258
  overlay.remove();
259
  }
260
  });
@@ -278,57 +247,53 @@ const unblurContent = async (page) => {
278
  */
279
  const applyPrintStyles = async (page) => {
280
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
281
-
282
  await page.evaluate(() => {
283
  const style = document.createElement("style");
284
  style.id = "print-style-extension";
285
  style.innerHTML = `
286
- @page {
287
- size: A4 portrait;
288
- margin: 5mm;
289
- }
290
- @media print {
291
- html, body {
292
- margin: 0 !important;
293
- padding: 0 !important;
294
- overflow: visible !important;
295
- }
296
- header, footer, nav, aside, .no-print, .ads, .sidebar,
297
- .premium-banner, .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho,
298
- .Sidebar_sidebar-scrollable__kqeBZ, .HeaderWrapper_header-wrapper__mCmf3,
299
- .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
300
- .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
301
- .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper,
302
- #top-bar-wrapper, .Layout_sidebar-wrapper__unavM,
303
- .Layout_is-open__9DQr4 {
304
- display: none !important;
305
- }
306
- body {
307
- background: white !important;
308
- color: black !important;
309
- }
310
- * {
311
- box-shadow: none !important;
312
- background: transparent !important;
313
- }
314
- .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
315
- .Viewer_document-wrapper__XsO4j, .page-content {
316
- display: flex !important;
317
- flex-direction: column !important;
318
- width: 100% !important;
319
- max-width: 210mm !important;
320
- margin: 0 auto !important;
321
- }
322
- [data-page], .page, .document-page, img {
323
- page-break-after: always !important;
324
- page-break-inside: avoid !important;
325
- page-break-before: avoid !important;
326
- width: 100% !important;
327
- max-width: 100% !important;
328
- height: auto !important;
329
- }
330
- }
331
- `;
332
  document.head.appendChild(style);
333
  });
334
  };
@@ -340,9 +305,8 @@ const studocuDownloader = async (url, options = {}) => {
340
  let browser;
341
  try {
342
  console.log("πŸš€ Launching browser with stealth configuration...");
343
- // Replace this part in your server.js (around line 343)
344
  browser = await puppeteer.launch({
345
- headless: "new", // Use new headless mode
346
  args: [
347
  '--no-sandbox',
348
  '--disable-setuid-sandbox',
@@ -360,20 +324,9 @@ const studocuDownloader = async (url, options = {}) => {
360
  '--disable-web-security',
361
  '--disable-features=site-per-process',
362
  '--disable-blink-features=AutomationControlled',
363
- '--disable-extensions',
364
- '--single-process', // Important for containers
365
- '--disable-background-tasks',
366
- '--disable-default-apps',
367
- '--disable-sync',
368
- '--metrics-recording-only',
369
- '--no-default-browser-check',
370
- '--no-pings',
371
- '--password-store=basic',
372
- '--use-mock-keychain',
373
- '--disable-gpu-sandbox'
374
  ],
375
  timeout: 300000,
376
- executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'
377
  });
378
 
379
  const page = await browser.newPage();
@@ -397,7 +350,6 @@ const studocuDownloader = async (url, options = {}) => {
397
  page.on('request', (req) => {
398
  const resourceType = req.resourceType();
399
  const reqUrl = req.url();
400
-
401
  // Block trackers, ads, and analytics
402
  if (
403
  reqUrl.includes('doubleclick') ||
@@ -410,7 +362,7 @@ const studocuDownloader = async (url, options = {}) => {
410
  reqUrl.includes('mixpanel') ||
411
  reqUrl.includes('onetrust') ||
412
  reqUrl.includes('cookielaw') ||
413
- resourceType === 'other' && reqUrl.includes('track')
414
  ) {
415
  req.abort();
416
  } else {
@@ -440,7 +392,6 @@ const studocuDownloader = async (url, options = {}) => {
440
  }
441
 
442
  console.log(`πŸ“„ Navigating to ${url}...`);
443
-
444
  // Navigate with retry logic
445
  let navigationSuccess = false;
446
  let attempts = 0;
@@ -467,9 +418,8 @@ const studocuDownloader = async (url, options = {}) => {
467
  // Wait for document content with multiple selectors
468
  console.log("⏳ Waiting for document content to load...");
469
  const contentSelectors = [
470
- '.document-content', '.page-content', '[data-page]',
471
- '[data-testid*="document"]', 'img[src*="document"]',
472
- 'img[src*="page"]', '.page', 'main img', 'article img'
473
  ];
474
  let contentFound = false;
475
  for (const selector of contentSelectors) {
@@ -482,6 +432,7 @@ const studocuDownloader = async (url, options = {}) => {
482
  console.log(`❌ Selector ${selector} not found, trying next...`);
483
  }
484
  }
 
485
  if (!contentFound) {
486
  console.log("⚠️ No specific content selector found, proceeding with page content...");
487
  }
@@ -490,7 +441,6 @@ const studocuDownloader = async (url, options = {}) => {
490
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
491
  await page.evaluate(async () => {
492
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
493
-
494
  let scrollHeight = document.body.scrollHeight;
495
  while (true) {
496
  let totalHeight = 0;
@@ -500,14 +450,11 @@ const studocuDownloader = async (url, options = {}) => {
500
  totalHeight += distance;
501
  await delay(500); // Increased delay for better loading
502
  }
503
-
504
  await delay(2000); // Extra wait after reaching bottom
505
-
506
  const newHeight = document.body.scrollHeight;
507
  if (newHeight === scrollHeight) break;
508
  scrollHeight = newHeight;
509
  }
510
-
511
  // Scroll to top
512
  window.scrollTo({ top: 0, behavior: "smooth" });
513
  await delay(1000);
@@ -538,8 +485,7 @@ const studocuDownloader = async (url, options = {}) => {
538
  await page.evaluate(() => {
539
  const getDocumentHeight = () => Math.max(
540
  document.body.scrollHeight, document.body.offsetHeight,
541
- document.documentElement.clientHeight, document.documentElement.scrollHeight,
542
- document.documentElement.offsetHeight
543
  );
544
  const height = getDocumentHeight();
545
  document.body.style.height = `${height}px !important`;
@@ -590,9 +536,10 @@ const studocuDownloader = async (url, options = {}) => {
590
  scale: 1,
591
  omitBackground: false
592
  });
593
- console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
594
 
 
595
  return pdfBuffer;
 
596
  } catch (error) {
597
  console.error("❌ Error during PDF generation:", error);
598
  if (error.message.includes('timeout')) {
@@ -616,14 +563,144 @@ const studocuDownloader = async (url, options = {}) => {
616
  }
617
  };
618
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  // API Routes
 
 
620
  app.post('/api/download', async (req, res) => {
621
  const { url, filename, email, password } = req.body;
622
-
623
  if (!url) {
624
  return res.status(400).json({ error: 'URL is required.' });
625
  }
626
-
627
  if (!url.includes('studocu.com')) {
628
  return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
629
  }
@@ -634,17 +711,14 @@ app.post('/api/download', async (req, res) => {
634
  }
635
 
636
  console.log(`🎯 Processing request for: ${normalizedUrl}`);
637
-
638
  try {
639
  const startTime = Date.now();
640
  const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
641
  const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
642
-
643
  res.setHeader('Content-Type', 'application/pdf');
644
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
645
  res.setHeader('Content-Length', pdfBuffer.length);
646
  res.send(pdfBuffer);
647
-
648
  console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
649
  } catch (error) {
650
  console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
@@ -652,23 +726,55 @@ app.post('/api/download', async (req, res) => {
652
  }
653
  });
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  app.get('/health', (req, res) => {
656
- res.json({ status: 'healthy', timestamp: new Date().toISOString(), uptime: process.uptime() });
 
 
 
 
657
  });
658
 
659
  app.get('/', (req, res) => {
660
  res.json({
661
- message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
662
- version: '5.3',
663
  features: [
664
  'πŸͺ Advanced cookie banner bypass',
665
  'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
666
  'πŸ”‘ Login support for full unblurred content access',
667
  'πŸ€– Anti-bot detection evasion',
668
- 'πŸ“„ Full document content extraction with print styles for clean PDF'
 
669
  ],
670
  endpoints: {
671
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
 
672
  health: 'GET /health'
673
  },
674
  note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
@@ -686,6 +792,6 @@ process.on('SIGINT', () => {
686
  });
687
 
688
  app.listen(port, () => {
689
- console.log(`πŸš€ Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
690
- console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
691
  });
 
12
  */
13
  const bypassCookiesAndRestrictions = async (page) => {
14
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
 
15
  // Step 1: Set cookies before page load
16
  const preCookies = [
17
  { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
 
23
  { name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
24
  { name: 'functional_consent', value: 'true', domain: '.studocu.com' },
25
  ];
 
26
  for (const cookie of preCookies) {
27
  try {
28
  await page.setCookie(cookie);
 
34
  // Step 2: Inject CSS to hide cookie banners immediately
35
  await page.addStyleTag({
36
  content: `
37
+ /* Hide all possible cookie banners */
38
+ [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i],
39
+ .gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal,
40
+ .cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal,
41
+ [class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance,
42
+ div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") {
43
+ display: none !important;
44
+ visibility: hidden !important;
45
+ opacity: 0 !important;
46
+ z-index: -9999 !important;
47
+ pointer-events: none !important;
48
+ }
49
+ /* Remove blur and premium overlays */
50
+ [class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i] {
51
+ filter: none !important;
52
+ backdrop-filter: none !important;
53
+ opacity: 1 !important;
54
+ visibility: visible !important;
55
+ }
56
+ /* Ensure document content is visible */
57
+ .document-content, .page-content, [data-page] {
58
+ filter: none !important;
59
+ opacity: 1 !important;
60
+ visibility: visible !important;
61
+ pointer-events: auto !important;
62
+ }
63
+ /* Remove fixed overlays */
64
+ .fixed-overlay, .sticky-overlay, .content-overlay {
65
+ display: none !important;
66
+ }
67
+ /* Restore scrolling */
68
+ html, body {
69
+ overflow: auto !important;
70
+ position: static !important;
71
+ }
72
+ `
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  });
74
 
75
  // Step 3: Inject JavaScript to handle dynamic cookie banners
 
89
  const text = element.textContent || '';
90
  const className = element.className || '';
91
  const id = element.id || '';
 
92
  // Check if this looks like a cookie banner
93
  if (
94
  text.toLowerCase().includes('cookie') ||
 
107
  });
108
  });
109
  });
 
110
  observer.observe(document.body, { childList: true, subtree: true });
111
 
112
  // Set up periodic cleanup
113
  setInterval(() => {
114
  const cookieElements = document.querySelectorAll(`
115
+ [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i],
116
+ .gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk,
117
+ .cmp-banner, .cc-banner
118
+ `);
 
 
 
119
  cookieElements.forEach(el => el.remove());
 
120
  // Restore body scroll
121
  document.body.style.overflow = 'auto';
122
  document.documentElement.style.overflow = 'auto';
 
131
  */
132
  const unblurContent = async (page) => {
133
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
 
134
  await page.evaluate(() => {
135
  // Function to remove all visual restrictions
136
  const removeRestrictions = () => {
 
152
  const removeBlur = (element = document) => {
153
  element.querySelectorAll("*").forEach(el => {
154
  const style = window.getComputedStyle(el);
 
155
  // Check for blur via filter, backdrop-filter, or class names
156
  if (
157
  style.filter?.includes("blur") ||
 
167
  el.classList.remove("blur", "blurred", "premium-blur");
168
  }
169
  }
 
170
  // Check parent elements for blur-inducing styles
171
  const parent = el.parentElement;
172
  if (parent) {
 
188
  document.querySelectorAll("div, section, aside").forEach(el => {
189
  const style = window.getComputedStyle(el);
190
  if (
191
+ (style.backgroundColor.includes("rgba") && (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000)) ||
 
192
  (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
193
  (el.className && el.className.toString().toLowerCase().includes("paywall"))
194
  ) {
 
203
 
204
  // Ensure document content is visible
205
  const contentSelectors = [
206
+ '.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
207
+ '[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
 
208
  ];
209
  contentSelectors.forEach(selector => {
210
  document.querySelectorAll(selector).forEach(el => {
 
218
 
219
  // Remove overlay divs that might be blocking content
220
  const overlays = document.querySelectorAll(`
221
+ [class*="overlay" i], [class*="modal" i], [class*="popup" i], [class*="banner" i],
222
+ [style*="position: fixed"], [style*="position: absolute"][style*="z-index"]
223
+ `);
 
224
  overlays.forEach(overlay => {
225
  const text = overlay.textContent || '';
226
+ if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') || text.includes('cookie') || text.includes('consent') || text.includes('login')) {
 
227
  overlay.remove();
228
  }
229
  });
 
247
  */
248
  const applyPrintStyles = async (page) => {
249
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
 
250
  await page.evaluate(() => {
251
  const style = document.createElement("style");
252
  style.id = "print-style-extension";
253
  style.innerHTML = `
254
+ @page {
255
+ size: A4 portrait;
256
+ margin: 5mm;
257
+ }
258
+ @media print {
259
+ html, body {
260
+ margin: 0 !important;
261
+ padding: 0 !important;
262
+ overflow: visible !important;
263
+ }
264
+ header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
265
+ .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
266
+ .HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
267
+ .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
268
+ .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper,
269
+ .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
270
+ display: none !important;
271
+ }
272
+ body {
273
+ background: white !important;
274
+ color: black !important;
275
+ }
276
+ * {
277
+ box-shadow: none !important;
278
+ background: transparent !important;
279
+ }
280
+ .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, .Viewer_document-wrapper__XsO4j, .page-content {
281
+ display: flex !important;
282
+ flex-direction: column !important;
283
+ width: 100% !important;
284
+ max-width: 210mm !important;
285
+ margin: 0 auto !important;
286
+ }
287
+ [data-page], .page, .document-page, img {
288
+ page-break-after: always !important;
289
+ page-break-inside: avoid !important;
290
+ page-break-before: avoid !important;
291
+ width: 100% !important;
292
+ max-width: 100% !important;
293
+ height: auto !important;
294
+ }
295
+ }
296
+ `;
 
 
 
297
  document.head.appendChild(style);
298
  });
299
  };
 
305
  let browser;
306
  try {
307
  console.log("πŸš€ Launching browser with stealth configuration...");
 
308
  browser = await puppeteer.launch({
309
+ headless: true,
310
  args: [
311
  '--no-sandbox',
312
  '--disable-setuid-sandbox',
 
324
  '--disable-web-security',
325
  '--disable-features=site-per-process',
326
  '--disable-blink-features=AutomationControlled',
327
+ '--disable-extensions'
 
 
 
 
 
 
 
 
 
 
328
  ],
329
  timeout: 300000,
 
330
  });
331
 
332
  const page = await browser.newPage();
 
350
  page.on('request', (req) => {
351
  const resourceType = req.resourceType();
352
  const reqUrl = req.url();
 
353
  // Block trackers, ads, and analytics
354
  if (
355
  reqUrl.includes('doubleclick') ||
 
362
  reqUrl.includes('mixpanel') ||
363
  reqUrl.includes('onetrust') ||
364
  reqUrl.includes('cookielaw') ||
365
+ (resourceType === 'other' && reqUrl.includes('track'))
366
  ) {
367
  req.abort();
368
  } else {
 
392
  }
393
 
394
  console.log(`πŸ“„ Navigating to ${url}...`);
 
395
  // Navigate with retry logic
396
  let navigationSuccess = false;
397
  let attempts = 0;
 
418
  // Wait for document content with multiple selectors
419
  console.log("⏳ Waiting for document content to load...");
420
  const contentSelectors = [
421
+ '.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
422
+ 'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
 
423
  ];
424
  let contentFound = false;
425
  for (const selector of contentSelectors) {
 
432
  console.log(`❌ Selector ${selector} not found, trying next...`);
433
  }
434
  }
435
+
436
  if (!contentFound) {
437
  console.log("⚠️ No specific content selector found, proceeding with page content...");
438
  }
 
441
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
442
  await page.evaluate(async () => {
443
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
 
444
  let scrollHeight = document.body.scrollHeight;
445
  while (true) {
446
  let totalHeight = 0;
 
450
  totalHeight += distance;
451
  await delay(500); // Increased delay for better loading
452
  }
 
453
  await delay(2000); // Extra wait after reaching bottom
 
454
  const newHeight = document.body.scrollHeight;
455
  if (newHeight === scrollHeight) break;
456
  scrollHeight = newHeight;
457
  }
 
458
  // Scroll to top
459
  window.scrollTo({ top: 0, behavior: "smooth" });
460
  await delay(1000);
 
485
  await page.evaluate(() => {
486
  const getDocumentHeight = () => Math.max(
487
  document.body.scrollHeight, document.body.offsetHeight,
488
+ document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight
 
489
  );
490
  const height = getDocumentHeight();
491
  document.body.style.height = `${height}px !important`;
 
536
  scale: 1,
537
  omitBackground: false
538
  });
 
539
 
540
+ console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
541
  return pdfBuffer;
542
+
543
  } catch (error) {
544
  console.error("❌ Error during PDF generation:", error);
545
  if (error.message.includes('timeout')) {
 
563
  }
564
  };
565
 
566
+ /**
567
+ * NEW: StuDocu downloader with page-by-page progress streaming
568
+ */
569
+ const studocuDownloaderStreamed = async (url, options, res) => {
570
+ let browser;
571
+ try {
572
+ console.log("πŸš€ Launching browser for streaming with stealth configuration...");
573
+ browser = await puppeteer.launch({
574
+ headless: true,
575
+ args: [
576
+ '--no-sandbox',
577
+ '--disable-setuid-sandbox',
578
+ '--disable-dev-shm-usage',
579
+ '--disable-accelerated-2d-canvas',
580
+ '--no-first-run',
581
+ '--no-zygote',
582
+ '--disable-gpu'
583
+ ],
584
+ timeout: 300000,
585
+ });
586
+
587
+ const page = await browser.newPage();
588
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
589
+ await page.setViewport({ width: 794, height: 1122 });
590
+
591
+ await page.evaluateOnNewDocument(() => {
592
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
593
+ });
594
+
595
+ await bypassCookiesAndRestrictions(page);
596
+
597
+ await page.setRequestInterception(true);
598
+ page.on('request', (req) => {
599
+ if (['image', 'stylesheet', 'font', 'other'].includes(req.resourceType()) && !req.url().includes('studocu.com')) {
600
+ req.abort();
601
+ } else {
602
+ req.continue();
603
+ }
604
+ });
605
+
606
+ if (options.email && options.password) {
607
+ console.log("πŸ”‘ Logging in for streaming...");
608
+ await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded' });
609
+ await page.waitForSelector('#email');
610
+ await page.type('#email', options.email);
611
+ await page.type('#password', options.password);
612
+ await page.click('button[type="submit"]');
613
+ await page.waitForNavigation({ waitUntil: 'networkidle2' });
614
+ console.log("βœ… Login successful for streaming.");
615
+ }
616
+
617
+ console.log(`πŸ“„ Navigating to ${url} for streaming...`);
618
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
619
+ await new Promise(resolve => setTimeout(resolve, 5000));
620
+
621
+ await unblurContent(page);
622
+
623
+ console.log("⏳ Waiting for document pages to load...");
624
+ await page.waitForSelector('[data-page]', { timeout: 30000 });
625
+
626
+ console.log("πŸ“œ Scrolling to load all pages for streaming...");
627
+ await page.evaluate(async () => {
628
+ await new Promise(resolve => {
629
+ let totalHeight = 0;
630
+ const distance = 100;
631
+ const timer = setInterval(() => {
632
+ const scrollHeight = document.body.scrollHeight;
633
+ window.scrollBy(0, distance);
634
+ totalHeight += distance;
635
+ if (totalHeight >= scrollHeight) {
636
+ clearInterval(timer);
637
+ resolve();
638
+ }
639
+ }, 100);
640
+ });
641
+ });
642
+
643
+ await unblurContent(page);
644
+ await new Promise(resolve => setTimeout(resolve, 5000));
645
+
646
+ const pageElements = await page.$$('[data-page]');
647
+ const totalPages = pageElements.length;
648
+ console.log(`πŸ“„ Found ${totalPages} pages to stream.`);
649
+
650
+ if (totalPages === 0) {
651
+ throw new Error("No document pages found to stream. The content might be protected or not loaded correctly.");
652
+ }
653
+
654
+ // Set headers for streaming
655
+ res.setHeader('Content-Type', 'application/json');
656
+ res.setHeader('Transfer-Encoding', 'chunked');
657
+
658
+ for (let i = 0; i < totalPages; i++) {
659
+ console.log(`🎨 Rendering page ${i + 1} of ${totalPages}...`);
660
+ const pageElement = pageElements[i];
661
+ const imageData = await pageElement.screenshot({ type: 'png', encoding: 'base64' });
662
+
663
+ const progressUpdate = {
664
+ pageNumber: i + 1,
665
+ totalPages: totalPages,
666
+ imageData: `data:image/png;base64,${imageData}`
667
+ };
668
+
669
+ res.write(JSON.stringify(progressUpdate) + '\n'); // Send as a new line delimited JSON
670
+ }
671
+
672
+ console.log("βœ… All pages have been rendered and sent.");
673
+
674
+ } catch (error) {
675
+ console.error("❌ Error during streamed download:", error);
676
+ const errorResponse = {
677
+ error: `Failed to generate streamed PDF: ${error.message}`
678
+ };
679
+ if (!res.headersSent) {
680
+ res.status(500).json(errorResponse);
681
+ } else {
682
+ res.write(JSON.stringify(errorResponse) + '\n');
683
+ }
684
+ } finally {
685
+ if (browser) {
686
+ console.log("πŸ”’ Closing browser for streaming...");
687
+ await browser.close();
688
+ }
689
+ if (!res.writableEnded) {
690
+ res.end(); // End the stream
691
+ }
692
+ }
693
+ };
694
+
695
+
696
  // API Routes
697
+
698
+ // Original endpoint for downloading the full PDF at once
699
  app.post('/api/download', async (req, res) => {
700
  const { url, filename, email, password } = req.body;
 
701
  if (!url) {
702
  return res.status(400).json({ error: 'URL is required.' });
703
  }
 
704
  if (!url.includes('studocu.com')) {
705
  return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
706
  }
 
711
  }
712
 
713
  console.log(`🎯 Processing request for: ${normalizedUrl}`);
 
714
  try {
715
  const startTime = Date.now();
716
  const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
717
  const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
 
718
  res.setHeader('Content-Type', 'application/pdf');
719
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
720
  res.setHeader('Content-Length', pdfBuffer.length);
721
  res.send(pdfBuffer);
 
722
  console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
723
  } catch (error) {
724
  console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
 
726
  }
727
  });
728
 
729
+ // NEW: Endpoint for streaming the document page by page
730
+ app.post('/api/download-stream', async (req, res) => {
731
+ const { url, email, password } = req.body;
732
+ if (!url) {
733
+ return res.status(400).json({ error: 'URL is required.' });
734
+ }
735
+ if (!url.includes('studocu.com')) {
736
+ return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
737
+ }
738
+
739
+ let normalizedUrl = url.trim();
740
+ if (!normalizedUrl.startsWith('http')) {
741
+ normalizedUrl = 'https://' + normalizedUrl;
742
+ }
743
+
744
+ console.log(`🎯 Processing stream request for: ${normalizedUrl}`);
745
+ try {
746
+ await studocuDownloaderStreamed(normalizedUrl, { email, password }, res);
747
+ console.log(`πŸŽ‰ Stream request completed for ${normalizedUrl}`);
748
+ } catch (error) {
749
+ console.error(`❌ Failed to process stream for ${normalizedUrl}:`, error.message);
750
+ // Error is handled within the downloader function to ensure proper response closure
751
+ }
752
+ });
753
+
754
+
755
  app.get('/health', (req, res) => {
756
+ res.json({
757
+ status: 'healthy',
758
+ timestamp: new Date().toISOString(),
759
+ uptime: process.uptime()
760
+ });
761
  });
762
 
763
  app.get('/', (req, res) => {
764
  res.json({
765
+ message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles and Streaming',
766
+ version: '5.3.1',
767
  features: [
768
  'πŸͺ Advanced cookie banner bypass',
769
  'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
770
  'πŸ”‘ Login support for full unblurred content access',
771
  'πŸ€– Anti-bot detection evasion',
772
+ 'πŸ“„ Full document content extraction with print styles for clean PDF',
773
+ 'πŸ”„ Real-time page rendering and streaming to the frontend'
774
  ],
775
  endpoints: {
776
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
777
+ download_stream: 'POST /api/download-stream (body: {url, email?, password?})',
778
  health: 'GET /health'
779
  },
780
  note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
 
792
  });
793
 
794
  app.listen(port, () => {
795
+ console.log(`πŸš€ Enhanced StuDocu Downloader v5.3.1 running on http://localhost:${port}`);
796
+ console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection, and real-time page streaming`);
797
  });
server.js CHANGED
@@ -1,16 +1,50 @@
1
  const express = require('express');
2
  const puppeteer = require('puppeteer');
3
  const cors = require('cors');
 
 
4
  const app = express();
5
  const port = 7860;
6
 
7
  app.use(cors());
8
  app.use(express.json());
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  /**
11
  * Advanced cookie banner and content bypass for StuDocu
12
  */
13
- const bypassCookiesAndRestrictions = async (page) => {
 
 
14
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
15
  // Step 1: Set cookies before page load
16
  const preCookies = [
@@ -123,36 +157,29 @@ const bypassCookiesAndRestrictions = async (page) => {
123
  }, 1000);
124
  });
125
 
 
126
  return true;
127
  };
128
 
129
  /**
130
- * Enhanced content unblurring and premium bypass (integrated from extension script)
131
  */
132
- const unblurContent = async (page) => {
 
 
133
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
134
  await page.evaluate(() => {
135
- // Function to remove all visual restrictions
136
  const removeRestrictions = () => {
137
  const removeBySelector = (selector) => {
138
  document.querySelectorAll(selector).forEach(el => el.remove());
139
  };
140
 
141
- // Remove ads by known class or ID
142
- removeBySelector("#adbox");
143
- removeBySelector(".adsbox");
144
- removeBySelector(".ad-box");
145
- removeBySelector(".banner-ads");
146
- removeBySelector(".advert");
147
-
148
- // Remove premium banner container
149
  removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
150
 
151
- // Enhanced blur removal
152
  const removeBlur = (element = document) => {
153
  element.querySelectorAll("*").forEach(el => {
154
  const style = window.getComputedStyle(el);
155
- // Check for blur via filter, backdrop-filter, or class names
156
  if (
157
  style.filter?.includes("blur") ||
158
  style.backdropFilter?.includes("blur") ||
@@ -167,41 +194,12 @@ const unblurContent = async (page) => {
167
  el.classList.remove("blur", "blurred", "premium-blur");
168
  }
169
  }
170
- // Check parent elements for blur-inducing styles
171
- const parent = el.parentElement;
172
- if (parent) {
173
- const parentStyle = window.getComputedStyle(parent);
174
- if (
175
- parentStyle.filter?.includes("blur") ||
176
- parentStyle.backdropFilter?.includes("blur") ||
177
- parseFloat(parentStyle.opacity) < 1
178
- ) {
179
- parent.style.filter = "none !important";
180
- parent.style.backdropFilter = "none !important";
181
- parent.style.opacity = "1 !important";
182
- }
183
- }
184
  });
185
  };
186
 
187
- // Remove dark overlays and paywall-like elements
188
- document.querySelectorAll("div, section, aside").forEach(el => {
189
- const style = window.getComputedStyle(el);
190
- if (
191
- (style.backgroundColor.includes("rgba") && (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000)) ||
192
- (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
193
- (el.className && el.className.toString().toLowerCase().includes("paywall"))
194
- ) {
195
- el.remove();
196
- }
197
- });
198
-
199
  removeBlur();
 
200
 
201
- // Remove other restrictions
202
- removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i]');
203
-
204
- // Ensure document content is visible
205
  const contentSelectors = [
206
  '.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
207
  '[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
@@ -215,37 +213,22 @@ const unblurContent = async (page) => {
215
  el.style.setProperty('pointer-events', 'auto', 'important');
216
  });
217
  });
218
-
219
- // Remove overlay divs that might be blocking content
220
- const overlays = document.querySelectorAll(`
221
- [class*="overlay" i], [class*="modal" i], [class*="popup" i], [class*="banner" i],
222
- [style*="position: fixed"], [style*="position: absolute"][style*="z-index"]
223
- `);
224
- overlays.forEach(overlay => {
225
- const text = overlay.textContent || '';
226
- if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') || text.includes('cookie') || text.includes('consent') || text.includes('login')) {
227
- overlay.remove();
228
- }
229
- });
230
  };
231
 
232
- // Run immediately
233
  removeRestrictions();
234
-
235
- // Run periodically
236
  const intervalId = setInterval(removeRestrictions, 2000);
237
-
238
- // Clean up after 60 seconds
239
- setTimeout(() => {
240
- clearInterval(intervalId);
241
- }, 60000);
242
  });
 
 
243
  };
244
 
245
  /**
246
- * Apply print styles for clean PDF output (integrated from extension script with improvements)
247
  */
248
- const applyPrintStyles = async (page) => {
 
 
249
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
250
  await page.evaluate(() => {
251
  const style = document.createElement("style");
@@ -296,14 +279,18 @@ const applyPrintStyles = async (page) => {
296
  `;
297
  document.head.appendChild(style);
298
  });
 
 
299
  };
300
 
301
  /**
302
- * Enhanced StuDocu downloader with comprehensive bypasses and login support
303
  */
304
- const studocuDownloader = async (url, options = {}) => {
305
  let browser;
306
  try {
 
 
307
  console.log("πŸš€ Launching browser with stealth configuration...");
308
  browser = await puppeteer.launch({
309
  headless: true,
@@ -331,11 +318,11 @@ const studocuDownloader = async (url, options = {}) => {
331
 
332
  const page = await browser.newPage();
333
 
334
- // Set realistic browser characteristics
 
335
  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
336
  await page.setViewport({ width: 794, height: 1122 });
337
 
338
- // Hide webdriver property
339
  await page.evaluateOnNewDocument(() => {
340
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
341
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
@@ -343,14 +330,13 @@ const studocuDownloader = async (url, options = {}) => {
343
  });
344
 
345
  // Set up cookie and content bypass
346
- await bypassCookiesAndRestrictions(page);
347
 
348
  // Block unnecessary resources
349
  await page.setRequestInterception(true);
350
  page.on('request', (req) => {
351
  const resourceType = req.resourceType();
352
  const reqUrl = req.url();
353
- // Block trackers, ads, and analytics
354
  if (
355
  reqUrl.includes('doubleclick') ||
356
  reqUrl.includes('googletagmanager') ||
@@ -370,8 +356,10 @@ const studocuDownloader = async (url, options = {}) => {
370
  }
371
  });
372
 
373
- // Login if credentials provided (for premium content)
374
  if (options.email && options.password) {
 
 
375
  console.log("πŸ”‘ Logging in to StuDocu...");
376
  await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
377
  await page.waitForSelector('#email', { timeout: 15000 });
@@ -380,25 +368,25 @@ const studocuDownloader = async (url, options = {}) => {
380
  await page.click('button[type="submit"]');
381
  try {
382
  await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
383
- // Additional check for successful login
384
  await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
385
  console.log("βœ… Login successful.");
 
386
  } catch (e) {
387
  console.error("❌ Login failed:", e.message);
388
- throw new Error("Login failed. Check credentials, if CAPTCHA is present, or try again.");
389
  }
390
- } else {
391
- console.log("⚠️ No login credentials provided. Full unblurred content requires premium account.");
392
  }
393
 
 
394
  console.log(`πŸ“„ Navigating to ${url}...`);
395
- // Navigate with retry logic
396
  let navigationSuccess = false;
397
  let attempts = 0;
398
  const maxAttempts = 3;
399
  while (!navigationSuccess && attempts < maxAttempts) {
400
  try {
401
  attempts++;
 
402
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
403
  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
404
  navigationSuccess = true;
@@ -409,14 +397,16 @@ const studocuDownloader = async (url, options = {}) => {
409
  }
410
  }
411
 
412
- // Wait for initial load
413
  await new Promise(resolve => setTimeout(resolve, 5000));
414
 
415
  // Apply content unblurring
416
- await unblurContent(page);
417
 
418
- // Wait for document content with multiple selectors
 
419
  console.log("⏳ Waiting for document content to load...");
 
420
  const contentSelectors = [
421
  '.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
422
  'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
@@ -437,8 +427,10 @@ const studocuDownloader = async (url, options = {}) => {
437
  console.log("⚠️ No specific content selector found, proceeding with page content...");
438
  }
439
 
440
- // Enhanced scrolling to load all content with loop for stability
 
441
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
 
442
  await page.evaluate(async () => {
443
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
444
  let scrollHeight = document.body.scrollHeight;
@@ -448,23 +440,26 @@ const studocuDownloader = async (url, options = {}) => {
448
  while (totalHeight < scrollHeight) {
449
  window.scrollBy(0, distance);
450
  totalHeight += distance;
451
- await delay(500); // Increased delay for better loading
452
  }
453
- await delay(2000); // Extra wait after reaching bottom
454
  const newHeight = document.body.scrollHeight;
455
  if (newHeight === scrollHeight) break;
456
  scrollHeight = newHeight;
457
  }
458
- // Scroll to top
459
  window.scrollTo({ top: 0, behavior: "smooth" });
460
  await delay(1000);
461
  });
462
 
 
 
463
  // Re-apply unblur after loading new content
464
- await unblurContent(page);
465
 
466
  // Wait for all images to load
 
467
  console.log("πŸ–ΌοΈ Waiting for all images to load...");
 
468
  await page.evaluate(async () => {
469
  const images = Array.from(document.querySelectorAll('img'));
470
  await Promise.all(images.map(img => {
@@ -477,11 +472,10 @@ const studocuDownloader = async (url, options = {}) => {
477
  }));
478
  });
479
 
480
- // Additional wait for any lazy loading
481
- await new Promise(resolve => setTimeout(resolve, 10000));
482
 
483
- // Set exact height to avoid extra blank pages
484
- console.log("πŸ“ Setting exact document height...");
485
  await page.evaluate(() => {
486
  const getDocumentHeight = () => Math.max(
487
  document.body.scrollHeight, document.body.offsetHeight,
@@ -493,7 +487,7 @@ const studocuDownloader = async (url, options = {}) => {
493
  document.body.style.overflow = 'hidden !important';
494
  });
495
 
496
- // Final content verification
497
  const contentCheck = await page.evaluate(() => {
498
  const textContent = document.body.textContent || '';
499
  const images = document.querySelectorAll('img');
@@ -505,10 +499,10 @@ const studocuDownloader = async (url, options = {}) => {
505
  totalText: textContent.length,
506
  totalImages: images.length,
507
  documentImages: documentImages.length,
508
- hasDocumentContent: documentImages.length > 0 || textContent.length > 1000,
509
- sampleText: textContent.substring(0, 300)
510
  };
511
  });
 
512
  console.log("πŸ“Š Content verification:", {
513
  textLength: contentCheck.totalText,
514
  images: contentCheck.totalImages,
@@ -517,17 +511,16 @@ const studocuDownloader = async (url, options = {}) => {
517
  });
518
 
519
  if (!contentCheck.hasDocumentContent) {
520
- console.warn("⚠️ Warning: Limited document content detected. Use premium credentials for full access.");
521
  }
522
 
523
- // Apply print styles
524
- await applyPrintStyles(page);
525
-
526
- // Emulate print media
527
  await page.emulateMediaType('print');
528
 
529
- // Generate PDF
530
  console.log("πŸ”„ Generating PDF...");
 
531
  const pdfBuffer = await page.pdf({
532
  printBackground: true,
533
  preferCSSPageSize: true,
@@ -537,20 +530,14 @@ const studocuDownloader = async (url, options = {}) => {
537
  omitBackground: false
538
  });
539
 
 
540
  console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
541
  return pdfBuffer;
542
 
543
  } catch (error) {
 
544
  console.error("❌ Error during PDF generation:", error);
545
- if (error.message.includes('timeout')) {
546
- throw new Error("Request timed out. The document may be taking too long to load. Please try again.");
547
- } else if (error.message.includes('net::')) {
548
- throw new Error("Network error. Please check the URL and your internet connection.");
549
- } else if (error.message.includes('ERR_BLOCKED')) {
550
- throw new Error("Access blocked. Try again or check if the document is publicly accessible.");
551
- } else {
552
- throw new Error(`Failed to generate PDF: ${error.message}`);
553
- }
554
  } finally {
555
  if (browser) {
556
  console.log("πŸ”’ Closing browser...");
@@ -563,141 +550,14 @@ const studocuDownloader = async (url, options = {}) => {
563
  }
564
  };
565
 
566
- /**
567
- * NEW: StuDocu downloader with page-by-page progress streaming
568
- */
569
- const studocuDownloaderStreamed = async (url, options, res) => {
570
- let browser;
571
- try {
572
- console.log("πŸš€ Launching browser for streaming with stealth configuration...");
573
- browser = await puppeteer.launch({
574
- headless: true,
575
- args: [
576
- '--no-sandbox',
577
- '--disable-setuid-sandbox',
578
- '--disable-dev-shm-usage',
579
- '--disable-accelerated-2d-canvas',
580
- '--no-first-run',
581
- '--no-zygote',
582
- '--disable-gpu'
583
- ],
584
- timeout: 300000,
585
- });
586
-
587
- const page = await browser.newPage();
588
- await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
589
- await page.setViewport({ width: 794, height: 1122 });
590
-
591
- await page.evaluateOnNewDocument(() => {
592
- Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
593
- });
594
-
595
- await bypassCookiesAndRestrictions(page);
596
-
597
- await page.setRequestInterception(true);
598
- page.on('request', (req) => {
599
- if (['image', 'stylesheet', 'font', 'other'].includes(req.resourceType()) && !req.url().includes('studocu.com')) {
600
- req.abort();
601
- } else {
602
- req.continue();
603
- }
604
- });
605
-
606
- if (options.email && options.password) {
607
- console.log("πŸ”‘ Logging in for streaming...");
608
- await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded' });
609
- await page.waitForSelector('#email');
610
- await page.type('#email', options.email);
611
- await page.type('#password', options.password);
612
- await page.click('button[type="submit"]');
613
- await page.waitForNavigation({ waitUntil: 'networkidle2' });
614
- console.log("βœ… Login successful for streaming.");
615
- }
616
-
617
- console.log(`πŸ“„ Navigating to ${url} for streaming...`);
618
- await page.goto(url, { waitUntil: 'domcontentloaded' });
619
- await new Promise(resolve => setTimeout(resolve, 5000));
620
-
621
- await unblurContent(page);
622
-
623
- console.log("⏳ Waiting for document pages to load...");
624
- await page.waitForSelector('[data-page]', { timeout: 30000 });
625
-
626
- console.log("πŸ“œ Scrolling to load all pages for streaming...");
627
- await page.evaluate(async () => {
628
- await new Promise(resolve => {
629
- let totalHeight = 0;
630
- const distance = 100;
631
- const timer = setInterval(() => {
632
- const scrollHeight = document.body.scrollHeight;
633
- window.scrollBy(0, distance);
634
- totalHeight += distance;
635
- if (totalHeight >= scrollHeight) {
636
- clearInterval(timer);
637
- resolve();
638
- }
639
- }, 100);
640
- });
641
- });
642
-
643
- await unblurContent(page);
644
- await new Promise(resolve => setTimeout(resolve, 5000));
645
-
646
- const pageElements = await page.$$('[data-page]');
647
- const totalPages = pageElements.length;
648
- console.log(`πŸ“„ Found ${totalPages} pages to stream.`);
649
-
650
- if (totalPages === 0) {
651
- throw new Error("No document pages found to stream. The content might be protected or not loaded correctly.");
652
- }
653
-
654
- // Set headers for streaming
655
- res.setHeader('Content-Type', 'application/json');
656
- res.setHeader('Transfer-Encoding', 'chunked');
657
-
658
- for (let i = 0; i < totalPages; i++) {
659
- console.log(`🎨 Rendering page ${i + 1} of ${totalPages}...`);
660
- const pageElement = pageElements[i];
661
- const imageData = await pageElement.screenshot({ type: 'png', encoding: 'base64' });
662
-
663
- const progressUpdate = {
664
- pageNumber: i + 1,
665
- totalPages: totalPages,
666
- imageData: `data:image/png;base64,${imageData}`
667
- };
668
-
669
- res.write(JSON.stringify(progressUpdate) + '\n'); // Send as a new line delimited JSON
670
- }
671
-
672
- console.log("βœ… All pages have been rendered and sent.");
673
-
674
- } catch (error) {
675
- console.error("❌ Error during streamed download:", error);
676
- const errorResponse = {
677
- error: `Failed to generate streamed PDF: ${error.message}`
678
- };
679
- if (!res.headersSent) {
680
- res.status(500).json(errorResponse);
681
- } else {
682
- res.write(JSON.stringify(errorResponse) + '\n');
683
- }
684
- } finally {
685
- if (browser) {
686
- console.log("πŸ”’ Closing browser for streaming...");
687
- await browser.close();
688
- }
689
- if (!res.writableEnded) {
690
- res.end(); // End the stream
691
- }
692
- }
693
- };
694
-
695
 
696
  // API Routes
697
 
698
- // Original endpoint for downloading the full PDF at once
699
  app.post('/api/download', async (req, res) => {
700
- const { url, filename, email, password } = req.body;
 
 
701
  if (!url) {
702
  return res.status(400).json({ error: 'URL is required.' });
703
  }
@@ -710,74 +570,122 @@ app.post('/api/download', async (req, res) => {
710
  normalizedUrl = 'https://' + normalizedUrl;
711
  }
712
 
713
- console.log(`🎯 Processing request for: ${normalizedUrl}`);
714
- try {
715
- const startTime = Date.now();
716
- const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
717
- const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
718
- res.setHeader('Content-Type', 'application/pdf');
719
- res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
720
- res.setHeader('Content-Length', pdfBuffer.length);
721
- res.send(pdfBuffer);
722
- console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
723
- } catch (error) {
724
- console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
725
- res.status(500).json({ error: error.message });
726
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
  });
728
 
729
- // NEW: Endpoint for streaming the document page by page
730
- app.post('/api/download-stream', async (req, res) => {
731
- const { url, email, password } = req.body;
732
- if (!url) {
733
- return res.status(400).json({ error: 'URL is required.' });
734
- }
735
- if (!url.includes('studocu.com')) {
736
- return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
737
- }
738
 
739
- let normalizedUrl = url.trim();
740
- if (!normalizedUrl.startsWith('http')) {
741
- normalizedUrl = 'https://' + normalizedUrl;
742
  }
743
 
744
- console.log(`🎯 Processing stream request for: ${normalizedUrl}`);
745
- try {
746
- await studocuDownloaderStreamed(normalizedUrl, { email, password }, res);
747
- console.log(`πŸŽ‰ Stream request completed for ${normalizedUrl}`);
748
- } catch (error) {
749
- console.error(`❌ Failed to process stream for ${normalizedUrl}:`, error.message);
750
- // Error is handled within the downloader function to ensure proper response closure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
  }
 
 
 
 
 
 
 
 
752
  });
753
 
754
 
 
755
  app.get('/health', (req, res) => {
756
  res.json({
757
  status: 'healthy',
758
  timestamp: new Date().toISOString(),
759
- uptime: process.uptime()
 
760
  });
761
  });
762
 
763
  app.get('/', (req, res) => {
764
  res.json({
765
- message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles and Streaming',
766
- version: '5.3.1',
767
  features: [
768
  'πŸͺ Advanced cookie banner bypass',
769
- 'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
770
- 'πŸ”‘ Login support for full unblurred content access',
771
- 'πŸ€– Anti-bot detection evasion',
772
- 'πŸ“„ Full document content extraction with print styles for clean PDF',
773
- 'πŸ”„ Real-time page rendering and streaming to the frontend'
774
  ],
775
  endpoints: {
776
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
777
- download_stream: 'POST /api/download-stream (body: {url, email?, password?})',
778
  health: 'GET /health'
779
- },
780
- note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
781
  });
782
  });
783
 
@@ -792,6 +700,6 @@ process.on('SIGINT', () => {
792
  });
793
 
794
  app.listen(port, () => {
795
- console.log(`πŸš€ Enhanced StuDocu Downloader v5.3.1 running on http://localhost:${port}`);
796
- console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection, and real-time page streaming`);
797
  });
 
1
  const express = require('express');
2
  const puppeteer = require('puppeteer');
3
  const cors = require('cors');
4
+ const { EventEmitter } = require('events');
5
+
6
  const app = express();
7
  const port = 7860;
8
 
9
  app.use(cors());
10
  app.use(express.json());
11
 
12
+ // Progress tracking system
13
+ class ProgressTracker extends EventEmitter {
14
+ constructor(sessionId) {
15
+ super();
16
+ this.sessionId = sessionId;
17
+ this.progress = 0;
18
+ this.status = 'initializing';
19
+ this.message = '';
20
+ }
21
+
22
+ updateProgress(progress, status, message) {
23
+ this.progress = progress;
24
+ this.status = status;
25
+ this.message = message;
26
+ const progressData = {
27
+ sessionId: this.sessionId,
28
+ progress,
29
+ status,
30
+ message,
31
+ timestamp: new Date().toISOString()
32
+ };
33
+ // Emit the 'progress' event for SSE listeners
34
+ this.emit('progress', progressData);
35
+ console.log(`πŸ“Š [${this.sessionId}] ${progress}% - ${status}: ${message}`);
36
+ }
37
+ }
38
+
39
+ // Store active progress trackers
40
+ const progressTrackers = new Map();
41
+
42
  /**
43
  * Advanced cookie banner and content bypass for StuDocu
44
  */
45
+ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
46
+ progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
47
+
48
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
49
  // Step 1: Set cookies before page load
50
  const preCookies = [
 
157
  }, 1000);
158
  });
159
 
160
+ progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully');
161
  return true;
162
  };
163
 
164
  /**
165
+ * Enhanced content unblurring and premium bypass
166
  */
167
+ const unblurContent = async (page, progressTracker) => {
168
+ progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...');
169
+
170
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
171
  await page.evaluate(() => {
 
172
  const removeRestrictions = () => {
173
  const removeBySelector = (selector) => {
174
  document.querySelectorAll(selector).forEach(el => el.remove());
175
  };
176
 
177
+ removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert");
 
 
 
 
 
 
 
178
  removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
179
 
 
180
  const removeBlur = (element = document) => {
181
  element.querySelectorAll("*").forEach(el => {
182
  const style = window.getComputedStyle(el);
 
183
  if (
184
  style.filter?.includes("blur") ||
185
  style.backdropFilter?.includes("blur") ||
 
194
  el.classList.remove("blur", "blurred", "premium-blur");
195
  }
196
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  });
198
  };
199
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  removeBlur();
201
+ removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i]');
202
 
 
 
 
 
203
  const contentSelectors = [
204
  '.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
205
  '[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
 
213
  el.style.setProperty('pointer-events', 'auto', 'important');
214
  });
215
  });
 
 
 
 
 
 
 
 
 
 
 
 
216
  };
217
 
 
218
  removeRestrictions();
 
 
219
  const intervalId = setInterval(removeRestrictions, 2000);
220
+ setTimeout(() => clearInterval(intervalId), 60000);
 
 
 
 
221
  });
222
+
223
+ progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
224
  };
225
 
226
  /**
227
+ * Apply print styles for clean PDF output
228
  */
229
+ const applyPrintStyles = async (page, progressTracker) => {
230
+ progressTracker?.updateProgress(85, 'styling', 'Applying print styles...');
231
+
232
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
233
  await page.evaluate(() => {
234
  const style = document.createElement("style");
 
279
  `;
280
  document.head.appendChild(style);
281
  });
282
+
283
+ progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully');
284
  };
285
 
286
  /**
287
+ * Enhanced StuDocu downloader with progress tracking
288
  */
289
+ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
290
  let browser;
291
  try {
292
+ progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
293
+
294
  console.log("πŸš€ Launching browser with stealth configuration...");
295
  browser = await puppeteer.launch({
296
  headless: true,
 
318
 
319
  const page = await browser.newPage();
320
 
321
+ progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
322
+
323
  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
324
  await page.setViewport({ width: 794, height: 1122 });
325
 
 
326
  await page.evaluateOnNewDocument(() => {
327
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
328
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
 
330
  });
331
 
332
  // Set up cookie and content bypass
333
+ await bypassCookiesAndRestrictions(page, progressTracker);
334
 
335
  // Block unnecessary resources
336
  await page.setRequestInterception(true);
337
  page.on('request', (req) => {
338
  const resourceType = req.resourceType();
339
  const reqUrl = req.url();
 
340
  if (
341
  reqUrl.includes('doubleclick') ||
342
  reqUrl.includes('googletagmanager') ||
 
356
  }
357
  });
358
 
359
+ // Login if credentials provided
360
  if (options.email && options.password) {
361
+ progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
362
+
363
  console.log("πŸ”‘ Logging in to StuDocu...");
364
  await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
365
  await page.waitForSelector('#email', { timeout: 15000 });
 
368
  await page.click('button[type="submit"]');
369
  try {
370
  await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
 
371
  await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
372
  console.log("βœ… Login successful.");
373
+ progressTracker?.updateProgress(18, 'authenticated', 'Login successful');
374
  } catch (e) {
375
  console.error("❌ Login failed:", e.message);
376
+ throw new Error("Login failed. Check credentials or try again.");
377
  }
 
 
378
  }
379
 
380
+ progressTracker?.updateProgress(25, 'navigating', 'Navigating to document...');
381
  console.log(`πŸ“„ Navigating to ${url}...`);
382
+
383
  let navigationSuccess = false;
384
  let attempts = 0;
385
  const maxAttempts = 3;
386
  while (!navigationSuccess && attempts < maxAttempts) {
387
  try {
388
  attempts++;
389
+ progressTracker?.updateProgress(25 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
390
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
391
  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
392
  navigationSuccess = true;
 
397
  }
398
  }
399
 
400
+ progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...');
401
  await new Promise(resolve => setTimeout(resolve, 5000));
402
 
403
  // Apply content unblurring
404
+ await unblurContent(page, progressTracker);
405
 
406
+ // Wait for document content
407
+ progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...');
408
  console.log("⏳ Waiting for document content to load...");
409
+
410
  const contentSelectors = [
411
  '.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
412
  'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
 
427
  console.log("⚠️ No specific content selector found, proceeding with page content...");
428
  }
429
 
430
+ // Enhanced scrolling to load all content
431
+ progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...');
432
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
433
+
434
  await page.evaluate(async () => {
435
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
436
  let scrollHeight = document.body.scrollHeight;
 
440
  while (totalHeight < scrollHeight) {
441
  window.scrollBy(0, distance);
442
  totalHeight += distance;
443
+ await delay(500);
444
  }
445
+ await delay(2000);
446
  const newHeight = document.body.scrollHeight;
447
  if (newHeight === scrollHeight) break;
448
  scrollHeight = newHeight;
449
  }
 
450
  window.scrollTo({ top: 0, behavior: "smooth" });
451
  await delay(1000);
452
  });
453
 
454
+ progressTracker?.updateProgress(70, 'processing', 'Processing loaded content...');
455
+
456
  // Re-apply unblur after loading new content
457
+ await unblurContent(page, progressTracker);
458
 
459
  // Wait for all images to load
460
+ progressTracker?.updateProgress(75, 'loading_images', 'Loading images...');
461
  console.log("πŸ–ΌοΈ Waiting for all images to load...");
462
+
463
  await page.evaluate(async () => {
464
  const images = Array.from(document.querySelectorAll('img'));
465
  await Promise.all(images.map(img => {
 
472
  }));
473
  });
474
 
475
+ await new Promise(resolve => setTimeout(resolve, 5000));
476
+ progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...');
477
 
478
+ // Set exact height
 
479
  await page.evaluate(() => {
480
  const getDocumentHeight = () => Math.max(
481
  document.body.scrollHeight, document.body.offsetHeight,
 
487
  document.body.style.overflow = 'hidden !important';
488
  });
489
 
490
+ // Content verification
491
  const contentCheck = await page.evaluate(() => {
492
  const textContent = document.body.textContent || '';
493
  const images = document.querySelectorAll('img');
 
499
  totalText: textContent.length,
500
  totalImages: images.length,
501
  documentImages: documentImages.length,
502
+ hasDocumentContent: documentImages.length > 0 || textContent.length > 1000
 
503
  };
504
  });
505
+
506
  console.log("πŸ“Š Content verification:", {
507
  textLength: contentCheck.totalText,
508
  images: contentCheck.totalImages,
 
511
  });
512
 
513
  if (!contentCheck.hasDocumentContent) {
514
+ console.warn("⚠️ Warning: Limited document content detected.");
515
  }
516
 
517
+ // Apply print styles and generate PDF
518
+ await applyPrintStyles(page, progressTracker);
 
 
519
  await page.emulateMediaType('print');
520
 
521
+ progressTracker?.updateProgress(90, 'generating', 'Generating PDF...');
522
  console.log("πŸ”„ Generating PDF...");
523
+
524
  const pdfBuffer = await page.pdf({
525
  printBackground: true,
526
  preferCSSPageSize: true,
 
530
  omitBackground: false
531
  });
532
 
533
+ progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!');
534
  console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
535
  return pdfBuffer;
536
 
537
  } catch (error) {
538
+ progressTracker?.updateProgress(-1, 'error', error.message);
539
  console.error("❌ Error during PDF generation:", error);
540
+ throw error;
 
 
 
 
 
 
 
 
541
  } finally {
542
  if (browser) {
543
  console.log("πŸ”’ Closing browser...");
 
550
  }
551
  };
552
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
 
554
  // API Routes
555
 
556
+ // Enhanced download endpoint with progress tracking
557
  app.post('/api/download', async (req, res) => {
558
+ // Note: The original client code was sending sessionId in the body.
559
+ // We will use the one passed in the body or generate a new one.
560
+ const { url, filename, email, password, sessionId: reqSessionId } = req.body;
561
  if (!url) {
562
  return res.status(400).json({ error: 'URL is required.' });
563
  }
 
570
  normalizedUrl = 'https://' + normalizedUrl;
571
  }
572
 
573
+ // Use the session ID from the request or create a new one.
574
+ const sessionId = reqSessionId || Date.now().toString();
575
+ const progressTracker = new ProgressTracker(sessionId);
576
+ progressTrackers.set(sessionId, progressTracker);
577
+
578
+ console.log(`🎯 Processing request for: ${normalizedUrl} [Session: ${sessionId}]`);
579
+
580
+ // We don't wait for the downloader to finish.
581
+ // It runs in the background while we immediately return a response.
582
+ studocuDownloader(normalizedUrl, { filename, email, password }, progressTracker)
583
+ .then(pdfBuffer => {
584
+ // Store the result for the user to download later or handle as needed
585
+ progressTracker.pdfBuffer = pdfBuffer;
586
+ console.log(`πŸŽ‰ PDF is ready for download [Session: ${sessionId}]`);
587
+ })
588
+ .catch(error => {
589
+ console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
590
+ // You can emit a final error event here if you want
591
+ progressTracker.updateProgress(-1, 'error', error.message || 'An unknown error occurred.');
592
+ })
593
+ .finally(() => {
594
+ // Optional: Clean up the tracker after some time
595
+ setTimeout(() => {
596
+ const tracker = progressTrackers.get(sessionId);
597
+ // Don't delete if there's a PDF buffer waiting to be downloaded
598
+ if (tracker && !tracker.pdfBuffer) {
599
+ progressTrackers.delete(sessionId);
600
+ }
601
+ }, 300000); // 5 minutes
602
+ });
603
+
604
+ // Immediately respond to the client so it can start listening to the progress stream.
605
+ res.status(202).json({
606
+ message: "Download process started.",
607
+ sessionId: sessionId
608
+ });
609
  });
610
 
611
+ // ***************************************************************
612
+ // ** NEW SERVER-SENT EVENTS (SSE) ENDPOINT FOR REAL-TIME PROGRESS **
613
+ // ***************************************************************
614
+ app.get('/api/progress-stream/:sessionId', (req, res) => {
615
+ const { sessionId } = req.params;
616
+ const tracker = progressTrackers.get(sessionId);
 
 
 
617
 
618
+ if (!tracker) {
619
+ return res.status(404).json({ error: 'Session not found' });
 
620
  }
621
 
622
+ // Set headers for SSE
623
+ res.setHeader('Content-Type', 'text/event-stream');
624
+ res.setHeader('Cache-Control', 'no-cache');
625
+ res.setHeader('Connection', 'keep-alive');
626
+ res.flushHeaders(); // Flush the headers to establish the connection
627
+
628
+ // The function that sends data to the client
629
+ const sendProgress = (data) => {
630
+ res.write(`data: ${JSON.stringify(data)}\n\n`);
631
+ };
632
+
633
+ // Attach the listener to the specific tracker instance
634
+ tracker.on('progress', sendProgress);
635
+
636
+ // Handle client disconnect
637
+ req.on('close', () => {
638
+ // Remove the listener for this specific client
639
+ tracker.removeListener('progress', sendProgress);
640
+ console.log(`πŸ”Œ Client disconnected for session: ${sessionId}`);
641
+ });
642
+ });
643
+
644
+ // Your old polling endpoint (can be kept for debugging or removed)
645
+ app.get('/api/progress/:sessionId', (req, res) => {
646
+ const { sessionId } = req.params;
647
+ const tracker = progressTrackers.get(sessionId);
648
+
649
+ if (!tracker) {
650
+ return res.status(404).json({ error: 'Session not found' });
651
  }
652
+
653
+ res.json({
654
+ sessionId,
655
+ progress: tracker.progress,
656
+ status: tracker.status,
657
+ message: tracker.message,
658
+ timestamp: new Date().toISOString()
659
+ });
660
  });
661
 
662
 
663
+ // Health and info endpoints
664
  app.get('/health', (req, res) => {
665
  res.json({
666
  status: 'healthy',
667
  timestamp: new Date().toISOString(),
668
+ uptime: process.uptime(),
669
+ activeDownloads: progressTrackers.size
670
  });
671
  });
672
 
673
  app.get('/', (req, res) => {
674
  res.json({
675
+ message: 'πŸš€ Enhanced StuDocu Downloader API v5.0 - Real-time Progress Tracking',
676
+ version: '5.0.0',
677
  features: [
678
  'πŸͺ Advanced cookie banner bypass',
679
+ 'πŸ”“ Premium content unblurring',
680
+ 'πŸ”‘ Login support for full access',
681
+ 'πŸ“Š Real-time progress tracking via polling',
682
+ 'πŸ“„ Clean PDF generation with print styles'
 
683
  ],
684
  endpoints: {
685
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
686
+ progress: 'GET /api/progress/:sessionId',
687
  health: 'GET /health'
688
+ }
 
689
  });
690
  });
691
 
 
700
  });
701
 
702
  app.listen(port, () => {
703
+ console.log(`πŸš€ Enhanced StuDocu Downloader v5.0.0 running on http://localhost:${port}`);
704
+ console.log(`✨ Features: Real-time progress tracking and enhanced user feedback`);
705
  });