Spaces:
Runtime error
Runtime error
File size: 7,442 Bytes
c4be319 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | const puppeteer = require('puppeteer');
const { decrypt } = require('../utils/crypto');
/**
* Scrapes the Moodle LMS dashboard for deadlines.
* @param {string} username - The plaintext LMS username.
* @param {string} encryptedPassword - The AES-encrypted password from DB.
* @param {string} iv - The initialization vector for decryption.
* @returns {Promise<Array>} - Array of deadline objects.
*/
async function scrapeDeadlines(username, encryptedPassword, iv) {
let browser;
try {
// Decrypt the password
const plaintextPassword = decrypt(encryptedPassword, iv);
// Launch an invisible Chrome browser
console.log(`[Scraper] Launching browser for user: ${username}...`);
browser = await puppeteer.launch({
headless: 'new',
executablePath: process.env.CHROME_BIN || null,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu'
]
});
const page = await browser.newPage();
// --- BOOYAH: Fast Puppeteer (Block heavy assets) ---
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'stylesheet', 'font', 'media'].includes(req.resourceType())) {
req.abort(); // Drops page load time from ~30s to ~5s!
} else {
req.continue();
}
});
// Bulletproof timeouts to prevent hanging indefinitely
page.setDefaultNavigationTimeout(45000);
page.setDefaultTimeout(45000);
// Navigate to the Moodle login page
// If they visit /my/, it automatically redirects to the login screen
await page.goto('https://lms.nust.edu.pk/portal/my/', { waitUntil: 'networkidle2' });
// Wait for the login fields to appear (standard Moodle selectors)
await page.waitForSelector('#username', { timeout: 10000 });
await page.waitForSelector('#password', { timeout: 10000 });
// Type credentials and login
await page.type('#username', username);
await page.type('#password', plaintextPassword);
console.log(`[Scraper] Credentials entered for ${username}. Logging in...`);
// Click the login button and wait for navigation back to the dashboard
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle2' }),
page.click('#loginbtn') // the standard Moodle login button ID
]);
console.log(`[Scraper] Successfully logged into dashboard for ${username}. Waiting for timeline...`);
// Wait for the timeline block to load
// Note: Moodle 4.x Timeline items usually have 'data-region="event-item"'
let hasItems = true;
await page.waitForSelector('[data-region="event-list-item"]', { timeout: 15000 }).catch(async () => {
// VERIFICATION: Check if timeline is officially empty, or if this is a Moodle crash/timeout
const bodyText = await page.evaluate(() => document.body.innerText);
if (bodyText.includes("No upcoming activities due") || bodyText.includes("No activities require action")) {
console.log(`[Scraper] Verified officially empty timeline for ${username}.`);
hasItems = false;
} else {
throw new Error("Timeline failed to load completely. Suspected Moodle outage. Aborting scrape.");
}
});
if (!hasItems) {
return []; // Completely safe to return empty, student has no assignments!
}
// Extract assignments from the Timeline block
const deadlines = await page.evaluate(() => {
const results = [];
// This selects the standard Moodle Timeline event container
const eventItems = document.querySelectorAll('[data-region="event-list-item"]');
eventItems.forEach(item => {
try {
// Extracting course name (Usually in a small badge or link inside the item)
const courseEl = item.querySelector('.text-truncate');
let courseName = courseEl ? courseEl.innerText.trim() : 'Unknown Course';
// Extracting assignment title
const titleEl = item.querySelector('.text-truncate a') || item.querySelector('.event-name-container a');
let assignmentTitle = titleEl ? titleEl.innerText.trim() : 'Unknown Assignment';
// Check for that pesky emoji and "is due" that might be injected.
// We let stringFormatter handle most of it, but ensure we don't pass massive objects.
// Find the date by looking at the parent DOM structure
// Moodle groups items under a parent <div> which is preceded by an <h5> containing the date
let dateHeader = null;
let parent = item.parentElement;
if (parent && parent.previousElementSibling && parent.previousElementSibling.tagName === 'H5') {
dateHeader = parent.previousElementSibling.innerText.trim();
}
// Extract the time from inside the item
const timeEl = item.querySelector('.text-right') || item.querySelector('.date');
const timeString = timeEl ? timeEl.innerText.trim() : '';
// Combine date and time
const dateString = dateHeader && timeString ? `${dateHeader}, ${timeString}` : timeString;
// DEBUG: Grab the raw HTML of the item so we can inspect it in the terminal later if needed
const rawHtml = item.innerHTML;
if (assignmentTitle && dateString) {
results.push({
courseName,
assignmentTitle,
dateString,
rawHtml: results.length === 0 ? rawHtml : undefined // only save html for the first item to avoid returning massive strings
});
}
} catch (err) {
// Skip malformed items
}
});
return results;
});
console.log(`[Scraper] Extracted ${deadlines.length} raw deadlines for ${username}.`);
return deadlines;
} catch (error) {
console.error(`[Scraper] Error scraping for ${username}:`, error.message);
return [];
} finally {
if (browser) {
try {
await browser.close();
console.log(`[Scraper] Browser closed for ${username}.`);
} catch (closeErr) {
console.error(`[Scraper] Error closing browser gracefully for ${username}, forcefully killing process...`, closeErr.message);
if (browser.process() != null) {
browser.process().kill('SIGKILL');
}
}
}
}
}
module.exports = { scrapeDeadlines };
|