const puppeteer = require('puppeteer'); const { decrypt } = require('../utils/crypto'); /** * Scrapes the Moodle LMS dashboard for deadlines. * @param {string} username - The plaintext LMS username. * @param {string} encryptedPassword - The AES-encrypted password from DB. * @param {string} iv - The initialization vector for decryption. * @returns {Promise} - Array of deadline objects. */ async function scrapeDeadlines(username, encryptedPassword, iv) { let browser; try { // Decrypt the password const plaintextPassword = decrypt(encryptedPassword, iv); // Launch an invisible Chrome browser console.log(`[Scraper] Launching browser for user: ${username}...`); browser = await puppeteer.launch({ headless: 'new', executablePath: process.env.CHROME_BIN || null, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu' ] }); const page = await browser.newPage(); // --- BOOYAH: Fast Puppeteer (Block heavy assets) --- await page.setRequestInterception(true); page.on('request', (req) => { if (['image', 'stylesheet', 'font', 'media'].includes(req.resourceType())) { req.abort(); // Drops page load time from ~30s to ~5s! } else { req.continue(); } }); // Bulletproof timeouts to prevent hanging indefinitely page.setDefaultNavigationTimeout(45000); page.setDefaultTimeout(45000); // Navigate to the Moodle login page // If they visit /my/, it automatically redirects to the login screen await page.goto('https://lms.nust.edu.pk/portal/my/', { waitUntil: 'networkidle2' }); // Wait for the login fields to appear (standard Moodle selectors) await page.waitForSelector('#username', { timeout: 10000 }); await page.waitForSelector('#password', { timeout: 10000 }); // Type credentials and login await page.type('#username', username); await page.type('#password', plaintextPassword); console.log(`[Scraper] Credentials entered for ${username}. Logging in...`); // Click the login button and wait for navigation back to the dashboard await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle2' }), page.click('#loginbtn') // the standard Moodle login button ID ]); console.log(`[Scraper] Successfully logged into dashboard for ${username}. Waiting for timeline...`); // Wait for the timeline block to load // Note: Moodle 4.x Timeline items usually have 'data-region="event-item"' let hasItems = true; await page.waitForSelector('[data-region="event-list-item"]', { timeout: 15000 }).catch(async () => { // VERIFICATION: Check if timeline is officially empty, or if this is a Moodle crash/timeout const bodyText = await page.evaluate(() => document.body.innerText); if (bodyText.includes("No upcoming activities due") || bodyText.includes("No activities require action")) { console.log(`[Scraper] Verified officially empty timeline for ${username}.`); hasItems = false; } else { throw new Error("Timeline failed to load completely. Suspected Moodle outage. Aborting scrape."); } }); if (!hasItems) { return []; // Completely safe to return empty, student has no assignments! } // Extract assignments from the Timeline block const deadlines = await page.evaluate(() => { const results = []; // This selects the standard Moodle Timeline event container const eventItems = document.querySelectorAll('[data-region="event-list-item"]'); eventItems.forEach(item => { try { // Extracting course name (Usually in a small badge or link inside the item) const courseEl = item.querySelector('.text-truncate'); let courseName = courseEl ? courseEl.innerText.trim() : 'Unknown Course'; // Extracting assignment title const titleEl = item.querySelector('.text-truncate a') || item.querySelector('.event-name-container a'); let assignmentTitle = titleEl ? titleEl.innerText.trim() : 'Unknown Assignment'; // Check for that pesky emoji and "is due" that might be injected. // We let stringFormatter handle most of it, but ensure we don't pass massive objects. // Find the date by looking at the parent DOM structure // Moodle groups items under a parent
which is preceded by an
containing the date let dateHeader = null; let parent = item.parentElement; if (parent && parent.previousElementSibling && parent.previousElementSibling.tagName === 'H5') { dateHeader = parent.previousElementSibling.innerText.trim(); } // Extract the time from inside the item const timeEl = item.querySelector('.text-right') || item.querySelector('.date'); const timeString = timeEl ? timeEl.innerText.trim() : ''; // Combine date and time const dateString = dateHeader && timeString ? `${dateHeader}, ${timeString}` : timeString; // DEBUG: Grab the raw HTML of the item so we can inspect it in the terminal later if needed const rawHtml = item.innerHTML; if (assignmentTitle && dateString) { results.push({ courseName, assignmentTitle, dateString, rawHtml: results.length === 0 ? rawHtml : undefined // only save html for the first item to avoid returning massive strings }); } } catch (err) { // Skip malformed items } }); return results; }); console.log(`[Scraper] Extracted ${deadlines.length} raw deadlines for ${username}.`); return deadlines; } catch (error) { console.error(`[Scraper] Error scraping for ${username}:`, error.message); return []; } finally { if (browser) { try { await browser.close(); console.log(`[Scraper] Browser closed for ${username}.`); } catch (closeErr) { console.error(`[Scraper] Error closing browser gracefully for ${username}, forcefully killing process...`, closeErr.message); if (browser.process() != null) { browser.process().kill('SIGKILL'); } } } } } module.exports = { scrapeDeadlines };