Spaces:
Runtime error
Runtime error
| const puppeteer = require('puppeteer'); | |
| const { decrypt } = require('../utils/crypto'); | |
| /** | |
| * Scrapes the Moodle LMS dashboard for deadlines. | |
| * @param {string} username - The plaintext LMS username. | |
| * @param {string} encryptedPassword - The AES-encrypted password from DB. | |
| * @param {string} iv - The initialization vector for decryption. | |
| * @returns {Promise<Array>} - Array of deadline objects. | |
| */ | |
| async function scrapeDeadlines(username, encryptedPassword, iv) { | |
| let browser; | |
| try { | |
| // Decrypt the password | |
| const plaintextPassword = decrypt(encryptedPassword, iv); | |
| // Launch an invisible Chrome browser | |
| console.log(`[Scraper] Launching browser for user: ${username}...`); | |
| browser = await puppeteer.launch({ | |
| headless: 'new', | |
| executablePath: process.env.CHROME_BIN || null, | |
| args: [ | |
| '--no-sandbox', | |
| '--disable-setuid-sandbox', | |
| '--disable-dev-shm-usage', | |
| '--disable-gpu' | |
| ] | |
| }); | |
| const page = await browser.newPage(); | |
| // --- BOOYAH: Fast Puppeteer (Block heavy assets) --- | |
| await page.setRequestInterception(true); | |
| page.on('request', (req) => { | |
| if (['image', 'stylesheet', 'font', 'media'].includes(req.resourceType())) { | |
| req.abort(); // Drops page load time from ~30s to ~5s! | |
| } else { | |
| req.continue(); | |
| } | |
| }); | |
| // Bulletproof timeouts to prevent hanging indefinitely | |
| page.setDefaultNavigationTimeout(45000); | |
| page.setDefaultTimeout(45000); | |
| // Navigate to the Moodle login page | |
| // If they visit /my/, it automatically redirects to the login screen | |
| await page.goto('https://lms.nust.edu.pk/portal/my/', { waitUntil: 'networkidle2' }); | |
| // Wait for the login fields to appear (standard Moodle selectors) | |
| await page.waitForSelector('#username', { timeout: 10000 }); | |
| await page.waitForSelector('#password', { timeout: 10000 }); | |
| // Type credentials and login | |
| await page.type('#username', username); | |
| await page.type('#password', plaintextPassword); | |
| console.log(`[Scraper] Credentials entered for ${username}. Logging in...`); | |
| // Click the login button and wait for navigation back to the dashboard | |
| await Promise.all([ | |
| page.waitForNavigation({ waitUntil: 'networkidle2' }), | |
| page.click('#loginbtn') // the standard Moodle login button ID | |
| ]); | |
| console.log(`[Scraper] Successfully logged into dashboard for ${username}. Waiting for timeline...`); | |
| // Wait for the timeline block to load | |
| // Note: Moodle 4.x Timeline items usually have 'data-region="event-item"' | |
| let hasItems = true; | |
| await page.waitForSelector('[data-region="event-list-item"]', { timeout: 15000 }).catch(async () => { | |
| // VERIFICATION: Check if timeline is officially empty, or if this is a Moodle crash/timeout | |
| const bodyText = await page.evaluate(() => document.body.innerText); | |
| if (bodyText.includes("No upcoming activities due") || bodyText.includes("No activities require action")) { | |
| console.log(`[Scraper] Verified officially empty timeline for ${username}.`); | |
| hasItems = false; | |
| } else { | |
| throw new Error("Timeline failed to load completely. Suspected Moodle outage. Aborting scrape."); | |
| } | |
| }); | |
| if (!hasItems) { | |
| return []; // Completely safe to return empty, student has no assignments! | |
| } | |
| // Extract assignments from the Timeline block | |
| const deadlines = await page.evaluate(() => { | |
| const results = []; | |
| // This selects the standard Moodle Timeline event container | |
| const eventItems = document.querySelectorAll('[data-region="event-list-item"]'); | |
| eventItems.forEach(item => { | |
| try { | |
| // Extracting course name (Usually in a small badge or link inside the item) | |
| const courseEl = item.querySelector('.text-truncate'); | |
| let courseName = courseEl ? courseEl.innerText.trim() : 'Unknown Course'; | |
| // Extracting assignment title | |
| const titleEl = item.querySelector('.text-truncate a') || item.querySelector('.event-name-container a'); | |
| let assignmentTitle = titleEl ? titleEl.innerText.trim() : 'Unknown Assignment'; | |
| // Check for that pesky emoji and "is due" that might be injected. | |
| // We let stringFormatter handle most of it, but ensure we don't pass massive objects. | |
| // Find the date by looking at the parent DOM structure | |
| // Moodle groups items under a parent <div> which is preceded by an <h5> containing the date | |
| let dateHeader = null; | |
| let parent = item.parentElement; | |
| if (parent && parent.previousElementSibling && parent.previousElementSibling.tagName === 'H5') { | |
| dateHeader = parent.previousElementSibling.innerText.trim(); | |
| } | |
| // Extract the time from inside the item | |
| const timeEl = item.querySelector('.text-right') || item.querySelector('.date'); | |
| const timeString = timeEl ? timeEl.innerText.trim() : ''; | |
| // Combine date and time | |
| const dateString = dateHeader && timeString ? `${dateHeader}, ${timeString}` : timeString; | |
| // DEBUG: Grab the raw HTML of the item so we can inspect it in the terminal later if needed | |
| const rawHtml = item.innerHTML; | |
| if (assignmentTitle && dateString) { | |
| results.push({ | |
| courseName, | |
| assignmentTitle, | |
| dateString, | |
| rawHtml: results.length === 0 ? rawHtml : undefined // only save html for the first item to avoid returning massive strings | |
| }); | |
| } | |
| } catch (err) { | |
| // Skip malformed items | |
| } | |
| }); | |
| return results; | |
| }); | |
| console.log(`[Scraper] Extracted ${deadlines.length} raw deadlines for ${username}.`); | |
| return deadlines; | |
| } catch (error) { | |
| console.error(`[Scraper] Error scraping for ${username}:`, error.message); | |
| return []; | |
| } finally { | |
| if (browser) { | |
| try { | |
| await browser.close(); | |
| console.log(`[Scraper] Browser closed for ${username}.`); | |
| } catch (closeErr) { | |
| console.error(`[Scraper] Error closing browser gracefully for ${username}, forcefully killing process...`, closeErr.message); | |
| if (browser.process() != null) { | |
| browser.process().kill('SIGKILL'); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| module.exports = { scrapeDeadlines }; | |