File size: 7,442 Bytes
c4be319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
const puppeteer = require('puppeteer');
const { decrypt } = require('../utils/crypto');

/**

 * Scrapes the Moodle LMS dashboard for deadlines.

 * @param {string} username - The plaintext LMS username.

 * @param {string} encryptedPassword - The AES-encrypted password from DB.

 * @param {string} iv - The initialization vector for decryption.

 * @returns {Promise<Array>} - Array of deadline objects.

 */
async function scrapeDeadlines(username, encryptedPassword, iv) {
    let browser;
    try {
        // Decrypt the password
        const plaintextPassword = decrypt(encryptedPassword, iv);

        // Launch an invisible Chrome browser
        console.log(`[Scraper] Launching browser for user: ${username}...`);
        browser = await puppeteer.launch({
            headless: 'new',
            executablePath: process.env.CHROME_BIN || null,
            args: [
                '--no-sandbox', 
                '--disable-setuid-sandbox',
                '--disable-dev-shm-usage',
                '--disable-gpu'
            ]
        });

        const page = await browser.newPage();
        
        // --- BOOYAH: Fast Puppeteer (Block heavy assets) ---
        await page.setRequestInterception(true);
        page.on('request', (req) => {
            if (['image', 'stylesheet', 'font', 'media'].includes(req.resourceType())) {
                req.abort(); // Drops page load time from ~30s to ~5s!
            } else {
                req.continue();
            }
        });

        // Bulletproof timeouts to prevent hanging indefinitely
        page.setDefaultNavigationTimeout(45000);
        page.setDefaultTimeout(45000);

        // Navigate to the Moodle login page
        // If they visit /my/, it automatically redirects to the login screen
        await page.goto('https://lms.nust.edu.pk/portal/my/', { waitUntil: 'networkidle2' });

        // Wait for the login fields to appear (standard Moodle selectors)
        await page.waitForSelector('#username', { timeout: 10000 });
        await page.waitForSelector('#password', { timeout: 10000 });

        // Type credentials and login
        await page.type('#username', username);
        await page.type('#password', plaintextPassword);
        
        console.log(`[Scraper] Credentials entered for ${username}. Logging in...`);
        
        // Click the login button and wait for navigation back to the dashboard
        await Promise.all([
            page.waitForNavigation({ waitUntil: 'networkidle2' }),
            page.click('#loginbtn') // the standard Moodle login button ID
        ]);

        console.log(`[Scraper] Successfully logged into dashboard for ${username}. Waiting for timeline...`);

        // Wait for the timeline block to load
        // Note: Moodle 4.x Timeline items usually have 'data-region="event-item"'
        let hasItems = true;
        await page.waitForSelector('[data-region="event-list-item"]', { timeout: 15000 }).catch(async () => {
            // VERIFICATION: Check if timeline is officially empty, or if this is a Moodle crash/timeout
            const bodyText = await page.evaluate(() => document.body.innerText);
            if (bodyText.includes("No upcoming activities due") || bodyText.includes("No activities require action")) {
                console.log(`[Scraper] Verified officially empty timeline for ${username}.`);
                hasItems = false;
            } else {
                throw new Error("Timeline failed to load completely. Suspected Moodle outage. Aborting scrape.");
            }
        });

        if (!hasItems) {
            return []; // Completely safe to return empty, student has no assignments!
        }

        // Extract assignments from the Timeline block
        const deadlines = await page.evaluate(() => {
            const results = [];
            
            // This selects the standard Moodle Timeline event container
            const eventItems = document.querySelectorAll('[data-region="event-list-item"]');
            
            eventItems.forEach(item => {
                try {
                    // Extracting course name (Usually in a small badge or link inside the item)
                    const courseEl = item.querySelector('.text-truncate'); 
                    let courseName = courseEl ? courseEl.innerText.trim() : 'Unknown Course';

                    // Extracting assignment title
                    const titleEl = item.querySelector('.text-truncate a') || item.querySelector('.event-name-container a');
                    let assignmentTitle = titleEl ? titleEl.innerText.trim() : 'Unknown Assignment';

                    // Check for that pesky emoji and "is due" that might be injected.
                    // We let stringFormatter handle most of it, but ensure we don't pass massive objects.

                    // Find the date by looking at the parent DOM structure
                    // Moodle groups items under a parent <div> which is preceded by an <h5> containing the date
                    let dateHeader = null;
                    let parent = item.parentElement;
                    if (parent && parent.previousElementSibling && parent.previousElementSibling.tagName === 'H5') {
                        dateHeader = parent.previousElementSibling.innerText.trim();
                    }

                    // Extract the time from inside the item
                    const timeEl = item.querySelector('.text-right') || item.querySelector('.date');
                    const timeString = timeEl ? timeEl.innerText.trim() : '';

                    // Combine date and time
                    const dateString = dateHeader && timeString ? `${dateHeader}, ${timeString}` : timeString;

                    // DEBUG: Grab the raw HTML of the item so we can inspect it in the terminal later if needed
                    const rawHtml = item.innerHTML;

                    if (assignmentTitle && dateString) {
                        results.push({
                            courseName,
                            assignmentTitle,
                            dateString,
                            rawHtml: results.length === 0 ? rawHtml : undefined // only save html for the first item to avoid returning massive strings
                        });
                    }
                } catch (err) {
                    // Skip malformed items
                }
            });
            
            return results;
        });

        console.log(`[Scraper] Extracted ${deadlines.length} raw deadlines for ${username}.`);
        
        return deadlines;

    } catch (error) {
        console.error(`[Scraper] Error scraping for ${username}:`, error.message);
        return [];
    } finally {
        if (browser) {
            try {
                await browser.close();
                console.log(`[Scraper] Browser closed for ${username}.`);
            } catch (closeErr) {
                console.error(`[Scraper] Error closing browser gracefully for ${username}, forcefully killing process...`, closeErr.message);
                if (browser.process() != null) {
                    browser.process().kill('SIGKILL');
                }
            }
        }
    }
}

module.exports = { scrapeDeadlines };