const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const express = require('express');
const path = require('path');
puppeteer.use(StealthPlugin());
const app = express();
app.use(express.urlencoded({ extended: true }));
app.use(express.json());
// UI Sederhana (Frontend)
app.get('/', (req, res) => {
res.send(`
AQSO Scraper Engine
Web Scraper Engine
`);
});
app.post('/scrape', async (req, res) => {
const { url } = req.body;
let browser;
try {
browser = await puppeteer.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'],
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH
});
const page = await browser.newPage();
// Optimasi: Blokir resource berat (Gambar/Font/Media)
await page.setRequestInterception(true);
page.on('request', (req) => {
if(['image', 'stylesheet', 'font', 'media'].includes(req.resourceType())) req.abort();
else req.continue();
});
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
// Ambil teks bersih & Meta data
const result = await page.evaluate(() => {
return {
title: document.title,
url: window.location.href,
content: document.body.innerText, // Ambil semua teks
links: Array.from(document.querySelectorAll('a')).map(a => a.href)
};
});
res.json({ success: true, data: result });
} catch (e) {
res.status(500).json({ error: e.message });
} finally {
if (browser) await browser.close();
}
});
app.listen(7860, '0.0.0.0', () => console.log('Engine ready on port 7860'));