const {addExtra} = require('puppeteer-extra'); const StealthPlugin = require('puppeteer-extra-plugin-stealth'); const rebrowserPuppeteer = require('rebrowser-puppeteer'); const puppeteer = addExtra(rebrowserPuppeteer); puppeteer.use(StealthPlugin()); const Koa = require('koa'); const bodyParser = require('koa-bodyparser'); const app = new Koa(); app.use(bodyParser()); const jsesc = require('jsesc'); const requestHeadersToRemove = [ "host", "user-agent", "accept-encoding", "content-length", "forwarded", "x-forwarded-proto", "x-forwarded-for", "x-cloud-trace-context" ]; const responseHeadersToRemove = ["Accept-Ranges", "Content-Length", "Keep-Alive", "Connection", "content-encoding", "set-cookie"]; (async () => { let options = { headless: "new", args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled' ], ignoreDefaultArgs: [ '--enable-automation', '--disable-popup-blocking' ] }; if (process.env.PUPPETEER_SKIP_CHROMIUM_DOWNLOAD) options.executablePath = '/usr/bin/chromium-browser'; if (process.env.PUPPETEER_HEADFUL) options.headless = false; if (process.env.PUPPETEER_USERDATADIR) options.userDataDir = process.env.PUPPETEER_USERDATADIR; if (process.env.PUPPETEER_PROXY) options.args.push(`--proxy-server=${process.env.PUPPETEER_PROXY}`); const browser = await puppeteer.launch(options); app.use(async ctx => { if (ctx.query.url) { const url = decodeURIComponent(ctx.url.replace("/?url=", "")); if (process.env.DEBUG) { console.log(`[DEBUG] URL: ${url}`); } let responseBody; let responseData; let responseHeaders; const page = await browser.newPage(); await page.removeAllListeners('request'); await page.setRequestInterception(true); let requestHeaders = ctx.headers; requestHeadersToRemove.forEach(header => { delete requestHeaders[header]; }); page.on('request', (request) => { requestHeaders = Object.assign({}, request.headers(), requestHeaders); if (process.env.DEBUG) { console.log(`[DEBUG] requested headers: \n${JSON.stringify(requestHeaders)}`); } if (ctx.method == "POST") { request.continue({ headers: requestHeaders, 'method': 'POST', 'postData': ctx.request.rawBody }); } else { request.continue({ headers: requestHeaders }); } }); const client = await page.target().createCDPSession(); await client.send('Network.setRequestInterception', { patterns: [{ urlPattern: '*', resourceType: 'Document', interceptionStage: 'HeadersReceived' }], }); await client.on('Network.requestIntercepted', async e => { let obj = { interceptionId: e.interceptionId }; if (e.isDownload) { await client.send('Network.getResponseBodyForInterception', { interceptionId: e.interceptionId }).then((result) => { if (result.base64Encoded) { responseData = Buffer.from(result.body, 'base64'); } }); obj['errorReason'] = 'BlockedByClient'; responseHeaders = e.responseHeaders; } await client.send('Network.continueInterceptedRequest', obj); if (e.isDownload) await page.close(); }); try { let response; let tryCount = 0; response = await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' }); ctx.status = response.status(); responseBody = await response.text(); responseData = await response.buffer(); while (responseBody.includes(process.env.CHALLENGE_MATCH || "challenge-platform") && tryCount <= 10) { newResponse = await page.waitForNavigation({ timeout: 30000, waitUntil: 'domcontentloaded' }); if (newResponse) response = newResponse; responseBody = await response.text(); responseData = await response.buffer(); tryCount++; } responseHeaders = await response.headers(); const cookies = await page.cookies(); if (cookies) cookies.forEach(cookie => { const { name, value, secure, expires, domain, ...options } = cookie; ctx.cookies.set(cookie.name, cookie.value, options); }); } catch (error) { if (!error.toString().includes("ERR_BLOCKED_BY_CLIENT")) { ctx.status = 500; ctx.body = error; } } await page.close(); if (responseHeaders) { responseHeadersToRemove.forEach(header => delete responseHeaders[header]); Object.keys(responseHeaders).forEach(header => ctx.set(header, jsesc(responseHeaders[header]))); } if (process.env.DEBUG) { console.log(`[DEBUG] response headers: \n${JSON.stringify(responseHeaders)}`); } if (process.env.DEBUG_BODY) { console.log(`[DEBUG] body: \n${responseData}`); } ctx.body = responseData; } else { ctx.body = "Please specify the URL in the 'url' query string."; } }); app.listen(process.env.PORT || 3000); })();