Commit ·
e60e8d0
0
Parent(s):
cf
Browse files- Dockerfile +36 -0
- LICENSE.md +21 -0
- README.md +199 -0
- app.js +111 -0
- package.json +40 -0
- src/data/fakePage.html +30 -0
- src/endpoints/getSource.js +45 -0
- src/endpoints/solveTurnstile.max.js +80 -0
- src/endpoints/solveTurnstile.min.js +67 -0
- src/endpoints/wafSession.js +80 -0
- src/module/createBrowser.js +89 -0
- src/module/reqValidate.js +58 -0
- tests/endpoints.test.js +62 -0
- tests/validate.test.js +28 -0
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM node:latest
|
| 2 |
+
|
| 3 |
+
# Install necessary packages including Chromium and XVFB
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
wget \
|
| 6 |
+
gnupg \
|
| 7 |
+
ca-certificates \
|
| 8 |
+
apt-transport-https \
|
| 9 |
+
chromium \
|
| 10 |
+
chromium-driver \
|
| 11 |
+
xvfb \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Set the environment variable for Chromium binary
|
| 15 |
+
ENV CHROME_BIN=/usr/bin/chromium
|
| 16 |
+
|
| 17 |
+
# Set the working directory inside the container
|
| 18 |
+
WORKDIR /app
|
| 19 |
+
|
| 20 |
+
# Copy package.json and package-lock.json (if exists) to leverage Docker cache
|
| 21 |
+
COPY package*.json ./
|
| 22 |
+
|
| 23 |
+
# Install Node.js dependencies
|
| 24 |
+
# Using `npm ci` is often preferred in Docker for reproducible builds if you have package-lock.json
|
| 25 |
+
# Otherwise, `npm install` is fine. `npm update` is generally not needed here.
|
| 26 |
+
RUN npm install
|
| 27 |
+
|
| 28 |
+
# Copy the rest of your application code into the container
|
| 29 |
+
COPY . .
|
| 30 |
+
|
| 31 |
+
# Expose the port your Node.js application listens on (7860 as per your app.js)
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
# Command to run your Node.js application
|
| 35 |
+
# This will keep the container running as long as app.js is active
|
| 36 |
+
CMD ["node", "app.js"]
|
LICENSE.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2023 - 2024 @zfcsoftware
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
title: cf-token
|
| 4 |
+
sdk: docker
|
| 5 |
+
emoji: 😻
|
| 6 |
+
colorFrom: indigo
|
| 7 |
+
colorTo: gray
|
| 8 |
+
short_description: cf-token
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
[](https://scrape.do/?utm_source=github&utm_medium=repo_ccs)
|
| 12 |
+
|
| 13 |
+
> [!WARNING]
|
| 14 |
+
> This repo will no longer receive updates. Thank you to everyone who supported it.
|
| 15 |
+
|
| 16 |
+
# CF Clearance Scraper
|
| 17 |
+
|
| 18 |
+
This library was created for testing and training purposes to retrieve the page source of websites, create Cloudflare Turnstile tokens and create Cloudflare WAF sessions.
|
| 19 |
+
|
| 20 |
+
Cloudflare protection not only checks cookies in the request. It also checks variables in the header. For this reason, it is recommended to use it with the sample code in this readme file.
|
| 21 |
+
|
| 22 |
+
Cookies with cf in the name belong to Cloudflare. You can find out what these cookies do and how long they are valid by **[Clicking Here](https://developers.cloudflare.com/fundamentals/reference/policies-compliances/cloudflare-cookies/)**.
|
| 23 |
+
|
| 24 |
+
## Sponsor
|
| 25 |
+
|
| 26 |
+
[](https://www.capsolver.com/?utm_source=github&utm_medium=repo&utm_campaign=scraping&utm_term=cf-clearance-scraper)
|
| 27 |
+
|
| 28 |
+
## Installation
|
| 29 |
+
|
| 30 |
+
Installation with Docker is recommended.
|
| 31 |
+
|
| 32 |
+
**Docker**
|
| 33 |
+
|
| 34 |
+
Please make sure you have installed the latest image. If you get an error, try downloading the latest version by going to Docker Hub.
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
sudo docker rmi zfcsoftware/cf-clearance-scraper:latest --force
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
```bash
|
| 41 |
+
docker run -d -p 3000:3000 \
|
| 42 |
+
-e PORT=3000 \
|
| 43 |
+
-e browserLimit=20 \
|
| 44 |
+
-e timeOut=60000 \
|
| 45 |
+
zfcsoftware/cf-clearance-scraper:latest
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
**Github**
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
git clone https://github.com/zfcsoftware/cf-clearance-scraper
|
| 52 |
+
cd cf-clearance-scraper
|
| 53 |
+
npm install
|
| 54 |
+
npm run start
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## Create Cloudflare WAF Session
|
| 58 |
+
|
| 59 |
+
By creating a session as in the example, you can send multiple requests to the same site without being blocked. Since sites may have TLS protection, it is recommended to send requests with the library in the example.
|
| 60 |
+
|
| 61 |
+
```js
|
| 62 |
+
const initCycleTLS = require('cycletls');
|
| 63 |
+
async function test() {
|
| 64 |
+
const session = await fetch('http://localhost:3000/cf-clearance-scraper', {
|
| 65 |
+
method: 'POST',
|
| 66 |
+
headers: {
|
| 67 |
+
'Content-Type': 'application/json'
|
| 68 |
+
},
|
| 69 |
+
body: JSON.stringify({
|
| 70 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 71 |
+
mode: "waf-session",
|
| 72 |
+
// proxy:{
|
| 73 |
+
// host: '127.0.0.1',
|
| 74 |
+
// port: 3000,
|
| 75 |
+
// username: 'username',
|
| 76 |
+
// password: 'password'
|
| 77 |
+
// }
|
| 78 |
+
})
|
| 79 |
+
}).then(res => res.json()).catch(err => { console.error(err); return null });
|
| 80 |
+
|
| 81 |
+
if (!session || session.code != 200) return console.error(session);
|
| 82 |
+
|
| 83 |
+
const cycleTLS = await initCycleTLS();
|
| 84 |
+
const response = await cycleTLS('https://nopecha.com/demo/cloudflare', {
|
| 85 |
+
body: '',
|
| 86 |
+
ja3: '772,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,23-27-65037-43-51-45-16-11-13-17513-5-18-65281-0-10-35,25497-29-23-24,0', // https://scrapfly.io/web-scraping-tools/ja3-fingerprint
|
| 87 |
+
userAgent: session.headers["user-agent"],
|
| 88 |
+
// proxy: 'http://username:password@hostname.com:443',
|
| 89 |
+
headers: {
|
| 90 |
+
...session.headers,
|
| 91 |
+
cookie: session.cookies.map(cookie => `${cookie.name}=${cookie.value}`).join('; ')
|
| 92 |
+
}
|
| 93 |
+
}, 'get');
|
| 94 |
+
|
| 95 |
+
console.log(response.status);
|
| 96 |
+
cycleTLS.exit().catch(err => { });
|
| 97 |
+
}
|
| 98 |
+
test()
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## Create Turnstile Token with Little Resource Consumption
|
| 102 |
+
|
| 103 |
+
This endpoint allows you to generate tokens for a Cloudflare Turnstile Captcha. It blocks the request that fetches the page resource and instead makes the page resource a simple Turnstile render page. This allows you to generate tokens without having to load any additional css or js files.
|
| 104 |
+
|
| 105 |
+
However, in this method, the siteKey variable must be sent to Turnstile along with the site to create the token. If this does not work, you can examine the token generation system by loading the full page resource described in the next section.
|
| 106 |
+
|
| 107 |
+
```js
|
| 108 |
+
fetch('http://localhost:3000/cf-clearance-scraper', {
|
| 109 |
+
method: 'POST',
|
| 110 |
+
headers: {
|
| 111 |
+
'Content-Type': 'application/json'
|
| 112 |
+
},
|
| 113 |
+
body: JSON.stringify({
|
| 114 |
+
url: 'https://turnstile.zeroclover.io/',
|
| 115 |
+
siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0",
|
| 116 |
+
mode: "turnstile-min",
|
| 117 |
+
// proxy:{
|
| 118 |
+
// host: '127.0.0.1',
|
| 119 |
+
// port: 3000,
|
| 120 |
+
// username: 'username',
|
| 121 |
+
// password: 'password'
|
| 122 |
+
// }
|
| 123 |
+
})
|
| 124 |
+
})
|
| 125 |
+
.then(res => res.json())
|
| 126 |
+
.then(console.log)
|
| 127 |
+
.catch(console.log);
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
## Creating Turnstile Token with Full Page Load
|
| 131 |
+
|
| 132 |
+
This example request goes to the page at the given url address with a real browser, resolves the Turnstile and returns you the token.
|
| 133 |
+
|
| 134 |
+
```js
|
| 135 |
+
fetch('http://localhost:3000/cf-clearance-scraper', {
|
| 136 |
+
method: 'POST',
|
| 137 |
+
headers: {
|
| 138 |
+
'Content-Type': 'application/json'
|
| 139 |
+
},
|
| 140 |
+
body: JSON.stringify({
|
| 141 |
+
url: 'https://turnstile.zeroclover.io/',
|
| 142 |
+
mode: "turnstile-max",
|
| 143 |
+
// proxy:{
|
| 144 |
+
// host: '127.0.0.1',
|
| 145 |
+
// port: 3000,
|
| 146 |
+
// username: 'username',
|
| 147 |
+
// password: 'password'
|
| 148 |
+
// }
|
| 149 |
+
})
|
| 150 |
+
})
|
| 151 |
+
.then(res => res.json())
|
| 152 |
+
.then(console.log)
|
| 153 |
+
.catch(console.log);
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
## Getting Page Source from a Site Protected with Cloudflare WAF
|
| 157 |
+
|
| 158 |
+
With this request you can scrape the page source of a website protected with CF WAF.
|
| 159 |
+
|
| 160 |
+
```js
|
| 161 |
+
fetch('http://localhost:3000/cf-clearance-scraper', {
|
| 162 |
+
method: 'POST',
|
| 163 |
+
headers: {
|
| 164 |
+
'Content-Type': 'application/json'
|
| 165 |
+
},
|
| 166 |
+
body: JSON.stringify({
|
| 167 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 168 |
+
mode: "source"
|
| 169 |
+
// proxy:{
|
| 170 |
+
// host: '127.0.0.1',
|
| 171 |
+
// port: 3000,
|
| 172 |
+
// username: 'username',
|
| 173 |
+
// password: 'password'
|
| 174 |
+
// }
|
| 175 |
+
})
|
| 176 |
+
})
|
| 177 |
+
.then(res => res.json())
|
| 178 |
+
.then(console.log)
|
| 179 |
+
.catch(console.log);
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
## Quick Questions and Answers
|
| 183 |
+
|
| 184 |
+
### Does It Open A New Browser On Every Request?
|
| 185 |
+
No, a new context is started with each request and closed when the job is finished. Processes are executed with isolated contexts through a single browser.
|
| 186 |
+
|
| 187 |
+
### How Do I Limit the Browser Context to Open?
|
| 188 |
+
You can do this by changing the process.env.browserLimit value. The default is 20
|
| 189 |
+
|
| 190 |
+
### How Do I Add Authentication to Api?
|
| 191 |
+
You can add authorisation by changing the process.env.authToken variable. If this variable is added, it returns 401 if the authToken variable in the request body is not equal to the token you specify.
|
| 192 |
+
|
| 193 |
+
### How Do I Set The Timeout Time?
|
| 194 |
+
You can give the variable process.env.timeOut a value in milliseconds. The default is 60000.
|
| 195 |
+
|
| 196 |
+
## Disclaimer of Liability
|
| 197 |
+
This repository was created purely for testing and training purposes. The user is responsible for any prohibited liability that may arise from its use.
|
| 198 |
+
The library is not intended to harm any site or company. The user is responsible for any damage that may arise.
|
| 199 |
+
Users of this repository are deemed to have accepted this disclaimer.
|
app.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const express = require("express");
|
| 2 |
+
const app = express();
|
| 3 |
+
const bodyParser = require("body-parser");
|
| 4 |
+
const cors = require("cors");
|
| 5 |
+
const reqValidate = require("./src/module/reqValidate");
|
| 6 |
+
|
| 7 |
+
const port = 7860;
|
| 8 |
+
const authToken = null;
|
| 9 |
+
global.browserLimit = 20;
|
| 10 |
+
global.timeOut = 60000;
|
| 11 |
+
|
| 12 |
+
global.browserLength = 0;
|
| 13 |
+
// Add a new global variable to hold the browser instance directly
|
| 14 |
+
global.browser = null;
|
| 15 |
+
|
| 16 |
+
app.use(bodyParser.json());
|
| 17 |
+
app.use(bodyParser.urlencoded({ extended: true }));
|
| 18 |
+
app.use(cors());
|
| 19 |
+
|
| 20 |
+
const server = app.listen(port, () => {
|
| 21 |
+
console.log(`Server running on port ${port}`);
|
| 22 |
+
});
|
| 23 |
+
|
| 24 |
+
if (server) {
|
| 25 |
+
try {
|
| 26 |
+
server.timeout = global.timeOut;
|
| 27 |
+
server.keepAliveTimeout = global.timeOut + 5000;
|
| 28 |
+
server.headersTimeout = global.timeOut + 6000;
|
| 29 |
+
} catch (e) {
|
| 30 |
+
console.error("Failed to set server timeouts:", e);
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// Require createBrowser, but don't immediately call it here.
|
| 35 |
+
// We'll manage its lifecycle from within the browser creation module itself.
|
| 36 |
+
require("./src/module/createBrowser");
|
| 37 |
+
|
| 38 |
+
const getSource = require("./src/endpoints/getSource");
|
| 39 |
+
const solveTurnstileMin = require("./src/endpoints/solveTurnstile.min");
|
| 40 |
+
const solveTurnstileMax = require("./src/endpoints/solveTurnstile.max");
|
| 41 |
+
const wafSession = require("./src/endpoints/wafSession");
|
| 42 |
+
|
| 43 |
+
async function handleRequest(fn, data) {
|
| 44 |
+
try {
|
| 45 |
+
return { ...(await fn(data)), code: 200 };
|
| 46 |
+
} catch (err) {
|
| 47 |
+
return { code: 500, message: String(err) };
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
app.post("/cf-clearance-scraper", async (req, res) => {
|
| 52 |
+
try {
|
| 53 |
+
const data = req.body;
|
| 54 |
+
const check = reqValidate(data);
|
| 55 |
+
|
| 56 |
+
if (check !== true) {
|
| 57 |
+
return res.status(400).json({ code: 400, message: "Bad Request", schema: check });
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
if (authToken && data.authToken !== authToken) {
|
| 61 |
+
return res.status(401).json({ code: 401, message: "Unauthorized" });
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
if (global.browserLength >= global.browserLimit) {
|
| 65 |
+
return res.status(429).json({ code: 429, message: "Too Many Requests" });
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
// Use global.browser instead of global.browser
|
| 69 |
+
if (!global.browser) {
|
| 70 |
+
return res.status(500).json({
|
| 71 |
+
code: 500,
|
| 72 |
+
message: "The scanner is not ready yet. Please try again a little later.",
|
| 73 |
+
});
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
global.browserLength++;
|
| 77 |
+
|
| 78 |
+
let result = { code: 500 };
|
| 79 |
+
try {
|
| 80 |
+
// Pass the active browser instance to the handler functions
|
| 81 |
+
const requestData = { ...data, browser: global.browser };
|
| 82 |
+
switch (data.mode) {
|
| 83 |
+
case "source":
|
| 84 |
+
result = await handleRequest(getSource, requestData);
|
| 85 |
+
break;
|
| 86 |
+
case "turnstile-min":
|
| 87 |
+
result = await handleRequest(solveTurnstileMin, requestData);
|
| 88 |
+
break;
|
| 89 |
+
case "turnstile-max":
|
| 90 |
+
result = await handleRequest(solveTurnstileMax, requestData);
|
| 91 |
+
break;
|
| 92 |
+
case "waf-session":
|
| 93 |
+
result = await handleRequest(wafSession, requestData);
|
| 94 |
+
break;
|
| 95 |
+
default:
|
| 96 |
+
result = { code: 400, message: "Invalid mode" };
|
| 97 |
+
}
|
| 98 |
+
} finally {
|
| 99 |
+
global.browserLength--;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
res.status(result.code ?? 500).json(result);
|
| 103 |
+
} catch (e) {
|
| 104 |
+
console.error("Error in /cf-clearance-scraper:", e);
|
| 105 |
+
res.status(500).json({ code: 500, message: "Internal Server Error" });
|
| 106 |
+
}
|
| 107 |
+
});
|
| 108 |
+
|
| 109 |
+
app.use((req, res) => {
|
| 110 |
+
res.status(404).json({ code: 404, message: "Not Found" });
|
| 111 |
+
});
|
package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "cf-clearance-scraper",
|
| 3 |
+
"version": "2.1.3",
|
| 4 |
+
"main": "app.js",
|
| 5 |
+
"scripts": {
|
| 6 |
+
"start": "node app.js",
|
| 7 |
+
"test": "node --experimental-vm-modules ./node_modules/.bin/jest --detectOpenHandles --verbose"
|
| 8 |
+
},
|
| 9 |
+
"jest": {
|
| 10 |
+
"testMatch": [
|
| 11 |
+
"**/tests/**/*.js"
|
| 12 |
+
],
|
| 13 |
+
"verbose": true
|
| 14 |
+
},
|
| 15 |
+
"keywords": [
|
| 16 |
+
"cf-clearance",
|
| 17 |
+
"cloudflare",
|
| 18 |
+
"waf",
|
| 19 |
+
"scraper",
|
| 20 |
+
"puppeteer",
|
| 21 |
+
"xvfb",
|
| 22 |
+
"turnstile",
|
| 23 |
+
"bypass",
|
| 24 |
+
"undetected",
|
| 25 |
+
"stealth"
|
| 26 |
+
],
|
| 27 |
+
"author": "zfcsoftware",
|
| 28 |
+
"license": "ISC",
|
| 29 |
+
"description": "This package is an experimental and educational package created for Cloudflare protections.",
|
| 30 |
+
"dependencies": {
|
| 31 |
+
"ajv": "^8.17.1",
|
| 32 |
+
"ajv-formats": "^3.0.1",
|
| 33 |
+
"body-parser": "^1.20.3",
|
| 34 |
+
"cors": "^2.8.5",
|
| 35 |
+
"express": "^4.21.0",
|
| 36 |
+
"jest": "^29.7.0",
|
| 37 |
+
"puppeteer-real-browser": "^1.4.0",
|
| 38 |
+
"supertest": "^7.0.0"
|
| 39 |
+
}
|
| 40 |
+
}
|
src/data/fakePage.html
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title></title>
|
| 8 |
+
</head>
|
| 9 |
+
|
| 10 |
+
<body>
|
| 11 |
+
<div class="turnstile"></div>
|
| 12 |
+
<script src="https://challenges.cloudflare.com/turnstile/v0/api.js?onload=onloadTurnstileCallback" defer></script>
|
| 13 |
+
<script>
|
| 14 |
+
window.onloadTurnstileCallback = function () {
|
| 15 |
+
turnstile.render('.turnstile', {
|
| 16 |
+
sitekey: '<site-key>',
|
| 17 |
+
callback: function (token) {
|
| 18 |
+
var c = document.createElement('input');
|
| 19 |
+
c.type = 'hidden';
|
| 20 |
+
c.name = 'cf-response';
|
| 21 |
+
c.value = token;
|
| 22 |
+
document.body.appendChild(c);
|
| 23 |
+
},
|
| 24 |
+
});
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
</script>
|
| 28 |
+
</body>
|
| 29 |
+
|
| 30 |
+
</html>
|
src/endpoints/getSource.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async function getSource({ url, proxy }) {
|
| 2 |
+
return new Promise(async (resolve, reject) => {
|
| 3 |
+
if (!url) return reject("Missing url parameter");
|
| 4 |
+
let isResolved = false;
|
| 5 |
+
|
| 6 |
+
const context = await global.browser.createBrowserContext().catch(() => null);
|
| 7 |
+
if (!context) return reject("Failed to create browser context");
|
| 8 |
+
|
| 9 |
+
const page = await context.newPage();
|
| 10 |
+
|
| 11 |
+
try {
|
| 12 |
+
await page.setRequestInterception(true);
|
| 13 |
+
page.on("request", (request) => {
|
| 14 |
+
try {
|
| 15 |
+
request.continue();
|
| 16 |
+
} catch (err) {
|
| 17 |
+
console.warn("Request interception error:", err);
|
| 18 |
+
}
|
| 19 |
+
});
|
| 20 |
+
|
| 21 |
+
page.on("response", async (res) => {
|
| 22 |
+
try {
|
| 23 |
+
if ([200, 302].includes(res.status()) && [url, url + "/"].includes(res.url())) {
|
| 24 |
+
await page.waitForNavigation({ waitUntil: "load", timeout: 5000 }).catch(() => {});
|
| 25 |
+
const html = await page.content();
|
| 26 |
+
isResolved = true;
|
| 27 |
+
await page.close();
|
| 28 |
+
resolve({ source: html });
|
| 29 |
+
}
|
| 30 |
+
} catch (e) {
|
| 31 |
+
console.error("Error processing response:", e);
|
| 32 |
+
}
|
| 33 |
+
});
|
| 34 |
+
await page.goto(url, { waitUntil: "domcontentloaded" });
|
| 35 |
+
} catch (e) {
|
| 36 |
+
console.error("Error in getSource:", e.message);
|
| 37 |
+
if (!isResolved) {
|
| 38 |
+
await page.close();
|
| 39 |
+
reject(e.message);
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
});
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
module.exports = getSource;
|
src/endpoints/solveTurnstile.max.js
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const fs = require("fs");
|
| 2 |
+
function solveTurnstileMin({ url, proxy }) {
|
| 3 |
+
return new Promise(async (resolve, reject) => {
|
| 4 |
+
if (!url) return reject("Missing url parameter");
|
| 5 |
+
|
| 6 |
+
const context = await global.browser
|
| 7 |
+
.createBrowserContext({
|
| 8 |
+
proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
|
| 9 |
+
})
|
| 10 |
+
.catch(() => null);
|
| 11 |
+
|
| 12 |
+
if (!context) return reject("Failed to create browser context");
|
| 13 |
+
|
| 14 |
+
let isResolved = false;
|
| 15 |
+
|
| 16 |
+
var cl = setTimeout(async () => {
|
| 17 |
+
if (!isResolved) {
|
| 18 |
+
await context.close();
|
| 19 |
+
reject("Timeout Error");
|
| 20 |
+
}
|
| 21 |
+
}, global.timeOut || 60000);
|
| 22 |
+
|
| 23 |
+
try {
|
| 24 |
+
const page = await context.newPage();
|
| 25 |
+
|
| 26 |
+
if (proxy?.username && proxy?.password)
|
| 27 |
+
await page.authenticate({
|
| 28 |
+
username: proxy.username,
|
| 29 |
+
password: proxy.password,
|
| 30 |
+
});
|
| 31 |
+
|
| 32 |
+
await page.evaluateOnNewDocument(() => {
|
| 33 |
+
let token = null;
|
| 34 |
+
async function waitForToken() {
|
| 35 |
+
while (!token) {
|
| 36 |
+
try {
|
| 37 |
+
token = window.turnstile.getResponse();
|
| 38 |
+
} catch (e) {}
|
| 39 |
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
| 40 |
+
}
|
| 41 |
+
var c = document.createElement("input");
|
| 42 |
+
c.type = "hidden";
|
| 43 |
+
c.name = "cf-response";
|
| 44 |
+
c.value = token;
|
| 45 |
+
document.body.appendChild(c);
|
| 46 |
+
}
|
| 47 |
+
waitForToken();
|
| 48 |
+
});
|
| 49 |
+
|
| 50 |
+
await page.goto(url, {
|
| 51 |
+
waitUntil: "domcontentloaded",
|
| 52 |
+
});
|
| 53 |
+
|
| 54 |
+
await page.waitForSelector('[name="cf-response"]', {
|
| 55 |
+
timeout: 60000,
|
| 56 |
+
});
|
| 57 |
+
const token = await page.evaluate(() => {
|
| 58 |
+
try {
|
| 59 |
+
return document.querySelector('[name="cf-response"]').value;
|
| 60 |
+
} catch (e) {
|
| 61 |
+
return null;
|
| 62 |
+
}
|
| 63 |
+
});
|
| 64 |
+
isResolved = true;
|
| 65 |
+
clearInterval(cl);
|
| 66 |
+
await context.close();
|
| 67 |
+
if (!token || token.length < 10) return reject("Failed to get token");
|
| 68 |
+
return resolve(token);
|
| 69 |
+
} catch (e) {
|
| 70 |
+
console.log(e);
|
| 71 |
+
|
| 72 |
+
if (!isResolved) {
|
| 73 |
+
await context.close();
|
| 74 |
+
clearInterval(cl);
|
| 75 |
+
reject(e.message);
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
});
|
| 79 |
+
}
|
| 80 |
+
module.exports = solveTurnstileMin;
|
src/endpoints/solveTurnstile.min.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async function solveTurnstileMin({ url, proxy, siteKey }) {
|
| 2 |
+
return new Promise(async (resolve, reject) => {
|
| 3 |
+
if (!url) return reject("Missing url parameter");
|
| 4 |
+
if (!siteKey) throw new Error("Missing siteKey parameter");
|
| 5 |
+
let isResolved = false;
|
| 6 |
+
|
| 7 |
+
const context = await global.browser.createBrowserContext().catch(() => null);
|
| 8 |
+
if (!context) return reject("Failed to create browser context");
|
| 9 |
+
|
| 10 |
+
const page = await context.newPage();
|
| 11 |
+
|
| 12 |
+
try {
|
| 13 |
+
if (proxy?.username && proxy?.password) {
|
| 14 |
+
await page.authenticate({
|
| 15 |
+
username: proxy.username,
|
| 16 |
+
password: proxy.password,
|
| 17 |
+
});
|
| 18 |
+
}
|
| 19 |
+
await page.setRequestInterception(true);
|
| 20 |
+
page.on("request", async (request) => {
|
| 21 |
+
if (
|
| 22 |
+
[url, url + "/"].includes(request.url()) &&
|
| 23 |
+
request.resourceType() === "document"
|
| 24 |
+
) {
|
| 25 |
+
const response = await request.respond({
|
| 26 |
+
status: 200,
|
| 27 |
+
contentType: "text/html",
|
| 28 |
+
body: String(
|
| 29 |
+
require("fs").readFileSync("./src/data/fakePage.html")
|
| 30 |
+
).replace(/<site-key>/g, siteKey),
|
| 31 |
+
});
|
| 32 |
+
} else {
|
| 33 |
+
await request.continue();
|
| 34 |
+
}
|
| 35 |
+
});
|
| 36 |
+
await page.goto(url, {
|
| 37 |
+
waitUntil: "domcontentloaded",
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
await page.waitForSelector('[name="cf-response"]', {
|
| 41 |
+
timeout: 60000,
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
const token = await page.evaluate(() => {
|
| 45 |
+
try {
|
| 46 |
+
return document.querySelector('[name="cf-response"]').value;
|
| 47 |
+
} catch (e) {
|
| 48 |
+
return null;
|
| 49 |
+
}
|
| 50 |
+
});
|
| 51 |
+
|
| 52 |
+
isResolved = true;
|
| 53 |
+
await page.close();
|
| 54 |
+
|
| 55 |
+
if (!token || token.length < 10) throw new Error("Failed to get token");
|
| 56 |
+
resolve({ token });
|
| 57 |
+
} catch (e) {
|
| 58 |
+
console.error("Error in getSource:", e.message);
|
| 59 |
+
if (!isResolved) {
|
| 60 |
+
await page.close();
|
| 61 |
+
reject(e.message);
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
});
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
module.exports = solveTurnstileMin;
|
src/endpoints/wafSession.js
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async function findAcceptLanguage(page) {
|
| 2 |
+
return await page.evaluate(async () => {
|
| 3 |
+
const result = await fetch("https://httpbin.org/get")
|
| 4 |
+
.then((res) => res.json())
|
| 5 |
+
.then(
|
| 6 |
+
(res) =>
|
| 7 |
+
res.headers["Accept-Language"] || res.headers["accept-language"]
|
| 8 |
+
)
|
| 9 |
+
.catch(() => null);
|
| 10 |
+
return result;
|
| 11 |
+
});
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
function getSource({ url, proxy }) {
|
| 15 |
+
return new Promise(async (resolve, reject) => {
|
| 16 |
+
if (!url) return reject("Missing url parameter");
|
| 17 |
+
const context = await global.browser
|
| 18 |
+
.createBrowserContext({
|
| 19 |
+
proxyServer: proxy ? `http://${proxy.host}:${proxy.port}` : undefined, // https://pptr.dev/api/puppeteer.browsercontextoptions
|
| 20 |
+
})
|
| 21 |
+
.catch(() => null);
|
| 22 |
+
if (!context) return reject("Failed to create browser context");
|
| 23 |
+
|
| 24 |
+
let isResolved = false;
|
| 25 |
+
|
| 26 |
+
var cl = setTimeout(async () => {
|
| 27 |
+
if (!isResolved) {
|
| 28 |
+
await context.close();
|
| 29 |
+
reject("Timeout Error");
|
| 30 |
+
}
|
| 31 |
+
}, global.timeOut || 60000);
|
| 32 |
+
|
| 33 |
+
try {
|
| 34 |
+
const page = await context.newPage();
|
| 35 |
+
|
| 36 |
+
if (proxy?.username && proxy?.password)
|
| 37 |
+
await page.authenticate({
|
| 38 |
+
username: proxy.username,
|
| 39 |
+
password: proxy.password,
|
| 40 |
+
});
|
| 41 |
+
let acceptLanguage = await findAcceptLanguage(page);
|
| 42 |
+
await page.setRequestInterception(true);
|
| 43 |
+
page.on("request", async (request) => request.continue());
|
| 44 |
+
page.on("response", async (res) => {
|
| 45 |
+
try {
|
| 46 |
+
if (
|
| 47 |
+
[200, 302].includes(res.status()) &&
|
| 48 |
+
[url, url + "/"].includes(res.url())
|
| 49 |
+
) {
|
| 50 |
+
await page
|
| 51 |
+
.waitForNavigation({ waitUntil: "load", timeout: 5000 })
|
| 52 |
+
.catch(() => {});
|
| 53 |
+
const cookies = await page.cookies();
|
| 54 |
+
let headers = await res.request().headers();
|
| 55 |
+
delete headers["content-type"];
|
| 56 |
+
delete headers["accept-encoding"];
|
| 57 |
+
delete headers["accept"];
|
| 58 |
+
delete headers["content-length"];
|
| 59 |
+
headers["accept-language"] = acceptLanguage;
|
| 60 |
+
await context.close();
|
| 61 |
+
isResolved = true;
|
| 62 |
+
clearInterval(cl);
|
| 63 |
+
resolve({ cookies, headers });
|
| 64 |
+
}
|
| 65 |
+
} catch (e) {}
|
| 66 |
+
});
|
| 67 |
+
|
| 68 |
+
await page.goto(url, {
|
| 69 |
+
waitUntil: "domcontentloaded",
|
| 70 |
+
});
|
| 71 |
+
} catch (e) {
|
| 72 |
+
if (!isResolved) {
|
| 73 |
+
await context.close();
|
| 74 |
+
clearInterval(cl);
|
| 75 |
+
reject(e.message);
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
});
|
| 79 |
+
}
|
| 80 |
+
module.exports = getSource;
|
src/module/createBrowser.js
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// const puppeteer = require("puppeteer-core"); // Uncomment if using puppeteer-core
|
| 2 |
+
const { connect } = require("puppeteer-real-browser");
|
| 3 |
+
|
| 4 |
+
async function createBrowser(retry = 0) {
|
| 5 |
+
try {
|
| 6 |
+
// If a browser instance already exists, or if we're finished, return.
|
| 7 |
+
// global.finished should be set to true when the application is shutting down.
|
| 8 |
+
if (global.finished) return;
|
| 9 |
+
|
| 10 |
+
// If global.browser is not null, it means we already have a connected browser.
|
| 11 |
+
// This prevents launching multiple browsers if one is already active.
|
| 12 |
+
if (global.browser) {
|
| 13 |
+
console.log("Browser is already connected.");
|
| 14 |
+
return;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
console.log("Launching new browser instance...");
|
| 18 |
+
|
| 19 |
+
/* If using puppeteer-core:
|
| 20 |
+
const browser = await puppeteer.launch({
|
| 21 |
+
headless: false,
|
| 22 |
+
args: ["--no-sandbox", "--disable-gpu"],
|
| 23 |
+
});
|
| 24 |
+
*/
|
| 25 |
+
|
| 26 |
+
const { browser: _browser } = await connect({
|
| 27 |
+
headless: false,
|
| 28 |
+
turnstile: true,
|
| 29 |
+
connectOption: { defaultViewport: null },
|
| 30 |
+
disableXvfb: false,
|
| 31 |
+
});
|
| 32 |
+
|
| 33 |
+
// Store the new browser instance globally
|
| 34 |
+
global.browser = _browser;
|
| 35 |
+
|
| 36 |
+
// Set up a listener for when the browser disconnects
|
| 37 |
+
browser.on("disconnected", async () => {
|
| 38 |
+
if (global.finished) return; // If the app is shutting down, don't try to restart
|
| 39 |
+
console.log("Browser disconnected. Attempting to restart browser...");
|
| 40 |
+
global.browser = null; // Clear the disconnected instance
|
| 41 |
+
// Recursively call createBrowser to launch a new instance
|
| 42 |
+
await createBrowser();
|
| 43 |
+
});
|
| 44 |
+
|
| 45 |
+
console.log("Browser launched successfully and ready for use.");
|
| 46 |
+
} catch (e) {
|
| 47 |
+
console.error("Error launching browser:", e.stack);
|
| 48 |
+
|
| 49 |
+
// Only retry if the application is not finished and retry attempts are within limit
|
| 50 |
+
if (global.finished || retry >= 5) {
|
| 51 |
+
console.error("Max retries reached or application is shutting down. Could not launch browser.");
|
| 52 |
+
global.browser = null; // Ensure the instance is null if launch failed
|
| 53 |
+
return;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
console.log(`Retrying browser launch (${retry + 1}/5)...`);
|
| 57 |
+
await new Promise((resolve) => setTimeout(resolve, 3000));
|
| 58 |
+
await createBrowser(retry + 1);
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
// Immediately call createBrowser when this module is required to ensure a browser is available from start
|
| 63 |
+
createBrowser();
|
| 64 |
+
|
| 65 |
+
// Optionally, export the createBrowser function if you need to manually trigger it from elsewhere
|
| 66 |
+
module.exports = createBrowser;
|
| 67 |
+
|
| 68 |
+
// Add a graceful shutdown handler
|
| 69 |
+
process.on('SIGINT', async () => {
|
| 70 |
+
console.log('SIGINT signal received. Shutting down...');
|
| 71 |
+
global.finished = true; // Signal that the application is shutting down
|
| 72 |
+
|
| 73 |
+
if (global.browser) {
|
| 74 |
+
console.log('Closing browser...');
|
| 75 |
+
await global.browser.close().catch(e => console.error("Error closing browser:", e));
|
| 76 |
+
}
|
| 77 |
+
process.exit(0);
|
| 78 |
+
});
|
| 79 |
+
|
| 80 |
+
process.on('SIGTERM', async () => {
|
| 81 |
+
console.log('SIGTERM signal received. Shutting down...');
|
| 82 |
+
global.finished = true; // Signal that the application is shutting down
|
| 83 |
+
|
| 84 |
+
if (global.browser) {
|
| 85 |
+
console.log('Closing browser...');
|
| 86 |
+
await global.browser.close().catch(e => console.error("Error closing browser:", e));
|
| 87 |
+
}
|
| 88 |
+
process.exit(0);
|
| 89 |
+
});
|
src/module/reqValidate.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const Ajv = require("ajv")
|
| 2 |
+
const addFormats = require("ajv-formats")
|
| 3 |
+
|
| 4 |
+
const ajv = new Ajv()
|
| 5 |
+
addFormats(ajv)
|
| 6 |
+
|
| 7 |
+
const schema = {
|
| 8 |
+
"type": "object",
|
| 9 |
+
"properties": {
|
| 10 |
+
"mode": {
|
| 11 |
+
"type": "string",
|
| 12 |
+
"enum": ["source", "turnstile-min", "turnstile-max", "waf-session"],
|
| 13 |
+
},
|
| 14 |
+
"proxy": {
|
| 15 |
+
"type": "object",
|
| 16 |
+
"properties": {
|
| 17 |
+
"host": { "type": "string" },
|
| 18 |
+
"port": { "type": "integer" },
|
| 19 |
+
"username": { "type": "string" },
|
| 20 |
+
"password": { "type": "string" }
|
| 21 |
+
},
|
| 22 |
+
"additionalProperties": false
|
| 23 |
+
},
|
| 24 |
+
"url": {
|
| 25 |
+
"type": "string",
|
| 26 |
+
"format": "uri",
|
| 27 |
+
},
|
| 28 |
+
"authToken": {
|
| 29 |
+
"type": "string"
|
| 30 |
+
},
|
| 31 |
+
"siteKey": {
|
| 32 |
+
"type": "string"
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"required": ["mode", "url"],
|
| 36 |
+
"additionalProperties": false
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
// const data = {
|
| 40 |
+
// mode: "source",
|
| 41 |
+
// url: "https://example.com",
|
| 42 |
+
// proxy: {
|
| 43 |
+
// host: "localhost",
|
| 44 |
+
// port: 8080,
|
| 45 |
+
// username: "test",
|
| 46 |
+
// password: "test"
|
| 47 |
+
// },
|
| 48 |
+
// authToken: "123456"
|
| 49 |
+
// }
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
function validate(data) {
|
| 53 |
+
const valid = ajv.validate(schema, data)
|
| 54 |
+
if (!valid) return ajv.errors
|
| 55 |
+
else return true
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
module.exports = validate
|
tests/endpoints.test.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
process.env.NODE_ENV = 'development'
|
| 2 |
+
const server = require('../src/index')
|
| 3 |
+
const request = require("supertest")
|
| 4 |
+
|
| 5 |
+
beforeAll(async () => {
|
| 6 |
+
while (!global.browser) {
|
| 7 |
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
| 8 |
+
}
|
| 9 |
+
}, 30000);
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
afterAll(async () => {
|
| 13 |
+
global.finished = true
|
| 14 |
+
await global.browser.close()
|
| 15 |
+
})
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
test('Scraping Page Source from Cloudflare Protection', async () => {
|
| 19 |
+
return request(server)
|
| 20 |
+
.post("/cf-clearance-scraper")
|
| 21 |
+
.send({
|
| 22 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 23 |
+
mode: "source"
|
| 24 |
+
})
|
| 25 |
+
.expect(200)
|
| 26 |
+
.then(response => { expect(response.body.code).toEqual(200); })
|
| 27 |
+
}, 60000)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
test('Creating a Turnstile Token With Site Key [min]', async () => {
|
| 31 |
+
return request(server)
|
| 32 |
+
.post("/cf-clearance-scraper")
|
| 33 |
+
.send({
|
| 34 |
+
url: 'https://turnstile.zeroclover.io/',
|
| 35 |
+
siteKey: "0x4AAAAAAAEwzhD6pyKkgXC0",
|
| 36 |
+
mode: "turnstile-min"
|
| 37 |
+
})
|
| 38 |
+
.expect(200)
|
| 39 |
+
.then(response => { expect(response.body.code).toEqual(200); })
|
| 40 |
+
}, 60000)
|
| 41 |
+
|
| 42 |
+
test('Creating a Turnstile Token With Site Key [max]', async () => {
|
| 43 |
+
return request(server)
|
| 44 |
+
.post("/cf-clearance-scraper")
|
| 45 |
+
.send({
|
| 46 |
+
url: 'https://turnstile.zeroclover.io/',
|
| 47 |
+
mode: "turnstile-max"
|
| 48 |
+
})
|
| 49 |
+
.expect(200)
|
| 50 |
+
.then(response => { expect(response.body.code).toEqual(200); })
|
| 51 |
+
}, 60000)
|
| 52 |
+
|
| 53 |
+
test('Create Cloudflare WAF Session', async () => {
|
| 54 |
+
return request(server)
|
| 55 |
+
.post("/cf-clearance-scraper")
|
| 56 |
+
.send({
|
| 57 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 58 |
+
mode: "waf-session"
|
| 59 |
+
})
|
| 60 |
+
.expect(200)
|
| 61 |
+
.then(response => { expect(response.body.code).toEqual(200); })
|
| 62 |
+
}, 60000)
|
tests/validate.test.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
process.env.NODE_ENV = 'development'
|
| 2 |
+
process.env.SKIP_LAUNCH = "true"
|
| 3 |
+
process.env.authToken = "123456"
|
| 4 |
+
process.env.browserLimit = -1
|
| 5 |
+
|
| 6 |
+
const server = require('../src/index')
|
| 7 |
+
const request = require("supertest")
|
| 8 |
+
|
| 9 |
+
test('Request Authorisation Control Test', async () => {
|
| 10 |
+
return request(server)
|
| 11 |
+
.post("/cf-clearance-scraper")
|
| 12 |
+
.send({
|
| 13 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 14 |
+
mode: "source"
|
| 15 |
+
})
|
| 16 |
+
.expect(401)
|
| 17 |
+
}, 10000)
|
| 18 |
+
|
| 19 |
+
test('Browser Context Limit Control Test', async () => {
|
| 20 |
+
return request(server)
|
| 21 |
+
.post("/cf-clearance-scraper")
|
| 22 |
+
.send({
|
| 23 |
+
url: 'https://nopecha.com/demo/cloudflare',
|
| 24 |
+
mode: "source",
|
| 25 |
+
authToken: "123456"
|
| 26 |
+
})
|
| 27 |
+
.expect(429)
|
| 28 |
+
}, 10000)
|