recaptchaV2 / solver.js
HerzaJ's picture
Update solver.js
731c18f verified
import { spawnSync } from 'child_process';
import fs from 'fs';
import path from 'path';
import os from 'os';
import { Readable } from 'stream';
import vosk from 'vosk-koffi';
import wav from 'wav';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
vosk.setLogLevel(-1);
const MODEL_DIR = path.resolve(__dirname, "model");
const model = new vosk.Model(MODEL_DIR);
const SOURCE_FILE = "sound.mp3";
const OUT_FILE = "out.wav";
const MAIN_FRAME = "iframe[title='reCAPTCHA']";
const BFRAME = "iframe[src*='google.com/recaptcha'][src*='bframe']";
const CHALLENGE = "body > div > div";
class Mutex {
constructor(name = "Mutex") {
this.name = name;
this._locked = false;
this._queue = [];
}
async lock(tag) {
if (this._locked) {
if (tag) {
console.log(`[${this.name}] ${tag} waiting`);
}
return new Promise((resolve) => {
this._queue.push(() => {
this._lock(tag);
resolve();
});
});
}
this._lock(tag);
}
unlock(tag) {
if (this._locked) {
if (tag) {
console.log(`[${this.name}] ${tag} unlocked`);
}
this._locked = false;
this._queue.shift()?.();
}
}
_lock(tag) {
this._locked = true;
if (tag) {
console.log(`[${this.name}] ${tag} locked`);
}
}
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function createDir() {
const dir = path.resolve(os.tmpdir(), "reSOLVER-" + Math.random().toString().slice(2));
if (fs.existsSync(dir)) {
fs.rmSync(dir, { recursive: true });
}
fs.mkdirSync(dir, { recursive: true });
return dir;
}
function convert(dir, ffmpeg = "ffmpeg") {
const args = [
"-loglevel",
"error",
"-i",
SOURCE_FILE,
"-acodec",
"pcm_s16le",
"-ac",
"1",
"-ar",
"16000",
OUT_FILE,
];
spawnSync(ffmpeg, args, { cwd: dir });
}
function recognize(dir) {
return new Promise((resolve) => {
const stream = fs.createReadStream(path.resolve(dir, OUT_FILE), { highWaterMark: 4096 });
const reader = new wav.Reader();
const readable = new Readable().wrap(reader);
reader.on("format", async ({ audioFormat, sampleRate, channels }) => {
if (audioFormat != 1 || channels != 1) {
throw new Error("Audio file must be WAV with mono PCM.");
}
const rec = new vosk.Recognizer({ model, sampleRate });
rec.setMaxAlternatives(10);
rec.setWords(true);
rec.setPartialWords(true);
for await (const data of readable) {
const end_of_speech = rec.acceptWaveform(data);
if (end_of_speech) {
const result = rec
.result()
.alternatives.sort((a, b) => b.confidence - a.confidence)[0].text;
stream.close(() => resolve(result));
}
}
rec.free();
});
stream.pipe(reader);
});
}
async function getText(res, ffmpeg = "ffmpeg") {
const tempDir = createDir();
fs.writeFileSync(path.resolve(tempDir, SOURCE_FILE), await res.body());
convert(tempDir, ffmpeg);
const result = await recognize(tempDir);
fs.rmSync(tempDir, { recursive: true });
return result;
}
export async function solve(page, { delay = 64, wait = 5000, retry = 3, ffmpeg = "ffmpeg" } = {}) {
console.log("Starting reCAPTCHA solver...");
try {
await page.waitForSelector('iframe', { state: "attached", timeout: wait });
console.log("Found iframes on page");
const allIframes = await page.$$('iframe');
console.log(`Total iframes found: ${allIframes.length}`);
for (let i = 0; i < allIframes.length; i++) {
const src = await allIframes[i].getAttribute('src');
const title = await allIframes[i].getAttribute('title');
console.log(`Iframe ${i}: src=${src}, title=${title}`);
}
} catch (error) {
console.error("Error finding iframes:", error.message);
throw new Error("No reCAPTCHA detected");
}
let invisible = false;
let b_iframe = null;
const bframeSelectors = [
"iframe[src*='/recaptcha/api2/bframe']",
"iframe[src*='/recaptcha/enterprise/bframe']",
"iframe[src*='bframe']"
];
for (const selector of bframeSelectors) {
try {
await page.waitForSelector(selector, { state: "attached", timeout: 2000 });
b_iframe = await page.$(selector);
if (b_iframe) {
console.log(`Found bframe with selector: ${selector}`);
break;
}
} catch {
continue;
}
}
if (b_iframe === null) {
console.log("Bframe not found yet, checking main frame...");
try {
await page.waitForSelector(MAIN_FRAME, { state: "attached", timeout: wait });
const iframe = await page.$(MAIN_FRAME);
if (iframe === null) {
throw new Error("Could not find reCAPTCHA iframe");
}
const box_page = await iframe.contentFrame();
if (box_page === null) {
throw new Error("Could not find reCAPTCHA iframe content");
}
invisible = (await box_page.$("div.rc-anchor-invisible")) ? true : false;
console.log("invisible:", invisible);
if (invisible === true) {
return false;
} else {
const label = await box_page.$("#recaptcha-anchor-label");
if (label === null) {
throw new Error("Could not find reCAPTCHA label");
}
console.log("Clicking reCAPTCHA checkbox...");
await label.click();
await sleep(2000);
for (const selector of bframeSelectors) {
b_iframe = await page.$(selector);
if (b_iframe) {
console.log(`Found bframe after click: ${selector}`);
break;
}
}
}
} catch (error) {
console.error("Error handling main frame:", error.message);
throw new Error("Could not find reCAPTCHA popup iframe");
}
}
if (b_iframe === null) {
throw new Error("Could not find reCAPTCHA popup iframe after all attempts");
}
const bframe = await b_iframe.contentFrame();
if (bframe === null) {
throw new Error("Could not find reCAPTCHA popup iframe content");
}
await sleep(1000);
const challenge = await bframe.$(CHALLENGE);
if (challenge === null) {
console.log("No challenge found yet, this might be OK");
return false;
}
const required = await challenge.evaluate(
(elm) => !elm.classList.contains("rc-footer"),
);
console.log("action required:", required);
if (required === false) {
return false;
}
await bframe.waitForSelector("#recaptcha-audio-button", { timeout: wait });
const audio_button = await bframe.$("#recaptcha-audio-button");
if (audio_button === null) {
throw new Error("Could not find reCAPTCHA audio button");
}
const mutex = new Mutex();
await mutex.lock("init");
let passed = false;
let answer = Promise.resolve("");
const listener = async (res) => {
if (res.headers()["content-type"] === "audio/mp3") {
console.log(`got audio from ${res.url()}`);
answer = new Promise((resolve) => {
getText(res, ffmpeg)
.then(resolve)
.catch(() => undefined);
});
mutex.unlock("get sound");
} else if (
res.url().startsWith("https://www.google.com/recaptcha/api2/userverify") ||
res.url().startsWith("https://www.google.com/recaptcha/enterprise/userverify")
) {
const raw = (await res.body()).toString().replace(")]}'\n", "");
const json = JSON.parse(raw);
passed = json[2] === 1;
mutex.unlock("verified");
}
};
page.on("response", listener);
await audio_button.click();
let tried = 0;
while (passed === false) {
if (tried++ >= retry) {
throw new Error("Could not solve reCAPTCHA");
}
await Promise.race([
mutex.lock("ready"),
sleep(wait).then(() => {
throw new Error("No Audio Found");
}),
]);
await bframe.waitForSelector("#audio-source", { state: "attached", timeout: wait });
await bframe.waitForSelector("#audio-response", { timeout: wait });
console.log("recognized:", await answer);
const input = await bframe.$("#audio-response");
if (input === null) {
throw new Error("Could not find reCAPTCHA audio input");
}
await input.type(await answer, { delay });
const button = await bframe.$("#recaptcha-verify-button");
if (button === null) {
throw new Error("Could not find reCAPTCHA verify button");
}
await button.click();
await mutex.lock("done");
console.log("passed:", passed);
}
page.off("response", listener);
await sleep(1000);
let token = null;
try {
const iframe = await page.$(MAIN_FRAME);
if (iframe) {
const box_page = await iframe.contentFrame();
if (box_page) {
const textarea = await box_page.$('#g-recaptcha-response');
if (textarea) {
token = await textarea.evaluate(el => el.value);
console.log("Token found in textarea:", token?.substring(0, 50) + "...");
}
}
}
} catch (error) {
console.log("Error extracting token from iframe:", error.message);
}
if (!token) {
try {
const textarea = await page.$('#g-recaptcha-response');
if (textarea) {
token = await textarea.evaluate(el => el.value);
console.log("Token found in main page:", token?.substring(0, 50) + "...");
}
} catch (error) {
console.log("Error extracting token from main page:", error.message);
}
}
if (!token) {
try {
const allTextareas = await page.$('textarea');
for (const textarea of allTextareas) {
const name = await textarea.getAttribute('name');
const id = await textarea.getAttribute('id');
if (name === 'g-recaptcha-response' || id === 'g-recaptcha-response') {
token = await textarea.evaluate(el => el.value);
console.log("Token found in textarea by attribute:", token?.substring(0, 50) + "...");
break;
}
}
} catch (error) {
console.log("Error searching all textareas:", error.message);
}
}
console.log("Final token:", token ? `${token.substring(0, 50)}... (length: ${token.length})` : "not found");
return { success: true, token };
}