Spaces:
Paused
Paused
| import { spawnSync } from 'child_process'; | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| import os from 'os'; | |
| import { Readable } from 'stream'; | |
| import vosk from 'vosk-koffi'; | |
| import wav from 'wav'; | |
| import { fileURLToPath } from 'url'; | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = path.dirname(__filename); | |
| vosk.setLogLevel(-1); | |
| const MODEL_DIR = path.resolve(__dirname, "model"); | |
| const model = new vosk.Model(MODEL_DIR); | |
| const SOURCE_FILE = "sound.mp3"; | |
| const OUT_FILE = "out.wav"; | |
| const MAIN_FRAME = "iframe[title='reCAPTCHA']"; | |
| const BFRAME = "iframe[src*='google.com/recaptcha'][src*='bframe']"; | |
| const CHALLENGE = "body > div > div"; | |
| class Mutex { | |
| constructor(name = "Mutex") { | |
| this.name = name; | |
| this._locked = false; | |
| this._queue = []; | |
| } | |
| async lock(tag) { | |
| if (this._locked) { | |
| if (tag) { | |
| console.log(`[${this.name}] ${tag} waiting`); | |
| } | |
| return new Promise((resolve) => { | |
| this._queue.push(() => { | |
| this._lock(tag); | |
| resolve(); | |
| }); | |
| }); | |
| } | |
| this._lock(tag); | |
| } | |
| unlock(tag) { | |
| if (this._locked) { | |
| if (tag) { | |
| console.log(`[${this.name}] ${tag} unlocked`); | |
| } | |
| this._locked = false; | |
| this._queue.shift()?.(); | |
| } | |
| } | |
| _lock(tag) { | |
| this._locked = true; | |
| if (tag) { | |
| console.log(`[${this.name}] ${tag} locked`); | |
| } | |
| } | |
| } | |
| function sleep(ms) { | |
| return new Promise((resolve) => setTimeout(resolve, ms)); | |
| } | |
| function createDir() { | |
| const dir = path.resolve(os.tmpdir(), "reSOLVER-" + Math.random().toString().slice(2)); | |
| if (fs.existsSync(dir)) { | |
| fs.rmSync(dir, { recursive: true }); | |
| } | |
| fs.mkdirSync(dir, { recursive: true }); | |
| return dir; | |
| } | |
| function convert(dir, ffmpeg = "ffmpeg") { | |
| const args = [ | |
| "-loglevel", | |
| "error", | |
| "-i", | |
| SOURCE_FILE, | |
| "-acodec", | |
| "pcm_s16le", | |
| "-ac", | |
| "1", | |
| "-ar", | |
| "16000", | |
| OUT_FILE, | |
| ]; | |
| spawnSync(ffmpeg, args, { cwd: dir }); | |
| } | |
| function recognize(dir) { | |
| return new Promise((resolve) => { | |
| const stream = fs.createReadStream(path.resolve(dir, OUT_FILE), { highWaterMark: 4096 }); | |
| const reader = new wav.Reader(); | |
| const readable = new Readable().wrap(reader); | |
| reader.on("format", async ({ audioFormat, sampleRate, channels }) => { | |
| if (audioFormat != 1 || channels != 1) { | |
| throw new Error("Audio file must be WAV with mono PCM."); | |
| } | |
| const rec = new vosk.Recognizer({ model, sampleRate }); | |
| rec.setMaxAlternatives(10); | |
| rec.setWords(true); | |
| rec.setPartialWords(true); | |
| for await (const data of readable) { | |
| const end_of_speech = rec.acceptWaveform(data); | |
| if (end_of_speech) { | |
| const result = rec | |
| .result() | |
| .alternatives.sort((a, b) => b.confidence - a.confidence)[0].text; | |
| stream.close(() => resolve(result)); | |
| } | |
| } | |
| rec.free(); | |
| }); | |
| stream.pipe(reader); | |
| }); | |
| } | |
| async function getText(res, ffmpeg = "ffmpeg") { | |
| const tempDir = createDir(); | |
| fs.writeFileSync(path.resolve(tempDir, SOURCE_FILE), await res.body()); | |
| convert(tempDir, ffmpeg); | |
| const result = await recognize(tempDir); | |
| fs.rmSync(tempDir, { recursive: true }); | |
| return result; | |
| } | |
| export async function solve(page, { delay = 64, wait = 5000, retry = 3, ffmpeg = "ffmpeg" } = {}) { | |
| console.log("Starting reCAPTCHA solver..."); | |
| try { | |
| await page.waitForSelector('iframe', { state: "attached", timeout: wait }); | |
| console.log("Found iframes on page"); | |
| const allIframes = await page.$$('iframe'); | |
| console.log(`Total iframes found: ${allIframes.length}`); | |
| for (let i = 0; i < allIframes.length; i++) { | |
| const src = await allIframes[i].getAttribute('src'); | |
| const title = await allIframes[i].getAttribute('title'); | |
| console.log(`Iframe ${i}: src=${src}, title=${title}`); | |
| } | |
| } catch (error) { | |
| console.error("Error finding iframes:", error.message); | |
| throw new Error("No reCAPTCHA detected"); | |
| } | |
| let invisible = false; | |
| let b_iframe = null; | |
| const bframeSelectors = [ | |
| "iframe[src*='/recaptcha/api2/bframe']", | |
| "iframe[src*='/recaptcha/enterprise/bframe']", | |
| "iframe[src*='bframe']" | |
| ]; | |
| for (const selector of bframeSelectors) { | |
| try { | |
| await page.waitForSelector(selector, { state: "attached", timeout: 2000 }); | |
| b_iframe = await page.$(selector); | |
| if (b_iframe) { | |
| console.log(`Found bframe with selector: ${selector}`); | |
| break; | |
| } | |
| } catch { | |
| continue; | |
| } | |
| } | |
| if (b_iframe === null) { | |
| console.log("Bframe not found yet, checking main frame..."); | |
| try { | |
| await page.waitForSelector(MAIN_FRAME, { state: "attached", timeout: wait }); | |
| const iframe = await page.$(MAIN_FRAME); | |
| if (iframe === null) { | |
| throw new Error("Could not find reCAPTCHA iframe"); | |
| } | |
| const box_page = await iframe.contentFrame(); | |
| if (box_page === null) { | |
| throw new Error("Could not find reCAPTCHA iframe content"); | |
| } | |
| invisible = (await box_page.$("div.rc-anchor-invisible")) ? true : false; | |
| console.log("invisible:", invisible); | |
| if (invisible === true) { | |
| return false; | |
| } else { | |
| const label = await box_page.$("#recaptcha-anchor-label"); | |
| if (label === null) { | |
| throw new Error("Could not find reCAPTCHA label"); | |
| } | |
| console.log("Clicking reCAPTCHA checkbox..."); | |
| await label.click(); | |
| await sleep(2000); | |
| for (const selector of bframeSelectors) { | |
| b_iframe = await page.$(selector); | |
| if (b_iframe) { | |
| console.log(`Found bframe after click: ${selector}`); | |
| break; | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error("Error handling main frame:", error.message); | |
| throw new Error("Could not find reCAPTCHA popup iframe"); | |
| } | |
| } | |
| if (b_iframe === null) { | |
| throw new Error("Could not find reCAPTCHA popup iframe after all attempts"); | |
| } | |
| const bframe = await b_iframe.contentFrame(); | |
| if (bframe === null) { | |
| throw new Error("Could not find reCAPTCHA popup iframe content"); | |
| } | |
| await sleep(1000); | |
| const challenge = await bframe.$(CHALLENGE); | |
| if (challenge === null) { | |
| console.log("No challenge found yet, this might be OK"); | |
| return false; | |
| } | |
| const required = await challenge.evaluate( | |
| (elm) => !elm.classList.contains("rc-footer"), | |
| ); | |
| console.log("action required:", required); | |
| if (required === false) { | |
| return false; | |
| } | |
| await bframe.waitForSelector("#recaptcha-audio-button", { timeout: wait }); | |
| const audio_button = await bframe.$("#recaptcha-audio-button"); | |
| if (audio_button === null) { | |
| throw new Error("Could not find reCAPTCHA audio button"); | |
| } | |
| const mutex = new Mutex(); | |
| await mutex.lock("init"); | |
| let passed = false; | |
| let answer = Promise.resolve(""); | |
| const listener = async (res) => { | |
| if (res.headers()["content-type"] === "audio/mp3") { | |
| console.log(`got audio from ${res.url()}`); | |
| answer = new Promise((resolve) => { | |
| getText(res, ffmpeg) | |
| .then(resolve) | |
| .catch(() => undefined); | |
| }); | |
| mutex.unlock("get sound"); | |
| } else if ( | |
| res.url().startsWith("https://www.google.com/recaptcha/api2/userverify") || | |
| res.url().startsWith("https://www.google.com/recaptcha/enterprise/userverify") | |
| ) { | |
| const raw = (await res.body()).toString().replace(")]}'\n", ""); | |
| const json = JSON.parse(raw); | |
| passed = json[2] === 1; | |
| mutex.unlock("verified"); | |
| } | |
| }; | |
| page.on("response", listener); | |
| await audio_button.click(); | |
| let tried = 0; | |
| while (passed === false) { | |
| if (tried++ >= retry) { | |
| throw new Error("Could not solve reCAPTCHA"); | |
| } | |
| await Promise.race([ | |
| mutex.lock("ready"), | |
| sleep(wait).then(() => { | |
| throw new Error("No Audio Found"); | |
| }), | |
| ]); | |
| await bframe.waitForSelector("#audio-source", { state: "attached", timeout: wait }); | |
| await bframe.waitForSelector("#audio-response", { timeout: wait }); | |
| console.log("recognized:", await answer); | |
| const input = await bframe.$("#audio-response"); | |
| if (input === null) { | |
| throw new Error("Could not find reCAPTCHA audio input"); | |
| } | |
| await input.type(await answer, { delay }); | |
| const button = await bframe.$("#recaptcha-verify-button"); | |
| if (button === null) { | |
| throw new Error("Could not find reCAPTCHA verify button"); | |
| } | |
| await button.click(); | |
| await mutex.lock("done"); | |
| console.log("passed:", passed); | |
| } | |
| page.off("response", listener); | |
| await sleep(1000); | |
| let token = null; | |
| try { | |
| const iframe = await page.$(MAIN_FRAME); | |
| if (iframe) { | |
| const box_page = await iframe.contentFrame(); | |
| if (box_page) { | |
| const textarea = await box_page.$('#g-recaptcha-response'); | |
| if (textarea) { | |
| token = await textarea.evaluate(el => el.value); | |
| console.log("Token found in textarea:", token?.substring(0, 50) + "..."); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.log("Error extracting token from iframe:", error.message); | |
| } | |
| if (!token) { | |
| try { | |
| const textarea = await page.$('#g-recaptcha-response'); | |
| if (textarea) { | |
| token = await textarea.evaluate(el => el.value); | |
| console.log("Token found in main page:", token?.substring(0, 50) + "..."); | |
| } | |
| } catch (error) { | |
| console.log("Error extracting token from main page:", error.message); | |
| } | |
| } | |
| if (!token) { | |
| try { | |
| const allTextareas = await page.$('textarea'); | |
| for (const textarea of allTextareas) { | |
| const name = await textarea.getAttribute('name'); | |
| const id = await textarea.getAttribute('id'); | |
| if (name === 'g-recaptcha-response' || id === 'g-recaptcha-response') { | |
| token = await textarea.evaluate(el => el.value); | |
| console.log("Token found in textarea by attribute:", token?.substring(0, 50) + "..."); | |
| break; | |
| } | |
| } | |
| } catch (error) { | |
| console.log("Error searching all textareas:", error.message); | |
| } | |
| } | |
| console.log("Final token:", token ? `${token.substring(0, 50)}... (length: ${token.length})` : "not found"); | |
| return { success: true, token }; | |
| } |