test001 / nb4x.py
bobocup's picture
Update nb4x.py
acc9d77 verified
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
import time
import random
import string
import logging
import requests
import json
import re
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 在文件开头添加配置
MAX_CONSECUTIVE_FAILURES = 3 # 最大连续失败次数
ENABLE_FAILURE_STOP = True # 失败停止开关
consecutive_failures = 0 # 连续失败计数器
def get_verification_code(driver, email, max_retries=10, delay=2):
email_url = f'https://linshiyou.com/#/{email}'
# 打开新标签页并访问邮箱
driver.execute_script(f"window.open('{email_url}', '_blank')")
driver.switch_to.window(driver.window_handles[-1])
logger.info("已打开邮箱页面")
for _ in range(max_retries):
try:
# 刷新页面
driver.refresh()
time.sleep(2)
# 直接获取包含验证码的div元素的内容
body_content = driver.find_element(By.CLASS_NAME, "body").get_attribute('outerHTML')
logger.info(f"获取到邮件内容: {body_content}")
# 使用正则表达式匹配6位数字验证码
match = re.search(r'\b\d{6}\b', body_content)
if match:
verification_code = match.group(0)
logger.info(f"获取到验证码: {verification_code}")
# 关闭邮箱标签页
driver.close()
driver.switch_to.window(driver.window_handles[0])
return verification_code
except Exception as e:
logger.error(f"获取验证码时发生错误: {str(e)}")
time.sleep(delay)
logger.info(f"等待验证码,重试第 {_ + 1} 次")
# 如果获取失败,确保回到主标签页
try:
driver.close()
driver.switch_to.window(driver.window_handles[0])
except:
pass
logger.error("未能获取到验证码")
return None
def get_cookie(driver):
try:
cookie_script = """
var cookieString = document.cookie;
var essentialCookies = cookieString.split('; ').filter(function(cookie) {
return cookie.startsWith('daily_query_') ||
cookie.startsWith('DSR=') ||
cookie.startsWith('DS=') ||
cookie.startsWith('uuid_guest=') ||
cookie.startsWith('ai_model=');
}).join('; ');
console.log('Essential Cookies:', essentialCookies);
return essentialCookies;
"""
cookie = driver.execute_script(cookie_script)
logger.info(f"获取的 Cookie: {cookie}")
# 将cookie发送到API服务
try:
response = requests.post(
'https://bobocup-test001.hf.space/cookies',
json={"cookie": cookie},
timeout=5
)
if response.status_code == 200:
logger.info("Cookie已成功保存到API服务")
else:
logger.error("保存Cookie到API服务失败")
except Exception as e:
logger.error(f"发送Cookie到API服务时出错: {str(e)}")
return cookie
except Exception as e:
logger.error(f"获取cookie错误: {str(e)}")
return None
def wait_for_url(driver, url, timeout=10):
try:
start_time = time.time()
while time.time() - start_time < timeout:
current_url = driver.current_url.rstrip('/')
target_url = url.rstrip('/')
if current_url == target_url:
return True
time.sleep(0.5)
return False
except TimeoutException:
return False
def wait_for_email_input(driver, timeout=30):
"""等待邮箱输入框出现并返回"""
global consecutive_failures
# 先等待页面加载完成
logger.info("等待页面完全加载...")
try:
WebDriverWait(driver, 20).until(
lambda d: d.execute_script('return document.readyState') == 'complete'
)
logger.info("页面加载完成")
# 再等待10秒,确保JavaScript完全执行
logger.info("等待10秒让JavaScript完全执行...")
time.sleep(10)
# 现在开始检查元素
start_time = time.time()
while time.time() - start_time < timeout:
try:
# 获取并记录页面源码
page_source = driver.page_source
logger.info("当前页面源码:")
logger.info("=" * 50)
logger.info(page_source[:500] + "...")
logger.info("=" * 50)
# 获取页面标题
logger.info(f"当前页面标题: {driver.title}")
# 检查页面源代码中是否包含关键元素
if "descope-wc" in page_source:
logger.info("找到 descope-wc 元素")
else:
logger.info("未找到 descope-wc 元素")
# 尝试获取 Shadow DOM 元素
try:
wrapper = driver.find_element(By.CSS_SELECTOR, "#AppProvider_Wrapper > form > descope-wc")
logger.info("找到 wrapper 元素")
except:
logger.info("未找到 wrapper 元素")
# 原来的元素查找逻辑
input_js = """
var wrapper = document.querySelector("#AppProvider_Wrapper > form > descope-wc");
if (!wrapper) {
return "wrapper not found";
}
var shadow1 = wrapper.shadowRoot;
if (!shadow1) {
return "shadow1 not found";
}
var elem = shadow1.querySelector("#TExZpnv5m6");
if (!elem) {
return "elem not found";
}
var shadow2 = elem.shadowRoot;
if (!shadow2) {
return "shadow2 not found";
}
var input = shadow2.querySelector("#input-vaadin-email-field-3");
if (!input) {
return "input not found";
}
return "found";
"""
result = driver.execute_script(input_js)
logger.info(f"元素查找结果: {result}")
if result == "found":
logger.info("邮箱输入框已加载")
return True
except Exception as e:
logger.error(f"查找元素时发生错误: {str(e)}")
logger.info("等待10秒后重试...")
time.sleep(10)
except Exception as e:
logger.error(f"等待页面加载时发生错误: {str(e)}")
# 更新失败计数
if ENABLE_FAILURE_STOP:
consecutive_failures += 1
logger.warning(f"连续失败次数: {consecutive_failures}")
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
logger.critical(f"连续失败达到{MAX_CONSECUTIVE_FAILURES}次,停止脚本运行")
import sys
sys.exit(1)
logger.error("邮箱输入框未出现")
return False
def input_email(driver, email):
"""使用JavaScript输入邮箱"""
try:
input_js = f"""
var input = document.querySelector("#AppProvider_Wrapper > form > descope-wc")
.shadowRoot.querySelector("#TExZpnv5m6")
.shadowRoot.querySelector("#input-vaadin-email-field-3");
input.value = "{email}";
input.dispatchEvent(new Event('input', {{ bubbles: true }}));
input.dispatchEvent(new Event('change', {{ bubbles: true }}));
"""
driver.execute_script(input_js)
logger.info("输入邮箱成功")
return True
except Exception as e:
logger.error(f"输入邮箱失败: {str(e)}")
return False
def input_verification_code(driver, code):
"""使用JavaScript输入验证码"""
try:
input_js = f"""
var input = document.querySelector("#AppProvider_Wrapper > form > descope-wc")
.shadowRoot.querySelector("#oneTimeCodeId")
.shadowRoot.querySelector("#input-vaadin-text-field-7 > div.wrapper > descope-text-field:nth-child(1)")
.shadowRoot.querySelector("#input-vaadin-text-field-35");
input.value = "{code}";
input.dispatchEvent(new Event('input', {{ bubbles: true }}));
input.dispatchEvent(new Event('change', {{ bubbles: true }}));
"""
driver.execute_script(input_js)
logger.info("输入验证码成功")
return True
except Exception as e:
logger.error(f"输入验证码失败: {str(e)}")
return False
def registration_process(driver, wait):
try:
global consecutive_failures # 使用全局计数器
# 生成邮箱地址
email = generate_email()
if not email:
logger.error("生成邮箱失败")
return False
# 第一步:打开认证页面
driver.get("https://you.com/authenticate")
logger.info("打开认证页面")
# 等待邮箱输入框出现并输入邮箱
if not wait_for_email_input(driver):
logger.error("等待邮箱输入框超时")
return False
if not input_email(driver, email):
logger.error("输入邮箱失败")
return False
time.sleep(0.5)
# 按回车继续
actions = webdriver.ActionChains(driver)
actions.send_keys(Keys.RETURN)
actions.perform()
logger.info("按下回车键")
time.sleep(2)
# 获取验证码
verification_code = get_verification_code(driver, email)
if not verification_code:
logger.error("获取验证码失败")
return False
# 输入验证码
if not input_verification_code(driver, verification_code):
logger.error("输入验证码失败")
return False
time.sleep(2)
# 等待URL变化并获取cookie
if wait_for_url(driver, "https://you.com"):
logger.info("URL已变更为 https://you.com")
time.sleep(1)
cookie = get_cookie(driver)
if cookie:
logger.info("成功获取cookie")
return True
else:
logger.error("获取cookie失败")
return False
else:
logger.error("URL未变化为预期值")
return False
except Exception as e:
logger.error(f"注册过程错误: {str(e)}")
return False
def single_registration():
chrome_options = Options()
# 基本设置
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless=new')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-extensions')
# 添加代理设置
proxy = "http://yvighdwv-1:a5fyzm76khsv@p.webshare.io:80"
chrome_options.add_argument(f'--proxy-server={proxy}')
try:
# 使用安装的 ChromeDriver
service = Service('/usr/local/bin/chromedriver')
logger.info("正在启动 Chrome...")
driver = webdriver.Chrome(service=service, options=chrome_options)
logger.info("Chrome 启动成功")
wait = WebDriverWait(driver, 20)
success = registration_process(driver, wait)
if success:
logger.info("注册成功")
else:
logger.error("注册失败")
return success
except Exception as e:
logger.error(f"注册过程发生错误: {str(e)}")
logger.error(f"错误类型: {type(e)}")
logger.error(f"错误详情: {str(e)}")
return False
finally:
try:
driver.quit()
except:
pass
def worker(thread_id):
error_count = 0
while True:
try:
logger.info(f"线程 {thread_id}: 开始新的注册循环")
success = single_registration()
if success:
error_count = 0
logger.info(f"线程 {thread_id}: 本次循环成功")
else:
error_count += 1
logger.error(f"线程 {thread_id}: 本次循环失败,这是第 {error_count} 次失败")
if error_count >= 5:
logger.critical(f"线程 {thread_id}: 续 {error_count} 次失败,出线程")
break
if not success:
logger.info(f"线程 {thread_id}: 等待5秒后重试")
time.sleep(5)
except Exception as e:
logger.error(f"线程 {thread_id} 发生错误: {str(e)}")
error_count += 1
if error_count >= 5:
break
def main():
try:
# 设置固定的并发数为3
concurrent_num = 3
logger.info(f"启动 {concurrent_num} 个并发线程")
# 创建线程池
with ThreadPoolExecutor(max_workers=concurrent_num) as executor:
# 提交任务
futures = [executor.submit(worker, i+1) for i in range(concurrent_num)]
# 等待所有任务完成
concurrent.futures.wait(futures)
except KeyboardInterrupt:
logger.info("程序被用户中断")
except Exception as e:
logger.error(f"主程序异常: {str(e)}")
finally:
logger.info("程序结束")
def generate_email():
"""生成随机邮箱地址"""
random_prefix = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
return f"{random_prefix}@youxiang.dev"
if __name__ == "__main__":
main()