Spaces:

194130157a
/

tx

Runtime error

App Files Files Community

194130157a commited on Feb 10

Commit

aab7dad

verified ·

1 Parent(s): 29bf901

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import gradio as gr
+import re
+import math
+def process_file(file_obj):
+    if file_obj is None:
+        return None, "❌ 请先上传 TXT 文件"
+    filepath = file_obj.name
+    extracted_cookies = []
+    debug_info = ""
+    # 尝试多种编码读取
+    content = ""
+    read_success = False
+    for enc in ['utf-8', 'utf-16', 'gb18030', 'latin-1']:
+        try:
+            with open(filepath, 'r', encoding=enc) as f:
+                content = f.read()
+            read_success = True
+            debug_info += f"✅ 使用 {enc} 编码读取成功\n"
+            break
+        except:
+            continue
+    if not read_success:
+        return None, "❌ 文件读取失败，无法识别编码。"
+    # 【核心修复】使用正则表达式提取
+    pattern = re.compile(r'(ey[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+)')
+    # 按行处理
+    lines = content.splitlines()
+    for i, line in enumerate(lines):
+        clean_line = line.strip()
+        if not clean_line:
+            continue
+        # 1. 先尝试直接匹配
+        if clean_line.startswith("ey"):
+             extracted_cookies.append(clean_line)
+             continue
+        # 2. 尝试按 "----" 切割
+        parts = clean_line.split('----')
+        found_in_parts = False
+        for part in parts:
+            p = part.strip()
+            if p.startswith('ey') and len(p) > 20:
+                extracted_cookies.append(p)
+                found_in_parts = True
+                break
+        # 3. 如果切割还没找到，用正则搜
+        if not found_in_parts:
+            match = pattern.search(clean_line)
+            if match:
+                extracted_cookies.append(match.group(1))
+    # 结果处理
+    if not extracted_cookies:
+        preview = content[:200].replace('\n', '\\n')
+        return None, (
+            f"⚠️ 依然未找到 Cookie！\n"
+            f"程序看到的文本前200字为：\n{preview}\n"
+            f"请检查：你的 Cookie 是标准的 JWT 格式（ey... . ... . ...）吗？"
+        )
+    # --- 👇 修改部分：文件分割逻辑 👇 ---
+    output_files = []
+    chunk_size = 999  # 每个文件的最大数量
+    total_cookies = len(extracted_cookies)
+    # 计算需要分多少个文件
+    num_files = math.ceil(total_cookies / chunk_size)
+    for i in range(num_files):
+        # 计算切片的开始和结束索引
+        start_idx = i * chunk_size
+        end_idx = start_idx + chunk_size
+        # 获取当前批次的 Cookie
+        chunk = extracted_cookies[start_idx:end_idx]
+        # 生成文件名，例如: cleaned_cookies_part_1.txt
+        filename = f"cleaned_cookies_part_{i + 1}.txt"
+        with open(filename, 'w', encoding='utf-8') as f_out:
+            f_out.write('\n'.join(chunk))
+        output_files.append(filename)
+    # --- 👆 修改部分结束 👆 ---
+    return output_files, f"✅ 成功提取 {total_cookies} 个 Cookie！\n已按每 {chunk_size} 个分割，共生成 {len(output_files)} 个文件。"
+# 界面
+with gr.Blocks(title="Cookie 智能抠取器") as demo:
+    gr.Markdown("### 🍪 Cookie 智能抠取器 (自动分割版)")
+    gr.Markdown(f"提取 ey 开头的 Cookie，并自动按照 **每 999 个** 为一组输出多个文件。")
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(label="上传文件", file_types=['.txt'])
+            submit_btn = gr.Button("开始提取", variant="primary")
+        with gr.Column():
+            status_output = gr.Textbox(label="运行结果", lines=4)
+            # 注意：gr.File 接收列表后，会自动处理为可下载的文件组
+            file_output = gr.File(label="下载处理后的文件 (支持多文件)")
+    submit_btn.click(
+        fn=process_file,
+        inputs=file_input,
+        outputs=[file_output, status_output]
+    )
+if __name__ == "__main__":
+    demo.launch()