194130157a commited on
Commit
aab7dad
·
verified ·
1 Parent(s): 29bf901

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import math
4
+
5
+
6
+ def process_file(file_obj):
7
+ if file_obj is None:
8
+ return None, "❌ 请先上传 TXT 文件"
9
+
10
+ filepath = file_obj.name
11
+ extracted_cookies = []
12
+ debug_info = ""
13
+
14
+ # 尝试多种编码读取
15
+ content = ""
16
+ read_success = False
17
+ for enc in ['utf-8', 'utf-16', 'gb18030', 'latin-1']:
18
+ try:
19
+ with open(filepath, 'r', encoding=enc) as f:
20
+ content = f.read()
21
+ read_success = True
22
+ debug_info += f"✅ 使用 {enc} 编码读取成功\n"
23
+ break
24
+ except:
25
+ continue
26
+
27
+ if not read_success:
28
+ return None, "❌ 文件读取失败,无法识别编码。"
29
+
30
+ # 【核心修复】使用正则表达式提取
31
+ pattern = re.compile(r'(ey[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+\.[a-zA-Z0-9\-_]+)')
32
+
33
+ # 按行处理
34
+ lines = content.splitlines()
35
+ for i, line in enumerate(lines):
36
+ clean_line = line.strip()
37
+ if not clean_line:
38
+ continue
39
+
40
+ # 1. 先尝试直接匹配
41
+ if clean_line.startswith("ey"):
42
+ extracted_cookies.append(clean_line)
43
+ continue
44
+
45
+ # 2. 尝试按 "----" 切割
46
+ parts = clean_line.split('----')
47
+ found_in_parts = False
48
+ for part in parts:
49
+ p = part.strip()
50
+ if p.startswith('ey') and len(p) > 20:
51
+ extracted_cookies.append(p)
52
+ found_in_parts = True
53
+ break
54
+
55
+ # 3. 如果切割还没找到,用正则搜
56
+ if not found_in_parts:
57
+ match = pattern.search(clean_line)
58
+ if match:
59
+ extracted_cookies.append(match.group(1))
60
+
61
+ # 结果处理
62
+ if not extracted_cookies:
63
+ preview = content[:200].replace('\n', '\\n')
64
+ return None, (
65
+ f"⚠️ 依然未找到 Cookie!\n"
66
+ f"程序看到的文本前200字为:\n{preview}\n"
67
+ f"请检查:你的 Cookie 是标准的 JWT 格式(ey... . ... . ...)吗?"
68
+ )
69
+
70
+ # --- 👇 修改部分:文件分割逻辑 👇 ---
71
+ output_files = []
72
+ chunk_size = 999 # 每个文件的最大数量
73
+ total_cookies = len(extracted_cookies)
74
+
75
+ # 计算需要分多少个文件
76
+ num_files = math.ceil(total_cookies / chunk_size)
77
+
78
+ for i in range(num_files):
79
+ # 计算切片的开始和结束索引
80
+ start_idx = i * chunk_size
81
+ end_idx = start_idx + chunk_size
82
+
83
+ # 获取当前批次的 Cookie
84
+ chunk = extracted_cookies[start_idx:end_idx]
85
+
86
+ # 生成文件名,例如: cleaned_cookies_part_1.txt
87
+ filename = f"cleaned_cookies_part_{i + 1}.txt"
88
+
89
+ with open(filename, 'w', encoding='utf-8') as f_out:
90
+ f_out.write('\n'.join(chunk))
91
+
92
+ output_files.append(filename)
93
+ # --- 👆 修改部分结束 👆 ---
94
+
95
+ return output_files, f"✅ 成功提取 {total_cookies} 个 Cookie!\n已按每 {chunk_size} 个分割,共生成 {len(output_files)} 个文件。"
96
+
97
+ # 界面
98
+ with gr.Blocks(title="Cookie 智能抠取器") as demo:
99
+ gr.Markdown("### 🍪 Cookie 智能抠取器 (自动分割版)")
100
+ gr.Markdown(f"提取 ey 开头的 Cookie,并自动按照 **每 999 个** 为一组输出多个文件。")
101
+
102
+ with gr.Row():
103
+ with gr.Column():
104
+ file_input = gr.File(label="上传文件", file_types=['.txt'])
105
+ submit_btn = gr.Button("开始提取", variant="primary")
106
+
107
+ with gr.Column():
108
+ status_output = gr.Textbox(label="运行结果", lines=4)
109
+ # 注意:gr.File 接收列表后,会自动处理为可下载的文件组
110
+ file_output = gr.File(label="下载处理后的文件 (支持多文件)")
111
+
112
+ submit_btn.click(
113
+ fn=process_file,
114
+ inputs=file_input,
115
+ outputs=[file_output, status_output]
116
+ )
117
+
118
+ if __name__ == "__main__":
119
+ demo.launch()