ljx77qaq commited on
Commit
837b2a2
·
verified ·
1 Parent(s): 8d57f06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +404 -247
app.py CHANGED
@@ -16,6 +16,7 @@ from functools import wraps
16
 
17
  import urllib3.util.connection as urllib3_cn
18
 
 
19
  def allowed_gai_family():
20
  return socket.AF_INET
21
  urllib3_cn.allowed_gai_family = allowed_gai_family
@@ -35,46 +36,6 @@ try:
35
  except ImportError:
36
  print("⚠️ 未安装 pypinyin")
37
 
38
- # ===== 🔧 Fix 2: 更宽泛的标签正则 & 辅助函数 =====
39
- TAG_PATTERN = r'#[^\s#,,。!!??;;::<>()()【】《》\[\]]+'
40
-
41
- def extract_tags(text):
42
- """提取标签,支持 emoji、& 等特殊字符"""
43
- raw = re.findall(TAG_PATTERN, text)
44
- return [t.rstrip('.,;:!?。,;:!?…))】》') for t in raw if len(t) > 1]
45
-
46
- def classify_tag_key(tag):
47
- """把标签按首字符分类到目录分组 key"""
48
- clean_str = tag[1:]
49
- if not clean_str:
50
- return None
51
- fc = clean_str[0]
52
- if fc.isdigit():
53
- return "0-9"
54
- elif fc.isalpha() and fc.isascii():
55
- return fc.upper()
56
- elif '\u4e00' <= fc <= '\u9fff':
57
- try:
58
- py = lazy_pinyin(fc)
59
- if py and len(py[0]) > 0:
60
- return py[0][0].upper()
61
- except NameError:
62
- return "中文"
63
- return "中文"
64
- else:
65
- return "#"
66
-
67
- # 🔧 Fix 1: 目录任务向后兼容迁移
68
- def migrate_dir_task(task):
69
- """把旧格式 dir_task 迁移到新的 scan_channel + targets 结构"""
70
- if "scan_channel" not in task:
71
- task["scan_channel"] = task.get("channel_id", "")
72
- if "targets" not in task:
73
- task["targets"] = [{"channel_id": task.get("channel_id", ""), "msg_id": str(task.get("msg_id", ""))}]
74
- if isinstance(task.get("last_html_dir"), str):
75
- task["last_html_dir"] = {}
76
- return task
77
-
78
  # ===== 2. WebDAV 存储配置 =====
79
  DAV_URL_BASE = os.environ.get("WEBDAV_URL", "").rstrip("/")
80
  DAV_USER = os.environ.get("WEBDAV_USER") or os.environ.get("WEBDAV_USERNAME")
@@ -122,6 +83,30 @@ def save_data(force=True):
122
 
123
  load_data()
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # ===== 3. Telebot 主逻辑 =====
126
  BOT_TOKEN = os.environ.get("BOT_TOKEN")
127
  bot = telebot.TeleBot(BOT_TOKEN)
@@ -130,6 +115,7 @@ apihelper.API_URL = "https://nine7.linlizhi0210.workers.dev/bot{0}/{1}"
130
  user_states = {}
131
  ALL_TYPES = ['text', 'audio', 'document', 'photo', 'sticker', 'video', 'video_note', 'voice', 'location', 'contact', 'animation', 'dice', 'poll']
132
 
 
133
  _rate_limit = defaultdict(list)
134
  RATE_LIMIT_MAX = 30
135
  RATE_LIMIT_WINDOW = 60
@@ -142,13 +128,18 @@ def check_rate_limit(uid):
142
  _rate_limit[uid].append(now)
143
  return False
144
 
 
145
  _event_queues = defaultdict(lambda: queue.Queue(maxsize=50))
146
 
147
  def push_event(uid, event_type, data):
148
  q = _event_queues.get(uid)
149
  if q:
150
  try:
151
- q.put_nowait({"type": event_type, "data": data, "time": time.strftime("%H:%M:%S")})
 
 
 
 
152
  except queue.Full:
153
  pass
154
 
@@ -268,32 +259,30 @@ def cmd_build_dir(message):
268
  user_states[uid] = {"step": "WAIT_MANUAL_DIR_CH"}
269
  send_channel_prompt(uid, "🗂️ **生成频道标签目录**\n\n请输入需要扫描的【频道 ID】(例如 `-10012345678`):")
270
 
271
- # 🔧 Fix 1: 目录任务创建 - 开启新流程
272
  @bot.message_handler(commands=['add_dir'])
273
  def cmd_add_dir(message):
274
  uid = str(message.from_user.id)
275
  user_states[uid] = {"step": "WAIT_DIR_NAME"}
276
  bot.send_message(uid, "🗂️ **创建自动更新目录任务**\n\n1️⃣ 请给任务起个名字 (如: `主频道自动目录`):", parse_mode="Markdown")
277
 
278
- # 🔧 Fix 1: 目录任务列表 - 显示新结构
279
  @bot.message_handler(commands=['list_dir'])
280
  def cmd_list_dir(message):
281
  uid = str(message.from_user.id)
282
  dirs = DATA["users"].get(uid, {}).get("dir_tasks", [])
283
  if not dirs: return bot.send_message(uid, "暂无自动目录任务。")
284
  for i, t in enumerate(dirs):
285
- migrate_dir_task(t)
286
- targets_text = "\n".join([f" ▪️ `{tgt['channel_id']}` 消息 `{tgt['msg_id']}`" for tgt in t.get("targets", [])])
287
  msg = (f"🗂️ **任务**: `{t.get('task_name', '未命名')}`\n"
288
- f"🔍 **扫描频道**: `{t.get('scan_channel')}`\n"
289
- f"📌 **更新目标**:\n{targets_text}\n"
290
  f"⏱ **频率**: 每 `{t.get('interval', 15)}` 分钟扫描一次\n"
291
  f"🛡️ **屏蔽标签**: `{', '.join(t.get('blacklist', [])) or '无'}`\n"
292
  f"📦 **已收录标签**: `{len(t.get('tags_cache', []))} 个`")
293
  markup = types.InlineKeyboardMarkup(row_width=2)
 
294
  markup.add(types.InlineKeyboardButton("➕ 加屏蔽", callback_data=f"ed_ab_{i}"), types.InlineKeyboardButton("➖ 删屏蔽", callback_data=f"ed_rb_{i}"))
295
  markup.add(types.InlineKeyboardButton("⏱ 扫描频率", callback_data=f"ed_in_{i}"))
296
- markup.add(types.InlineKeyboardButton("➕ 添加目标", callback_data=f"ed_at_{i}"), types.InlineKeyboardButton("➖ 删除目标", callback_data=f"ed_rt_{i}"))
297
  markup.add(types.InlineKeyboardButton("🗑️ 终止并删除该目录任务", callback_data=f"d_d_{i}"))
298
  bot.send_message(uid, msg, reply_markup=markup, parse_mode="Markdown")
299
 
@@ -416,10 +405,12 @@ def process_user_text(uid, text):
416
  if not text.isdigit(): return bot.send_message(uid, "❌ 只能输入纯数字!")
417
  user_states[uid].update({"step": "WAIT_STAT_BLACKLIST", "duration": int(text)})
418
  bot.send_message(uid, "9️⃣ 请输入**屏蔽名单** (用空格隔开)\n💡 不需要屏蔽请回复 `无`:")
 
419
  elif step == "WAIT_STAT_BLACKLIST":
420
  blacklist = [] if text.strip() == "无" else [x.strip() for x in re.split(r'[\s\n]+', text) if x.strip()]
421
  user_states[uid].update({"step": "WAIT_STAT_BL_TITLE", "stats_blacklist": blacklist})
422
  bot.send_message(uid, "🔟 请输入**屏蔽区的标题** (例如 `🚫本月轮换限制:`)\n💡 不需要请回复 `无`:")
 
423
  elif step == "WAIT_STAT_BL_TITLE":
424
  bl_title = "" if text.strip() == "无" else text.strip()
425
  if "stats_tasks" not in DATA["users"][uid]: DATA["users"][uid]["stats_tasks"] = []
@@ -428,7 +419,8 @@ def process_user_text(uid, text):
428
  "table_title": state["table_title"], "top_n": state["top_n"], "trigger_tag": state["trigger_tag"],
429
  "interval": state["interval"], "duration": state["duration"], "start_time": int(time.time()),
430
  "last_run": 0, "completed_items": [], "last_checked_msg_id": int(state["msg_id"]),
431
- "stats_blacklist": state["stats_blacklist"], "blacklist_title": bl_title
 
432
  })
433
  save_data()
434
  bot.send_message(uid, "✅ 完美!任务已创建。")
@@ -463,13 +455,15 @@ def process_user_text(uid, text):
463
  task["stats_blacklist"] = [x for x in task.get("stats_blacklist", []) if x not in to_remove]
464
  elif step == "EDIT_STAT_BLTITLE":
465
  task["blacklist_title"] = "" if text.strip() == "无" else text.strip()
 
466
  task["last_html_stats"] = ""
467
  task["last_run"] = 0
 
468
  save_data()
469
  bot.send_message(uid, "✅ 属性已修改!下次刷新周期将立即更新。")
470
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
471
  user_states.pop(uid)
472
-
473
  elif step == "WAIT_BACKUP_SRC":
474
  user_states[uid] = {"step": "WAIT_BACKUP_TGT", "src": text}
475
  send_channel_prompt(uid, "📌 请输入【目标频道 ID】:")
@@ -495,67 +489,74 @@ def process_user_text(uid, text):
495
  Thread(target=generate_smart_directory, args=(uid, text)).start()
496
  user_states.pop(uid)
497
 
498
- # 🔧 Fix 1: 目录任务新建流程 - 分离扫描频道与目标频道
499
  elif step == "WAIT_DIR_NAME":
500
- user_states[uid].update({"step": "WAIT_DIR_SCAN_CH", "task_name": text})
501
- send_channel_prompt(uid, "2️⃣ 请输入**扫描标签的频道 ID**:")
502
- elif step == "WAIT_DIR_SCAN_CH":
503
- user_states[uid].update({"step": "WAIT_DIR_TGT_CH", "scan_channel": text, "targets": []})
504
- send_channel_prompt(uid, "3️⃣ 请输入第个**目标频道 ID** (目录将更新到此频道的消息):")
505
  elif step == "WAIT_DIR_TGT_CH":
506
  user_states[uid].update({"step": "WAIT_DIR_TGT_MSG", "current_tgt_ch": text})
507
- bot.send_message(uid, f"📌 请输入频道 `{text}` 中**承载目录的消息 ID**:")
508
  elif step == "WAIT_DIR_TGT_MSG":
509
  if text.startswith('http'): text = text.split('/')[-1]
510
  state["targets"].append({"channel_id": state["current_tgt_ch"], "msg_id": text})
511
- user_states[uid]["step"] = "WAIT_DIR_MORE_OR_BL"
512
- send_channel_prompt(uid, f"✅ 已添加目标 {len(state['targets'])}!\n\n📌 发送频道 ID 继续添加目标,或回复 **完成** 进入下一步:")
513
- elif step == "WAIT_DIR_MORE_OR_BL":
514
- if text.strip() in ["完成", "done", "Done", "完"]:
515
- user_states[uid]["step"] = "WAIT_DIR_BLACKLIST"
516
- bot.send_message(uid, "4️⃣ 请输入需要**屏蔽的标签** (空格隔开,不屏蔽回复 `无`):")
517
- else:
518
- user_states[uid].update({"step": "WAIT_DIR_TGT_MSG", "current_tgt_ch": text})
519
- bot.send_message(uid, f"📌 请输入频道 `{text}` 中**承载目录的消息 ID**:")
 
 
520
  elif step == "WAIT_DIR_BLACKLIST":
521
  blacklist = [] if text.strip() == "无" else text.split()
522
  if "dir_tasks" not in DATA["users"][uid]: DATA["users"][uid]["dir_tasks"] = []
523
  DATA["users"][uid]["dir_tasks"].append({
524
  "task_name": state["task_name"],
525
- "scan_channel": state["scan_channel"],
526
  "targets": state["targets"],
527
  "blacklist": blacklist, "interval": 15,
528
- "tags_cache": [], "tags_map": {}, "scanned_msgs": {}, "last_html_dir": {}
 
529
  })
530
  save_data()
531
- bot.send_message(uid, f" 目录任务建立完成!共 {len(state['targets'])} 个更新目标。")
 
532
  user_states.pop(uid)
533
 
534
- # 🔧 Fix 1: 编辑目录任务 - 添加目标中间步骤
535
  elif step.startswith("EDIT_DIR_"):
536
  idx = state["idx"]
537
  try:
538
  task = DATA["users"][uid]["dir_tasks"][idx]
539
- migrate_dir_task(task)
540
  if step == "EDIT_DIR_ADD_TGT_CH":
541
- user_states[uid].update({"step": "EDIT_DIR_ADD_TGT_MSG", "add_tgt_ch": text})
542
- return bot.send_message(uid, "📌 请输入该频道的**消息 ID**:")
543
  elif step == "EDIT_DIR_ADD_TGT_MSG":
544
  if text.startswith('http'): text = text.split('/')[-1]
545
- task.setdefault("targets", []).append({"channel_id": state["add_tgt_ch"], "msg_id": text})
546
- bot.send_message(uid, "✅ 新目标已添加!")
 
 
547
  elif step == "EDIT_DIR_ADDBL":
548
  task["blacklist"].extend(text.split())
549
  task["blacklist"] = list(set(task["blacklist"]))
 
550
  bot.send_message(uid, "✅ 目录属性已修改!")
551
  elif step == "EDIT_DIR_RMBL":
552
  to_rem = text.split()
553
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
 
554
  bot.send_message(uid, "✅ 目录属性已修改!")
555
  elif step == "EDIT_DIR_INTV":
556
  task["interval"] = int(text)
 
557
  bot.send_message(uid, "✅ 目录属性已修改!")
558
- save_data()
559
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
560
  user_states.pop(uid)
561
 
@@ -604,6 +605,39 @@ def handle_backup_callbacks(call):
604
  def handle_callbacks(call):
605
  uid = str(call.from_user.id)
606
  data = call.data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  if data == "ab_add":
608
  user_states[uid] = {"step": "WAIT_AB_ID"}
609
  bot.send_message(uid, "📌 请输入要保存的**频道 ID**:")
@@ -622,24 +656,6 @@ def handle_callbacks(call):
622
  bot.edit_message_text("✅ 频道已移除。", call.message.chat.id, call.message.message_id)
623
  return bot.answer_callback_query(call.id)
624
  elif data.startswith("selch_") or data.startswith("bkp_"): return
625
-
626
- # 🔧 Fix 1: 处理删除目标 edrt_{task_idx}_{target_idx}
627
- if data.startswith("edrt_"):
628
- parts = data.split("_")
629
- task_idx = int(parts[1])
630
- tgt_idx = int(parts[2])
631
- try:
632
- task = DATA["users"][uid]["dir_tasks"][task_idx]
633
- migrate_dir_task(task)
634
- targets = task.get("targets", [])
635
- if tgt_idx < len(targets):
636
- removed = targets.pop(tgt_idx)
637
- save_data()
638
- bot.edit_message_text(f"✅ 目标已删除: {removed['channel_id']} | 消息 {removed['msg_id']}", call.message.chat.id, call.message.message_id)
639
- except Exception as e:
640
- bot.send_message(uid, f"❌ 删除失败: {e}")
641
- return bot.answer_callback_query(call.id)
642
-
643
  try:
644
  action, idx_str = data.rsplit("_", 1)
645
  idx = int(idx_str)
@@ -656,25 +672,27 @@ def handle_callbacks(call):
656
  del DATA["users"][uid]["dir_tasks"][idx]
657
  bot.edit_message_text("❌ 目录任务已移除", call.message.chat.id, call.message.message_id)
658
  save_data()
659
- # 🔧 Fix 1: 添加/除目标回调
660
  elif action == "ed_at":
661
  user_states[uid] = {"step": "EDIT_DIR_ADD_TGT_CH", "idx": idx}
662
- send_channel_prompt(uid, "📌 请输入新目标的**频道 ID**:")
663
- bot.answer_callback_query(call.id, "请输入新目标频道")
664
  elif action == "ed_rt":
665
- task = DATA["users"][uid]["dir_tasks"][idx]
666
- migrate_dir_task(task)
667
- targets = task.get("targets", [])
668
- if not targets:
669
- bot.send_message(uid, "暂无目标可删除。")
670
- else:
671
- markup_del = types.InlineKeyboardMarkup(row_width=1)
672
- for ti, tgt in enumerate(targets):
673
- markup_del.add(types.InlineKeyboardButton(
674
- f"❌ {tgt['channel_id']} | 消息 {tgt['msg_id']}",
675
- callback_data=f"edrt_{idx}_{ti}"
676
- ))
677
- bot.send_message(uid, "🗑️ 选择要除的目标:", reply_markup=markup_del)
 
 
678
  bot.answer_callback_query(call.id)
679
  elif action in ["e_name", "e_titl", "e_trig", "e_topn", "e_intv", "e_dura", "e_chid", "e_msgid", "e_sabl", "e_srbl", "e_sblt", "ed_ab", "ed_rb", "ed_in"]:
680
  prompt_map = {
@@ -694,21 +712,23 @@ def handle_callbacks(call):
694
  "ed_in": "📌 请输入新的扫描频率(分钟):"
695
  }
696
  state_map = {
697
- "e_name": "EDIT_STAT_NAME", "e_titl": "EDIT_STAT_TITL",
698
- "e_trig": "EDIT_STAT_TRIG", "e_topn": "EDIT_STAT_TOPN",
699
- "e_intv": "EDIT_STAT_INTV", "e_dura": "EDIT_STAT_DURA",
700
- "e_chid": "EDIT_STAT_CHID", "e_msgid": "EDIT_STAT_MSGID",
701
- "e_sabl": "EDIT_STAT_ADDBL", "e_srbl": "EDIT_STAT_RMBL",
 
 
 
 
 
702
  "e_sblt": "EDIT_STAT_BLTITLE",
703
- "ed_ab": "EDIT_DIR_ADDBL", "ed_rb": "EDIT_DIR_RMBL",
 
704
  "ed_in": "EDIT_DIR_INTV"
705
  }
706
  user_states[uid] = {"step": state_map[action], "idx": idx}
707
- # 🔧 Fix 3: 修改频道ID时也弹出地址簿快选
708
- if action in ["e_chid"]:
709
- send_channel_prompt(uid, prompt_map[action])
710
- else:
711
- bot.send_message(uid, prompt_map[action])
712
  bot.answer_callback_query(call.id, "请在对话框输入新值")
713
 
714
  def run_smart_backup_v2(latest_id, uid, src, tgt):
@@ -723,7 +743,8 @@ def run_smart_backup_v2(latest_id, uid, src, tgt):
723
  ref_msg = await TL_CLIENT.get_messages(int(src), ids=latest_id)
724
  target_grouped_id = ref_msg.grouped_id if ref_msg else None
725
  async for msg in TL_CLIENT.iter_messages(int(src)):
726
- if msg.action is not None: continue
 
727
  if msg.id > latest_id:
728
  if target_grouped_id and msg.grouped_id == target_grouped_id: messages.append(msg)
729
  continue
@@ -764,20 +785,25 @@ def run_smart_backup_v2(latest_id, uid, src, tgt):
764
  if e.error_code == 429:
765
  time.sleep(e.result_json.get('parameters', {}).get('retry_after', 10))
766
  else:
767
- failed += len(msg_ids_to_copy); failed_ids.extend(msg_ids_to_copy); break
 
 
768
  except Exception:
769
- failed += len(msg_ids_to_copy); failed_ids.extend(msg_ids_to_copy); break
 
 
770
  save_data()
 
771
  report = f"🏁 **备份完成!**\n源: `{src}` ➡️ 目: `{tgt}`\n✅ 新增 **{success}** 条"
772
  if failed > 0:
773
  report += f"\n❌ 失败 **{failed}** 条"
774
  show_ids = failed_ids[:10]
775
  report += f"\n失败消息 ID: `{show_ids}`"
776
- if len(failed_ids) > 10: report += f"\n... 等共 {len(failed_ids)} 条"
 
777
  bot.send_message(uid, report)
778
  push_event(uid, "backup_done", f"✅ 备份完成,新增 {success} 条,失败 {failed} 条")
779
 
780
- # 🔧 Fix 2: 使用 extract_tags + classify_tag_key
781
  def generate_smart_directory(uid, ch_id):
782
  global TL_LOOP, TL_CLIENT
783
  if not TL_LOOP or not TL_CLIENT: return bot.send_message(uid, "❌ 错误: Userbot 未启动。")
@@ -790,14 +816,22 @@ def generate_smart_directory(uid, ch_id):
790
  else:
791
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
792
  clean_text = html.unescape(re.sub(r'<.*?>', '', re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)))
793
- for t in extract_tags(clean_text):
794
- tags_set.add(t)
795
  except Exception as e: return None, str(e)
796
  if not tags_set: return None, "没有找到有效标签。"
797
  directory_map = {}
798
  for tag in tags_set:
799
- key = classify_tag_key(tag)
800
- if key is None: continue
 
 
 
 
 
 
 
 
 
801
  if key not in directory_map: directory_map[key] = []
802
  directory_map[key].append(tag)
803
  return directory_map, None
@@ -941,9 +975,11 @@ def start_telethon_worker():
941
  trigger_tag = task.get('trigger_tag', '#未设置')
942
  completed_items = []
943
  interval_sec = int(task.get('interval', 60)) * 60
 
944
  if current_time > int(task.get('start_time', current_time)) + int(task.get('duration', 7)) * 86400:
945
  del tasks[i]; data_changed = True; continue
946
  if current_time - int(task.get('last_run', 0)) < interval_sec: continue
 
947
  try:
948
  original_msg = await TL_CLIENT.get_messages(ch_id, ids=msg_id)
949
  if not original_msg: continue
@@ -951,14 +987,17 @@ def start_telethon_worker():
951
  raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
952
  base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
953
  else: base_html = ""
 
954
  comments_data_list = []
955
  discussion_chat_id = None
956
  thread_id = None
 
957
  async for comment in TL_CLIENT.iter_messages(ch_id, reply_to=msg_id):
958
  if not discussion_chat_id:
959
  discussion_chat_id = comment.chat_id
960
  if comment.reply_to:
961
  thread_id = comment.reply_to.reply_to_top_id or comment.reply_to.reply_to_msg_id
 
962
  if comment.reactions:
963
  total_reacts = sum(r.count for r in comment.reactions.results)
964
  if total_reacts > 0:
@@ -966,27 +1005,37 @@ def start_telethon_worker():
966
  short_text = full_raw_text
967
  if len(short_text) > 15: short_text = short_text[:14] + "…"
968
  comments_data_list.append((total_reacts, html.escape(short_text), short_text, full_raw_text, comment.id))
 
969
  stats_blacklist = task.get('stats_blacklist', [])
970
  blacklist_section = ""
971
  if stats_blacklist:
972
  bl_title = task.get('blacklist_title', '🚫本月轮换限制:')
973
  bl_names = "\n".join([html.escape(n) for n in stats_blacklist])
974
- if bl_title: blacklist_section = f"\n{html.escape(bl_title)}\n<blockquote>{bl_names}</blockquote>\n"
975
- else: blacklist_section = f"\n<blockquote>{bl_names}</blockquote>\n"
 
 
 
976
  stats_section = ""
977
  all_comments_for_file = []
 
978
  if comments_data_list:
979
  comments_data_list.sort(key=lambda x: x[0], reverse=True)
 
980
  deduped_comments = []
981
  for item in comments_data_list:
982
  total, safe_text, raw_short, full_raw, c_id = item
983
  base_name = re.split(r'[((]', full_raw)[0].strip()
 
984
  is_blocked = False
985
  for blocked in stats_blacklist:
986
  if blocked and len(blocked) >= 2:
987
  if blocked in full_raw or blocked in base_name:
988
- is_blocked = True; break
989
- if is_blocked: continue
 
 
 
990
  conflict = False
991
  for added_item in deduped_comments:
992
  added_full_raw = added_item[3]
@@ -994,35 +1043,45 @@ def start_telethon_worker():
994
  if (len(base_name) >= 2 and base_name in added_full_raw) or (len(added_base) >= 2 and added_base in full_raw):
995
  conflict = True; break
996
  if not conflict: deduped_comments.append(item)
 
997
  comments_data_list = deduped_comments
998
  completed_items = []
 
999
  try:
1000
  async for newer_msg in TL_CLIENT.iter_messages(ch_id, limit=100, min_id=msg_id):
1001
  if newer_msg.id == msg_id: continue
1002
  if not newer_msg.raw_text or trigger_tag.lower() not in newer_msg.raw_text.lower(): continue
 
1003
  if newer_msg.entities:
1004
  html_text = tl_html.unparse(newer_msg.raw_text, newer_msg.entities)
1005
  text_no_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1006
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_no_bq))
1007
- else: clean_text = newer_msg.raw_text
 
 
1008
  if trigger_tag.lower() not in clean_text.lower(): continue
1009
- # 🔧 Fix 2: 用 extract_tags 提取(无 #
1010
- found_tags = [t[1:] for t in extract_tags(clean_text)]
1011
  for item in comments_data_list:
1012
- raw_short = item[2]; full_raw = item[3]
 
1013
  base_name = re.split(r'[((]', full_raw)[0].strip()
1014
  for tag in found_tags:
1015
  if (len(tag) >= 2 and tag.lower() in full_raw.lower()) or (len(base_name) >= 2 and base_name.lower() in tag.lower()):
1016
  if raw_short not in completed_items: completed_items.append(raw_short)
1017
  break
1018
  except Exception: pass
 
1019
  if task.get('completed_items') != completed_items:
1020
  task['completed_items'] = completed_items; data_changed = True
 
1021
  comments_data_list.sort(key=lambda x: (x[2] in completed_items, x[0]), reverse=True)
1022
  all_comments_for_file = comments_data_list.copy()
 
1023
  comments_data_list = comments_data_list[:top_n]
1024
  completed_count = sum(1 for item in comments_data_list if item[2] in completed_items)
1025
  max_digits = max([len(str(item[0])) for item in comments_data_list] + [1])
 
1026
  inner_lines = []
1027
  for rank, item in enumerate(comments_data_list):
1028
  total, safe_text, raw_short, full_raw, c_id = item
@@ -1030,54 +1089,95 @@ def start_telethon_worker():
1030
  display_text = f"<s>{safe_text}</s>" if raw_short in completed_items else safe_text
1031
  padded_total = str(total).rjust(max_digits, ' ')
1032
  inner_lines.append(f"{medal} <code>{padded_total}</code> 赞 | <i>{display_text}</i>")
 
1033
  beijing_tz = timezone(timedelta(hours=8))
1034
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1035
  inner_lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
 
1036
  if all_comments_for_file:
1037
  c_chat_str = str(discussion_chat_id).replace("-100", "") if discussion_chat_id else ""
1038
  list_html = ""
1039
  for rank, item in enumerate(all_comments_for_file, 1):
1040
  total, safe_text, raw_short, full_raw, c_id = item
1041
- if c_chat_str and thread_id: link = f"tg://privatepost?channel={c_chat_str}&post={c_id}&thread={thread_id}"
1042
- elif c_chat_str: link = f"tg://privatepost?channel={c_chat_str}&post={c_id}"
1043
- else: link = "#"
 
 
 
 
1044
  display_name = html.escape(full_raw)
1045
- if raw_short in completed_items: display_name = f"<s style='opacity: 0.5;'>{display_name}</s>"
 
 
1046
  list_html += f'<a href="{link}" class="item"><span class="rank">#{rank}</span><span class="name">{display_name}</span><span class="reacts">{total} 赞</span></a>'
 
1047
  html_template = f"""<!DOCTYPE html>
1048
- <html lang="zh-CN"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>完整榜单 - {html.escape(table_title)}</title>
1049
- <style>body{{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:#1a1a2e;color:#eaeaea;padding:15px;margin:0}}.container{{max-width:600px;margin:0 auto;background:#16213e;border-radius:12px;padding:15px;box-shadow:0 4px 15px rgba(0,0,0,0.3)}}h2{{text-align:center;color:#fff;margin-bottom:20px;font-size:18px;border-bottom:1px solid rgba(255,255,255,0.1);padding-bottom:10px;line-height:1.5}}.hint{{text-align:center;color:#5dade2;font-size:13px;margin-top:-10px;margin-bottom:15px;font-weight:500}}.item{{display:flex;justify-content:space-between;align-items:center;padding:12px 0;border-bottom:1px solid rgba(255,255,255,0.05);text-decoration:none;color:inherit;transition:background 0.2s}}.item:last-child{{border-bottom:none}}.item:active{{background:rgba(255,255,255,0.05);border-radius:8px}}.rank{{font-weight:bold;width:35px;color:#e94560;font-size:14px}}.name{{flex:1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;padding:0 10px;font-size:14px}}.reacts{{font-weight:bold;color:#5dade2;font-size:14px}}</style></head>
1050
- <body><div class="container"><h2>📊 {html.escape(table_title)} <br><span style="font-size:12px;color:#8a8a9a;font-weight:normal">全量数据收录 | 更新于 {now_str}</span></h2><div class="hint">💡 点击名字跳转评论投票</div>{list_html}</div></body></html>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  cache_key = f"{ch_id}_{msg_id}"
1052
  HTML_CACHE[cache_key] = html_template
 
1053
  space_host = "bangdan.nine7.cc.cd"
1054
  file_msg_link = f"https://{space_host}/list/{cache_key}"
 
 
1055
  stats_section = f"<b>{html.escape(table_title)} ({completed_count}/{top_n}) <a href='{file_msg_link}'>完整名单</a></b>\n<blockquote>{chr(10).join(inner_lines)}</blockquote>"
 
1056
  else:
1057
  beijing_tz = timezone(timedelta(hours=8))
1058
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1059
  stats_section = f"<b>{html.escape(table_title)} (0/{top_n})</b>\n<blockquote>暂无评论数据\n\n⏳ <code>最后更新: {now_str} (北京时间)</code></blockquote>"
 
1060
  new_message_text = f"{base_html}{SEPARATOR_MARK}{blacklist_section}{stats_section}"
1061
  content_hash = f"{blacklist_section}|{stats_section}"
 
1062
  if task.get('last_html_stats') != content_hash:
1063
  try:
1064
  if original_msg.photo or original_msg.video or original_msg.document: bot.edit_message_caption(caption=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1065
  else: bot.edit_message_text(text=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1066
  task['last_html_stats'] = content_hash; data_changed = True
1067
  except Exception: pass
 
1068
  task['last_run'] = current_time; data_changed = True; await asyncio.sleep(2)
1069
- except Exception: pass
 
1070
  if data_changed: await asyncio.to_thread(save_data)
1071
 
1072
- # 🔧 Fix 1 + Fix 2: 目录任务 - 扫描频道与多目标更新
1073
  async def update_channel_dirs():
1074
  current_time = int(time.time())
1075
  data_changed = False
1076
  for uid, u_data in DATA.get("users", {}).items():
1077
  tasks = u_data.get("dir_tasks", [])
1078
  for task in tasks:
1079
- migrate_dir_task(task)
1080
- scan_ch = int(task['scan_channel'])
 
1081
  targets = task.get('targets', [])
1082
  if not targets: continue
1083
  blacklist = task.get('blacklist', [])
@@ -1089,9 +1189,17 @@ def start_telethon_worker():
1089
  scan_kwargs = {'limit': None if is_first_run else 150}
1090
  new_tags_found = False
1091
 
1092
- # 扫描 scan_channel (不一定与 target channel 相同)
 
 
 
 
 
 
1093
  try:
1094
- async for msg in TL_CLIENT.iter_messages(scan_ch, **scan_kwargs):
 
 
1095
  msg_id_str = str(msg.id)
1096
  msg_time = msg.edit_date.timestamp() if msg.edit_date else msg.date.timestamp()
1097
  if scanned_msgs.get(msg_id_str) == msg_time: continue
@@ -1104,7 +1212,7 @@ def start_telethon_worker():
1104
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
1105
  text_without_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1106
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_without_bq))
1107
- found_tags = extract_tags(clean_text)
1108
  if found_tags:
1109
  tags_map[msg_id_str] = found_tags
1110
  new_tags_found = True
@@ -1112,73 +1220,74 @@ def start_telethon_worker():
1112
  if msg_id_str in tags_map:
1113
  del tags_map[msg_id_str]
1114
  new_tags_found = True
 
1115
  task['last_run'] = current_time
1116
- except Exception: continue
1117
-
1118
- if new_tags_found or is_first_run:
1119
- task['tags_map'] = tags_map
1120
- task['scanned_msgs'] = scanned_msgs
1121
- data_changed = True
1122
- all_tags = set()
1123
- for t_list in tags_map.values(): all_tags.update(t_list)
1124
- active_tags = [t for t in all_tags if t not in blacklist]
1125
- task['tags_cache'] = active_tags
1126
-
1127
- # 构建目录文本
1128
- directory_map = {}
1129
- for tag in active_tags:
1130
- key = classify_tag_key(tag)
1131
- if key is None: continue
1132
- if key not in directory_map: directory_map[key] = []
1133
- directory_map[key].append(tag)
1134
-
1135
- lines = ["目录:\n<blockquote expandable>"]
1136
- keys = sorted(directory_map.keys())
1137
- if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
1138
- for key in keys:
1139
- tags_line = " ".join([html.escape(t) for t in sorted(directory_map[key])])
1140
- lines.append(f"{key}: {tags_line}\n")
1141
- lines.append("</blockquote>")
1142
- beijing_tz = timezone(timedelta(hours=8))
1143
- now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1144
- lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1145
- stats_text = f"<blockquote>{chr(10).join(lines)}</blockquote>"
1146
- task_name = task.get('task_name', '标签目录')
1147
- safe_title_with_count = f"{html.escape(task_name)} ({len(active_tags)})"
1148
-
1149
- # 🔧 Fix 1: 遍历所有 targets 更新
1150
- last_html_dir = task.get('last_html_dir', {})
1151
- if isinstance(last_html_dir, str): last_html_dir = {}
1152
-
1153
- for tgt in targets:
1154
- tgt_ch = int(tgt['channel_id'])
1155
- tgt_msg = int(tgt['msg_id'])
1156
- tgt_key = f"{tgt_ch}_{tgt_msg}"
1157
- try:
1158
- original_msg = await TL_CLIENT.get_messages(tgt_ch, ids=tgt_msg)
1159
- if not original_msg: continue
1160
- if original_msg.raw_text:
1161
- raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
1162
- base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
1163
- else: base_html = ""
1164
- except Exception: continue
1165
-
1166
- new_message_text = f"{base_html}{SEPARATOR_MARK}<b>{safe_title_with_count}</b>\n{stats_text}"
1167
- if len(new_message_text) > 4000:
1168
- new_message_text = new_message_text[:4000] + "\n... </blockquote>\n⚠️ 目录过长已截断"
1169
-
1170
- if last_html_dir.get(tgt_key) != stats_text:
1171
  try:
 
 
 
 
 
 
 
 
 
1172
  if original_msg.photo or original_msg.video or original_msg.document:
1173
  bot.edit_message_caption(caption=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1174
  else:
1175
  bot.edit_message_text(text=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1176
- last_html_dir[tgt_key] = stats_text
1177
  data_changed = True
1178
  except Exception: pass
1179
- await asyncio.sleep(2)
1180
-
1181
- task['last_html_dir'] = last_html_dir
1182
  if data_changed: await asyncio.to_thread(save_data)
1183
 
1184
  TL_CLIENT.start()
@@ -1200,7 +1309,8 @@ def add_security_headers(response):
1200
 
1201
  def validate_webapp(req):
1202
  init_data = req.headers.get('X-Init-Data', '')
1203
- if not init_data: return None
 
1204
  try:
1205
  parsed = {}
1206
  for part in init_data.split('&'):
@@ -1208,13 +1318,18 @@ def validate_webapp(req):
1208
  k, v = part.split('=', 1)
1209
  parsed[k] = unquote(v)
1210
  check_hash = parsed.pop('hash', None)
1211
- if not check_hash or not BOT_TOKEN: return None
 
1212
  data_check_string = "\n".join(f"{k}={parsed[k]}" for k in sorted(parsed.keys()))
1213
  secret_key = hmac.new(b"WebAppData", BOT_TOKEN.encode(), hashlib.sha256).digest()
1214
  computed = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
1215
- if not hmac.compare_digest(computed, check_hash): return None
 
 
1216
  auth_date = int(parsed.get('auth_date', 0))
1217
- if abs(time.time() - auth_date) > 86400: return None
 
 
1218
  user_obj = json.loads(parsed.get('user', '{}'))
1219
  uid = str(user_obj.get('id', ''))
1220
  if uid and uid not in DATA["users"]:
@@ -1229,21 +1344,26 @@ def need_auth(f):
1229
  @wraps(f)
1230
  def wrapper(*args, **kwargs):
1231
  uid = validate_webapp(request)
1232
- if not uid: return jsonify({"ok": False, "msg": "未授权"}), 401
1233
- if check_rate_limit(uid): return jsonify({"ok": False, "msg": "操作太频繁,请稍后再试"}), 429
 
 
1234
  return f(uid, *args, **kwargs)
1235
  return wrapper
1236
 
1237
  @app.route('/')
1238
- def home(): return "Bot is running"
 
1239
 
1240
  @app.route('/webapp')
1241
- def webapp_page(): return send_file('webapp.html')
 
1242
 
1243
  @app.route('/list/<cache_key>')
1244
  def view_list(cache_key):
1245
  html_content = HTML_CACHE.get(cache_key)
1246
- if not html_content: return "暂无数据或页面已刷新,请等待机器人下次更新", 404
 
1247
  return html_content
1248
 
1249
  @app.route('/api/data')
@@ -1317,7 +1437,8 @@ def api_edit_stat(uid, idx):
1317
  field, val = d["field"], d["value"]
1318
  try:
1319
  task = DATA["users"][uid]["stats_tasks"][idx]
1320
- if field in ("top_n", "interval", "duration"): task[field] = int(val)
 
1321
  elif field == "msg_id":
1322
  if val.startswith('http'): val = val.split('/')[-1]
1323
  task["msg_id"] = val
@@ -1338,9 +1459,12 @@ def api_edit_stat(uid, idx):
1338
  elif field == "blacklist_title":
1339
  task["blacklist_title"] = "" if val.strip() == "无" else val.strip()
1340
  task["last_html_stats"] = ""
1341
- else: task[field] = val
 
 
1342
  task["last_html_stats"] = ""
1343
  task["last_run"] = 0
 
1344
  save_data()
1345
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1346
  except Exception as e:
@@ -1355,27 +1479,28 @@ def api_del_stat(uid, idx):
1355
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1356
  except: return jsonify({"ok": False, "msg": "索引无效"})
1357
 
1358
- # 🔧 Fix 1: 目录API - 新结构
1359
  @app.route('/api/dirs', methods=['POST'])
1360
  @need_auth
1361
  def api_add_dir(uid):
1362
  d = request.json
1363
- scan_channel = d.get("scan_channel", d.get("channel_id", ""))
1364
- targets = d.get("targets", [])
1365
- if not targets:
1366
- msg_id_raw = str(d.get("msg_id", ""))
1367
- if msg_id_raw.startswith('http'): msg_id_raw = msg_id_raw.split('/')[-1]
1368
- targets = [{"channel_id": d.get("channel_id", scan_channel), "msg_id": msg_id_raw}]
1369
  DATA["users"][uid].setdefault("dir_tasks", []).append({
1370
- "task_name": d["task_name"], "scan_channel": scan_channel,
1371
- "targets": targets, "blacklist": d.get("blacklist", []),
1372
- "interval": 15, "tags_cache": [], "tags_map": {},
1373
- "scanned_msgs": {}, "last_html_dir": {}
 
 
 
1374
  })
1375
  save_data()
1376
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1377
 
1378
- # 🔧 Fix 1: 目录编辑API - 支持 add_target / rm_target
1379
  @app.route('/api/dirs/<int:idx>', methods=['PUT'])
1380
  @need_auth
1381
  def api_edit_dir(uid, idx):
@@ -1383,7 +1508,6 @@ def api_edit_dir(uid, idx):
1383
  field, val = d["field"], d["value"]
1384
  try:
1385
  task = DATA["users"][uid]["dir_tasks"][idx]
1386
- migrate_dir_task(task)
1387
  if field == "add_blacklist":
1388
  task["blacklist"].extend(val.split())
1389
  task["blacklist"] = list(set(task["blacklist"]))
@@ -1393,15 +1517,19 @@ def api_edit_dir(uid, idx):
1393
  elif field == "interval":
1394
  task["interval"] = int(val)
1395
  elif field == "add_target":
1396
- parts = val.strip().split()
1397
- if len(parts) >= 2:
1398
- msg_id = parts[1]
1399
- if msg_id.startswith('http'): msg_id = msg_id.split('/')[-1]
1400
- task.setdefault("targets", []).append({"channel_id": parts[0], "msg_id": msg_id})
 
1401
  elif field == "rm_target":
1402
- tgt_idx = int(val)
1403
  targets = task.get("targets", [])
1404
- if 0 <= tgt_idx < len(targets): targets.pop(tgt_idx)
 
 
 
1405
  save_data()
1406
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1407
  except Exception as e:
@@ -1492,14 +1620,19 @@ def api_backup(uid):
1492
  except Exception as e:
1493
  return jsonify({"ok": False, "msg": str(e)})
1494
 
 
1495
  @app.route('/api/events')
1496
  def api_events():
1497
  class FakeReq:
1498
  def __init__(self, init_data_str):
1499
  self.headers = {'X-Init-Data': init_data_str}
 
1500
  init_data_str = request.args.get('init_data', '')
1501
  uid = validate_webapp(FakeReq(init_data_str))
1502
- if not uid: return jsonify({"ok": False, "msg": "未授权"}), 401
 
 
 
1503
  def stream():
1504
  q = _event_queues[uid]
1505
  yield f"data: {json.dumps({'type': 'connected', 'data': '🟢 实时连接已建立'})}\n\n"
@@ -1509,15 +1642,31 @@ def api_events():
1509
  yield f"data: {json.dumps(event)}\n\n"
1510
  except queue.Empty:
1511
  yield f": heartbeat\n\n"
1512
- return Response(stream(), mimetype='text/event-stream', headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no', 'Connection': 'keep-alive'})
 
 
 
 
 
 
 
 
 
1513
 
1514
  @app.route('/api/health')
1515
  def api_health():
1516
- return jsonify({"status": "ok", "users": len(DATA.get("users", {})), "msg_map": len(DATA.get("msg_map", {})), "telethon": TL_CLIENT is not None})
 
 
 
 
 
1517
 
 
1518
  if __name__ == "__main__":
1519
  Thread(target=lambda: app.run(host="0.0.0.0", port=7860), daemon=True).start()
1520
  Thread(target=start_telethon_worker, daemon=True).start()
 
1521
  print("🔄 正在清除旧连接...")
1522
  for attempt in range(5):
1523
  try:
@@ -1527,11 +1676,18 @@ if __name__ == "__main__":
1527
  except Exception as e:
1528
  print(f"⏳ 等待旧实例释放... ({attempt+1}/5) {e}")
1529
  time.sleep(3)
 
1530
  print("🤖 Telebot 主消息引擎已启动!")
1531
  print("🌐 Mini App 地址: http://localhost:7860/webapp")
 
1532
  while True:
1533
  try:
1534
- bot.infinity_polling(timeout=60, long_polling_timeout=60, allowed_updates=["message", "callback_query", "channel_post", "edited_channel_post"])
 
 
 
 
 
1535
  except Exception as e:
1536
  print(f"❌ Polling 异常: {e}")
1537
  print("⏳ 10秒后重连...")
@@ -1539,4 +1695,5 @@ if __name__ == "__main__":
1539
  try:
1540
  bot.remove_webhook()
1541
  bot.get_updates(offset=-1, timeout=1)
1542
- except: pass
 
 
16
 
17
  import urllib3.util.connection as urllib3_cn
18
 
19
+ # 🌟 强制 IPv4
20
  def allowed_gai_family():
21
  return socket.AF_INET
22
  urllib3_cn.allowed_gai_family = allowed_gai_family
 
36
  except ImportError:
37
  print("⚠️ 未安装 pypinyin")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # ===== 2. WebDAV 存储配置 =====
40
  DAV_URL_BASE = os.environ.get("WEBDAV_URL", "").rstrip("/")
41
  DAV_USER = os.environ.get("WEBDAV_USER") or os.environ.get("WEBDAV_USERNAME")
 
83
 
84
  load_data()
85
 
86
+ # ===== 🔄 迁移旧版目录任务到多目标格式 =====
87
+ def migrate_dir_tasks():
88
+ changed = False
89
+ for uid, u_data in DATA.get("users", {}).items():
90
+ for task in u_data.get("dir_tasks", []):
91
+ if "scan_id" not in task and "channel_id" in task:
92
+ old_ch = task.get("channel_id", "")
93
+ old_msg = task.get("msg_id", "")
94
+ task["scan_id"] = old_ch
95
+ task["targets"] = [{"channel_id": old_ch, "msg_id": old_msg}]
96
+ task["last_html_per_target"] = {}
97
+ task.pop("channel_id", None)
98
+ task.pop("msg_id", None)
99
+ task.pop("last_html_dir", None)
100
+ changed = True
101
+ if changed:
102
+ save_data()
103
+ print("🔄 已迁移旧版目录任务到多目标格式")
104
+
105
+ try:
106
+ migrate_dir_tasks()
107
+ except Exception as e:
108
+ print(f"⚠️ 迁移目录任务时出错: {e}")
109
+
110
  # ===== 3. Telebot 主逻辑 =====
111
  BOT_TOKEN = os.environ.get("BOT_TOKEN")
112
  bot = telebot.TeleBot(BOT_TOKEN)
 
115
  user_states = {}
116
  ALL_TYPES = ['text', 'audio', 'document', 'photo', 'sticker', 'video', 'video_note', 'voice', 'location', 'contact', 'animation', 'dice', 'poll']
117
 
118
+ # ====== 频率限制 ======
119
  _rate_limit = defaultdict(list)
120
  RATE_LIMIT_MAX = 30
121
  RATE_LIMIT_WINDOW = 60
 
128
  _rate_limit[uid].append(now)
129
  return False
130
 
131
+ # ====== SSE 实时推送队列 ======
132
  _event_queues = defaultdict(lambda: queue.Queue(maxsize=50))
133
 
134
  def push_event(uid, event_type, data):
135
  q = _event_queues.get(uid)
136
  if q:
137
  try:
138
+ q.put_nowait({
139
+ "type": event_type,
140
+ "data": data,
141
+ "time": time.strftime("%H:%M:%S")
142
+ })
143
  except queue.Full:
144
  pass
145
 
 
259
  user_states[uid] = {"step": "WAIT_MANUAL_DIR_CH"}
260
  send_channel_prompt(uid, "🗂️ **生成频道标签目录**\n\n请输入需要扫描的【频道 ID】(例如 `-10012345678`):")
261
 
 
262
  @bot.message_handler(commands=['add_dir'])
263
  def cmd_add_dir(message):
264
  uid = str(message.from_user.id)
265
  user_states[uid] = {"step": "WAIT_DIR_NAME"}
266
  bot.send_message(uid, "🗂️ **创建自动更新目录任务**\n\n1️⃣ 请给任务起个名字 (如: `主频道自动目录`):", parse_mode="Markdown")
267
 
 
268
  @bot.message_handler(commands=['list_dir'])
269
  def cmd_list_dir(message):
270
  uid = str(message.from_user.id)
271
  dirs = DATA["users"].get(uid, {}).get("dir_tasks", [])
272
  if not dirs: return bot.send_message(uid, "暂无自动目录任务。")
273
  for i, t in enumerate(dirs):
274
+ targets = t.get('targets', [])
275
+ targets_text = "\n".join([f" {ti+1}. `{tgt['channel_id']}` | 消息: `{tgt['msg_id']}`" for ti, tgt in enumerate(targets)]) or " 暂无目标"
276
  msg = (f"🗂️ **任务**: `{t.get('task_name', '未命名')}`\n"
277
+ f"🔍 **扫描频道**: `{t.get('scan_id', '未设置')}`\n"
278
+ f"📌 **目标频道**:\n{targets_text}\n"
279
  f"⏱ **频率**: 每 `{t.get('interval', 15)}` 分钟扫描一次\n"
280
  f"🛡️ **屏蔽标签**: `{', '.join(t.get('blacklist', [])) or '无'}`\n"
281
  f"📦 **已收录标签**: `{len(t.get('tags_cache', []))} 个`")
282
  markup = types.InlineKeyboardMarkup(row_width=2)
283
+ markup.add(types.InlineKeyboardButton("➕ 添加目标", callback_data=f"ed_at_{i}"), types.InlineKeyboardButton("➖ 移除目标", callback_data=f"ed_rt_{i}"))
284
  markup.add(types.InlineKeyboardButton("➕ 加屏蔽", callback_data=f"ed_ab_{i}"), types.InlineKeyboardButton("➖ 删屏蔽", callback_data=f"ed_rb_{i}"))
285
  markup.add(types.InlineKeyboardButton("⏱ 扫描频率", callback_data=f"ed_in_{i}"))
 
286
  markup.add(types.InlineKeyboardButton("🗑️ 终止并删除该目录任务", callback_data=f"d_d_{i}"))
287
  bot.send_message(uid, msg, reply_markup=markup, parse_mode="Markdown")
288
 
 
405
  if not text.isdigit(): return bot.send_message(uid, "❌ 只能输入纯数字!")
406
  user_states[uid].update({"step": "WAIT_STAT_BLACKLIST", "duration": int(text)})
407
  bot.send_message(uid, "9️⃣ 请输入**屏蔽名单** (用空格隔开)\n💡 不需要屏蔽请回复 `无`:")
408
+
409
  elif step == "WAIT_STAT_BLACKLIST":
410
  blacklist = [] if text.strip() == "无" else [x.strip() for x in re.split(r'[\s\n]+', text) if x.strip()]
411
  user_states[uid].update({"step": "WAIT_STAT_BL_TITLE", "stats_blacklist": blacklist})
412
  bot.send_message(uid, "🔟 请输入**屏蔽区的标题** (例如 `🚫本月轮换限制:`)\n💡 不需要请回复 `无`:")
413
+
414
  elif step == "WAIT_STAT_BL_TITLE":
415
  bl_title = "" if text.strip() == "无" else text.strip()
416
  if "stats_tasks" not in DATA["users"][uid]: DATA["users"][uid]["stats_tasks"] = []
 
419
  "table_title": state["table_title"], "top_n": state["top_n"], "trigger_tag": state["trigger_tag"],
420
  "interval": state["interval"], "duration": state["duration"], "start_time": int(time.time()),
421
  "last_run": 0, "completed_items": [], "last_checked_msg_id": int(state["msg_id"]),
422
+ "stats_blacklist": state["stats_blacklist"],
423
+ "blacklist_title": bl_title
424
  })
425
  save_data()
426
  bot.send_message(uid, "✅ 完美!任务已创建。")
 
455
  task["stats_blacklist"] = [x for x in task.get("stats_blacklist", []) if x not in to_remove]
456
  elif step == "EDIT_STAT_BLTITLE":
457
  task["blacklist_title"] = "" if text.strip() == "无" else text.strip()
458
+
459
  task["last_html_stats"] = ""
460
  task["last_run"] = 0
461
+
462
  save_data()
463
  bot.send_message(uid, "✅ 属性已修改!下次刷新周期将立即更新。")
464
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
465
  user_states.pop(uid)
466
+
467
  elif step == "WAIT_BACKUP_SRC":
468
  user_states[uid] = {"step": "WAIT_BACKUP_TGT", "src": text}
469
  send_channel_prompt(uid, "📌 请输入【目标频道 ID】:")
 
489
  Thread(target=generate_smart_directory, args=(uid, text)).start()
490
  user_states.pop(uid)
491
 
492
+ # ===== 🌟 新版目录建流程 (多目标) =====
493
  elif step == "WAIT_DIR_NAME":
494
+ user_states[uid].update({"step": "WAIT_DIR_SCAN", "task_name": text})
495
+ send_channel_prompt(uid, "2️⃣ 请输入**扫描频道 ID** (将从此频道扫描标签):")
496
+ elif step == "WAIT_DIR_SCAN":
497
+ user_states[uid].update({"step": "WAIT_DIR_TGT_CH", "scan_id": text, "targets": []})
498
+ send_channel_prompt(uid, "3️⃣ 请输入第 1 个**目标频道 ID** (目录将更新到此频道的消息):")
499
  elif step == "WAIT_DIR_TGT_CH":
500
  user_states[uid].update({"step": "WAIT_DIR_TGT_MSG", "current_tgt_ch": text})
501
+ bot.send_message(uid, "📝 请输入频道中**承载目录的消息 ID**:")
502
  elif step == "WAIT_DIR_TGT_MSG":
503
  if text.startswith('http'): text = text.split('/')[-1]
504
  state["targets"].append({"channel_id": state["current_tgt_ch"], "msg_id": text})
505
+ user_states[uid]["step"] = "WAIT_DIR_MORE"
506
+ markup = types.InlineKeyboardMarkup(row_width=2)
507
+ markup.add(
508
+ types.InlineKeyboardButton(" 继续添加目标", callback_data="dir_more_yes"),
509
+ types.InlineKeyboardButton("⏭ 下一步", callback_data="dir_more_no")
510
+ )
511
+ bot.send_message(uid,
512
+ f"✅ 已添加目标 {len(state['targets'])}:`{state['current_tgt_ch']}` | 消息 `{text}`\n\n还要添加更多目标频道吗?",
513
+ reply_markup=markup, parse_mode="Markdown")
514
+ elif step == "WAIT_DIR_MORE":
515
+ bot.send_message(uid, "💡 请点击上方按钮选择 **继续添加** 或 **下一步**", parse_mode="Markdown")
516
  elif step == "WAIT_DIR_BLACKLIST":
517
  blacklist = [] if text.strip() == "无" else text.split()
518
  if "dir_tasks" not in DATA["users"][uid]: DATA["users"][uid]["dir_tasks"] = []
519
  DATA["users"][uid]["dir_tasks"].append({
520
  "task_name": state["task_name"],
521
+ "scan_id": state["scan_id"],
522
  "targets": state["targets"],
523
  "blacklist": blacklist, "interval": 15,
524
+ "tags_cache": [], "tags_map": {}, "scanned_msgs": {},
525
+ "last_html_per_target": {}
526
  })
527
  save_data()
528
+ targets_summary = "\n".join([f" {ti+1}. `{t['channel_id']}` | 消息 `{t['msg_id']}`" for ti, t in enumerate(state["targets"])])
529
+ bot.send_message(uid, f"✅ 目录任务建立完成!\n🔍 扫描: `{state['scan_id']}`\n📌 目标:\n{targets_summary}", parse_mode="Markdown")
530
  user_states.pop(uid)
531
 
532
+ # ===== 🌟 目录任务编辑 (含添加目标) =====
533
  elif step.startswith("EDIT_DIR_"):
534
  idx = state["idx"]
535
  try:
536
  task = DATA["users"][uid]["dir_tasks"][idx]
 
537
  if step == "EDIT_DIR_ADD_TGT_CH":
538
+ user_states[uid].update({"step": "EDIT_DIR_ADD_TGT_MSG", "new_tgt_ch": text})
539
+ return bot.send_message(uid, "📝 请输入该目标频道的**承载消息 ID**:")
540
  elif step == "EDIT_DIR_ADD_TGT_MSG":
541
  if text.startswith('http'): text = text.split('/')[-1]
542
+ task.setdefault("targets", []).append({"channel_id": state["new_tgt_ch"], "msg_id": text})
543
+ task.setdefault("last_html_per_target", {})
544
+ save_data()
545
+ bot.send_message(uid, f"✅ 已添加目标: `{state['new_tgt_ch']}` | 消息 `{text}`", parse_mode="Markdown")
546
  elif step == "EDIT_DIR_ADDBL":
547
  task["blacklist"].extend(text.split())
548
  task["blacklist"] = list(set(task["blacklist"]))
549
+ save_data()
550
  bot.send_message(uid, "✅ 目录属性已修改!")
551
  elif step == "EDIT_DIR_RMBL":
552
  to_rem = text.split()
553
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
554
+ save_data()
555
  bot.send_message(uid, "✅ 目录属性已修改!")
556
  elif step == "EDIT_DIR_INTV":
557
  task["interval"] = int(text)
558
+ save_data()
559
  bot.send_message(uid, "✅ 目录属性已修改!")
 
560
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
561
  user_states.pop(uid)
562
 
 
605
  def handle_callbacks(call):
606
  uid = str(call.from_user.id)
607
  data = call.data
608
+
609
+ # ===== 🌟 目录多目标:继续添加 / 下一步 =====
610
+ if data == "dir_more_yes":
611
+ if uid in user_states and user_states[uid].get("step") == "WAIT_DIR_MORE":
612
+ user_states[uid]["step"] = "WAIT_DIR_TGT_CH"
613
+ tgt_count = len(user_states[uid].get("targets", [])) + 1
614
+ send_channel_prompt(uid, f"请输入第 {tgt_count} 个**目标频道 ID**:")
615
+ return bot.answer_callback_query(call.id)
616
+ elif data == "dir_more_no":
617
+ if uid in user_states and user_states[uid].get("step") == "WAIT_DIR_MORE":
618
+ user_states[uid]["step"] = "WAIT_DIR_BLACKLIST"
619
+ bot.send_message(uid, "请输入需要**屏蔽的标签** (空格隔开,不屏蔽回复 `无`):")
620
+ return bot.answer_callback_query(call.id)
621
+
622
+ # ===== 🌟 目录:移除目标 (多索引回调) =====
623
+ if data.startswith("ed_rtc_"):
624
+ parts = data.split("_")
625
+ task_idx = int(parts[2])
626
+ target_idx = int(parts[3])
627
+ try:
628
+ task = DATA["users"][uid]["dir_tasks"][task_idx]
629
+ targets = task.get("targets", [])
630
+ if 0 <= target_idx < len(targets):
631
+ removed = targets.pop(target_idx)
632
+ tgt_key = f"{removed['channel_id']}_{removed['msg_id']}"
633
+ task.get("last_html_per_target", {}).pop(tgt_key, None)
634
+ save_data()
635
+ bot.edit_message_text(f"✅ 已移除目标: `{removed['channel_id']}` | 消息 `{removed['msg_id']}`",
636
+ call.message.chat.id, call.message.message_id, parse_mode="Markdown")
637
+ except Exception as e:
638
+ bot.send_message(uid, f"❌ 操作失败: {e}")
639
+ return bot.answer_callback_query(call.id)
640
+
641
  if data == "ab_add":
642
  user_states[uid] = {"step": "WAIT_AB_ID"}
643
  bot.send_message(uid, "📌 请输入要保存的**频道 ID**:")
 
656
  bot.edit_message_text("✅ 频道已移除。", call.message.chat.id, call.message.message_id)
657
  return bot.answer_callback_query(call.id)
658
  elif data.startswith("selch_") or data.startswith("bkp_"): return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  try:
660
  action, idx_str = data.rsplit("_", 1)
661
  idx = int(idx_str)
 
672
  del DATA["users"][uid]["dir_tasks"][idx]
673
  bot.edit_message_text("❌ 目录任务已移除", call.message.chat.id, call.message.message_id)
674
  save_data()
675
+ # ===== 🌟 目录:添加/除目标频道 =====
676
  elif action == "ed_at":
677
  user_states[uid] = {"step": "EDIT_DIR_ADD_TGT_CH", "idx": idx}
678
+ send_channel_prompt(uid, "📌 请输入新的**目标频道 ID**:")
679
+ bot.answer_callback_query(call.id)
680
  elif action == "ed_rt":
681
+ try:
682
+ task = DATA["users"][uid]["dir_tasks"][idx]
683
+ targets = task.get("targets", [])
684
+ if not targets:
685
+ bot.send_message(uid, "该任务暂无目标频道。")
686
+ else:
687
+ markup = types.InlineKeyboardMarkup(row_width=1)
688
+ for ti, tgt in enumerate(targets):
689
+ markup.add(types.InlineKeyboardButton(
690
+ f"❌ {tgt['channel_id']} | 消息 {tgt['msg_id']}",
691
+ callback_data=f"ed_rtc_{idx}_{ti}"
692
+ ))
693
+ bot.send_message(uid, "选择要除的目标频道:", reply_markup=markup)
694
+ except Exception as e:
695
+ bot.send_message(uid, f"❌ 操作失败: {e}")
696
  bot.answer_callback_query(call.id)
697
  elif action in ["e_name", "e_titl", "e_trig", "e_topn", "e_intv", "e_dura", "e_chid", "e_msgid", "e_sabl", "e_srbl", "e_sblt", "ed_ab", "ed_rb", "ed_in"]:
698
  prompt_map = {
 
712
  "ed_in": "📌 请输入新的扫描频率(分钟):"
713
  }
714
  state_map = {
715
+ "e_name": "EDIT_STAT_NAME",
716
+ "e_titl": "EDIT_STAT_TITL",
717
+ "e_trig": "EDIT_STAT_TRIG",
718
+ "e_topn": "EDIT_STAT_TOPN",
719
+ "e_intv": "EDIT_STAT_INTV",
720
+ "e_dura": "EDIT_STAT_DURA",
721
+ "e_chid": "EDIT_STAT_CHID",
722
+ "e_msgid": "EDIT_STAT_MSGID",
723
+ "e_sabl": "EDIT_STAT_ADDBL",
724
+ "e_srbl": "EDIT_STAT_RMBL",
725
  "e_sblt": "EDIT_STAT_BLTITLE",
726
+ "ed_ab": "EDIT_DIR_ADDBL",
727
+ "ed_rb": "EDIT_DIR_RMBL",
728
  "ed_in": "EDIT_DIR_INTV"
729
  }
730
  user_states[uid] = {"step": state_map[action], "idx": idx}
731
+ bot.send_message(uid, prompt_map[action])
 
 
 
 
732
  bot.answer_callback_query(call.id, "请在对话框输入新值")
733
 
734
  def run_smart_backup_v2(latest_id, uid, src, tgt):
 
743
  ref_msg = await TL_CLIENT.get_messages(int(src), ids=latest_id)
744
  target_grouped_id = ref_msg.grouped_id if ref_msg else None
745
  async for msg in TL_CLIENT.iter_messages(int(src)):
746
+ if msg.action is not None:
747
+ continue
748
  if msg.id > latest_id:
749
  if target_grouped_id and msg.grouped_id == target_grouped_id: messages.append(msg)
750
  continue
 
785
  if e.error_code == 429:
786
  time.sleep(e.result_json.get('parameters', {}).get('retry_after', 10))
787
  else:
788
+ failed += len(msg_ids_to_copy)
789
+ failed_ids.extend(msg_ids_to_copy)
790
+ break
791
  except Exception:
792
+ failed += len(msg_ids_to_copy)
793
+ failed_ids.extend(msg_ids_to_copy)
794
+ break
795
  save_data()
796
+
797
  report = f"🏁 **备份完成!**\n源: `{src}` ➡️ 目: `{tgt}`\n✅ 新增 **{success}** 条"
798
  if failed > 0:
799
  report += f"\n❌ 失败 **{failed}** 条"
800
  show_ids = failed_ids[:10]
801
  report += f"\n失败消息 ID: `{show_ids}`"
802
+ if len(failed_ids) > 10:
803
+ report += f"\n... 等共 {len(failed_ids)} 条"
804
  bot.send_message(uid, report)
805
  push_event(uid, "backup_done", f"✅ 备份完成,新增 {success} 条,失败 {failed} 条")
806
 
 
807
  def generate_smart_directory(uid, ch_id):
808
  global TL_LOOP, TL_CLIENT
809
  if not TL_LOOP or not TL_CLIENT: return bot.send_message(uid, "❌ 错误: Userbot 未启动。")
 
816
  else:
817
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
818
  clean_text = html.unescape(re.sub(r'<.*?>', '', re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)))
819
+ for t in re.findall(r'#[A-Za-z0-9_\u4e00-\u9fa5]+', clean_text): tags_set.add(t)
 
820
  except Exception as e: return None, str(e)
821
  if not tags_set: return None, "没有找到有效标签。"
822
  directory_map = {}
823
  for tag in tags_set:
824
+ clean_str = tag[1:]
825
+ if not clean_str: continue
826
+ fc = clean_str[0]
827
+ key = "#"
828
+ if fc.isalpha() and fc.isascii(): key = fc.upper()
829
+ elif fc.isdigit(): key = "0-9"
830
+ elif '\u4e00' <= fc <= '\u9fff':
831
+ try:
832
+ py = lazy_pinyin(fc)
833
+ if py and len(py[0])>0: key = py[0][0].upper()
834
+ except NameError: key = "中文"
835
  if key not in directory_map: directory_map[key] = []
836
  directory_map[key].append(tag)
837
  return directory_map, None
 
975
  trigger_tag = task.get('trigger_tag', '#未设置')
976
  completed_items = []
977
  interval_sec = int(task.get('interval', 60)) * 60
978
+
979
  if current_time > int(task.get('start_time', current_time)) + int(task.get('duration', 7)) * 86400:
980
  del tasks[i]; data_changed = True; continue
981
  if current_time - int(task.get('last_run', 0)) < interval_sec: continue
982
+
983
  try:
984
  original_msg = await TL_CLIENT.get_messages(ch_id, ids=msg_id)
985
  if not original_msg: continue
 
987
  raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
988
  base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
989
  else: base_html = ""
990
+
991
  comments_data_list = []
992
  discussion_chat_id = None
993
  thread_id = None
994
+
995
  async for comment in TL_CLIENT.iter_messages(ch_id, reply_to=msg_id):
996
  if not discussion_chat_id:
997
  discussion_chat_id = comment.chat_id
998
  if comment.reply_to:
999
  thread_id = comment.reply_to.reply_to_top_id or comment.reply_to.reply_to_msg_id
1000
+
1001
  if comment.reactions:
1002
  total_reacts = sum(r.count for r in comment.reactions.results)
1003
  if total_reacts > 0:
 
1005
  short_text = full_raw_text
1006
  if len(short_text) > 15: short_text = short_text[:14] + "…"
1007
  comments_data_list.append((total_reacts, html.escape(short_text), short_text, full_raw_text, comment.id))
1008
+
1009
  stats_blacklist = task.get('stats_blacklist', [])
1010
  blacklist_section = ""
1011
  if stats_blacklist:
1012
  bl_title = task.get('blacklist_title', '🚫本月轮换限制:')
1013
  bl_names = "\n".join([html.escape(n) for n in stats_blacklist])
1014
+ if bl_title:
1015
+ blacklist_section = f"\n{html.escape(bl_title)}\n<blockquote>{bl_names}</blockquote>\n"
1016
+ else:
1017
+ blacklist_section = f"\n<blockquote>{bl_names}</blockquote>\n"
1018
+
1019
  stats_section = ""
1020
  all_comments_for_file = []
1021
+
1022
  if comments_data_list:
1023
  comments_data_list.sort(key=lambda x: x[0], reverse=True)
1024
+
1025
  deduped_comments = []
1026
  for item in comments_data_list:
1027
  total, safe_text, raw_short, full_raw, c_id = item
1028
  base_name = re.split(r'[((]', full_raw)[0].strip()
1029
+
1030
  is_blocked = False
1031
  for blocked in stats_blacklist:
1032
  if blocked and len(blocked) >= 2:
1033
  if blocked in full_raw or blocked in base_name:
1034
+ is_blocked = True
1035
+ break
1036
+ if is_blocked:
1037
+ continue
1038
+
1039
  conflict = False
1040
  for added_item in deduped_comments:
1041
  added_full_raw = added_item[3]
 
1043
  if (len(base_name) >= 2 and base_name in added_full_raw) or (len(added_base) >= 2 and added_base in full_raw):
1044
  conflict = True; break
1045
  if not conflict: deduped_comments.append(item)
1046
+
1047
  comments_data_list = deduped_comments
1048
  completed_items = []
1049
+
1050
  try:
1051
  async for newer_msg in TL_CLIENT.iter_messages(ch_id, limit=100, min_id=msg_id):
1052
  if newer_msg.id == msg_id: continue
1053
  if not newer_msg.raw_text or trigger_tag.lower() not in newer_msg.raw_text.lower(): continue
1054
+
1055
  if newer_msg.entities:
1056
  html_text = tl_html.unparse(newer_msg.raw_text, newer_msg.entities)
1057
  text_no_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1058
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_no_bq))
1059
+ else:
1060
+ clean_text = newer_msg.raw_text
1061
+
1062
  if trigger_tag.lower() not in clean_text.lower(): continue
1063
+ found_tags = re.findall(r'#([A-Za-z0-9_\u4e00-\u9fa5]+)', clean_text)
1064
+
1065
  for item in comments_data_list:
1066
+ raw_short = item[2]
1067
+ full_raw = item[3]
1068
  base_name = re.split(r'[((]', full_raw)[0].strip()
1069
  for tag in found_tags:
1070
  if (len(tag) >= 2 and tag.lower() in full_raw.lower()) or (len(base_name) >= 2 and base_name.lower() in tag.lower()):
1071
  if raw_short not in completed_items: completed_items.append(raw_short)
1072
  break
1073
  except Exception: pass
1074
+
1075
  if task.get('completed_items') != completed_items:
1076
  task['completed_items'] = completed_items; data_changed = True
1077
+
1078
  comments_data_list.sort(key=lambda x: (x[2] in completed_items, x[0]), reverse=True)
1079
  all_comments_for_file = comments_data_list.copy()
1080
+
1081
  comments_data_list = comments_data_list[:top_n]
1082
  completed_count = sum(1 for item in comments_data_list if item[2] in completed_items)
1083
  max_digits = max([len(str(item[0])) for item in comments_data_list] + [1])
1084
+
1085
  inner_lines = []
1086
  for rank, item in enumerate(comments_data_list):
1087
  total, safe_text, raw_short, full_raw, c_id = item
 
1089
  display_text = f"<s>{safe_text}</s>" if raw_short in completed_items else safe_text
1090
  padded_total = str(total).rjust(max_digits, ' ')
1091
  inner_lines.append(f"{medal} <code>{padded_total}</code> 赞 | <i>{display_text}</i>")
1092
+
1093
  beijing_tz = timezone(timedelta(hours=8))
1094
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1095
  inner_lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1096
+
1097
  if all_comments_for_file:
1098
  c_chat_str = str(discussion_chat_id).replace("-100", "") if discussion_chat_id else ""
1099
  list_html = ""
1100
  for rank, item in enumerate(all_comments_for_file, 1):
1101
  total, safe_text, raw_short, full_raw, c_id = item
1102
+ if c_chat_str and thread_id:
1103
+ link = f"tg://privatepost?channel={c_chat_str}&post={c_id}&thread={thread_id}"
1104
+ elif c_chat_str:
1105
+ link = f"tg://privatepost?channel={c_chat_str}&post={c_id}"
1106
+ else:
1107
+ link = "#"
1108
+
1109
  display_name = html.escape(full_raw)
1110
+ if raw_short in completed_items:
1111
+ display_name = f"<s style='opacity: 0.5;'>{display_name}</s>"
1112
+
1113
  list_html += f'<a href="{link}" class="item"><span class="rank">#{rank}</span><span class="name">{display_name}</span><span class="reacts">{total} 赞</span></a>'
1114
+
1115
  html_template = f"""<!DOCTYPE html>
1116
+ <html lang="zh-CN">
1117
+ <head>
1118
+ <meta charset="UTF-8">
1119
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1120
+ <title>完整榜单 - {html.escape(table_title)}</title>
1121
+ <style>
1122
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #1a1a2e; color: #eaeaea; padding: 15px; margin: 0; }}
1123
+ .container {{ max-width: 600px; margin: 0 auto; background: #16213e; border-radius: 12px; padding: 15px; box-shadow: 0 4px 15px rgba(0,0,0,0.3); }}
1124
+ h2 {{ text-align: center; color: #fff; margin-bottom: 20px; font-size: 18px; border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 10px; line-height: 1.5; }}
1125
+ .hint {{ text-align: center; color: #5dade2; font-size: 13px; margin-top: -10px; margin-bottom: 15px; font-weight: 500; }}
1126
+ .item {{ display: flex; justify-content: space-between; align-items: center; padding: 12px 0; border-bottom: 1px solid rgba(255,255,255,0.05); text-decoration: none; color: inherit; transition: background 0.2s; }}
1127
+ .item:last-child {{ border-bottom: none; }}
1128
+ .item:active {{ background: rgba(255,255,255,0.05); border-radius: 8px; }}
1129
+ .rank {{ font-weight: bold; width: 35px; color: #e94560; font-size: 14px; }}
1130
+ .name {{ flex: 1; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; padding: 0 10px; font-size: 14px; }}
1131
+ .reacts {{ font-weight: bold; color: #5dade2; font-size: 14px; }}
1132
+ </style>
1133
+ </head>
1134
+ <body>
1135
+ <div class="container">
1136
+ <h2>📊 {html.escape(table_title)} <br><span style="font-size:12px;color:#8a8a9a;font-weight:normal">全量数据收录 | 更新于 {now_str}</span></h2>
1137
+ <div class="hint">💡 点击名字跳转评论投票</div>
1138
+ {list_html}
1139
+ </div>
1140
+ </body>
1141
+ </html>"""
1142
  cache_key = f"{ch_id}_{msg_id}"
1143
  HTML_CACHE[cache_key] = html_template
1144
+
1145
  space_host = "bangdan.nine7.cc.cd"
1146
  file_msg_link = f"https://{space_host}/list/{cache_key}"
1147
+
1148
+
1149
  stats_section = f"<b>{html.escape(table_title)} ({completed_count}/{top_n}) <a href='{file_msg_link}'>完整名单</a></b>\n<blockquote>{chr(10).join(inner_lines)}</blockquote>"
1150
+
1151
  else:
1152
  beijing_tz = timezone(timedelta(hours=8))
1153
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1154
  stats_section = f"<b>{html.escape(table_title)} (0/{top_n})</b>\n<blockquote>暂无评论数据\n\n⏳ <code>最后更新: {now_str} (北京时间)</code></blockquote>"
1155
+
1156
  new_message_text = f"{base_html}{SEPARATOR_MARK}{blacklist_section}{stats_section}"
1157
  content_hash = f"{blacklist_section}|{stats_section}"
1158
+
1159
  if task.get('last_html_stats') != content_hash:
1160
  try:
1161
  if original_msg.photo or original_msg.video or original_msg.document: bot.edit_message_caption(caption=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1162
  else: bot.edit_message_text(text=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1163
  task['last_html_stats'] = content_hash; data_changed = True
1164
  except Exception: pass
1165
+
1166
  task['last_run'] = current_time; data_changed = True; await asyncio.sleep(2)
1167
+ except Exception as e:
1168
+ pass
1169
  if data_changed: await asyncio.to_thread(save_data)
1170
 
1171
+ # ===== 🌟 重写:多目标目录更新 =====
1172
  async def update_channel_dirs():
1173
  current_time = int(time.time())
1174
  data_changed = False
1175
  for uid, u_data in DATA.get("users", {}).items():
1176
  tasks = u_data.get("dir_tasks", [])
1177
  for task in tasks:
1178
+ scan_id_str = task.get('scan_id', '')
1179
+ if not scan_id_str: continue
1180
+ scan_id = int(scan_id_str)
1181
  targets = task.get('targets', [])
1182
  if not targets: continue
1183
  blacklist = task.get('blacklist', [])
 
1189
  scan_kwargs = {'limit': None if is_first_run else 150}
1190
  new_tags_found = False
1191
 
1192
+ # 需要跳过的消息 ID (目标频道和扫描频道相同)
1193
+ skip_msg_ids = set()
1194
+ for tgt in targets:
1195
+ if int(tgt['channel_id']) == scan_id:
1196
+ try: skip_msg_ids.add(int(tgt['msg_id']))
1197
+ except: pass
1198
+
1199
  try:
1200
+ # 扫描阶段:扫描 scan_id 频道
1201
+ async for msg in TL_CLIENT.iter_messages(scan_id, **scan_kwargs):
1202
+ if msg.id in skip_msg_ids: continue
1203
  msg_id_str = str(msg.id)
1204
  msg_time = msg.edit_date.timestamp() if msg.edit_date else msg.date.timestamp()
1205
  if scanned_msgs.get(msg_id_str) == msg_time: continue
 
1212
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
1213
  text_without_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1214
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_without_bq))
1215
+ found_tags = re.findall(r'#[A-Za-z0-9_\u4e00-\u9fa5]+', clean_text)
1216
  if found_tags:
1217
  tags_map[msg_id_str] = found_tags
1218
  new_tags_found = True
 
1220
  if msg_id_str in tags_map:
1221
  del tags_map[msg_id_str]
1222
  new_tags_found = True
1223
+
1224
  task['last_run'] = current_time
1225
+ if new_tags_found or is_first_run:
1226
+ task['tags_map'] = tags_map
1227
+ task['scanned_msgs'] = scanned_msgs
1228
+ data_changed = True
1229
+ all_tags = set()
1230
+ for t_list in tags_map.values(): all_tags.update(t_list)
1231
+ active_tags = [t for t in all_tags if t not in blacklist]
1232
+ task['tags_cache'] = active_tags
1233
+ directory_map = {}
1234
+ for tag in active_tags:
1235
+ clean_str = tag[1:]
1236
+ if not clean_str: continue
1237
+ fc = clean_str[0]
1238
+ key = "#"
1239
+ if fc.isalpha() and fc.isascii(): key = fc.upper()
1240
+ elif fc.isdigit(): key = "0-9"
1241
+ elif '\u4e00' <= fc <= '\u9fff':
1242
+ try:
1243
+ py = lazy_pinyin(fc)
1244
+ if py and len(py[0])>0: key = py[0][0].upper()
1245
+ except NameError: key = "中文"
1246
+ if key not in directory_map: directory_map[key] = []
1247
+ directory_map[key].append(tag)
1248
+ lines = ["目录:\n<blockquote expandable>"]
1249
+ keys = sorted(directory_map.keys())
1250
+ if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
1251
+ for key in keys:
1252
+ tags_line = " ".join([html.escape(t) for t in sorted(directory_map[key])])
1253
+ lines.append(f"{key}: {tags_line}\n")
1254
+ lines.append("</blockquote>")
1255
+ beijing_tz = timezone(timedelta(hours=8))
1256
+ now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1257
+ lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1258
+ stats_text = f"<blockquote>{chr(10).join(lines)}</blockquote>"
1259
+ task_name = task.get('task_name', '标签目录')
1260
+ safe_title_with_count = f"{html.escape(task_name)} ({len(active_tags)})"
1261
+
1262
+ # 🌟 遍历所有目标频道,逐个更新
1263
+ last_html_per_target = task.setdefault('last_html_per_target', {})
1264
+ for tgt in targets:
1265
+ tgt_ch = int(tgt['channel_id'])
1266
+ tgt_msg = int(tgt['msg_id'])
1267
+ tgt_key = f"{tgt_ch}_{tgt_msg}"
1268
+
1269
+ if last_html_per_target.get(tgt_key) == stats_text:
1270
+ continue
1271
+
 
 
 
 
 
 
 
 
1272
  try:
1273
+ original_msg = await TL_CLIENT.get_messages(tgt_ch, ids=tgt_msg)
1274
+ if not original_msg: continue
1275
+ if original_msg.raw_text:
1276
+ raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
1277
+ base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
1278
+ else: base_html = ""
1279
+ new_message_text = f"{base_html}{SEPARATOR_MARK}<b>{safe_title_with_count}</b>\n{stats_text}"
1280
+ if len(new_message_text) > 4000: new_message_text = new_message_text[:4000] + "\n... </blockquote>\n⚠️ 目录过长已截断"
1281
+
1282
  if original_msg.photo or original_msg.video or original_msg.document:
1283
  bot.edit_message_caption(caption=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1284
  else:
1285
  bot.edit_message_text(text=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1286
+ last_html_per_target[tgt_key] = stats_text
1287
  data_changed = True
1288
  except Exception: pass
1289
+ await asyncio.sleep(2)
1290
+ except Exception: pass
 
1291
  if data_changed: await asyncio.to_thread(save_data)
1292
 
1293
  TL_CLIENT.start()
 
1309
 
1310
  def validate_webapp(req):
1311
  init_data = req.headers.get('X-Init-Data', '')
1312
+ if not init_data:
1313
+ return None
1314
  try:
1315
  parsed = {}
1316
  for part in init_data.split('&'):
 
1318
  k, v = part.split('=', 1)
1319
  parsed[k] = unquote(v)
1320
  check_hash = parsed.pop('hash', None)
1321
+ if not check_hash or not BOT_TOKEN:
1322
+ return None
1323
  data_check_string = "\n".join(f"{k}={parsed[k]}" for k in sorted(parsed.keys()))
1324
  secret_key = hmac.new(b"WebAppData", BOT_TOKEN.encode(), hashlib.sha256).digest()
1325
  computed = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
1326
+ if not hmac.compare_digest(computed, check_hash):
1327
+ print("⛔ HMAC 签名不匹配")
1328
+ return None
1329
  auth_date = int(parsed.get('auth_date', 0))
1330
+ if abs(time.time() - auth_date) > 86400:
1331
+ print("⛔ initData 已过期")
1332
+ return None
1333
  user_obj = json.loads(parsed.get('user', '{}'))
1334
  uid = str(user_obj.get('id', ''))
1335
  if uid and uid not in DATA["users"]:
 
1344
  @wraps(f)
1345
  def wrapper(*args, **kwargs):
1346
  uid = validate_webapp(request)
1347
+ if not uid:
1348
+ return jsonify({"ok": False, "msg": "未授权"}), 401
1349
+ if check_rate_limit(uid):
1350
+ return jsonify({"ok": False, "msg": "操作太频繁,请稍后再试"}), 429
1351
  return f(uid, *args, **kwargs)
1352
  return wrapper
1353
 
1354
  @app.route('/')
1355
+ def home():
1356
+ return "Bot is running"
1357
 
1358
  @app.route('/webapp')
1359
+ def webapp_page():
1360
+ return send_file('webapp.html')
1361
 
1362
  @app.route('/list/<cache_key>')
1363
  def view_list(cache_key):
1364
  html_content = HTML_CACHE.get(cache_key)
1365
+ if not html_content:
1366
+ return "暂无数据或页面已刷新,请等待机器人下次更新", 404
1367
  return html_content
1368
 
1369
  @app.route('/api/data')
 
1437
  field, val = d["field"], d["value"]
1438
  try:
1439
  task = DATA["users"][uid]["stats_tasks"][idx]
1440
+ if field in ("top_n", "interval", "duration"):
1441
+ task[field] = int(val)
1442
  elif field == "msg_id":
1443
  if val.startswith('http'): val = val.split('/')[-1]
1444
  task["msg_id"] = val
 
1459
  elif field == "blacklist_title":
1460
  task["blacklist_title"] = "" if val.strip() == "无" else val.strip()
1461
  task["last_html_stats"] = ""
1462
+ else:
1463
+ task[field] = val
1464
+
1465
  task["last_html_stats"] = ""
1466
  task["last_run"] = 0
1467
+
1468
  save_data()
1469
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1470
  except Exception as e:
 
1479
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1480
  except: return jsonify({"ok": False, "msg": "索引无效"})
1481
 
1482
+ # ===== 🌟 目录 API:多目标格式 =====
1483
  @app.route('/api/dirs', methods=['POST'])
1484
  @need_auth
1485
  def api_add_dir(uid):
1486
  d = request.json
1487
+ targets = d.get('targets', [])
1488
+ for tgt in targets:
1489
+ msg_id = str(tgt.get("msg_id", ""))
1490
+ if msg_id.startswith('http'): msg_id = msg_id.split('/')[-1]
1491
+ tgt["msg_id"] = msg_id
 
1492
  DATA["users"][uid].setdefault("dir_tasks", []).append({
1493
+ "task_name": d["task_name"],
1494
+ "scan_id": d["scan_id"],
1495
+ "targets": targets,
1496
+ "blacklist": d.get("blacklist", []),
1497
+ "interval": 15,
1498
+ "tags_cache": [], "tags_map": {}, "scanned_msgs": {},
1499
+ "last_html_per_target": {}
1500
  })
1501
  save_data()
1502
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1503
 
 
1504
  @app.route('/api/dirs/<int:idx>', methods=['PUT'])
1505
  @need_auth
1506
  def api_edit_dir(uid, idx):
 
1508
  field, val = d["field"], d["value"]
1509
  try:
1510
  task = DATA["users"][uid]["dir_tasks"][idx]
 
1511
  if field == "add_blacklist":
1512
  task["blacklist"].extend(val.split())
1513
  task["blacklist"] = list(set(task["blacklist"]))
 
1517
  elif field == "interval":
1518
  task["interval"] = int(val)
1519
  elif field == "add_target":
1520
+ target_info = json.loads(val)
1521
+ msg_id = str(target_info.get("msg_id", ""))
1522
+ if msg_id.startswith('http'): msg_id = msg_id.split('/')[-1]
1523
+ target_info["msg_id"] = msg_id
1524
+ task.setdefault("targets", []).append(target_info)
1525
+ task.setdefault("last_html_per_target", {})
1526
  elif field == "rm_target":
1527
+ target_idx = int(val)
1528
  targets = task.get("targets", [])
1529
+ if 0 <= target_idx < len(targets):
1530
+ removed = targets.pop(target_idx)
1531
+ tgt_key = f"{removed['channel_id']}_{removed['msg_id']}"
1532
+ task.get("last_html_per_target", {}).pop(tgt_key, None)
1533
  save_data()
1534
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1535
  except Exception as e:
 
1620
  except Exception as e:
1621
  return jsonify({"ok": False, "msg": str(e)})
1622
 
1623
+ # ====== SSE 实时事件流 ======
1624
  @app.route('/api/events')
1625
  def api_events():
1626
  class FakeReq:
1627
  def __init__(self, init_data_str):
1628
  self.headers = {'X-Init-Data': init_data_str}
1629
+
1630
  init_data_str = request.args.get('init_data', '')
1631
  uid = validate_webapp(FakeReq(init_data_str))
1632
+
1633
+ if not uid:
1634
+ return jsonify({"ok": False, "msg": "未授权"}), 401
1635
+
1636
  def stream():
1637
  q = _event_queues[uid]
1638
  yield f"data: {json.dumps({'type': 'connected', 'data': '🟢 实时连接已建立'})}\n\n"
 
1642
  yield f"data: {json.dumps(event)}\n\n"
1643
  except queue.Empty:
1644
  yield f": heartbeat\n\n"
1645
+
1646
+ return Response(
1647
+ stream(),
1648
+ mimetype='text/event-stream',
1649
+ headers={
1650
+ 'Cache-Control': 'no-cache',
1651
+ 'X-Accel-Buffering': 'no',
1652
+ 'Connection': 'keep-alive'
1653
+ }
1654
+ )
1655
 
1656
  @app.route('/api/health')
1657
  def api_health():
1658
+ return jsonify({
1659
+ "status": "ok",
1660
+ "users": len(DATA.get("users", {})),
1661
+ "msg_map": len(DATA.get("msg_map", {})),
1662
+ "telethon": TL_CLIENT is not None
1663
+ })
1664
 
1665
+ # ===== 5. 启动点 =====
1666
  if __name__ == "__main__":
1667
  Thread(target=lambda: app.run(host="0.0.0.0", port=7860), daemon=True).start()
1668
  Thread(target=start_telethon_worker, daemon=True).start()
1669
+
1670
  print("🔄 正在清除旧连接...")
1671
  for attempt in range(5):
1672
  try:
 
1676
  except Exception as e:
1677
  print(f"⏳ 等待旧实例释放... ({attempt+1}/5) {e}")
1678
  time.sleep(3)
1679
+
1680
  print("🤖 Telebot 主消息引擎已启动!")
1681
  print("🌐 Mini App 地址: http://localhost:7860/webapp")
1682
+
1683
  while True:
1684
  try:
1685
+ bot.infinity_polling(
1686
+ timeout=60,
1687
+ long_polling_timeout=60,
1688
+ allowed_updates=["message", "callback_query",
1689
+ "channel_post", "edited_channel_post"]
1690
+ )
1691
  except Exception as e:
1692
  print(f"❌ Polling 异常: {e}")
1693
  print("⏳ 10秒后重连...")
 
1695
  try:
1696
  bot.remove_webhook()
1697
  bot.get_updates(offset=-1, timeout=1)
1698
+ except:
1699
+ pass