ljx77qaq commited on
Commit
7b84ff5
·
verified ·
1 Parent(s): ea90a32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -348
app.py CHANGED
@@ -11,12 +11,11 @@ import queue
11
  from collections import defaultdict
12
  from datetime import datetime, timezone, timedelta
13
  from threading import Thread, Lock, Timer
14
- from urllib.parse import parse_qs, unquote, quote
15
  from functools import wraps
16
 
17
  import urllib3.util.connection as urllib3_cn
18
 
19
- # 🌟 强制 IPv4
20
  def allowed_gai_family():
21
  return socket.AF_INET
22
  urllib3_cn.allowed_gai_family = allowed_gai_family
@@ -36,6 +35,46 @@ try:
36
  except ImportError:
37
  print("⚠️ 未安装 pypinyin")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # ===== 2. WebDAV 存储配置 =====
40
  DAV_URL_BASE = os.environ.get("WEBDAV_URL", "").rstrip("/")
41
  DAV_USER = os.environ.get("WEBDAV_USER") or os.environ.get("WEBDAV_USERNAME")
@@ -47,7 +86,6 @@ REMOTE_FILENAME = "tg_bot_data_v5.json"
47
  DATA = {"users": {}, "msg_map": {}, "backup_log": {}}
48
  data_lock = Lock()
49
 
50
- # 👇 增加内存级网页源码缓存字典
51
  HTML_CACHE = {}
52
 
53
  TL_LOOP = None
@@ -92,7 +130,6 @@ apihelper.API_URL = "https://nine7.linlizhi0210.workers.dev/bot{0}/{1}"
92
  user_states = {}
93
  ALL_TYPES = ['text', 'audio', 'document', 'photo', 'sticker', 'video', 'video_note', 'voice', 'location', 'contact', 'animation', 'dice', 'poll']
94
 
95
- # ====== 频率限制 ======
96
  _rate_limit = defaultdict(list)
97
  RATE_LIMIT_MAX = 30
98
  RATE_LIMIT_WINDOW = 60
@@ -105,18 +142,13 @@ def check_rate_limit(uid):
105
  _rate_limit[uid].append(now)
106
  return False
107
 
108
- # ====== SSE 实时推送队列 ======
109
  _event_queues = defaultdict(lambda: queue.Queue(maxsize=50))
110
 
111
  def push_event(uid, event_type, data):
112
  q = _event_queues.get(uid)
113
  if q:
114
  try:
115
- q.put_nowait({
116
- "type": event_type,
117
- "data": data,
118
- "time": time.strftime("%H:%M:%S")
119
- })
120
  except queue.Full:
121
  pass
122
 
@@ -236,29 +268,32 @@ def cmd_build_dir(message):
236
  user_states[uid] = {"step": "WAIT_MANUAL_DIR_CH"}
237
  send_channel_prompt(uid, "🗂️ **生成频道标签目录**\n\n请输入需要扫描的【频道 ID】(例如 `-10012345678`):")
238
 
 
239
  @bot.message_handler(commands=['add_dir'])
240
  def cmd_add_dir(message):
241
  uid = str(message.from_user.id)
242
  user_states[uid] = {"step": "WAIT_DIR_NAME"}
243
  bot.send_message(uid, "🗂️ **创建自动更新目录任务**\n\n1️⃣ 请给任务起个名字 (如: `主频道自动目录`):", parse_mode="Markdown")
244
 
 
245
  @bot.message_handler(commands=['list_dir'])
246
  def cmd_list_dir(message):
247
  uid = str(message.from_user.id)
248
  dirs = DATA["users"].get(uid, {}).get("dir_tasks", [])
249
  if not dirs: return bot.send_message(uid, "暂无自动目录任务。")
250
  for i, t in enumerate(dirs):
 
 
251
  msg = (f"🗂️ **任务**: `{t.get('task_name', '未命名')}`\n"
252
- f"🔍 **扫描**: `{t.get('scan_channel_id', t.get('channel_id'))}`\n"
253
- f"📌 **写入目标**: `{t.get('channel_id')}` | **承载消息**: `{t.get('msg_id')}`\n"
254
  f"⏱ **频率**: 每 `{t.get('interval', 15)}` 分钟扫描一次\n"
255
  f"🛡️ **屏蔽标签**: `{', '.join(t.get('blacklist', [])) or '无'}`\n"
256
  f"📦 **已收录标签**: `{len(t.get('tags_cache', []))} 个`")
257
  markup = types.InlineKeyboardMarkup(row_width=2)
258
- markup.add(types.InlineKeyboardButton("✏️ 扫描源频道", callback_data=f"ed_scid_{i}"), types.InlineKeyboardButton("✏️ 目标频道", callback_data=f"ed_chid_{i}"))
259
- markup.add(types.InlineKeyboardButton("✏️ 承载消息 ID", callback_data=f"ed_msgid_{i}"))
260
  markup.add(types.InlineKeyboardButton("➕ 加屏蔽", callback_data=f"ed_ab_{i}"), types.InlineKeyboardButton("➖ 删屏蔽", callback_data=f"ed_rb_{i}"))
261
  markup.add(types.InlineKeyboardButton("⏱ 扫描频率", callback_data=f"ed_in_{i}"))
 
262
  markup.add(types.InlineKeyboardButton("🗑️ 终止并删除该目录任务", callback_data=f"d_d_{i}"))
263
  bot.send_message(uid, msg, reply_markup=markup, parse_mode="Markdown")
264
 
@@ -381,12 +416,10 @@ def process_user_text(uid, text):
381
  if not text.isdigit(): return bot.send_message(uid, "❌ 只能输入纯数字!")
382
  user_states[uid].update({"step": "WAIT_STAT_BLACKLIST", "duration": int(text)})
383
  bot.send_message(uid, "9️⃣ 请输入**屏蔽名单** (用空格隔开)\n💡 不需要屏蔽请回复 `无`:")
384
-
385
  elif step == "WAIT_STAT_BLACKLIST":
386
  blacklist = [] if text.strip() == "无" else [x.strip() for x in re.split(r'[\s\n]+', text) if x.strip()]
387
  user_states[uid].update({"step": "WAIT_STAT_BL_TITLE", "stats_blacklist": blacklist})
388
  bot.send_message(uid, "🔟 请输入**屏蔽区的标题** (例如 `🚫本月轮换限制:`)\n💡 不需要请回复 `无`:")
389
-
390
  elif step == "WAIT_STAT_BL_TITLE":
391
  bl_title = "" if text.strip() == "无" else text.strip()
392
  if "stats_tasks" not in DATA["users"][uid]: DATA["users"][uid]["stats_tasks"] = []
@@ -395,8 +428,7 @@ def process_user_text(uid, text):
395
  "table_title": state["table_title"], "top_n": state["top_n"], "trigger_tag": state["trigger_tag"],
396
  "interval": state["interval"], "duration": state["duration"], "start_time": int(time.time()),
397
  "last_run": 0, "completed_items": [], "last_checked_msg_id": int(state["msg_id"]),
398
- "stats_blacklist": state["stats_blacklist"],
399
- "blacklist_title": bl_title
400
  })
401
  save_data()
402
  bot.send_message(uid, "✅ 完美!任务已创建。")
@@ -431,15 +463,13 @@ def process_user_text(uid, text):
431
  task["stats_blacklist"] = [x for x in task.get("stats_blacklist", []) if x not in to_remove]
432
  elif step == "EDIT_STAT_BLTITLE":
433
  task["blacklist_title"] = "" if text.strip() == "无" else text.strip()
434
-
435
  task["last_html_stats"] = ""
436
  task["last_run"] = 0
437
-
438
  save_data()
439
  bot.send_message(uid, "✅ 属性已修改!下次刷新周期将立即更新。")
440
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
441
  user_states.pop(uid)
442
-
443
  elif step == "WAIT_BACKUP_SRC":
444
  user_states[uid] = {"step": "WAIT_BACKUP_TGT", "src": text}
445
  send_channel_prompt(uid, "📌 请输入【目标频道 ID】:")
@@ -464,64 +494,68 @@ def process_user_text(uid, text):
464
  bot.send_message(uid, "🔍 正在扫描频道历史标签,请耐心等待...")
465
  Thread(target=generate_smart_directory, args=(uid, text)).start()
466
  user_states.pop(uid)
467
-
468
- # ====== 🌟 私聊创建自动更新目录的链式会话 🌟 ======
469
  elif step == "WAIT_DIR_NAME":
470
  user_states[uid].update({"step": "WAIT_DIR_SCAN_CH", "task_name": text})
471
- send_channel_prompt(uid, "2️⃣ 请输入【需要扫描提取标签的频道 ID:")
472
  elif step == "WAIT_DIR_SCAN_CH":
473
- user_states[uid].update({"step": "WAIT_DIR_CH", "scan_channel_id": text})
474
- send_channel_prompt(uid, "3️⃣ 请输入【承载并渲染录的目标频道 ID:")
475
- elif step == "WAIT_DIR_CH":
476
- user_states[uid].update({"step": "WAIT_DIR_MSG", "ch_id": text})
477
- bot.send_message(uid, "4️⃣ 请输入目标频道中承载目录的消息 ID:")
478
- elif step == "WAIT_DIR_MSG":
479
  if text.startswith('http'): text = text.split('/')[-1]
480
- user_states[uid].update({"step": "WAIT_DIR_BLACKLIST", "msg_id": text})
481
- bot.send_message(uid, "5️⃣ 请输入需要**屏蔽的标签** (空格隔开,不屏蔽回复 `无`):")
 
 
 
 
 
 
 
 
482
  elif step == "WAIT_DIR_BLACKLIST":
483
  blacklist = [] if text.strip() == "无" else text.split()
484
  if "dir_tasks" not in DATA["users"][uid]: DATA["users"][uid]["dir_tasks"] = []
485
  DATA["users"][uid]["dir_tasks"].append({
486
- "task_name": state["task_name"],
487
- "scan_channel_id": state["scan_channel_id"],
488
- "channel_id": state["ch_id"],
489
- "msg_id": state["msg_id"],
490
- "blacklist": blacklist,
491
- "interval": 15,
492
- "tags_cache": [], "tags_map": {}, "scanned_msgs": {}, "last_html_dir": ""
493
  })
494
  save_data()
495
- bot.send_message(uid, "✅ 目录任务建立完成!")
496
  user_states.pop(uid)
497
 
 
498
  elif step.startswith("EDIT_DIR_"):
499
  idx = state["idx"]
500
  try:
501
  task = DATA["users"][uid]["dir_tasks"][idx]
502
- if step == "EDIT_DIR_SCAN_CH":
503
- task["scan_channel_id"] = text
504
- elif step == "EDIT_DIR_CH":
505
- task["channel_id"] = text
506
- elif step == "EDIT_DIR_MSGID":
507
  if text.startswith('http'): text = text.split('/')[-1]
508
- task["msg_id"] = text
 
509
  elif step == "EDIT_DIR_ADDBL":
510
  task["blacklist"].extend(text.split())
511
  task["blacklist"] = list(set(task["blacklist"]))
 
512
  elif step == "EDIT_DIR_RMBL":
513
  to_rem = text.split()
514
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
 
515
  elif step == "EDIT_DIR_INTV":
516
  task["interval"] = int(text)
517
-
518
- # 重置缓存强制重扫
519
- task["tags_map"] = {}
520
- task["scanned_msgs"] = {}
521
- task["last_html_dir"] = ""
522
-
523
  save_data()
524
- bot.send_message(uid, "✅ 目录属性已修改!")
525
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
526
  user_states.pop(uid)
527
 
@@ -588,6 +622,24 @@ def handle_callbacks(call):
588
  bot.edit_message_text("✅ 频道已移除。", call.message.chat.id, call.message.message_id)
589
  return bot.answer_callback_query(call.id)
590
  elif data.startswith("selch_") or data.startswith("bkp_"): return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  try:
592
  action, idx_str = data.rsplit("_", 1)
593
  idx = int(idx_str)
@@ -604,7 +656,27 @@ def handle_callbacks(call):
604
  del DATA["users"][uid]["dir_tasks"][idx]
605
  bot.edit_message_text("❌ 目录任务已移除", call.message.chat.id, call.message.message_id)
606
  save_data()
607
- elif action in ["e_name", "e_titl", "e_trig", "e_topn", "e_intv", "e_dura", "e_chid", "e_msgid", "e_sabl", "e_srbl", "e_sblt", "ed_ab", "ed_rb", "ed_in", "ed_scid", "ed_chid", "ed_msgid"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608
  prompt_map = {
609
  "e_name": "📌 请输入新的任务名称:",
610
  "e_titl": "📌 请输入新的表头标题:",
@@ -617,34 +689,26 @@ def handle_callbacks(call):
617
  "e_sabl": "🚫 请输入要**屏蔽的名字** (空格隔开):",
618
  "e_srbl": "✅ 请输入要**解除屏蔽的名字** (空格隔开):",
619
  "e_sblt": "📝 请输入**屏蔽区的显示标题**\n(例如 `🚫本月轮换限制:`,回复 `无` 则不显示):",
620
- "ed_scid": "📌 请输入新的【扫描源频道 ID】:",
621
- "ed_chid": "📌 请输入新的【承载消息目标频道 ID】:",
622
- "ed_msgid": "📌 请输入新的【承载目录消息 ID】:",
623
  "ed_ab": "📌 请输入要追加的屏蔽标签(空格隔开):",
624
  "ed_rb": "📌 请输入要移出屏蔽的标签(空格隔开):",
625
  "ed_in": "📌 请输入新的扫描频率(分钟):"
626
  }
627
  state_map = {
628
- "e_name": "EDIT_STAT_NAME",
629
- "e_titl": "EDIT_STAT_TITL",
630
- "e_trig": "EDIT_STAT_TRIG",
631
- "e_topn": "EDIT_STAT_TOPN",
632
- "e_intv": "EDIT_STAT_INTV",
633
- "e_dura": "EDIT_STAT_DURA",
634
- "e_chid": "EDIT_STAT_CHID",
635
- "e_msgid": "EDIT_STAT_MSGID",
636
- "e_sabl": "EDIT_STAT_ADDBL",
637
- "e_srbl": "EDIT_STAT_RMBL",
638
  "e_sblt": "EDIT_STAT_BLTITLE",
639
- "ed_scid": "EDIT_DIR_SCAN_CH",
640
- "ed_chid": "EDIT_DIR_CH",
641
- "ed_msgid": "EDIT_DIR_MSGID",
642
- "ed_ab": "EDIT_DIR_ADDBL",
643
- "ed_rb": "EDIT_DIR_RMBL",
644
  "ed_in": "EDIT_DIR_INTV"
645
  }
646
  user_states[uid] = {"step": state_map[action], "idx": idx}
647
- bot.send_message(uid, prompt_map[action])
 
 
 
 
648
  bot.answer_callback_query(call.id, "请在对话框输入新值")
649
 
650
  def run_smart_backup_v2(latest_id, uid, src, tgt):
@@ -659,8 +723,7 @@ def run_smart_backup_v2(latest_id, uid, src, tgt):
659
  ref_msg = await TL_CLIENT.get_messages(int(src), ids=latest_id)
660
  target_grouped_id = ref_msg.grouped_id if ref_msg else None
661
  async for msg in TL_CLIENT.iter_messages(int(src)):
662
- if msg.action is not None:
663
- continue
664
  if msg.id > latest_id:
665
  if target_grouped_id and msg.grouped_id == target_grouped_id: messages.append(msg)
666
  continue
@@ -701,25 +764,20 @@ def run_smart_backup_v2(latest_id, uid, src, tgt):
701
  if e.error_code == 429:
702
  time.sleep(e.result_json.get('parameters', {}).get('retry_after', 10))
703
  else:
704
- failed += len(msg_ids_to_copy)
705
- failed_ids.extend(msg_ids_to_copy)
706
- break
707
  except Exception:
708
- failed += len(msg_ids_to_copy)
709
- failed_ids.extend(msg_ids_to_copy)
710
- break
711
  save_data()
712
-
713
  report = f"🏁 **备份完成!**\n源: `{src}` ➡️ 目: `{tgt}`\n✅ 新增 **{success}** 条"
714
  if failed > 0:
715
  report += f"\n❌ 失败 **{failed}** 条"
716
  show_ids = failed_ids[:10]
717
  report += f"\n失败消息 ID: `{show_ids}`"
718
- if len(failed_ids) > 10:
719
- report += f"\n... 等共 {len(failed_ids)} 条"
720
  bot.send_message(uid, report)
721
  push_event(uid, "backup_done", f"✅ 备份完成,新增 {success} 条,失败 {failed} 条")
722
 
 
723
  def generate_smart_directory(uid, ch_id):
724
  global TL_LOOP, TL_CLIENT
725
  if not TL_LOOP or not TL_CLIENT: return bot.send_message(uid, "❌ 错误: Userbot 未启动。")
@@ -732,23 +790,14 @@ def generate_smart_directory(uid, ch_id):
732
  else:
733
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
734
  clean_text = html.unescape(re.sub(r'<.*?>', '', re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)))
735
- for t in re.findall(r'#[A-Za-z0-9_\u4e00-\u9fa5\s&+\-/Detail\u200d\u2600-\u27bf\U0001f000-\U0001f9ff]+', clean_text):
736
- tags_set.add(t.strip())
737
  except Exception as e: return None, str(e)
738
  if not tags_set: return None, "没有找到有效标签。"
739
  directory_map = {}
740
  for tag in tags_set:
741
- clean_str = tag[1:]
742
- if not clean_str: continue
743
- fc = clean_str[0]
744
- key = "#"
745
- if fc.isalpha() and fc.isascii(): key = fc.upper()
746
- elif fc.isdigit(): key = "0-9"
747
- elif '\u4e00' <= fc <= '\u9fff':
748
- try:
749
- py = lazy_pinyin(fc)
750
- if py and len(py[0])>0: key = py[0][0].upper()
751
- except NameError: key = "中文"
752
  if key not in directory_map: directory_map[key] = []
753
  directory_map[key].append(tag)
754
  return directory_map, None
@@ -762,19 +811,9 @@ def generate_smart_directory(uid, ch_id):
762
  lines = ["目录:\n<blockquote expandable>"]
763
  keys = sorted(directory_map.keys())
764
  if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
765
-
766
- c_chat_str = str(ch_id).replace("-100", "")
767
-
768
  for key in keys:
769
- linked_tags = []
770
- for t in sorted(directory_map[key]):
771
- safe_search_text = quote(t)
772
- tg_search_link = f"tg://resolve?domain=c/{c_chat_str}&query={safe_search_text}"
773
- linked_tags.append(f'<a href="{tg_search_link}">{html.escape(t)}</a>')
774
-
775
- tags_line = " ".join(linked_tags)
776
- lines.append(f"<b>{key}</b>: {tags_line}\n")
777
-
778
  lines.append("</blockquote>")
779
  final_text = "\n".join(lines)
780
  if len(final_text) > 4000:
@@ -902,11 +941,9 @@ def start_telethon_worker():
902
  trigger_tag = task.get('trigger_tag', '#未设置')
903
  completed_items = []
904
  interval_sec = int(task.get('interval', 60)) * 60
905
-
906
  if current_time > int(task.get('start_time', current_time)) + int(task.get('duration', 7)) * 86400:
907
  del tasks[i]; data_changed = True; continue
908
  if current_time - int(task.get('last_run', 0)) < interval_sec: continue
909
-
910
  try:
911
  original_msg = await TL_CLIENT.get_messages(ch_id, ids=msg_id)
912
  if not original_msg: continue
@@ -914,17 +951,14 @@ def start_telethon_worker():
914
  raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
915
  base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
916
  else: base_html = ""
917
-
918
  comments_data_list = []
919
  discussion_chat_id = None
920
  thread_id = None
921
-
922
  async for comment in TL_CLIENT.iter_messages(ch_id, reply_to=msg_id):
923
  if not discussion_chat_id:
924
  discussion_chat_id = comment.chat_id
925
  if comment.reply_to:
926
  thread_id = comment.reply_to.reply_to_top_id or comment.reply_to.reply_to_msg_id
927
-
928
  if comment.reactions:
929
  total_reacts = sum(r.count for r in comment.reactions.results)
930
  if total_reacts > 0:
@@ -932,38 +966,27 @@ def start_telethon_worker():
932
  short_text = full_raw_text
933
  if len(short_text) > 15: short_text = short_text[:14] + "…"
934
  comments_data_list.append((total_reacts, html.escape(short_text), short_text, full_raw_text, comment.id))
935
-
936
- # ====== 屏蔽名单与区域构建 ======
937
  stats_blacklist = task.get('stats_blacklist', [])
938
  blacklist_section = ""
939
  if stats_blacklist:
940
  bl_title = task.get('blacklist_title', '🚫本月轮换限制:')
941
  bl_names = "\n".join([html.escape(n) for n in stats_blacklist])
942
- if bl_title:
943
- blacklist_section = f"\n{html.escape(bl_title)}\n<blockquote>{bl_names}</blockquote>\n"
944
- else:
945
- blacklist_section = f"\n<blockquote>{bl_names}</blockquote>\n"
946
-
947
  stats_section = ""
948
  all_comments_for_file = []
949
-
950
  if comments_data_list:
951
  comments_data_list.sort(key=lambda x: x[0], reverse=True)
952
-
953
  deduped_comments = []
954
  for item in comments_data_list:
955
  total, safe_text, raw_short, full_raw, c_id = item
956
  base_name = re.split(r'[((]', full_raw)[0].strip()
957
-
958
  is_blocked = False
959
  for blocked in stats_blacklist:
960
  if blocked and len(blocked) >= 2:
961
  if blocked in full_raw or blocked in base_name:
962
- is_blocked = True
963
- break
964
- if is_blocked:
965
- continue
966
-
967
  conflict = False
968
  for added_item in deduped_comments:
969
  added_full_raw = added_item[3]
@@ -971,45 +994,35 @@ def start_telethon_worker():
971
  if (len(base_name) >= 2 and base_name in added_full_raw) or (len(added_base) >= 2 and added_base in full_raw):
972
  conflict = True; break
973
  if not conflict: deduped_comments.append(item)
974
-
975
  comments_data_list = deduped_comments
976
  completed_items = []
977
-
978
  try:
979
  async for newer_msg in TL_CLIENT.iter_messages(ch_id, limit=100, min_id=msg_id):
980
  if newer_msg.id == msg_id: continue
981
  if not newer_msg.raw_text or trigger_tag.lower() not in newer_msg.raw_text.lower(): continue
982
-
983
  if newer_msg.entities:
984
  html_text = tl_html.unparse(newer_msg.raw_text, newer_msg.entities)
985
  text_no_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
986
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_no_bq))
987
- else:
988
- clean_text = newer_msg.raw_text
989
-
990
  if trigger_tag.lower() not in clean_text.lower(): continue
991
- found_tags = re.findall(r'#([A-Za-z0-9_\u4e00-\u9fa5]+)', clean_text)
992
-
993
  for item in comments_data_list:
994
- raw_short = item[2]
995
- full_raw = item[3]
996
  base_name = re.split(r'[((]', full_raw)[0].strip()
997
  for tag in found_tags:
998
  if (len(tag) >= 2 and tag.lower() in full_raw.lower()) or (len(base_name) >= 2 and base_name.lower() in tag.lower()):
999
  if raw_short not in completed_items: completed_items.append(raw_short)
1000
  break
1001
  except Exception: pass
1002
-
1003
  if task.get('completed_items') != completed_items:
1004
  task['completed_items'] = completed_items; data_changed = True
1005
-
1006
  comments_data_list.sort(key=lambda x: (x[2] in completed_items, x[0]), reverse=True)
1007
  all_comments_for_file = comments_data_list.copy()
1008
-
1009
  comments_data_list = comments_data_list[:top_n]
1010
  completed_count = sum(1 for item in comments_data_list if item[2] in completed_items)
1011
  max_digits = max([len(str(item[0])) for item in comments_data_list] + [1])
1012
-
1013
  inner_lines = []
1014
  for rank, item in enumerate(comments_data_list):
1015
  total, safe_text, raw_short, full_raw, c_id = item
@@ -1017,98 +1030,56 @@ def start_telethon_worker():
1017
  display_text = f"<s>{safe_text}</s>" if raw_short in completed_items else safe_text
1018
  padded_total = str(total).rjust(max_digits, ' ')
1019
  inner_lines.append(f"{medal} <code>{padded_total}</code> 赞 | <i>{display_text}</i>")
1020
-
1021
  beijing_tz = timezone(timedelta(hours=8))
1022
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1023
  inner_lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1024
-
1025
- # ====== 🌟 将榜单存储为网页,并把链接拼接到标题旁边 ======
1026
  if all_comments_for_file:
1027
  c_chat_str = str(discussion_chat_id).replace("-100", "") if discussion_chat_id else ""
1028
  list_html = ""
1029
  for rank, item in enumerate(all_comments_for_file, 1):
1030
  total, safe_text, raw_short, full_raw, c_id = item
1031
- if c_chat_str and thread_id:
1032
- link = f"tg://privatepost?channel={c_chat_str}&post={c_id}&thread={thread_id}"
1033
- elif c_chat_str:
1034
- link = f"tg://privatepost?channel={c_chat_str}&post={c_id}"
1035
- else:
1036
- link = "#"
1037
-
1038
  display_name = html.escape(full_raw)
1039
- if raw_short in completed_items:
1040
- display_name = f"<s style='opacity: 0.5;'>{display_name}</s>"
1041
-
1042
  list_html += f'<a href="{link}" class="item"><span class="rank">#{rank}</span><span class="name">{display_name}</span><span class="reacts">{total} 赞</span></a>'
1043
-
1044
  html_template = f"""<!DOCTYPE html>
1045
- <html lang="zh-CN">
1046
- <head>
1047
- <meta charset="UTF-8">
1048
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
1049
- <title>完整榜单 - {html.escape(table_title)}</title>
1050
- <style>
1051
- body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #1a1a2e; color: #eaeaea; padding: 15px; margin: 0; }}
1052
- .container {{ max-width: 600px; margin: 0 auto; background: #16213e; border-radius: 12px; padding: 15px; box-shadow: 0 4px 15px rgba(0,0,0,0.3); }}
1053
- h2 {{ text-align: center; color: #fff; margin-bottom: 20px; font-size: 18px; border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 10px; line-height: 1.5; }}
1054
- .hint {{ text-align: center; color: #5dade2; font-size: 13px; margin-top: -10px; margin-bottom: 15px; font-weight: 500; }}
1055
- .item {{ display: flex; justify-content: space-between; align-items: center; padding: 12px 0; border-bottom: 1px solid rgba(255,255,255,0.05); text-decoration: none; color: inherit; transition: background 0.2s; }}
1056
- .item:last-child {{ border-bottom: none; }}
1057
- .item:active {{ background: rgba(255,255,255,0.05); border-radius: 8px; }}
1058
- .rank {{ font-weight: bold; width: 35px; color: #e94560; font-size: 14px; }}
1059
- .name {{ flex: 1; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; padding: 0 10px; font-size: 14px; }}
1060
- .reacts {{ font-weight: bold; color: #5dade2; font-size: 14px; }}
1061
- </style>
1062
- </head>
1063
- <body>
1064
- <div class="container">
1065
- <h2>📊 {html.escape(table_title)} <br><span style="font-size:12px;color:#8a8a9a;font-weight:normal">全量数据收录 | 更新于 {now_str}</span></h2>
1066
- <div class="hint">💡 点击名字跳转评论投票</div>
1067
- {list_html}
1068
- </div>
1069
- </body>
1070
- </html>"""
1071
  cache_key = f"{ch_id}_{msg_id}"
1072
  HTML_CACHE[cache_key] = html_template
1073
-
1074
  space_host = "bangdan.nine7.cc.cd"
1075
  file_msg_link = f"https://{space_host}/list/{cache_key}"
1076
-
1077
  stats_section = f"<b>{html.escape(table_title)} ({completed_count}/{top_n}) <a href='{file_msg_link}'>完整名单</a></b>\n<blockquote>{chr(10).join(inner_lines)}</blockquote>"
1078
-
1079
  else:
1080
  beijing_tz = timezone(timedelta(hours=8))
1081
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1082
  stats_section = f"<b>{html.escape(table_title)} (0/{top_n})</b>\n<blockquote>暂无评论数据\n\n⏳ <code>最后更新: {now_str} (北京时间)</code></blockquote>"
1083
-
1084
  new_message_text = f"{base_html}{SEPARATOR_MARK}{blacklist_section}{stats_section}"
1085
  content_hash = f"{blacklist_section}|{stats_section}"
1086
-
1087
  if task.get('last_html_stats') != content_hash:
1088
  try:
1089
  if original_msg.photo or original_msg.video or original_msg.document: bot.edit_message_caption(caption=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1090
  else: bot.edit_message_text(text=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1091
  task['last_html_stats'] = content_hash; data_changed = True
1092
  except Exception: pass
1093
-
1094
  task['last_run'] = current_time; data_changed = True; await asyncio.sleep(2)
1095
- except Exception:
1096
- pass
1097
  if data_changed: await asyncio.to_thread(save_data)
1098
 
 
1099
  async def update_channel_dirs():
1100
  current_time = int(time.time())
1101
  data_changed = False
1102
  for uid, u_data in DATA.get("users", {}).items():
1103
  tasks = u_data.get("dir_tasks", [])
1104
  for task in tasks:
1105
- # 🌟 分离【目标写入频道】与【源扫描频道】
1106
- ch_id = int(task['channel_id']) # 目标承载频道
1107
- msg_id = int(task['msg_id'])
1108
-
1109
- # 如果有独立指定的扫描源频道,用它;否则默认用目标频道
1110
- scan_ch_id = int(task.get('scan_channel_id', task['channel_id']))
1111
-
1112
  blacklist = task.get('blacklist', [])
1113
  tags_map = task.get('tags_map', {})
1114
  scanned_msgs = task.get('scanned_msgs', {})
@@ -1117,21 +1088,10 @@ def start_telethon_worker():
1117
  is_first_run = not bool(scanned_msgs)
1118
  scan_kwargs = {'limit': None if is_first_run else 150}
1119
  new_tags_found = False
1120
-
1121
- try:
1122
- # 🌟 获取承载消息时,依旧去目标承载频道(ch_id)找
1123
- original_msg = await TL_CLIENT.get_messages(ch_id, ids=msg_id)
1124
- if not original_msg: continue
1125
- if original_msg.raw_text:
1126
- raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
1127
- base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
1128
- else: base_html = ""
1129
- except Exception: continue
1130
-
1131
  try:
1132
- # 🌟 迭代历史消息提取标签时,去源扫描频道(scan_ch_id)
1133
- async for msg in TL_CLIENT.iter_messages(scan_ch_id, **scan_kwargs):
1134
- if scan_ch_id == ch_id and msg.id == msg_id: continue # 防止自扫自改
1135
  msg_id_str = str(msg.id)
1136
  msg_time = msg.edit_date.timestamp() if msg.edit_date else msg.date.timestamp()
1137
  if scanned_msgs.get(msg_id_str) == msg_time: continue
@@ -1144,8 +1104,7 @@ def start_telethon_worker():
1144
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
1145
  text_without_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1146
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_without_bq))
1147
- found_tags = re.findall(r'#[A-Za-z0-9_\u4e00-\u9fa5\s&+\-/Detail\u200d\u2600-\u27bf\U0001f000-\U0001f9ff]+', clean_text)
1148
- found_tags = [t.strip() for t in found_tags if t.strip() > '#']
1149
  if found_tags:
1150
  tags_map[msg_id_str] = found_tags
1151
  new_tags_found = True
@@ -1153,68 +1112,73 @@ def start_telethon_worker():
1153
  if msg_id_str in tags_map:
1154
  del tags_map[msg_id_str]
1155
  new_tags_found = True
1156
-
1157
  task['last_run'] = current_time
1158
- if new_tags_found or is_first_run:
1159
- task['tags_map'] = tags_map
1160
- task['scanned_msgs'] = scanned_msgs
1161
- data_changed = True
1162
- all_tags = set()
1163
- for t_list in tags_map.values(): all_tags.update(t_list)
1164
- active_tags = [t for t in all_tags if t not in blacklist]
1165
- task['tags_cache'] = active_tags
1166
- directory_map = {}
1167
- for tag in active_tags:
1168
- clean_str = tag[1:]
1169
- if not clean_str: continue
1170
- fc = clean_str[0]
1171
- key = "#"
1172
- if fc.isalpha() and fc.isascii(): key = fc.upper()
1173
- elif fc.isdigit(): key = "0-9"
1174
- elif '\u4e00' <= fc <= '\u9fff':
1175
- try:
1176
- py = lazy_pinyin(fc)
1177
- if py and len(py[0])>0: key = py[0][0].upper()
1178
- except NameError: key = "中文"
1179
- if key not in directory_map: directory_map[key] = []
1180
- directory_map[key].append(tag)
1181
-
1182
- lines = ["目录:\n<blockquote expandable>"]
1183
- keys = sorted(directory_map.keys())
1184
- if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
1185
-
1186
- # 🚀 自动化同步渲染:支持全量数字、Emoji及组合参数检索
1187
- scan_ch_pure_str = str(scan_ch_id).replace("-100", "")
1188
- for key in keys:
1189
- linked_tags = []
1190
- for t in sorted(directory_map[key]):
1191
- safe_search_text = quote(t)
1192
- # 点击直接向【被扫描的源频道】发起专属协议搜索,100%原生点击追踪高亮!
1193
- tg_search_link = f"tg://resolve?domain=c/{scan_ch_pure_str}&query={safe_search_text}"
1194
- linked_tags.append(f'<a href="{tg_search_link}">{html.escape(t)}</a>')
1195
-
1196
- tags_line = " ".join(linked_tags)
1197
- lines.append(f"<b>{key}</b>: {tags_line}\n")
1198
-
1199
- lines.append("</blockquote>")
1200
- beijing_tz = timezone(timedelta(hours=8))
1201
- now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1202
- lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1203
- stats_text = "\n".join(lines)
1204
-
1205
- task_name = task.get('task_name', '标签目录')
1206
- safe_title_with_count = f"{html.escape(task_name)} ({len(active_tags)})"
 
1207
  new_message_text = f"{base_html}{SEPARATOR_MARK}<b>{safe_title_with_count}</b>\n{stats_text}"
1208
- if len(new_message_text) > 4000: new_message_text = new_message_text[:4000] + "\n... </blockquote>\n⚠️ 目录过长已截断"
1209
- if task.get('last_html_dir') != stats_text:
 
 
1210
  try:
1211
- # 🌟 写入时,目标始终是承载频道 (ch_id)
1212
- if original_msg.photo or original_msg.video or original_msg.document: bot.edit_message_caption(caption=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1213
- else: bot.edit_message_text(text=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1214
- task['last_html_dir'] = stats_text
 
1215
  data_changed = True
1216
  except Exception: pass
1217
- except Exception: pass
 
 
1218
  if data_changed: await asyncio.to_thread(save_data)
1219
 
1220
  TL_CLIENT.start()
@@ -1236,8 +1200,7 @@ def add_security_headers(response):
1236
 
1237
  def validate_webapp(req):
1238
  init_data = req.headers.get('X-Init-Data', '')
1239
- if not init_data:
1240
- return None
1241
  try:
1242
  parsed = {}
1243
  for part in init_data.split('&'):
@@ -1245,18 +1208,13 @@ def validate_webapp(req):
1245
  k, v = part.split('=', 1)
1246
  parsed[k] = unquote(v)
1247
  check_hash = parsed.pop('hash', None)
1248
- if not check_hash or not BOT_TOKEN:
1249
- return None
1250
  data_check_string = "\n".join(f"{k}={parsed[k]}" for k in sorted(parsed.keys()))
1251
  secret_key = hmac.new(b"WebAppData", BOT_TOKEN.encode(), hashlib.sha256).digest()
1252
  computed = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
1253
- if not hmac.compare_digest(computed, check_hash):
1254
- print("⛔ HMAC 签名不匹配")
1255
- return None
1256
  auth_date = int(parsed.get('auth_date', 0))
1257
- if abs(time.time() - auth_date) > 86400:
1258
- print("⛔ initData 已过期")
1259
- return None
1260
  user_obj = json.loads(parsed.get('user', '{}'))
1261
  uid = str(user_obj.get('id', ''))
1262
  if uid and uid not in DATA["users"]:
@@ -1271,27 +1229,21 @@ def need_auth(f):
1271
  @wraps(f)
1272
  def wrapper(*args, **kwargs):
1273
  uid = validate_webapp(request)
1274
- if not uid:
1275
- return jsonify({"ok": False, "msg": "未授权"}), 401
1276
- if check_rate_limit(uid):
1277
- return jsonify({"ok": False, "msg": "操作太频繁,请稍后再试"}), 429
1278
  return f(uid, *args, **kwargs)
1279
  return wrapper
1280
 
1281
  @app.route('/')
1282
- def home():
1283
- return "Bot is running"
1284
 
1285
  @app.route('/webapp')
1286
- def webapp_page():
1287
- return send_file('webapp.html')
1288
 
1289
- # 👇 新增路由:专门用于分发生成的完整榜单外部网页
1290
  @app.route('/list/<cache_key>')
1291
  def view_list(cache_key):
1292
  html_content = HTML_CACHE.get(cache_key)
1293
- if not html_content:
1294
- return "暂无数据或页面已刷新,请等待机器人下次更新", 404
1295
  return html_content
1296
 
1297
  @app.route('/api/data')
@@ -1365,8 +1317,7 @@ def api_edit_stat(uid, idx):
1365
  field, val = d["field"], d["value"]
1366
  try:
1367
  task = DATA["users"][uid]["stats_tasks"][idx]
1368
- if field in ("top_n", "interval", "duration"):
1369
- task[field] = int(val)
1370
  elif field == "msg_id":
1371
  if val.startswith('http'): val = val.split('/')[-1]
1372
  task["msg_id"] = val
@@ -1387,12 +1338,9 @@ def api_edit_stat(uid, idx):
1387
  elif field == "blacklist_title":
1388
  task["blacklist_title"] = "" if val.strip() == "无" else val.strip()
1389
  task["last_html_stats"] = ""
1390
- else:
1391
- task[field] = val
1392
-
1393
  task["last_html_stats"] = ""
1394
  task["last_run"] = 0
1395
-
1396
  save_data()
1397
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1398
  except Exception as e:
@@ -1407,25 +1355,27 @@ def api_del_stat(uid, idx):
1407
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1408
  except: return jsonify({"ok": False, "msg": "索引无效"})
1409
 
 
1410
  @app.route('/api/dirs', methods=['POST'])
1411
  @need_auth
1412
  def api_add_dir(uid):
1413
  d = request.json
1414
- msg_id_raw = str(d.get("msg_id", ""))
1415
- if msg_id_raw.startswith('http'): msg_id_raw = msg_id_raw.split('/')[-1]
1416
-
 
 
 
1417
  DATA["users"][uid].setdefault("dir_tasks", []).append({
1418
- "task_name": d["task_name"],
1419
- "scan_channel_id": d["scan_channel_id"], # 🌟 新增存储:源扫描频道
1420
- "channel_id": d["channel_id"], # 目标承载频道
1421
- "msg_id": msg_id_raw,
1422
- "blacklist": d.get("blacklist", []),
1423
- "interval": 15,
1424
- "tags_cache": [], "tags_map": {}, "scanned_msgs": {}, "last_html_dir": ""
1425
  })
1426
  save_data()
1427
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1428
 
 
1429
  @app.route('/api/dirs/<int:idx>', methods=['PUT'])
1430
  @need_auth
1431
  def api_edit_dir(uid, idx):
@@ -1433,6 +1383,7 @@ def api_edit_dir(uid, idx):
1433
  field, val = d["field"], d["value"]
1434
  try:
1435
  task = DATA["users"][uid]["dir_tasks"][idx]
 
1436
  if field == "add_blacklist":
1437
  task["blacklist"].extend(val.split())
1438
  task["blacklist"] = list(set(task["blacklist"]))
@@ -1441,12 +1392,16 @@ def api_edit_dir(uid, idx):
1441
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
1442
  elif field == "interval":
1443
  task["interval"] = int(val)
1444
- elif field in ("scan_channel_id", "channel_id", "msg_id"): # 🌟 允许编辑跨频道属性
1445
- if field == "msg_id" and val.startswith('http'): val = val.split('/')[-1]
1446
- task[field] = val
1447
- task["tags_map"] = {}
1448
- task["scanned_msgs"] = {}
1449
- task["last_html_dir"] = ""
 
 
 
 
1450
  save_data()
1451
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1452
  except Exception as e:
@@ -1537,19 +1492,14 @@ def api_backup(uid):
1537
  except Exception as e:
1538
  return jsonify({"ok": False, "msg": str(e)})
1539
 
1540
- # ====== SSE 实时事件流 ======
1541
  @app.route('/api/events')
1542
  def api_events():
1543
  class FakeReq:
1544
  def __init__(self, init_data_str):
1545
  self.headers = {'X-Init-Data': init_data_str}
1546
-
1547
  init_data_str = request.args.get('init_data', '')
1548
  uid = validate_webapp(FakeReq(init_data_str))
1549
-
1550
- if not uid:
1551
- return jsonify({"ok": False, "msg": "未授权"}), 401
1552
-
1553
  def stream():
1554
  q = _event_queues[uid]
1555
  yield f"data: {json.dumps({'type': 'connected', 'data': '🟢 实时连接已建立'})}\n\n"
@@ -1559,31 +1509,15 @@ def api_events():
1559
  yield f"data: {json.dumps(event)}\n\n"
1560
  except queue.Empty:
1561
  yield f": heartbeat\n\n"
1562
-
1563
- return Response(
1564
- stream(),
1565
- mimetype='text/event-stream',
1566
- headers={
1567
- 'Cache-Control': 'no-cache',
1568
- 'X-Accel-Buffering': 'no',
1569
- 'Connection': 'keep-alive'
1570
- }
1571
- )
1572
 
1573
  @app.route('/api/health')
1574
  def api_health():
1575
- return jsonify({
1576
- "status": "ok",
1577
- "users": len(DATA.get("users", {})),
1578
- "msg_map": len(DATA.get("msg_map", {})),
1579
- "telethon": TL_CLIENT is not None
1580
- })
1581
 
1582
- # ===== 5. 启动点 =====
1583
  if __name__ == "__main__":
1584
  Thread(target=lambda: app.run(host="0.0.0.0", port=7860), daemon=True).start()
1585
  Thread(target=start_telethon_worker, daemon=True).start()
1586
-
1587
  print("🔄 正在清除旧连接...")
1588
  for attempt in range(5):
1589
  try:
@@ -1593,18 +1527,11 @@ if __name__ == "__main__":
1593
  except Exception as e:
1594
  print(f"⏳ 等待旧实例释放... ({attempt+1}/5) {e}")
1595
  time.sleep(3)
1596
-
1597
  print("🤖 Telebot 主消息引擎已启动!")
1598
  print("🌐 Mini App 地址: http://localhost:7860/webapp")
1599
-
1600
  while True:
1601
  try:
1602
- bot.infinity_polling(
1603
- timeout=60,
1604
- long_polling_timeout=60,
1605
- allowed_updates=["message", "callback_query",
1606
- "channel_post", "edited_channel_post"]
1607
- )
1608
  except Exception as e:
1609
  print(f"❌ Polling 异常: {e}")
1610
  print("⏳ 10秒后重连...")
@@ -1612,5 +1539,4 @@ if __name__ == "__main__":
1612
  try:
1613
  bot.remove_webhook()
1614
  bot.get_updates(offset=-1, timeout=1)
1615
- except:
1616
- pass
 
11
  from collections import defaultdict
12
  from datetime import datetime, timezone, timedelta
13
  from threading import Thread, Lock, Timer
14
+ from urllib.parse import parse_qs, unquote
15
  from functools import wraps
16
 
17
  import urllib3.util.connection as urllib3_cn
18
 
 
19
  def allowed_gai_family():
20
  return socket.AF_INET
21
  urllib3_cn.allowed_gai_family = allowed_gai_family
 
35
  except ImportError:
36
  print("⚠️ 未安装 pypinyin")
37
 
38
+ # ===== 🔧 Fix 2: 更宽泛的标签正则 & 辅助函数 =====
39
+ TAG_PATTERN = r'#[^\s#,,。!!??;;::<>()()【】《》\[\]]+'
40
+
41
+ def extract_tags(text):
42
+ """提取标签,支持 emoji、& 等特殊字符"""
43
+ raw = re.findall(TAG_PATTERN, text)
44
+ return [t.rstrip('.,;:!?。,;:!?…))】》') for t in raw if len(t) > 1]
45
+
46
+ def classify_tag_key(tag):
47
+ """把标签按首字符分类到目录分组 key"""
48
+ clean_str = tag[1:]
49
+ if not clean_str:
50
+ return None
51
+ fc = clean_str[0]
52
+ if fc.isdigit():
53
+ return "0-9"
54
+ elif fc.isalpha() and fc.isascii():
55
+ return fc.upper()
56
+ elif '\u4e00' <= fc <= '\u9fff':
57
+ try:
58
+ py = lazy_pinyin(fc)
59
+ if py and len(py[0]) > 0:
60
+ return py[0][0].upper()
61
+ except NameError:
62
+ return "中文"
63
+ return "中文"
64
+ else:
65
+ return "#"
66
+
67
+ # 🔧 Fix 1: 目录任务向后兼容迁移
68
+ def migrate_dir_task(task):
69
+ """把旧格式 dir_task 迁移到新的 scan_channel + targets 结构"""
70
+ if "scan_channel" not in task:
71
+ task["scan_channel"] = task.get("channel_id", "")
72
+ if "targets" not in task:
73
+ task["targets"] = [{"channel_id": task.get("channel_id", ""), "msg_id": str(task.get("msg_id", ""))}]
74
+ if isinstance(task.get("last_html_dir"), str):
75
+ task["last_html_dir"] = {}
76
+ return task
77
+
78
  # ===== 2. WebDAV 存储配置 =====
79
  DAV_URL_BASE = os.environ.get("WEBDAV_URL", "").rstrip("/")
80
  DAV_USER = os.environ.get("WEBDAV_USER") or os.environ.get("WEBDAV_USERNAME")
 
86
  DATA = {"users": {}, "msg_map": {}, "backup_log": {}}
87
  data_lock = Lock()
88
 
 
89
  HTML_CACHE = {}
90
 
91
  TL_LOOP = None
 
130
  user_states = {}
131
  ALL_TYPES = ['text', 'audio', 'document', 'photo', 'sticker', 'video', 'video_note', 'voice', 'location', 'contact', 'animation', 'dice', 'poll']
132
 
 
133
  _rate_limit = defaultdict(list)
134
  RATE_LIMIT_MAX = 30
135
  RATE_LIMIT_WINDOW = 60
 
142
  _rate_limit[uid].append(now)
143
  return False
144
 
 
145
  _event_queues = defaultdict(lambda: queue.Queue(maxsize=50))
146
 
147
  def push_event(uid, event_type, data):
148
  q = _event_queues.get(uid)
149
  if q:
150
  try:
151
+ q.put_nowait({"type": event_type, "data": data, "time": time.strftime("%H:%M:%S")})
 
 
 
 
152
  except queue.Full:
153
  pass
154
 
 
268
  user_states[uid] = {"step": "WAIT_MANUAL_DIR_CH"}
269
  send_channel_prompt(uid, "🗂️ **生成频道标签目录**\n\n请输入需要扫描的【频道 ID】(例如 `-10012345678`):")
270
 
271
+ # 🔧 Fix 1: 目录任务创建 - 开启新流程
272
  @bot.message_handler(commands=['add_dir'])
273
  def cmd_add_dir(message):
274
  uid = str(message.from_user.id)
275
  user_states[uid] = {"step": "WAIT_DIR_NAME"}
276
  bot.send_message(uid, "🗂️ **创建自动更新目录任务**\n\n1️⃣ 请给任务起个名字 (如: `主频道自动目录`):", parse_mode="Markdown")
277
 
278
+ # 🔧 Fix 1: 目录任务列表 - 显示新结构
279
  @bot.message_handler(commands=['list_dir'])
280
  def cmd_list_dir(message):
281
  uid = str(message.from_user.id)
282
  dirs = DATA["users"].get(uid, {}).get("dir_tasks", [])
283
  if not dirs: return bot.send_message(uid, "暂无自动目录任务。")
284
  for i, t in enumerate(dirs):
285
+ migrate_dir_task(t)
286
+ targets_text = "\n".join([f" ▪️ `{tgt['channel_id']}` 消息 `{tgt['msg_id']}`" for tgt in t.get("targets", [])])
287
  msg = (f"🗂️ **任务**: `{t.get('task_name', '未命名')}`\n"
288
+ f"🔍 **扫描频道**: `{t.get('scan_channel')}`\n"
289
+ f"📌 **更新目标**:\n{targets_text}\n"
290
  f"⏱ **频率**: 每 `{t.get('interval', 15)}` 分钟扫描一次\n"
291
  f"🛡️ **屏蔽标签**: `{', '.join(t.get('blacklist', [])) or '无'}`\n"
292
  f"📦 **已收录标签**: `{len(t.get('tags_cache', []))} 个`")
293
  markup = types.InlineKeyboardMarkup(row_width=2)
 
 
294
  markup.add(types.InlineKeyboardButton("➕ 加屏蔽", callback_data=f"ed_ab_{i}"), types.InlineKeyboardButton("➖ 删屏蔽", callback_data=f"ed_rb_{i}"))
295
  markup.add(types.InlineKeyboardButton("⏱ 扫描频率", callback_data=f"ed_in_{i}"))
296
+ markup.add(types.InlineKeyboardButton("➕ 添加目标", callback_data=f"ed_at_{i}"), types.InlineKeyboardButton("➖ 删除目标", callback_data=f"ed_rt_{i}"))
297
  markup.add(types.InlineKeyboardButton("🗑️ 终止并删除该目录任务", callback_data=f"d_d_{i}"))
298
  bot.send_message(uid, msg, reply_markup=markup, parse_mode="Markdown")
299
 
 
416
  if not text.isdigit(): return bot.send_message(uid, "❌ 只能输入纯数字!")
417
  user_states[uid].update({"step": "WAIT_STAT_BLACKLIST", "duration": int(text)})
418
  bot.send_message(uid, "9️⃣ 请输入**屏蔽名单** (用空格隔开)\n💡 不需要屏蔽请回复 `无`:")
 
419
  elif step == "WAIT_STAT_BLACKLIST":
420
  blacklist = [] if text.strip() == "无" else [x.strip() for x in re.split(r'[\s\n]+', text) if x.strip()]
421
  user_states[uid].update({"step": "WAIT_STAT_BL_TITLE", "stats_blacklist": blacklist})
422
  bot.send_message(uid, "🔟 请输入**屏蔽区的标题** (例如 `🚫本月轮换限制:`)\n💡 不需要请回复 `无`:")
 
423
  elif step == "WAIT_STAT_BL_TITLE":
424
  bl_title = "" if text.strip() == "无" else text.strip()
425
  if "stats_tasks" not in DATA["users"][uid]: DATA["users"][uid]["stats_tasks"] = []
 
428
  "table_title": state["table_title"], "top_n": state["top_n"], "trigger_tag": state["trigger_tag"],
429
  "interval": state["interval"], "duration": state["duration"], "start_time": int(time.time()),
430
  "last_run": 0, "completed_items": [], "last_checked_msg_id": int(state["msg_id"]),
431
+ "stats_blacklist": state["stats_blacklist"], "blacklist_title": bl_title
 
432
  })
433
  save_data()
434
  bot.send_message(uid, "✅ 完美!任务已创建。")
 
463
  task["stats_blacklist"] = [x for x in task.get("stats_blacklist", []) if x not in to_remove]
464
  elif step == "EDIT_STAT_BLTITLE":
465
  task["blacklist_title"] = "" if text.strip() == "无" else text.strip()
 
466
  task["last_html_stats"] = ""
467
  task["last_run"] = 0
 
468
  save_data()
469
  bot.send_message(uid, "✅ 属性已修改!下次刷新周期将立即更新。")
470
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
471
  user_states.pop(uid)
472
+
473
  elif step == "WAIT_BACKUP_SRC":
474
  user_states[uid] = {"step": "WAIT_BACKUP_TGT", "src": text}
475
  send_channel_prompt(uid, "📌 请输入【目标频道 ID】:")
 
494
  bot.send_message(uid, "🔍 正在扫描频道历史标签,请耐心等待...")
495
  Thread(target=generate_smart_directory, args=(uid, text)).start()
496
  user_states.pop(uid)
497
+
498
+ # 🔧 Fix 1: 目录任务新建流程 - 分离扫描频道与目标频道
499
  elif step == "WAIT_DIR_NAME":
500
  user_states[uid].update({"step": "WAIT_DIR_SCAN_CH", "task_name": text})
501
+ send_channel_prompt(uid, "2️⃣ 请输入要**扫描标签的频道 ID**:")
502
  elif step == "WAIT_DIR_SCAN_CH":
503
+ user_states[uid].update({"step": "WAIT_DIR_TGT_CH", "scan_channel": text, "targets": []})
504
+ send_channel_prompt(uid, "3️⃣ 请输入第一个**目标频道 ID** (目录将更新到此频道的消息中):")
505
+ elif step == "WAIT_DIR_TGT_CH":
506
+ user_states[uid].update({"step": "WAIT_DIR_TGT_MSG", "current_tgt_ch": text})
507
+ bot.send_message(uid, f"📌 请输入频道 `{text}` **承载目录的消息 ID**:")
508
+ elif step == "WAIT_DIR_TGT_MSG":
509
  if text.startswith('http'): text = text.split('/')[-1]
510
+ state["targets"].append({"channel_id": state["current_tgt_ch"], "msg_id": text})
511
+ user_states[uid]["step"] = "WAIT_DIR_MORE_OR_BL"
512
+ send_channel_prompt(uid, f"✅ 已添加目标 {len(state['targets'])}!\n\n📌 发送频道 ID 继续添加目标,或回复 **完成** 进入下一步:")
513
+ elif step == "WAIT_DIR_MORE_OR_BL":
514
+ if text.strip() in ["完成", "done", "Done", "完"]:
515
+ user_states[uid]["step"] = "WAIT_DIR_BLACKLIST"
516
+ bot.send_message(uid, "4️⃣ 请输入需要**屏蔽的标签** (空格隔开,不屏蔽回复 `无`):")
517
+ else:
518
+ user_states[uid].update({"step": "WAIT_DIR_TGT_MSG", "current_tgt_ch": text})
519
+ bot.send_message(uid, f"📌 请输入频道 `{text}` 中**承载目录的消息 ID**:")
520
  elif step == "WAIT_DIR_BLACKLIST":
521
  blacklist = [] if text.strip() == "无" else text.split()
522
  if "dir_tasks" not in DATA["users"][uid]: DATA["users"][uid]["dir_tasks"] = []
523
  DATA["users"][uid]["dir_tasks"].append({
524
+ "task_name": state["task_name"],
525
+ "scan_channel": state["scan_channel"],
526
+ "targets": state["targets"],
527
+ "blacklist": blacklist, "interval": 15,
528
+ "tags_cache": [], "tags_map": {}, "scanned_msgs": {}, "last_html_dir": {}
 
 
529
  })
530
  save_data()
531
+ bot.send_message(uid, f"✅ 目录任务建立完成!共 {len(state['targets'])} 个更新目标。")
532
  user_states.pop(uid)
533
 
534
+ # 🔧 Fix 1: 编辑目录任务 - 添加目标中间步骤
535
  elif step.startswith("EDIT_DIR_"):
536
  idx = state["idx"]
537
  try:
538
  task = DATA["users"][uid]["dir_tasks"][idx]
539
+ migrate_dir_task(task)
540
+ if step == "EDIT_DIR_ADD_TGT_CH":
541
+ user_states[uid].update({"step": "EDIT_DIR_ADD_TGT_MSG", "add_tgt_ch": text})
542
+ return bot.send_message(uid, "📌 请输入该频道的**消息 ID**:")
543
+ elif step == "EDIT_DIR_ADD_TGT_MSG":
544
  if text.startswith('http'): text = text.split('/')[-1]
545
+ task.setdefault("targets", []).append({"channel_id": state["add_tgt_ch"], "msg_id": text})
546
+ bot.send_message(uid, "✅ 新目标已添加!")
547
  elif step == "EDIT_DIR_ADDBL":
548
  task["blacklist"].extend(text.split())
549
  task["blacklist"] = list(set(task["blacklist"]))
550
+ bot.send_message(uid, "✅ 目录属性已修改!")
551
  elif step == "EDIT_DIR_RMBL":
552
  to_rem = text.split()
553
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
554
+ bot.send_message(uid, "✅ 目录属性已修改!")
555
  elif step == "EDIT_DIR_INTV":
556
  task["interval"] = int(text)
557
+ bot.send_message(uid, "✅ 目录属性已修改!")
 
 
 
 
 
558
  save_data()
 
559
  except Exception as e: bot.send_message(uid, f"❌ 修改失败: {e}")
560
  user_states.pop(uid)
561
 
 
622
  bot.edit_message_text("✅ 频道已移除。", call.message.chat.id, call.message.message_id)
623
  return bot.answer_callback_query(call.id)
624
  elif data.startswith("selch_") or data.startswith("bkp_"): return
625
+
626
+ # 🔧 Fix 1: 处理删除目标 edrt_{task_idx}_{target_idx}
627
+ if data.startswith("edrt_"):
628
+ parts = data.split("_")
629
+ task_idx = int(parts[1])
630
+ tgt_idx = int(parts[2])
631
+ try:
632
+ task = DATA["users"][uid]["dir_tasks"][task_idx]
633
+ migrate_dir_task(task)
634
+ targets = task.get("targets", [])
635
+ if tgt_idx < len(targets):
636
+ removed = targets.pop(tgt_idx)
637
+ save_data()
638
+ bot.edit_message_text(f"✅ 目标已删除: {removed['channel_id']} | 消息 {removed['msg_id']}", call.message.chat.id, call.message.message_id)
639
+ except Exception as e:
640
+ bot.send_message(uid, f"❌ 删除失败: {e}")
641
+ return bot.answer_callback_query(call.id)
642
+
643
  try:
644
  action, idx_str = data.rsplit("_", 1)
645
  idx = int(idx_str)
 
656
  del DATA["users"][uid]["dir_tasks"][idx]
657
  bot.edit_message_text("❌ 目录任务已移除", call.message.chat.id, call.message.message_id)
658
  save_data()
659
+ # 🔧 Fix 1: 添加/删除目标回调
660
+ elif action == "ed_at":
661
+ user_states[uid] = {"step": "EDIT_DIR_ADD_TGT_CH", "idx": idx}
662
+ send_channel_prompt(uid, "📌 请输入新目标的**频道 ID**:")
663
+ bot.answer_callback_query(call.id, "请输入新目标频道")
664
+ elif action == "ed_rt":
665
+ task = DATA["users"][uid]["dir_tasks"][idx]
666
+ migrate_dir_task(task)
667
+ targets = task.get("targets", [])
668
+ if not targets:
669
+ bot.send_message(uid, "暂无目标可删除。")
670
+ else:
671
+ markup_del = types.InlineKeyboardMarkup(row_width=1)
672
+ for ti, tgt in enumerate(targets):
673
+ markup_del.add(types.InlineKeyboardButton(
674
+ f"❌ {tgt['channel_id']} | 消息 {tgt['msg_id']}",
675
+ callback_data=f"edrt_{idx}_{ti}"
676
+ ))
677
+ bot.send_message(uid, "🗑️ 选择要删除的目标:", reply_markup=markup_del)
678
+ bot.answer_callback_query(call.id)
679
+ elif action in ["e_name", "e_titl", "e_trig", "e_topn", "e_intv", "e_dura", "e_chid", "e_msgid", "e_sabl", "e_srbl", "e_sblt", "ed_ab", "ed_rb", "ed_in"]:
680
  prompt_map = {
681
  "e_name": "📌 请输入新的任务名称:",
682
  "e_titl": "📌 请输入新的表头标题:",
 
689
  "e_sabl": "🚫 请输入要**屏蔽的名字** (空格隔开):",
690
  "e_srbl": "✅ 请输入要**解除屏蔽的名字** (空格隔开):",
691
  "e_sblt": "📝 请输入**屏蔽区的显示标题**\n(例如 `🚫本月轮换限制:`,回复 `无` 则不显示):",
 
 
 
692
  "ed_ab": "📌 请输入要追加的屏蔽标签(空格隔开):",
693
  "ed_rb": "📌 请输入要移出屏蔽的标签(空格隔开):",
694
  "ed_in": "📌 请输入新的扫描频率(分钟):"
695
  }
696
  state_map = {
697
+ "e_name": "EDIT_STAT_NAME", "e_titl": "EDIT_STAT_TITL",
698
+ "e_trig": "EDIT_STAT_TRIG", "e_topn": "EDIT_STAT_TOPN",
699
+ "e_intv": "EDIT_STAT_INTV", "e_dura": "EDIT_STAT_DURA",
700
+ "e_chid": "EDIT_STAT_CHID", "e_msgid": "EDIT_STAT_MSGID",
701
+ "e_sabl": "EDIT_STAT_ADDBL", "e_srbl": "EDIT_STAT_RMBL",
 
 
 
 
 
702
  "e_sblt": "EDIT_STAT_BLTITLE",
703
+ "ed_ab": "EDIT_DIR_ADDBL", "ed_rb": "EDIT_DIR_RMBL",
 
 
 
 
704
  "ed_in": "EDIT_DIR_INTV"
705
  }
706
  user_states[uid] = {"step": state_map[action], "idx": idx}
707
+ # 🔧 Fix 3: 修改频道ID时也弹出地址簿快选
708
+ if action in ["e_chid"]:
709
+ send_channel_prompt(uid, prompt_map[action])
710
+ else:
711
+ bot.send_message(uid, prompt_map[action])
712
  bot.answer_callback_query(call.id, "请在对话框输入新值")
713
 
714
  def run_smart_backup_v2(latest_id, uid, src, tgt):
 
723
  ref_msg = await TL_CLIENT.get_messages(int(src), ids=latest_id)
724
  target_grouped_id = ref_msg.grouped_id if ref_msg else None
725
  async for msg in TL_CLIENT.iter_messages(int(src)):
726
+ if msg.action is not None: continue
 
727
  if msg.id > latest_id:
728
  if target_grouped_id and msg.grouped_id == target_grouped_id: messages.append(msg)
729
  continue
 
764
  if e.error_code == 429:
765
  time.sleep(e.result_json.get('parameters', {}).get('retry_after', 10))
766
  else:
767
+ failed += len(msg_ids_to_copy); failed_ids.extend(msg_ids_to_copy); break
 
 
768
  except Exception:
769
+ failed += len(msg_ids_to_copy); failed_ids.extend(msg_ids_to_copy); break
 
 
770
  save_data()
 
771
  report = f"🏁 **备份完成!**\n源: `{src}` ➡️ 目: `{tgt}`\n✅ 新增 **{success}** 条"
772
  if failed > 0:
773
  report += f"\n❌ 失败 **{failed}** 条"
774
  show_ids = failed_ids[:10]
775
  report += f"\n失败消息 ID: `{show_ids}`"
776
+ if len(failed_ids) > 10: report += f"\n... 等共 {len(failed_ids)} 条"
 
777
  bot.send_message(uid, report)
778
  push_event(uid, "backup_done", f"✅ 备份完成,新增 {success} 条,失败 {failed} 条")
779
 
780
+ # 🔧 Fix 2: 使用 extract_tags + classify_tag_key
781
  def generate_smart_directory(uid, ch_id):
782
  global TL_LOOP, TL_CLIENT
783
  if not TL_LOOP or not TL_CLIENT: return bot.send_message(uid, "❌ 错误: Userbot 未启动。")
 
790
  else:
791
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
792
  clean_text = html.unescape(re.sub(r'<.*?>', '', re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)))
793
+ for t in extract_tags(clean_text):
794
+ tags_set.add(t)
795
  except Exception as e: return None, str(e)
796
  if not tags_set: return None, "没有找到有效标签。"
797
  directory_map = {}
798
  for tag in tags_set:
799
+ key = classify_tag_key(tag)
800
+ if key is None: continue
 
 
 
 
 
 
 
 
 
801
  if key not in directory_map: directory_map[key] = []
802
  directory_map[key].append(tag)
803
  return directory_map, None
 
811
  lines = ["目录:\n<blockquote expandable>"]
812
  keys = sorted(directory_map.keys())
813
  if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
 
 
 
814
  for key in keys:
815
+ tags_line = " ".join([html.escape(t) for t in sorted(directory_map[key])])
816
+ lines.append(f"{key}: {tags_line}\n")
 
 
 
 
 
 
 
817
  lines.append("</blockquote>")
818
  final_text = "\n".join(lines)
819
  if len(final_text) > 4000:
 
941
  trigger_tag = task.get('trigger_tag', '#未设置')
942
  completed_items = []
943
  interval_sec = int(task.get('interval', 60)) * 60
 
944
  if current_time > int(task.get('start_time', current_time)) + int(task.get('duration', 7)) * 86400:
945
  del tasks[i]; data_changed = True; continue
946
  if current_time - int(task.get('last_run', 0)) < interval_sec: continue
 
947
  try:
948
  original_msg = await TL_CLIENT.get_messages(ch_id, ids=msg_id)
949
  if not original_msg: continue
 
951
  raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
952
  base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
953
  else: base_html = ""
 
954
  comments_data_list = []
955
  discussion_chat_id = None
956
  thread_id = None
 
957
  async for comment in TL_CLIENT.iter_messages(ch_id, reply_to=msg_id):
958
  if not discussion_chat_id:
959
  discussion_chat_id = comment.chat_id
960
  if comment.reply_to:
961
  thread_id = comment.reply_to.reply_to_top_id or comment.reply_to.reply_to_msg_id
 
962
  if comment.reactions:
963
  total_reacts = sum(r.count for r in comment.reactions.results)
964
  if total_reacts > 0:
 
966
  short_text = full_raw_text
967
  if len(short_text) > 15: short_text = short_text[:14] + "…"
968
  comments_data_list.append((total_reacts, html.escape(short_text), short_text, full_raw_text, comment.id))
 
 
969
  stats_blacklist = task.get('stats_blacklist', [])
970
  blacklist_section = ""
971
  if stats_blacklist:
972
  bl_title = task.get('blacklist_title', '🚫本月轮换限制:')
973
  bl_names = "\n".join([html.escape(n) for n in stats_blacklist])
974
+ if bl_title: blacklist_section = f"\n{html.escape(bl_title)}\n<blockquote>{bl_names}</blockquote>\n"
975
+ else: blacklist_section = f"\n<blockquote>{bl_names}</blockquote>\n"
 
 
 
976
  stats_section = ""
977
  all_comments_for_file = []
 
978
  if comments_data_list:
979
  comments_data_list.sort(key=lambda x: x[0], reverse=True)
 
980
  deduped_comments = []
981
  for item in comments_data_list:
982
  total, safe_text, raw_short, full_raw, c_id = item
983
  base_name = re.split(r'[((]', full_raw)[0].strip()
 
984
  is_blocked = False
985
  for blocked in stats_blacklist:
986
  if blocked and len(blocked) >= 2:
987
  if blocked in full_raw or blocked in base_name:
988
+ is_blocked = True; break
989
+ if is_blocked: continue
 
 
 
990
  conflict = False
991
  for added_item in deduped_comments:
992
  added_full_raw = added_item[3]
 
994
  if (len(base_name) >= 2 and base_name in added_full_raw) or (len(added_base) >= 2 and added_base in full_raw):
995
  conflict = True; break
996
  if not conflict: deduped_comments.append(item)
 
997
  comments_data_list = deduped_comments
998
  completed_items = []
 
999
  try:
1000
  async for newer_msg in TL_CLIENT.iter_messages(ch_id, limit=100, min_id=msg_id):
1001
  if newer_msg.id == msg_id: continue
1002
  if not newer_msg.raw_text or trigger_tag.lower() not in newer_msg.raw_text.lower(): continue
 
1003
  if newer_msg.entities:
1004
  html_text = tl_html.unparse(newer_msg.raw_text, newer_msg.entities)
1005
  text_no_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1006
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_no_bq))
1007
+ else: clean_text = newer_msg.raw_text
 
 
1008
  if trigger_tag.lower() not in clean_text.lower(): continue
1009
+ # 🔧 Fix 2: 用 extract_tags 提取(无 #)
1010
+ found_tags = [t[1:] for t in extract_tags(clean_text)]
1011
  for item in comments_data_list:
1012
+ raw_short = item[2]; full_raw = item[3]
 
1013
  base_name = re.split(r'[((]', full_raw)[0].strip()
1014
  for tag in found_tags:
1015
  if (len(tag) >= 2 and tag.lower() in full_raw.lower()) or (len(base_name) >= 2 and base_name.lower() in tag.lower()):
1016
  if raw_short not in completed_items: completed_items.append(raw_short)
1017
  break
1018
  except Exception: pass
 
1019
  if task.get('completed_items') != completed_items:
1020
  task['completed_items'] = completed_items; data_changed = True
 
1021
  comments_data_list.sort(key=lambda x: (x[2] in completed_items, x[0]), reverse=True)
1022
  all_comments_for_file = comments_data_list.copy()
 
1023
  comments_data_list = comments_data_list[:top_n]
1024
  completed_count = sum(1 for item in comments_data_list if item[2] in completed_items)
1025
  max_digits = max([len(str(item[0])) for item in comments_data_list] + [1])
 
1026
  inner_lines = []
1027
  for rank, item in enumerate(comments_data_list):
1028
  total, safe_text, raw_short, full_raw, c_id = item
 
1030
  display_text = f"<s>{safe_text}</s>" if raw_short in completed_items else safe_text
1031
  padded_total = str(total).rjust(max_digits, ' ')
1032
  inner_lines.append(f"{medal} <code>{padded_total}</code> 赞 | <i>{display_text}</i>")
 
1033
  beijing_tz = timezone(timedelta(hours=8))
1034
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1035
  inner_lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
 
 
1036
  if all_comments_for_file:
1037
  c_chat_str = str(discussion_chat_id).replace("-100", "") if discussion_chat_id else ""
1038
  list_html = ""
1039
  for rank, item in enumerate(all_comments_for_file, 1):
1040
  total, safe_text, raw_short, full_raw, c_id = item
1041
+ if c_chat_str and thread_id: link = f"tg://privatepost?channel={c_chat_str}&post={c_id}&thread={thread_id}"
1042
+ elif c_chat_str: link = f"tg://privatepost?channel={c_chat_str}&post={c_id}"
1043
+ else: link = "#"
 
 
 
 
1044
  display_name = html.escape(full_raw)
1045
+ if raw_short in completed_items: display_name = f"<s style='opacity: 0.5;'>{display_name}</s>"
 
 
1046
  list_html += f'<a href="{link}" class="item"><span class="rank">#{rank}</span><span class="name">{display_name}</span><span class="reacts">{total} 赞</span></a>'
 
1047
  html_template = f"""<!DOCTYPE html>
1048
+ <html lang="zh-CN"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>完整榜单 - {html.escape(table_title)}</title>
1049
+ <style>body{{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:#1a1a2e;color:#eaeaea;padding:15px;margin:0}}.container{{max-width:600px;margin:0 auto;background:#16213e;border-radius:12px;padding:15px;box-shadow:0 4px 15px rgba(0,0,0,0.3)}}h2{{text-align:center;color:#fff;margin-bottom:20px;font-size:18px;border-bottom:1px solid rgba(255,255,255,0.1);padding-bottom:10px;line-height:1.5}}.hint{{text-align:center;color:#5dade2;font-size:13px;margin-top:-10px;margin-bottom:15px;font-weight:500}}.item{{display:flex;justify-content:space-between;align-items:center;padding:12px 0;border-bottom:1px solid rgba(255,255,255,0.05);text-decoration:none;color:inherit;transition:background 0.2s}}.item:last-child{{border-bottom:none}}.item:active{{background:rgba(255,255,255,0.05);border-radius:8px}}.rank{{font-weight:bold;width:35px;color:#e94560;font-size:14px}}.name{{flex:1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;padding:0 10px;font-size:14px}}.reacts{{font-weight:bold;color:#5dade2;font-size:14px}}</style></head>
1050
+ <body><div class="container"><h2>📊 {html.escape(table_title)} <br><span style="font-size:12px;color:#8a8a9a;font-weight:normal">全量数据收录 | 更新于 {now_str}</span></h2><div class="hint">💡 点击名字跳转评论投票</div>{list_html}</div></body></html>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  cache_key = f"{ch_id}_{msg_id}"
1052
  HTML_CACHE[cache_key] = html_template
 
1053
  space_host = "bangdan.nine7.cc.cd"
1054
  file_msg_link = f"https://{space_host}/list/{cache_key}"
 
1055
  stats_section = f"<b>{html.escape(table_title)} ({completed_count}/{top_n}) <a href='{file_msg_link}'>完整名单</a></b>\n<blockquote>{chr(10).join(inner_lines)}</blockquote>"
 
1056
  else:
1057
  beijing_tz = timezone(timedelta(hours=8))
1058
  now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1059
  stats_section = f"<b>{html.escape(table_title)} (0/{top_n})</b>\n<blockquote>暂无评论数据\n\n⏳ <code>最后更新: {now_str} (北京时间)</code></blockquote>"
 
1060
  new_message_text = f"{base_html}{SEPARATOR_MARK}{blacklist_section}{stats_section}"
1061
  content_hash = f"{blacklist_section}|{stats_section}"
 
1062
  if task.get('last_html_stats') != content_hash:
1063
  try:
1064
  if original_msg.photo or original_msg.video or original_msg.document: bot.edit_message_caption(caption=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1065
  else: bot.edit_message_text(text=new_message_text, chat_id=ch_id, message_id=msg_id, parse_mode="HTML")
1066
  task['last_html_stats'] = content_hash; data_changed = True
1067
  except Exception: pass
 
1068
  task['last_run'] = current_time; data_changed = True; await asyncio.sleep(2)
1069
+ except Exception: pass
 
1070
  if data_changed: await asyncio.to_thread(save_data)
1071
 
1072
+ # 🔧 Fix 1 + Fix 2: 目录任务 - 扫描频道与多目标更新
1073
  async def update_channel_dirs():
1074
  current_time = int(time.time())
1075
  data_changed = False
1076
  for uid, u_data in DATA.get("users", {}).items():
1077
  tasks = u_data.get("dir_tasks", [])
1078
  for task in tasks:
1079
+ migrate_dir_task(task)
1080
+ scan_ch = int(task['scan_channel'])
1081
+ targets = task.get('targets', [])
1082
+ if not targets: continue
 
 
 
1083
  blacklist = task.get('blacklist', [])
1084
  tags_map = task.get('tags_map', {})
1085
  scanned_msgs = task.get('scanned_msgs', {})
 
1088
  is_first_run = not bool(scanned_msgs)
1089
  scan_kwargs = {'limit': None if is_first_run else 150}
1090
  new_tags_found = False
1091
+
1092
+ # 扫描 scan_channel (不一定与 target channel 相同)
 
 
 
 
 
 
 
 
 
1093
  try:
1094
+ async for msg in TL_CLIENT.iter_messages(scan_ch, **scan_kwargs):
 
 
1095
  msg_id_str = str(msg.id)
1096
  msg_time = msg.edit_date.timestamp() if msg.edit_date else msg.date.timestamp()
1097
  if scanned_msgs.get(msg_id_str) == msg_time: continue
 
1104
  html_text = tl_html.unparse(msg.raw_text, msg.entities)
1105
  text_without_bq = re.sub(r'<blockquote.*?>.*?</blockquote>', '', html_text, flags=re.DOTALL)
1106
  clean_text = html.unescape(re.sub(r'<.*?>', '', text_without_bq))
1107
+ found_tags = extract_tags(clean_text)
 
1108
  if found_tags:
1109
  tags_map[msg_id_str] = found_tags
1110
  new_tags_found = True
 
1112
  if msg_id_str in tags_map:
1113
  del tags_map[msg_id_str]
1114
  new_tags_found = True
 
1115
  task['last_run'] = current_time
1116
+ except Exception: continue
1117
+
1118
+ if new_tags_found or is_first_run:
1119
+ task['tags_map'] = tags_map
1120
+ task['scanned_msgs'] = scanned_msgs
1121
+ data_changed = True
1122
+ all_tags = set()
1123
+ for t_list in tags_map.values(): all_tags.update(t_list)
1124
+ active_tags = [t for t in all_tags if t not in blacklist]
1125
+ task['tags_cache'] = active_tags
1126
+
1127
+ # 构建目录文本
1128
+ directory_map = {}
1129
+ for tag in active_tags:
1130
+ key = classify_tag_key(tag)
1131
+ if key is None: continue
1132
+ if key not in directory_map: directory_map[key] = []
1133
+ directory_map[key].append(tag)
1134
+
1135
+ lines = ["目录:\n<blockquote expandable>"]
1136
+ keys = sorted(directory_map.keys())
1137
+ if "0-9" in keys: keys.remove("0-9"); keys.insert(0, "0-9")
1138
+ for key in keys:
1139
+ tags_line = " ".join([html.escape(t) for t in sorted(directory_map[key])])
1140
+ lines.append(f"{key}: {tags_line}\n")
1141
+ lines.append("</blockquote>")
1142
+ beijing_tz = timezone(timedelta(hours=8))
1143
+ now_str = datetime.now(beijing_tz).strftime("%m-%d %H:%M")
1144
+ lines.append(f"\n⏳ <code>最后更新: {now_str} (北京时间)</code>")
1145
+ stats_text = f"<blockquote>{chr(10).join(lines)}</blockquote>"
1146
+ task_name = task.get('task_name', '标签目录')
1147
+ safe_title_with_count = f"{html.escape(task_name)} ({len(active_tags)})"
1148
+
1149
+ # 🔧 Fix 1: 遍历所有 targets 更新
1150
+ last_html_dir = task.get('last_html_dir', {})
1151
+ if isinstance(last_html_dir, str): last_html_dir = {}
1152
+
1153
+ for tgt in targets:
1154
+ tgt_ch = int(tgt['channel_id'])
1155
+ tgt_msg = int(tgt['msg_id'])
1156
+ tgt_key = f"{tgt_ch}_{tgt_msg}"
1157
+ try:
1158
+ original_msg = await TL_CLIENT.get_messages(tgt_ch, ids=tgt_msg)
1159
+ if not original_msg: continue
1160
+ if original_msg.raw_text:
1161
+ raw_html = tl_html.unparse(original_msg.raw_text, original_msg.entities)
1162
+ base_html = raw_html.split("➖➖➖➖➖➖")[0].rstrip() if "➖➖➖➖➖➖" in raw_html else raw_html.rstrip()
1163
+ else: base_html = ""
1164
+ except Exception: continue
1165
+
1166
  new_message_text = f"{base_html}{SEPARATOR_MARK}<b>{safe_title_with_count}</b>\n{stats_text}"
1167
+ if len(new_message_text) > 4000:
1168
+ new_message_text = new_message_text[:4000] + "\n... </blockquote>\n⚠️ 目录过长已截断"
1169
+
1170
+ if last_html_dir.get(tgt_key) != stats_text:
1171
  try:
1172
+ if original_msg.photo or original_msg.video or original_msg.document:
1173
+ bot.edit_message_caption(caption=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1174
+ else:
1175
+ bot.edit_message_text(text=new_message_text, chat_id=tgt_ch, message_id=tgt_msg, parse_mode="HTML")
1176
+ last_html_dir[tgt_key] = stats_text
1177
  data_changed = True
1178
  except Exception: pass
1179
+ await asyncio.sleep(2)
1180
+
1181
+ task['last_html_dir'] = last_html_dir
1182
  if data_changed: await asyncio.to_thread(save_data)
1183
 
1184
  TL_CLIENT.start()
 
1200
 
1201
  def validate_webapp(req):
1202
  init_data = req.headers.get('X-Init-Data', '')
1203
+ if not init_data: return None
 
1204
  try:
1205
  parsed = {}
1206
  for part in init_data.split('&'):
 
1208
  k, v = part.split('=', 1)
1209
  parsed[k] = unquote(v)
1210
  check_hash = parsed.pop('hash', None)
1211
+ if not check_hash or not BOT_TOKEN: return None
 
1212
  data_check_string = "\n".join(f"{k}={parsed[k]}" for k in sorted(parsed.keys()))
1213
  secret_key = hmac.new(b"WebAppData", BOT_TOKEN.encode(), hashlib.sha256).digest()
1214
  computed = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
1215
+ if not hmac.compare_digest(computed, check_hash): return None
 
 
1216
  auth_date = int(parsed.get('auth_date', 0))
1217
+ if abs(time.time() - auth_date) > 86400: return None
 
 
1218
  user_obj = json.loads(parsed.get('user', '{}'))
1219
  uid = str(user_obj.get('id', ''))
1220
  if uid and uid not in DATA["users"]:
 
1229
  @wraps(f)
1230
  def wrapper(*args, **kwargs):
1231
  uid = validate_webapp(request)
1232
+ if not uid: return jsonify({"ok": False, "msg": "未授权"}), 401
1233
+ if check_rate_limit(uid): return jsonify({"ok": False, "msg": "操作太频繁,请稍后再试"}), 429
 
 
1234
  return f(uid, *args, **kwargs)
1235
  return wrapper
1236
 
1237
  @app.route('/')
1238
+ def home(): return "Bot is running"
 
1239
 
1240
  @app.route('/webapp')
1241
+ def webapp_page(): return send_file('webapp.html')
 
1242
 
 
1243
  @app.route('/list/<cache_key>')
1244
  def view_list(cache_key):
1245
  html_content = HTML_CACHE.get(cache_key)
1246
+ if not html_content: return "暂无数据或页面已刷新,请等待机器人下次更新", 404
 
1247
  return html_content
1248
 
1249
  @app.route('/api/data')
 
1317
  field, val = d["field"], d["value"]
1318
  try:
1319
  task = DATA["users"][uid]["stats_tasks"][idx]
1320
+ if field in ("top_n", "interval", "duration"): task[field] = int(val)
 
1321
  elif field == "msg_id":
1322
  if val.startswith('http'): val = val.split('/')[-1]
1323
  task["msg_id"] = val
 
1338
  elif field == "blacklist_title":
1339
  task["blacklist_title"] = "" if val.strip() == "无" else val.strip()
1340
  task["last_html_stats"] = ""
1341
+ else: task[field] = val
 
 
1342
  task["last_html_stats"] = ""
1343
  task["last_run"] = 0
 
1344
  save_data()
1345
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1346
  except Exception as e:
 
1355
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1356
  except: return jsonify({"ok": False, "msg": "索引无效"})
1357
 
1358
+ # 🔧 Fix 1: 目录API - 新结构
1359
  @app.route('/api/dirs', methods=['POST'])
1360
  @need_auth
1361
  def api_add_dir(uid):
1362
  d = request.json
1363
+ scan_channel = d.get("scan_channel", d.get("channel_id", ""))
1364
+ targets = d.get("targets", [])
1365
+ if not targets:
1366
+ msg_id_raw = str(d.get("msg_id", ""))
1367
+ if msg_id_raw.startswith('http'): msg_id_raw = msg_id_raw.split('/')[-1]
1368
+ targets = [{"channel_id": d.get("channel_id", scan_channel), "msg_id": msg_id_raw}]
1369
  DATA["users"][uid].setdefault("dir_tasks", []).append({
1370
+ "task_name": d["task_name"], "scan_channel": scan_channel,
1371
+ "targets": targets, "blacklist": d.get("blacklist", []),
1372
+ "interval": 15, "tags_cache": [], "tags_map": {},
1373
+ "scanned_msgs": {}, "last_html_dir": {}
 
 
 
1374
  })
1375
  save_data()
1376
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1377
 
1378
+ # 🔧 Fix 1: 目录编辑API - 支持 add_target / rm_target
1379
  @app.route('/api/dirs/<int:idx>', methods=['PUT'])
1380
  @need_auth
1381
  def api_edit_dir(uid, idx):
 
1383
  field, val = d["field"], d["value"]
1384
  try:
1385
  task = DATA["users"][uid]["dir_tasks"][idx]
1386
+ migrate_dir_task(task)
1387
  if field == "add_blacklist":
1388
  task["blacklist"].extend(val.split())
1389
  task["blacklist"] = list(set(task["blacklist"]))
 
1392
  task["blacklist"] = [t for t in task["blacklist"] if t not in to_rem]
1393
  elif field == "interval":
1394
  task["interval"] = int(val)
1395
+ elif field == "add_target":
1396
+ parts = val.strip().split()
1397
+ if len(parts) >= 2:
1398
+ msg_id = parts[1]
1399
+ if msg_id.startswith('http'): msg_id = msg_id.split('/')[-1]
1400
+ task.setdefault("targets", []).append({"channel_id": parts[0], "msg_id": msg_id})
1401
+ elif field == "rm_target":
1402
+ tgt_idx = int(val)
1403
+ targets = task.get("targets", [])
1404
+ if 0 <= tgt_idx < len(targets): targets.pop(tgt_idx)
1405
  save_data()
1406
  return jsonify({"ok": True, "user": DATA["users"][uid]})
1407
  except Exception as e:
 
1492
  except Exception as e:
1493
  return jsonify({"ok": False, "msg": str(e)})
1494
 
 
1495
  @app.route('/api/events')
1496
  def api_events():
1497
  class FakeReq:
1498
  def __init__(self, init_data_str):
1499
  self.headers = {'X-Init-Data': init_data_str}
 
1500
  init_data_str = request.args.get('init_data', '')
1501
  uid = validate_webapp(FakeReq(init_data_str))
1502
+ if not uid: return jsonify({"ok": False, "msg": "未授权"}), 401
 
 
 
1503
  def stream():
1504
  q = _event_queues[uid]
1505
  yield f"data: {json.dumps({'type': 'connected', 'data': '🟢 实时连接已建立'})}\n\n"
 
1509
  yield f"data: {json.dumps(event)}\n\n"
1510
  except queue.Empty:
1511
  yield f": heartbeat\n\n"
1512
+ return Response(stream(), mimetype='text/event-stream', headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no', 'Connection': 'keep-alive'})
 
 
 
 
 
 
 
 
 
1513
 
1514
  @app.route('/api/health')
1515
  def api_health():
1516
+ return jsonify({"status": "ok", "users": len(DATA.get("users", {})), "msg_map": len(DATA.get("msg_map", {})), "telethon": TL_CLIENT is not None})
 
 
 
 
 
1517
 
 
1518
  if __name__ == "__main__":
1519
  Thread(target=lambda: app.run(host="0.0.0.0", port=7860), daemon=True).start()
1520
  Thread(target=start_telethon_worker, daemon=True).start()
 
1521
  print("🔄 正在清除旧连接...")
1522
  for attempt in range(5):
1523
  try:
 
1527
  except Exception as e:
1528
  print(f"⏳ 等待旧实例释放... ({attempt+1}/5) {e}")
1529
  time.sleep(3)
 
1530
  print("🤖 Telebot 主消息引擎已启动!")
1531
  print("🌐 Mini App 地址: http://localhost:7860/webapp")
 
1532
  while True:
1533
  try:
1534
+ bot.infinity_polling(timeout=60, long_polling_timeout=60, allowed_updates=["message", "callback_query", "channel_post", "edited_channel_post"])
 
 
 
 
 
1535
  except Exception as e:
1536
  print(f"❌ Polling 异常: {e}")
1537
  print("⏳ 10秒后重连...")
 
1539
  try:
1540
  bot.remove_webhook()
1541
  bot.get_updates(offset=-1, timeout=1)
1542
+ except: pass