agus1111 commited on
Commit
49b0d44
·
verified ·
1 Parent(s): 5c821c2

Update botsignal.py

Browse files
Files changed (1) hide show
  1. botsignal.py +29 -19
botsignal.py CHANGED
@@ -4,7 +4,7 @@ import re
4
  import io
5
  import sqlite3
6
  import hashlib
7
- from collections import deque, defaultdict, Counter
8
  from datetime import datetime, timedelta, timezone
9
  from mimetypes import guess_extension
10
  from typing import List, Tuple, Optional, Dict
@@ -81,9 +81,6 @@ DRY_RUN = os.environ.get("DRY_RUN", "0") == "1"
81
  # Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
82
  BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
83
 
84
- # Opsional: bypass gating support bila keyword dominan adalah $ticker (default ON)
85
- SUPPORT_TICKER_BYPASS = os.environ.get("SUPPORT_TICKER_BYPASS", "1") == "1"
86
-
87
 
88
  # ========= Client bootstrap =========
89
  def build_client() -> TelegramClient:
@@ -236,6 +233,9 @@ def score_relevance(text: str, keywords: List[str]) -> float:
236
 
237
  return exact_score + fuzzy_score
238
 
 
 
 
239
  def hash_for_dedup(text: str, msg) -> str:
240
  """Hash campuran (lama) – menahan duplikat per pesan+media."""
241
  parts = [text or ""]
@@ -254,7 +254,7 @@ def hash_for_dedup(text: str, msg) -> str:
254
 
255
  def content_only_hash(text: str) -> str:
256
  """Hash berbasis isi saja (untuk lintas-grup crosspost)."""
257
- norm = _strip_urls_and_mentions(normalize_for_filter(text))
258
  return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
259
 
260
  def is_image_message(msg) -> bool:
@@ -360,7 +360,7 @@ def filter_invite_sentences(text: str) -> str:
360
 
361
  # ========= Post-on-threshold with EDIT (persisted) =========
362
  TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
363
- last_posted: Dict[str, Dict[str, object]] = {} # keyword -> {"msg_id": int, "tier": str}
364
 
365
  async def _send_initial(msg, text: str) -> int:
366
  if DRY_RUN:
@@ -393,9 +393,6 @@ async def _send_initial(msg, text: str) -> int:
393
  except Exception as e:
394
  debug_log("Gagal kirim media awal, fallback text", str(e))
395
  try:
396
- if DRY_RUN:
397
- print("[DRY_RUN] send_message:", text[:140])
398
- return -1
399
  m = await client.send_message(TARGET_CHAT, text, link_preview=True)
400
  return m.id
401
  except FloodWaitError as e:
@@ -403,7 +400,7 @@ async def _send_initial(msg, text: str) -> int:
403
  return await _send_initial(msg, text)
404
 
405
  async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
406
- prefix = f"[{new_tier.UPPER()}] " if hasattr(new_tier, "UPPER") else f"[{new_tier.upper()}] "
407
  text = prefix + body
408
  prev = last_posted.get(keyword)
409
  if not prev:
@@ -415,10 +412,7 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> Non
415
 
416
  if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
417
  try:
418
- if DRY_RUN:
419
- print(f"[DRY_RUN] edit_message (kw={keyword}) -> {text[:140]}")
420
- else:
421
- await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
422
  prev["tier"] = new_tier
423
  if prev["msg_id"] != -1:
424
  db_save_last_posted(keyword, prev["msg_id"], new_tier)
@@ -556,11 +550,23 @@ def _choose_dominant_keyword(text_norm: str, kws: List[str]) -> Optional[str]:
556
  def _role_of(chat_id: int) -> str:
557
  return chat_roles.get(chat_id, "core")
558
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  async def process_message(msg, source_chat_id: int) -> None:
561
  """
562
  Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
563
- agregasi tier, gating support, filter ajakan, dan POST/EDIT.
564
  """
565
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
566
  text_norm = normalize_for_filter(orig_text).lower()
@@ -605,12 +611,16 @@ async def process_message(msg, source_chat_id: int) -> None:
605
  now = datetime.now(timezone.utc)
606
  class_label, unique_groups = update_and_classify(main_kw, group_key, now)
607
 
608
- # Gating SUPPORT: izinkan $ticker bila SUPPORT_TICKER_BYPASS aktif
609
  if role == "support":
610
- if main_kw.startswith("$") and SUPPORT_TICKER_BYPASS:
 
 
 
 
611
  pass
612
- elif unique_groups < SUPPORT_MIN_UNIQUE:
613
- debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
614
  return
615
 
616
  # Filter kalimat ajakan (whitelist-aware)
 
4
  import io
5
  import sqlite3
6
  import hashlib
7
+ from collections import deque, defaultdict
8
  from datetime import datetime, timedelta, timezone
9
  from mimetypes import guess_extension
10
  from typing import List, Tuple, Optional, Dict
 
81
  # Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
82
  BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
83
 
 
 
 
84
 
85
  # ========= Client bootstrap =========
86
  def build_client() -> TelegramClient:
 
233
 
234
  return exact_score + fuzzy_score
235
 
236
+ def _strip_urls_mentions_only(s: str) -> str:
237
+ return _strip_urls_and_mentions(s)
238
+
239
  def hash_for_dedup(text: str, msg) -> str:
240
  """Hash campuran (lama) – menahan duplikat per pesan+media."""
241
  parts = [text or ""]
 
254
 
255
  def content_only_hash(text: str) -> str:
256
  """Hash berbasis isi saja (untuk lintas-grup crosspost)."""
257
+ norm = _strip_urls_mentions_only(normalize_for_filter(text))
258
  return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
259
 
260
  def is_image_message(msg) -> bool:
 
360
 
361
  # ========= Post-on-threshold with EDIT (persisted) =========
362
  TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
363
+ last_posted: Dict[str, Dict[str, object]] = {}
364
 
365
  async def _send_initial(msg, text: str) -> int:
366
  if DRY_RUN:
 
393
  except Exception as e:
394
  debug_log("Gagal kirim media awal, fallback text", str(e))
395
  try:
 
 
 
396
  m = await client.send_message(TARGET_CHAT, text, link_preview=True)
397
  return m.id
398
  except FloodWaitError as e:
 
400
  return await _send_initial(msg, text)
401
 
402
  async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
403
+ prefix = f"[{new_tier.upper()}] "
404
  text = prefix + body
405
  prev = last_posted.get(keyword)
406
  if not prev:
 
412
 
413
  if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
414
  try:
415
+ await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
 
 
 
416
  prev["tier"] = new_tier
417
  if prev["msg_id"] != -1:
418
  db_save_last_posted(keyword, prev["msg_id"], new_tier)
 
550
  def _role_of(chat_id: int) -> str:
551
  return chat_roles.get(chat_id, "core")
552
 
553
+ def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
554
+ """
555
+ Hitung jumlah grup unik yang menyebut 'keyword' dalam window aktif,
556
+ dipisah CORE vs SUPPORT.
557
+ """
558
+ bucket = keyword_group_last_seen.get(keyword, {})
559
+ core_ids, sup_ids = set(), set()
560
+ for gk in bucket.keys():
561
+ role = chat_roles.get(int(gk), "core")
562
+ (core_ids if role == "core" else sup_ids).add(gk)
563
+ return len(core_ids), len(sup_ids)
564
+
565
 
566
  async def process_message(msg, source_chat_id: int) -> None:
567
  """
568
  Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
569
+ agregasi tier, gating support (CORE-anchored), filter ajakan, dan POST/EDIT.
570
  """
571
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
572
  text_norm = normalize_for_filter(orig_text).lower()
 
611
  now = datetime.now(timezone.utc)
612
  class_label, unique_groups = update_and_classify(main_kw, group_key, now)
613
 
614
+ # Gating SUPPORT (CORE-anchored)
615
  if role == "support":
616
+ core_u, sup_u = _unique_counts_by_role(main_kw)
617
+ # Aturan:
618
+ # - Jika sudah ada minimal 1 sebutan dari CORE untuk keyword ini -> izinkan.
619
+ # - Jika belum ada anchor CORE, SUPPORT harus >= SUPPORT_MIN_UNIQUE.
620
+ if core_u >= 1:
621
  pass
622
+ elif sup_u < SUPPORT_MIN_UNIQUE:
623
+ debug_log(f"Support ditahan (core_u={core_u}, sup_u={sup_u} < {SUPPORT_MIN_UNIQUE})", orig_text)
624
  return
625
 
626
  # Filter kalimat ajakan (whitelist-aware)