Spaces:
Sleeping
Sleeping
Update botsignal.py
Browse files- botsignal.py +29 -19
botsignal.py
CHANGED
|
@@ -4,7 +4,7 @@ import re
|
|
| 4 |
import io
|
| 5 |
import sqlite3
|
| 6 |
import hashlib
|
| 7 |
-
from collections import deque, defaultdict
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from mimetypes import guess_extension
|
| 10 |
from typing import List, Tuple, Optional, Dict
|
|
@@ -81,9 +81,6 @@ DRY_RUN = os.environ.get("DRY_RUN", "0") == "1"
|
|
| 81 |
# Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
|
| 82 |
BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
|
| 83 |
|
| 84 |
-
# Opsional: bypass gating support bila keyword dominan adalah $ticker (default ON)
|
| 85 |
-
SUPPORT_TICKER_BYPASS = os.environ.get("SUPPORT_TICKER_BYPASS", "1") == "1"
|
| 86 |
-
|
| 87 |
|
| 88 |
# ========= Client bootstrap =========
|
| 89 |
def build_client() -> TelegramClient:
|
|
@@ -236,6 +233,9 @@ def score_relevance(text: str, keywords: List[str]) -> float:
|
|
| 236 |
|
| 237 |
return exact_score + fuzzy_score
|
| 238 |
|
|
|
|
|
|
|
|
|
|
| 239 |
def hash_for_dedup(text: str, msg) -> str:
|
| 240 |
"""Hash campuran (lama) – menahan duplikat per pesan+media."""
|
| 241 |
parts = [text or ""]
|
|
@@ -254,7 +254,7 @@ def hash_for_dedup(text: str, msg) -> str:
|
|
| 254 |
|
| 255 |
def content_only_hash(text: str) -> str:
|
| 256 |
"""Hash berbasis isi saja (untuk lintas-grup crosspost)."""
|
| 257 |
-
norm =
|
| 258 |
return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
|
| 259 |
|
| 260 |
def is_image_message(msg) -> bool:
|
|
@@ -360,7 +360,7 @@ def filter_invite_sentences(text: str) -> str:
|
|
| 360 |
|
| 361 |
# ========= Post-on-threshold with EDIT (persisted) =========
|
| 362 |
TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
|
| 363 |
-
last_posted: Dict[str, Dict[str, object]] = {}
|
| 364 |
|
| 365 |
async def _send_initial(msg, text: str) -> int:
|
| 366 |
if DRY_RUN:
|
|
@@ -393,9 +393,6 @@ async def _send_initial(msg, text: str) -> int:
|
|
| 393 |
except Exception as e:
|
| 394 |
debug_log("Gagal kirim media awal, fallback text", str(e))
|
| 395 |
try:
|
| 396 |
-
if DRY_RUN:
|
| 397 |
-
print("[DRY_RUN] send_message:", text[:140])
|
| 398 |
-
return -1
|
| 399 |
m = await client.send_message(TARGET_CHAT, text, link_preview=True)
|
| 400 |
return m.id
|
| 401 |
except FloodWaitError as e:
|
|
@@ -403,7 +400,7 @@ async def _send_initial(msg, text: str) -> int:
|
|
| 403 |
return await _send_initial(msg, text)
|
| 404 |
|
| 405 |
async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
|
| 406 |
-
prefix = f"[{new_tier.
|
| 407 |
text = prefix + body
|
| 408 |
prev = last_posted.get(keyword)
|
| 409 |
if not prev:
|
|
@@ -415,10 +412,7 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> Non
|
|
| 415 |
|
| 416 |
if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
|
| 417 |
try:
|
| 418 |
-
|
| 419 |
-
print(f"[DRY_RUN] edit_message (kw={keyword}) -> {text[:140]}")
|
| 420 |
-
else:
|
| 421 |
-
await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
|
| 422 |
prev["tier"] = new_tier
|
| 423 |
if prev["msg_id"] != -1:
|
| 424 |
db_save_last_posted(keyword, prev["msg_id"], new_tier)
|
|
@@ -556,11 +550,23 @@ def _choose_dominant_keyword(text_norm: str, kws: List[str]) -> Optional[str]:
|
|
| 556 |
def _role_of(chat_id: int) -> str:
|
| 557 |
return chat_roles.get(chat_id, "core")
|
| 558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
async def process_message(msg, source_chat_id: int) -> None:
|
| 561 |
"""
|
| 562 |
Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
|
| 563 |
-
agregasi tier, gating support, filter ajakan, dan POST/EDIT.
|
| 564 |
"""
|
| 565 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 566 |
text_norm = normalize_for_filter(orig_text).lower()
|
|
@@ -605,12 +611,16 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 605 |
now = datetime.now(timezone.utc)
|
| 606 |
class_label, unique_groups = update_and_classify(main_kw, group_key, now)
|
| 607 |
|
| 608 |
-
# Gating SUPPORT
|
| 609 |
if role == "support":
|
| 610 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
pass
|
| 612 |
-
elif
|
| 613 |
-
debug_log(f"Support ditahan (
|
| 614 |
return
|
| 615 |
|
| 616 |
# Filter kalimat ajakan (whitelist-aware)
|
|
|
|
| 4 |
import io
|
| 5 |
import sqlite3
|
| 6 |
import hashlib
|
| 7 |
+
from collections import deque, defaultdict
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from mimetypes import guess_extension
|
| 10 |
from typing import List, Tuple, Optional, Dict
|
|
|
|
| 81 |
# Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
|
| 82 |
BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
|
| 83 |
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# ========= Client bootstrap =========
|
| 86 |
def build_client() -> TelegramClient:
|
|
|
|
| 233 |
|
| 234 |
return exact_score + fuzzy_score
|
| 235 |
|
| 236 |
+
def _strip_urls_mentions_only(s: str) -> str:
|
| 237 |
+
return _strip_urls_and_mentions(s)
|
| 238 |
+
|
| 239 |
def hash_for_dedup(text: str, msg) -> str:
|
| 240 |
"""Hash campuran (lama) – menahan duplikat per pesan+media."""
|
| 241 |
parts = [text or ""]
|
|
|
|
| 254 |
|
| 255 |
def content_only_hash(text: str) -> str:
|
| 256 |
"""Hash berbasis isi saja (untuk lintas-grup crosspost)."""
|
| 257 |
+
norm = _strip_urls_mentions_only(normalize_for_filter(text))
|
| 258 |
return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
|
| 259 |
|
| 260 |
def is_image_message(msg) -> bool:
|
|
|
|
| 360 |
|
| 361 |
# ========= Post-on-threshold with EDIT (persisted) =========
|
| 362 |
TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
|
| 363 |
+
last_posted: Dict[str, Dict[str, object]] = {}
|
| 364 |
|
| 365 |
async def _send_initial(msg, text: str) -> int:
|
| 366 |
if DRY_RUN:
|
|
|
|
| 393 |
except Exception as e:
|
| 394 |
debug_log("Gagal kirim media awal, fallback text", str(e))
|
| 395 |
try:
|
|
|
|
|
|
|
|
|
|
| 396 |
m = await client.send_message(TARGET_CHAT, text, link_preview=True)
|
| 397 |
return m.id
|
| 398 |
except FloodWaitError as e:
|
|
|
|
| 400 |
return await _send_initial(msg, text)
|
| 401 |
|
| 402 |
async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
|
| 403 |
+
prefix = f"[{new_tier.upper()}] "
|
| 404 |
text = prefix + body
|
| 405 |
prev = last_posted.get(keyword)
|
| 406 |
if not prev:
|
|
|
|
| 412 |
|
| 413 |
if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
|
| 414 |
try:
|
| 415 |
+
await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
|
|
|
|
|
|
|
|
|
|
| 416 |
prev["tier"] = new_tier
|
| 417 |
if prev["msg_id"] != -1:
|
| 418 |
db_save_last_posted(keyword, prev["msg_id"], new_tier)
|
|
|
|
| 550 |
def _role_of(chat_id: int) -> str:
|
| 551 |
return chat_roles.get(chat_id, "core")
|
| 552 |
|
| 553 |
+
def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
|
| 554 |
+
"""
|
| 555 |
+
Hitung jumlah grup unik yang menyebut 'keyword' dalam window aktif,
|
| 556 |
+
dipisah CORE vs SUPPORT.
|
| 557 |
+
"""
|
| 558 |
+
bucket = keyword_group_last_seen.get(keyword, {})
|
| 559 |
+
core_ids, sup_ids = set(), set()
|
| 560 |
+
for gk in bucket.keys():
|
| 561 |
+
role = chat_roles.get(int(gk), "core")
|
| 562 |
+
(core_ids if role == "core" else sup_ids).add(gk)
|
| 563 |
+
return len(core_ids), len(sup_ids)
|
| 564 |
+
|
| 565 |
|
| 566 |
async def process_message(msg, source_chat_id: int) -> None:
|
| 567 |
"""
|
| 568 |
Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
|
| 569 |
+
agregasi tier, gating support (CORE-anchored), filter ajakan, dan POST/EDIT.
|
| 570 |
"""
|
| 571 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 572 |
text_norm = normalize_for_filter(orig_text).lower()
|
|
|
|
| 611 |
now = datetime.now(timezone.utc)
|
| 612 |
class_label, unique_groups = update_and_classify(main_kw, group_key, now)
|
| 613 |
|
| 614 |
+
# Gating SUPPORT (CORE-anchored)
|
| 615 |
if role == "support":
|
| 616 |
+
core_u, sup_u = _unique_counts_by_role(main_kw)
|
| 617 |
+
# Aturan:
|
| 618 |
+
# - Jika sudah ada minimal 1 sebutan dari CORE untuk keyword ini -> izinkan.
|
| 619 |
+
# - Jika belum ada anchor CORE, SUPPORT harus >= SUPPORT_MIN_UNIQUE.
|
| 620 |
+
if core_u >= 1:
|
| 621 |
pass
|
| 622 |
+
elif sup_u < SUPPORT_MIN_UNIQUE:
|
| 623 |
+
debug_log(f"Support ditahan (core_u={core_u}, sup_u={sup_u} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 624 |
return
|
| 625 |
|
| 626 |
# Filter kalimat ajakan (whitelist-aware)
|