Spaces:

agus1111
/

bot-signal-telegram

Sleeping

App Files Files Community

agus1111 commited on Aug 28, 2025

Commit

8d523f6

verified ·

1 Parent(s): d46bd90

Update botsignal.py

Browse files

Files changed (1) hide show

botsignal.py +207 -373

botsignal.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import asyncio
 import os
 import re
@@ -14,42 +15,40 @@ from telethon import TelegramClient, events
 from telethon.sessions import StringSession, MemorySession
 from telethon.errors.rpcerrorlist import FloodWaitError
-# ========= Configuration via Environment =========
 API_ID = int(os.environ.get("API_ID", "0"))
 API_HASH = os.environ.get("API_HASH", "")
-STRING_SESSION = os.environ.get("STRING_SESSION", "")
-# --- Definisikan sumber sebagai CORE vs SUPPORT (pakai data milikmu) ---
-CORE_CHATS = [
-    "https://t.me/PEPE_Calls28",
-    "https://t.me/HenryGems",
-    "https://t.me/ChinaPumpCommunity",
-    "https://t.me/SephirothGemCalls1",
-    "https://t.me/GM_Degencalls",
-    "https://t.me/Enthucalls",
-    "https://t.me/kobecalls",
-    "https://t.me/Kulture_Kall",
-]
-SUPPORT_CHATS = [
-    "https://t.me/TheDonALPHAJournal",
-    "https://t.me/savascalls",
-    "https://t.me/Tanjirocall",
-    "https://t.me/ChapoInsider",
-    "https://t.me/millionsgems",
-    "https://t.me/Milagrosdegencalls",
-    "https://t.me/kariusgemscalls",
-    "https://t.me/Dwen_Exchange",
-    "https://t.me/bat_gamble",
-    "https://t.me/BatmanGamble",
-    "https://t.me/hulkgemscalls_real",
-    "https://t.me/MineGems",
-]
 SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
-TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignalll")
-# Kata kunci topik + simbol '$' tetap dipakai
 THEME_KEYWORDS = [
     "call", "signal", "entry", "buy", "sell", "tp", "sl",
     "pump", "spot", "futures", "setup",
@@ -73,6 +72,7 @@ DEDUP_BUFFER_SIZE = int(os.environ.get("DEDUP_BUFFER_SIZE", "800"))
 CLASS_WINDOW_MINUTES = int(os.environ.get("CLASS_WINDOW_MINUTES", "10"))
 SUPPORT_MIN_UNIQUE = int(os.environ.get("SUPPORT_MIN_UNIQUE", "2"))
 # New: DRY RUN (tidak kirim apa pun ke TARGET_CHAT)
@@ -81,30 +81,34 @@ DRY_RUN = os.environ.get("DRY_RUN", "0") == "1"
 # Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
 BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
-# ========= Client bootstrap =========
-def build_client() -> TelegramClient:
-    if STRING_SESSION:
-        print(">> Using StringSession (persistent).")
-        return TelegramClient(StringSession(STRING_SESSION), API_ID, API_HASH)
-    print(">> Using MemorySession (login tiap run).")
-    return TelegramClient(MemorySession(), API_ID, API_HASH)
-client = build_client()
-recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
-recent_content_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)  # NEW: content-only dedup
-# Peta id_chat -> "core" / "support"
-chat_roles: Dict[int, str] = {}  # diisi saat startup setelah resolve entity
 startup_time_utc = datetime.now(timezone.utc)
-# ========= Persistence (SQLite) =========
-DB_PATH = os.environ.get("BOTSIGNAL_DB", "/tmp/botsignal.db")
 def _db():
     conn = sqlite3.connect(DB_PATH)
     conn.execute("PRAGMA journal_mode=WAL;")
     return conn
 def _init_db():
@@ -137,83 +141,29 @@ def db_load_state():
     conn.close()
     return last, kw_map
-def db_save_last_posted(keyword: str, msg_id: int, tier: str):
-    conn = _db()
-    conn.execute("INSERT INTO last_posted(keyword, msg_id, tier) VALUES(?,?,?) "
-                 "ON CONFLICT(keyword) DO UPDATE SET msg_id=excluded.msg_id, tier=excluded.tier",
-                 (keyword, msg_id, tier))
-    conn.commit()
-    conn.close()
-def db_upsert_kw_seen(keyword: str, group_key: str, ts: datetime):
     conn = _db()
-    conn.execute("INSERT INTO kw_group_seen(keyword, group_key, last_ts) VALUES(?,?,?) "
-                 "ON CONFLICT(keyword, group_key) DO UPDATE SET last_ts=excluded.last_ts",
-                 (keyword, group_key, int(ts.timestamp())))
     conn.commit()
     conn.close()
-def db_prune_expired(cutoff: datetime):
     conn = _db()
-    conn.execute("DELETE FROM kw_group_seen WHERE last_ts < ?", (int(cutoff.timestamp()),))
     conn.commit()
     conn.close()
-# ========= Utilities =========
-def debug_log(reason: str, content: str = "") -> None:
-    short = (content or "").replace("\n", " ")[:160]
-    print(f"[DEBUG] {reason}: {short}")
-def normalize_for_filter(text: str) -> str:
-    if not text:
-        return ""
-    s = re.sub(r"(?m)^>.*", "", text)
-    s = re.sub(r"\s+", " ", s).strip()
-    return s
-def _tokenize_words(s: str) -> List[str]:
-    return re.findall(r"[a-z0-9\$\#]{1,64}", s.lower())
-def _windows(tokens: List[str], size: int = 20):
-    for i in range(0, len(tokens), size):
-        yield " ".join(tokens[i:i+size])
-# --- Tambahan: bersihkan URL/CA untuk kepentingan SCORING relevansi ---
-CA_SOL_RE = re.compile(r"\b[1-9A-HJ-NP-Za-km-z]{32,48}\b")  # Solana base58 (perkiraan)
-CA_EVM_RE = re.compile(r"\b0x[a-fA-F0-9]{40}\b")            # EVM address
-CA_LABEL_RE = re.compile(r"\bCA\s*[:=]\s*\S+", re.IGNORECASE)  # "CA: ..." potong tokennya
-def _strip_urls_and_mentions(s: str) -> str:
-    s = re.sub(r"https?://\S+", "", s)
-    s = re.sub(r"t\.me/[A-Za-z0-9_]+", "", s)
-    s = re.sub(r"@[A-Za-z0-9_]+", "", s)
-    return re.sub(r"\s+", " ", s).strip()
-def strip_contracts_for_scoring(s: str) -> str:
-    """
-    Hilangkan URL/mention, alamat kontrak, dan token setelah 'CA:'
-    agar kata 'pump' pada CA/URL (mis. pump.fun) tidak memengaruhi skor.
-    """
-    s0 = _strip_urls_and_mentions(s)
-    s1 = CA_LABEL_RE.sub(" ", s0)
-    s2 = CA_EVM_RE.sub(" ", s1)
-    s3 = CA_SOL_RE.sub(" ", s2)
-    return re.sub(r"\s+", " ", s3).strip()
-def score_relevance(text: str, keywords: List[str]) -> float:
-    """Skor: exact keyword + fuzzy windowed (top-3 rata-rata) agar adil untuk teks panjang."""
-    if not text:
-        return 0.0
-    # Gunakan versi yang TIDAK mengandung URL/CA agar 'pump' di CA tidak ikut dihitung
-    t = strip_contracts_for_scoring(text).lower()
-    # exact hits (unik)
-    exact_hits = 0
-    for kw in set(keywords):
-        if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
-            exact_hits += 1
     exact_score = exact_hits * KEYWORD_WEIGHT
     # fuzzy windowed: ambil top-3 skor di antara jendela 20 token
@@ -245,89 +195,53 @@ def hash_for_dedup(text: str, msg) -> str:
     if doc and getattr(doc, "id", None) is not None:
         parts.append(f"doc:{doc.id}")
     if getattr(msg, "photo", None) is not None:
-        ph = msg.photo
-        ph_id = getattr(ph, "id", None)
-        if ph_id is not None:
-            parts.append(f"photo:{ph_id}")
-    raw = "|".join(parts).encode("utf-8", errors="ignore")
-    return hashlib.sha1(raw).hexdigest()
 def content_only_hash(text: str) -> str:
-    """Hash berbasis isi saja (untuk lintas-grup crosspost)."""
-    norm = _strip_urls_mentions_only(normalize_for_filter(text))
-    return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
 def is_image_message(msg) -> bool:
     if getattr(msg, "photo", None) is not None:
         return True
     doc = getattr(msg, "document", None)
-    if doc and getattr(doc, "mime_type", None):
-        mt = (doc.mime_type or "").lower()
-        if mt.startswith("image/"):
-            if SKIP_STICKERS and ("webp" in mt or "sticker" in mt):
-                return False
-            return True
-        if not ALLOW_GIFS_VIDEOS:
-            return False
-        if mt in ("video/mp4", "image/gif"):
-            return True
-    return False
 def media_too_big(msg) -> bool:
     doc = getattr(msg, "document", None)
-    if doc and getattr(doc, "size", None):
-        return (doc.size or 0) > MAX_MEDIA_MB * 1024 * 1024
-    return False
-# ========= Class aggregator (windowed unique groups) =========
-keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
-def _prune_expired(now: datetime) -> None:
-    window = timedelta(minutes=CLASS_WINDOW_MINUTES)
-    cutoff = now - window
-    # in-memory prune
-    for kw, m in list(keyword_group_last_seen.items()):
-        for gk, ts in list(m.items()):
-            if ts < cutoff:
-                del m[gk]
-        if not m:
-            del keyword_group_last_seen[kw]
-    # db prune
-    db_prune_expired(cutoff)
-def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] = None) -> Tuple[str, int]:
-    if not now:
-        now = datetime.now(timezone.utc)
-    _prune_expired(now)
-    bucket = keyword_group_last_seen[keyword]
-    bucket[group_key] = now
-    db_upsert_kw_seen(keyword, group_key, now)
-    unique_groups = len(bucket)
-    if unique_groups >= 4:
-        return "kuat", unique_groups
-    elif unique_groups >= 2:
-        return "sedang", unique_groups
-    else:
-        return "rendah", unique_groups
-# ========= Sentence-level invite filter (smarter) =========
 INVITE_PATTERNS = [
-    r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
-    r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
-    r"\bpm\b", r"\binbox\b", r"\bcontact\b", r"\bkontak\b", r"\bhubungi\b",
-    r"\bvip\b", r"\bpremium\b", r"\bberbayar\b", r"\bpaid\b", r"\bexclusive\b",
-    r"\bwhitelist\b", r"\bprivate( group| channel)?\b", r"\bmembership?\b",
-    r"\bsubscribe\b", r"\blangganan\b",
-    r"(t\.me\/joinchat|t\.me\/\+|telegram\.me\/|discord\.gg\/|wa\.me\/|whatsapp\.com\/)",
-    r"(bit\.ly|tinyurl\.com|linktr\.ee)",
-    r"From TG channel:",
     r"\bpromo\b", r"\bpromosi\b", r"\biklan\b",
-    r"\badvert\b", r"\badvertise\b", r"\badvertisement\b",
-    r"t\.me\/[A-Za-z0-9_]+"
 ]
 INVITE_REGEXES = [re.compile(p, re.IGNORECASE) for p in INVITE_PATTERNS]
@@ -343,7 +257,7 @@ def _is_invite_sentence(s: str) -> bool:
     t = s.strip()
     if not t:
         return False
-    # Jika kalimat memuat sinyal kuat, jangan dibuang walau ada kata invite
     if any(r.search(t) for r in WHITELIST_REGEXES):
         return False
     # Jika ada 1+ pola ajakan, buang
@@ -358,77 +272,35 @@ def filter_invite_sentences(text: str) -> str:
     cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
     return cleaned
-# ========= Post-on-threshold with EDIT (persisted) =========
-TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
-last_posted: Dict[str, Dict[str, object]] = {}
-async def _send_initial(msg, text: str) -> int:
     if DRY_RUN:
-        print("[DRY_RUN] send_initial:", text[:140])
-        return -1
-    # kirim media bila ada & allowed
-    if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
-        try:
-            if getattr(msg, "photo", None):
-                m = await client.send_file(TARGET_CHAT, msg.photo, caption=text, caption_entities=None, force_document=False)
-                return m.id
-            doc = getattr(msg, "document", None)
-            if doc:
-                data = await client.download_media(msg, file=bytes)
-                if data:
-                    bio = io.BytesIO(data)
-                    ext = ".jpg"
-                    mt = (getattr(doc, "mime_type", "") or "").lower()
-                    if mt:
-                        ext_guess = guess_extension(mt) or ".jpg"
-                        if ext_guess == ".jpe":
-                            ext_guess = ".jpg"
-                        ext = ext_guess
-                    bio.name = f"media{ext}"
-                    m = await client.send_file(TARGET_CHAT, bio, caption=text, caption_entities=None, force_document=False)
-                    return m.id
-        except FloodWaitError as e:
-            await asyncio.sleep(e.seconds + 1)
-            return await _send_initial(msg, text)
-        except Exception as e:
-            debug_log("Gagal kirim media awal, fallback text", str(e))
-    try:
-        m = await client.send_message(TARGET_CHAT, text, link_preview=True)
-        return m.id
-    except FloodWaitError as e:
-        await asyncio.sleep(e.seconds + 1)
-        return await _send_initial(msg, text)
-async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
-    prefix = f"[{new_tier.upper()}] "
-    text = prefix + body
-    prev = last_posted.get(keyword)
-    if not prev:
-        msg_id = await _send_initial(src_msg, text)
-        last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
-        if msg_id != -1:
-            db_save_last_posted(keyword, msg_id, new_tier)
         return
-    if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
         try:
-            await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
-            prev["tier"] = new_tier
-            if prev["msg_id"] != -1:
-                db_save_last_posted(keyword, prev["msg_id"], new_tier)
-        except FloodWaitError as e:
-            await asyncio.sleep(e.seconds + 1)
-            await post_or_update(keyword, body, new_tier, src_msg)
         except Exception as e:
-            debug_log("Edit gagal, fallback kirim baru", str(e))
-            msg_id = await _send_initial(src_msg, text)
-            last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
-            if msg_id != -1:
-                db_save_last_posted(keyword, msg_id, new_tier)
-    else:
-        pass  # no-op
 # ========= Core actions (fallback kept) =========
 async def send_as_is(msg, text_override: Optional[str] = None) -> None:
@@ -456,78 +328,32 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
                     ext = ".jpg"
                     mt = (getattr(doc, "mime_type", "") or "").lower()
                     if mt:
-                        ext_guess = guess_extension(mt) or ".jpg"
-                        if ext_guess == ".jpe":
-                            ext_guess = ".jpg"
-                        ext = ext_guess
                     bio.name = f"media{ext}"
                     await client.send_file(TARGET_CHAT, bio, caption=orig_text, caption_entities=entities, force_document=False)
                     return
-        except FloodWaitError as e:
-            await asyncio.sleep(e.seconds + 1)
         except Exception as e:
-            debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
-    try:
-        await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
-    except FloodWaitError as e:
-        await asyncio.sleep(e.seconds + 1)
-        await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
-# ========= Keyword extraction ($ticker-aware) =========
-TICKER_CLEAN_RE = re.compile(r"\$[A-Za-z0-9]{2,12}")
-TICKER_NOISY_RE = re.compile(r"\$[A-Za-z0-9](?:[^A-Za-z0-9]+[A-Za-z0-9]){1,11}")
-def _extract_tickers(text_norm: str) -> List[str]:
-    """
-    Ambil $TICKER dengan dua cara:
-    - Bersih: $ABC, $JBCOIN
-    - Noisy: $J*BCOIN -> dinormalisasi jadi $JBCOIN untuk *keyword* saja.
-      (Teks asli tetap dikirim apa adanya.)
-    """
-    found = []
-    # bersih
-    for m in TICKER_CLEAN_RE.finditer(text_norm):
-        found.append(m.group(0).lower())
-    # noisy -> normalisasi internal
-    for m in TICKER_NOISY_RE.finditer(text_norm):
-        raw = m.group(0)
-        norm = "$" + re.sub(r"[^A-Za-z0-9]+", "", raw[1:])
-        if 3 <= len(norm) <= 13:  # termasuk '$'
-            found.append(norm.lower())
-    # unik & pertahankan urutan
-    seen = set()
-    uniq = []
-    for x in found:
-        if x not in seen:
-            uniq.append(x)
-            seen.add(x)
-    return uniq
 def _extract_all_keywords(text_norm: str) -> List[str]:
-    """
-    Deteksi SEMUA keyword dari THEME_KEYWORDS + $ticker.
-    Tidak menghapus simbol '$' (sesuai permintaan).
-    """
-    # toleran untuk pencarian keyword tema (seperti semula)
-    t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
     found = []
-    for kw in THEME_KEYWORDS:
-        if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
-            found.append(kw.lower())
-    # gabungkan hasil $ticker
-    tickers = _extract_tickers(text_norm)
-    found.extend(tickers)
-    # unik dengan urutan muncul pertama
-    uniq = []
     seen = set()
     for kw in found:
         if kw not in seen:
             uniq.append(kw)
@@ -549,7 +375,9 @@ def _choose_dominant_keyword(text_norm: str, kws: List[str]) -> Optional[str]:
     return chosen
 def _role_of(chat_id: int) -> str:
-    return chat_roles.get(chat_id, "core")
 def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
     """
@@ -559,12 +387,38 @@ def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
     bucket = keyword_group_last_seen.get(keyword, {})
     core_ids, sup_ids = set(), set()
     for gk in bucket.keys():
-        role = chat_roles.get(int(gk), "core")
         (core_ids if role == "core" else sup_ids).add(gk)
     return len(core_ids), len(sup_ids)
-async def process_message(msg, source_chat_id: int) -> None:
     """
     Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
     agregasi tier, gating support (CORE-anchored), filter ajakan, dan POST/EDIT.
@@ -583,12 +437,16 @@ async def process_message(msg, source_chat_id: int) -> None:
     if ch in recent_content_hashes:
         debug_log("Content-duplicate (global), dilewati", orig_text)
         return
-    recent_content_hashes.append(ch)
-    # Dedup lama (per pesan/media)
-    h = hash_for_dedup(text_norm, msg)
     if h in recent_hashes:
-        debug_log("Duplikat (pesan/media), dilewati", orig_text)
         return
     recent_hashes.append(h)
@@ -612,22 +470,19 @@ async def process_message(msg, source_chat_id: int) -> None:
     now = datetime.now(timezone.utc)
     class_label, unique_groups = update_and_classify(main_kw, group_key, now)
-    # Gating SUPPORT (CORE-anchored)
     if role == "support":
         core_u, sup_u = _unique_counts_by_role(main_kw)
-        # Aturan:
-        # - Jika sudah ada minimal 1 sebutan dari CORE untuk keyword ini -> izinkan.
-        # - Jika belum ada anchor CORE, SUPPORT harus >= SUPPORT_MIN_UNIQUE.
-        if core_u >= 1:
-            pass
-        elif sup_u < SUPPORT_MIN_UNIQUE:
-            debug_log(f"Support ditahan (core_u={core_u}, sup_u={sup_u} < {SUPPORT_MIN_UNIQUE})", orig_text)
-            return
-    # Filter kalimat ajakan (whitelist-aware)
-    cleaned_body = filter_invite_sentences(orig_text)
-    if not cleaned_body.strip():
-        debug_log("Semua kalimat terfilter (kosong), dilewati", orig_text)
         return
     # Backfill safety: saat startup, hindari pesan yang terlalu lama
@@ -638,9 +493,21 @@ async def process_message(msg, source_chat_id: int) -> None:
             debug_log("Lama (lewat cutoff backfill safety), dilewati", orig_text)
             return
-    await post_or_update(main_kw, cleaned_body, class_label, msg)
     debug_log(f"Posted/Edited (role={role}, unique_groups={unique_groups}, kw={main_kw}, tier={class_label})", orig_text)
 async def backfill_history(entity, limit: int) -> None:
     if limit <= 0:
@@ -652,17 +519,6 @@ async def backfill_history(entity, limit: int) -> None:
         except Exception as e:
             debug_log("Error saat memproses backfill", str(e))
-# ========= Event handlers =========
-@client.on(events.NewMessage(chats=SOURCE_CHATS))
-async def on_new_message(event):
-    try:
-        await process_message(event.message, source_chat_id=event.chat_id)
-    except Exception as e:
-        print("Process error:", e)
-# ========= Entry points =========
 async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
     resolved = []
     for src in raw_list:
@@ -671,37 +527,16 @@ async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
             resolved.append(ent)
             chat_roles[int(ent.id)] = role_label
         except Exception as e:
-            print(f"Gagal resolve sumber {src}: {e}")
     return resolved
-async def start_bot_background() -> None:
-    await client.start()
-    _init_db()
-    # Load persisted state
-    global last_posted, keyword_group_last_seen
-    last_posted, keyword_group_last_seen = db_load_state()
-    resolved_core = await _resolve_and_tag_chats(CORE_CHATS, "core")
-    resolved_support = await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
-    resolved_sources = resolved_core + resolved_support
-    for ent in resolved_sources:
-        try:
-            await backfill_history(ent, INITIAL_BACKFILL)
-        except Exception as e:
-            print(f"Backfill gagal untuk {ent}: {e}")
-    print("Kurator berjalan (background task). Menunggu pesan baru...")
-    asyncio.create_task(client.run_until_disconnected())
-async def app_main() -> None:
     await client.start()
     _init_db()
-    global last_posted, keyword_group_last_seen
-    last_posted, keyword_group_last_seen = db_load_state()
     resolved_core = await _resolve_and_tag_chats(CORE_CHATS, "core")
     resolved_support = await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
     resolved_sources = resolved_core + resolved_support
@@ -712,6 +547,5 @@ async def app_main() -> None:
     print("Kurator berjalan. Menunggu pesan baru... (Stop dengan interrupt).")
     await client.run_until_disconnected()
 if __name__ == "__main__":
     asyncio.run(app_main())

+# botsignal.py (patched)
 import asyncio
 import os
 import re
 from telethon.sessions import StringSession, MemorySession
 from telethon.errors.rpcerrorlist import FloodWaitError
 API_ID = int(os.environ.get("API_ID", "0"))
 API_HASH = os.environ.get("API_HASH", "")
+SESSION = os.environ.get("SESSION", "")
+TARGET_CHAT = os.environ.get("TARGET_CHAT", "")  # @username atau chat id
+# Sumber
+CORE_CHATS = [s.strip() for s in os.environ.get("CORE_CHATS", "").split(",") if s.strip()]
+SUPPORT_CHATS = [s.strip() for s in os.environ.get("SUPPORT_CHATS", "").split(",") if s.strip()]
 SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
+DB_PATH = os.environ.get("DB_PATH", "botsignal.db")
+# ====== Tokenisasi / relevansi ======
+def _tokenize_words(s: str) -> List[str]:
+    return re.findall(r"[a-zA-Z0-9\$\#]{2,}", s or "")
+def normalize_for_filter(s: str) -> str:
+    if not s:
+        return ""
+    t = s
+    # netralkan alamat kontrak (sol/eth panjang)
+    t = re.sub(r"\b[1-9A-HJ-NP-Za-km-z]{25,}\b", "CA", t)
+    # hapus url/mention
+    t = _strip_urls_and_mentions(t)
+    return t
+URL_REGEX = re.compile(r"(https?:\/\/\S+)", re.IGNORECASE)
+MENTION_REGEX = re.compile(r"@\w+", re.IGNORECASE)
+def _strip_urls_and_mentions(s: str) -> str:
+    s = URL_REGEX.sub("", s)
+    s = MENTION_REGEX.sub("", s)
+    return s
 THEME_KEYWORDS = [
     "call", "signal", "entry", "buy", "sell", "tp", "sl",
     "pump", "spot", "futures", "setup",
 CLASS_WINDOW_MINUTES = int(os.environ.get("CLASS_WINDOW_MINUTES", "10"))
+# Dulu dipakai untuk bypass support-only; sekarang hanya aktif bila ALLOW_SUPPORT_SOLO=1
 SUPPORT_MIN_UNIQUE = int(os.environ.get("SUPPORT_MIN_UNIQUE", "2"))
 # New: DRY RUN (tidak kirim apa pun ke TARGET_CHAT)
 # Backfill buffer: abaikan pesan lebih tua dari (startup_time - buffer)
 BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
+# >>> Tambahan ENV untuk mode support-only (default OFF, strict core-anchored) <<<
+ALLOW_SUPPORT_SOLO = os.environ.get("ALLOW_SUPPORT_SOLO", "0") == "1"
+SUPPORT_SOLO_MIN_UNIQUE = int(os.environ.get("SUPPORT_SOLO_MIN_UNIQUE", "99"))
+# ====== Client ======
+if SESSION:
+    session = StringSession(SESSION)
+else:
+    session = MemorySession()
+client = TelegramClient(session, API_ID, API_HASH)
+# ====== State ======
 startup_time_utc = datetime.now(timezone.utc)
+recent_hashes = deque(maxlen=DEDUP_BUFFER_SIZE)
+recent_content_hashes = set()
+# chat_roles: chat_id -> "core"/"support"
+chat_roles: Dict[int, str] = {}
+# agregasi keyword -> group_id -> last_seen_utc
+keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
 def _db():
     conn = sqlite3.connect(DB_PATH)
     conn.execute("PRAGMA journal_mode=WAL;")
+    conn.execute("PRAGMA synchronous=NORMAL;")
     return conn
 def _init_db():
     conn.close()
     return last, kw_map
+def db_save_group_seen(keyword: str, group_key: str, ts: datetime):
     conn = _db()
+    conn.execute(
+        "INSERT OR REPLACE INTO kw_group_seen(keyword, group_key, last_ts) VALUES (?, ?, ?)",
+        (keyword, group_key, int(ts.timestamp())),
+    )
     conn.commit()
     conn.close()
+def db_save_last_posted(keyword: str, msg_id: int, tier: str):
     conn = _db()
+    conn.execute(
+        "INSERT OR REPLACE INTO last_posted(keyword, msg_id, tier) VALUES (?, ?, ?)",
+        (keyword, msg_id, tier),
+    )
     conn.commit()
     conn.close()
+def score_relevance(t: str, keywords: List[str]) -> float:
+    # exact: jumlah kemunculan kw
+    exact_hits = sum(
+        len(re.findall(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I)) for kw in keywords
+    )
     exact_score = exact_hits * KEYWORD_WEIGHT
     # fuzzy windowed: ambil top-3 skor di antara jendela 20 token
     if doc and getattr(doc, "id", None) is not None:
         parts.append(f"doc:{doc.id}")
     if getattr(msg, "photo", None) is not None:
+        parts.append("has_photo")
+    h = hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest()
+    return h
 def content_only_hash(text: str) -> str:
+    t = text or ""
+    # netralkan angka/CA/url/mention agar “teks ajakan” sama ke-dedup
+    t = re.sub(r"\b\d{2,}\b", "NUM", t)
+    t = re.sub(r"\b[1-9A-HJ-NP-Za-km-z]{25,}\b", "CA", t)
+    t = _strip_urls_and_mentions(t)
+    return hashlib.sha256(t.encode("utf-8")).hexdigest()
+def _windows(tokens: List[str], k: int):
+    for i in range(0, len(tokens), max(1, k // 2)):
+        yield " ".join(tokens[i:i + k])
+# ====== Media helpers ======
 def is_image_message(msg) -> bool:
     if getattr(msg, "photo", None) is not None:
         return True
     doc = getattr(msg, "document", None)
+    if not doc:
+        return False
+    mt = (getattr(doc, "mime_type", "") or "").lower()
+    if mt.startswith("image/"):
+        return True
+    if not ALLOW_GIFS_VIDEOS and (mt.startswith("video/") or mt == "image/gif"):
+        return False
+    return True
 def media_too_big(msg) -> bool:
     doc = getattr(msg, "document", None)
+    if not doc:
+        return False
+    size = getattr(doc, "size", None)
+    if size is None:
+        return False
+    return (size / (1024 * 1024)) > MAX_MEDIA_MB
+# ====== Ajakan filter ======
 INVITE_PATTERNS = [
+    r"\bjoin\b", r"\bdm\b", r"\binbox\b", r"\bpm\b", r"\bvip\b",
+    r"\bcontact\b", r"\bpromo\b", r"\bpaid\b", r"@",
+    r"t\.me\/", r"telegram\.me\/", r"\blink\b", r"\bklik\b", r"\bhubungi\b",
     r"\bpromo\b", r"\bpromosi\b", r"\biklan\b",
+    r"\badvert\b", r"\badvertise\b", r"\badvertisement\b"
 ]
 INVITE_REGEXES = [re.compile(p, re.IGNORECASE) for p in INVITE_PATTERNS]
     t = s.strip()
     if not t:
         return False
+    # (ASAL) Jika kalimat memuat sinyal kuat, jangan dibuang walau ada kata invite
     if any(r.search(t) for r in WHITELIST_REGEXES):
         return False
     # Jika ada 1+ pola ajakan, buang
     cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
     return cleaned
+# ====== Posting ======
+async def post_or_update(keyword: str, body: str, class_label: str, msg):
+    """
+    Post baru jika belum ada, jika sudah ada posting untuk keyword tsb → edit.
+    """
+    # muat last posted dari DB
+    last_posted, _ = db_load_state()
+    last = last_posted.get(keyword)
     if DRY_RUN:
+        print(f"[DRY_RUN] ({class_label}) [{keyword}] {body[:160]}")
         return
+    if not last:
+        # post baru
+        sent = await client.send_message(TARGET_CHAT, body)
+        db_save_last_posted(keyword, sent.id, class_label)
+        return
+    else:
+        # edit posting lama
+        last_msg_id = last["msg_id"]
         try:
+            await client.edit_message(TARGET_CHAT, last_msg_id, body)
+            db_save_last_posted(keyword, last_msg_id, class_label)
         except Exception as e:
+            print("Edit failed, posting baru:", e)
+            sent = await client.send_message(TARGET_CHAT, body)
+            db_save_last_posted(keyword, sent.id, class_label)
+        return
 # ========= Core actions (fallback kept) =========
 async def send_as_is(msg, text_override: Optional[str] = None) -> None:
                     ext = ".jpg"
                     mt = (getattr(doc, "mime_type", "") or "").lower()
                     if mt:
+                        guessed = guess_extension(mt) or ""
+                        if guessed:
+                            ext = guessed
                     bio.name = f"media{ext}"
                     await client.send_file(TARGET_CHAT, bio, caption=orig_text, caption_entities=entities, force_document=False)
                     return
+        except FloodWaitError as fw:
+            print("FloodWait:", fw)
         except Exception as e:
+            print("send_file err:", e)
+    await client.send_message(TARGET_CHAT, orig_text)
+# ====== Keyword extraction / aggregation ======
+TICKER_RE = re.compile(r"\$[a-z0-9]{2,10}", re.IGNORECASE)
+COINWORDS = ["btc","eth","sol","bnb","pepe","doge","meme","spot","futures","pump","entry","buy","sell","tp","sl","setup"]
 def _extract_all_keywords(text_norm: str) -> List[str]:
     found = []
+    found += [m.group(0).lower() for m in TICKER_RE.finditer(text_norm)]
+    for w in COINWORDS:
+        if re.search(rf"(^|\W){re.escape(w)}(\W|$)", text_norm, flags=re.I):
+            found.append(w)
+    # unik, preserve order
     seen = set()
+    uniq = []
     for kw in found:
         if kw not in seen:
             uniq.append(kw)
     return chosen
 def _role_of(chat_id: int) -> str:
+    # DULU: default ke "core" → menyebabkan tembus saat resolve gagal.
+    # BARU: default ke "support" (atau treat unknown as support)
+    return chat_roles.get(chat_id, "support")
 def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
     """
     bucket = keyword_group_last_seen.get(keyword, {})
     core_ids, sup_ids = set(), set()
     for gk in bucket.keys():
+        role = chat_roles.get(int(gk), "support")
         (core_ids if role == "core" else sup_ids).add(gk)
     return len(core_ids), len(sup_ids)
+def update_and_classify(keyword: str, group_key: str, ts: datetime) -> Tuple[str, int]:
+    """
+    Simpan last_seen dan kembalikan label kelas (HOT/SEDANG/BARU) + jumlah grup unik.
+    """
+    # purge window kadaluarsa
+    cutoff = ts - timedelta(minutes=CLASS_WINDOW_MINUTES)
+    bucket = keyword_group_last_seen[keyword]
+    expired = [g for g, t in bucket.items() if t < cutoff]
+    for g in expired:
+        del bucket[g]
+    # update
+    bucket[group_key] = ts
+    db_save_group_seen(keyword, group_key, ts)
+    # hitung grup unik
+    unique_groups = len(bucket)
+    # simple classification
+    if unique_groups >= 4:
+        label = "🔥 HOT"
+    elif unique_groups >= 2:
+        label = "☀️ SEDANG"
+    else:
+        label = "🌱 BARU"
+    return label, unique_groups
+def process_message(msg, source_chat_id: int) -> None:
     """
     Filter, content-dedup, relevansi, multi-kw -> pilih dominan,
     agregasi tier, gating support (CORE-anchored), filter ajakan, dan POST/EDIT.
     if ch in recent_content_hashes:
         debug_log("Content-duplicate (global), dilewati", orig_text)
         return
+    recent_content_hashes.add(ch)
+    if len(recent_content_hashes) > DEDUP_BUFFER_SIZE * 2:
+        # jaga ukuran set
+        recent_content_hashes.clear()
+        recent_content_hashes.add(ch)
+    # Hash per pesan (teks+media) untuk guard cepat
+    h = hash_for_dedup(orig_text, msg)
     if h in recent_hashes:
+        debug_log("Duplicate (hash recent), dilewati", orig_text)
         return
     recent_hashes.append(h)
     now = datetime.now(timezone.utc)
     class_label, unique_groups = update_and_classify(main_kw, group_key, now)
+    # Gating SUPPORT (strict core-anchored unless env allows 'solo')
     if role == "support":
         core_u, sup_u = _unique_counts_by_role(main_kw)
+        if core_u < 1:
+            # if solo support not allowed or not strong enough, hold
+            if (not ALLOW_SUPPORT_SOLO) or (sup_u < SUPPORT_SOLO_MIN_UNIQUE):
+                debug_log(f"Support ditahan (butuh anchor core; core_u={core_u}, sup_u={sup_u})", orig_text)
+                return
+    # Filter ajakan per-kalimat
+    cleaned_body = filter_invite_sentences(orig_text).strip()
+    if not cleaned_body:
+        debug_log("Habis difilter, kosong → dilewati", orig_text)
         return
     # Backfill safety: saat startup, hindari pesan yang terlalu lama
             debug_log("Lama (lewat cutoff backfill safety), dilewati", orig_text)
             return
+    asyncio.create_task(post_or_update(main_kw, cleaned_body, class_label, msg))
     debug_log(f"Posted/Edited (role={role}, unique_groups={unique_groups}, kw={main_kw}, tier={class_label})", orig_text)
+# ========= Event handlers =========
+@client.on(events.NewMessage(chats=SOURCE_CHATS))
+async def on_new_message(event):
+    try:
+        await process_message(event.message, source_chat_id=event.chat_id)
+    except Exception as e:
+        print("Process error:", e)
+# ========= Utilities =========
+def debug_log(tag: str, body: str):
+    ts = datetime.now().strftime("%H:%M:%S")
+    print(f"[{ts}] {tag}: {body[:200].replace(chr(10),' / ')}")
 async def backfill_history(entity, limit: int) -> None:
     if limit <= 0:
         except Exception as e:
             debug_log("Error saat memproses backfill", str(e))
 async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
     resolved = []
     for src in raw_list:
             resolved.append(ent)
             chat_roles[int(ent.id)] = role_label
         except Exception as e:
+            print(f"Gagal resolve {src}: {e}")
     return resolved
+# ========= Entry points =========
+async def app_main():
     await client.start()
     _init_db()
+    _ = db_load_state()  # pre-load caches
+    # resolve semua chats dan tag role
     resolved_core = await _resolve_and_tag_chats(CORE_CHATS, "core")
     resolved_support = await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
     resolved_sources = resolved_core + resolved_support
     print("Kurator berjalan. Menunggu pesan baru... (Stop dengan interrupt).")
     await client.run_until_disconnected()
 if __name__ == "__main__":
     asyncio.run(app_main())