Spaces:
Sleeping
Sleeping
Update botsignal.py
Browse files- botsignal.py +92 -62
botsignal.py
CHANGED
|
@@ -19,29 +19,24 @@ API_HASH = os.environ.get("API_HASH", "")
|
|
| 19 |
STRING_SESSION = os.environ.get("STRING_SESSION", "")
|
| 20 |
|
| 21 |
# --- Definisikan sumber sebagai CORE vs SUPPORT ---
|
| 22 |
-
# (
|
| 23 |
CORE_CHATS = [
|
| 24 |
"https://t.me/PEPE_Calls28",
|
| 25 |
-
"https://t.me/
|
|
|
|
|
|
|
| 26 |
"https://t.me/ChinaPumpCommunity",
|
| 27 |
"https://t.me/Milagrosdegencalls",
|
| 28 |
"https://t.me/GM_Degencalls",
|
| 29 |
]
|
| 30 |
-
SUPPORT_CHATS = [
|
| 31 |
-
"https://t.me/SephirothGemCalls1",
|
| 32 |
-
"https://t.me/TheDonALPHAJournal",
|
| 33 |
-
"https://t.me/savascalls",
|
| 34 |
-
"https://t.me/Tanjirocall",
|
| 35 |
-
"https://t.me/ChapoInsider",
|
| 36 |
-
]
|
| 37 |
|
| 38 |
-
# Gabungan untuk handler event
|
| 39 |
SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
|
| 40 |
|
| 41 |
# Target (boleh @username / id / link)
|
| 42 |
-
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/
|
| 43 |
|
| 44 |
-
# Kata kunci topik untuk relevansi
|
| 45 |
THEME_KEYWORDS = [
|
| 46 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 47 |
"pump", "spot", "futures", "setup",
|
|
@@ -51,7 +46,7 @@ KEYWORD_WEIGHT = 1.0
|
|
| 51 |
FUZZ_WEIGHT = 0.6
|
| 52 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 53 |
|
| 54 |
-
# Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip
|
| 55 |
EXCLUDE_PHRASES = [
|
| 56 |
"achievement unlocked",
|
| 57 |
]
|
|
@@ -202,7 +197,6 @@ def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] =
|
|
| 202 |
|
| 203 |
|
| 204 |
# ========= Sentence-level invite filter =========
|
| 205 |
-
# Pola ajakan (join/DM/VIP/premium/berbayar/dll) — case-insensitive.
|
| 206 |
INVITE_PATTERNS = [
|
| 207 |
r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
|
| 208 |
r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
|
|
@@ -223,32 +217,88 @@ def _is_invite_sentence(s: str) -> bool:
|
|
| 223 |
|
| 224 |
def filter_invite_sentences(text: str) -> str:
|
| 225 |
"""
|
| 226 |
-
Hapus
|
| 227 |
-
tapi pertahankan bubble
|
| 228 |
"""
|
| 229 |
if not text:
|
| 230 |
return text
|
| 231 |
-
|
| 232 |
-
# Pisah berdasarkan akhir kalimat . ! ? atau newline (umum di Telegram)
|
| 233 |
parts = re.split(r'(?<=[\.\!\?])\s+|\n+', text, flags=re.UNICODE)
|
| 234 |
kept = [p.strip() for p in parts if p and not _is_invite_sentence(p)]
|
| 235 |
-
|
| 236 |
-
# Gabungkan kembali; gunakan newline agar tetap rapi, tapi tetap 1 bubble.
|
| 237 |
cleaned = "\n".join(kept).strip()
|
| 238 |
-
# Rapikan newline ganda
|
| 239 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
| 240 |
return cleaned
|
| 241 |
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
# ========= Core actions =========
|
| 244 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 245 |
"""
|
| 246 |
-
|
| 247 |
-
|
| 248 |
"""
|
| 249 |
if text_override is not None:
|
| 250 |
orig_text = text_override
|
| 251 |
-
entities = None
|
| 252 |
else:
|
| 253 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 254 |
entities = getattr(msg, "entities", None)
|
|
@@ -256,13 +306,8 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 256 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 257 |
try:
|
| 258 |
if getattr(msg, "photo", None):
|
| 259 |
-
await client.send_file(
|
| 260 |
-
TARGET_CHAT, msg.photo,
|
| 261 |
-
caption=orig_text, caption_entities=entities,
|
| 262 |
-
force_document=False,
|
| 263 |
-
)
|
| 264 |
return
|
| 265 |
-
|
| 266 |
doc = getattr(msg, "document", None)
|
| 267 |
if doc:
|
| 268 |
data = await client.download_media(msg, file=bytes)
|
|
@@ -276,27 +321,15 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 276 |
ext_guess = ".jpg"
|
| 277 |
ext = ext_guess
|
| 278 |
bio.name = f"media{ext}"
|
| 279 |
-
await client.send_file(
|
| 280 |
-
TARGET_CHAT, bio,
|
| 281 |
-
caption=orig_text, caption_entities=entities,
|
| 282 |
-
force_document=False,
|
| 283 |
-
)
|
| 284 |
return
|
| 285 |
except Exception as e:
|
| 286 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 287 |
|
| 288 |
-
await client.send_message(
|
| 289 |
-
TARGET_CHAT,
|
| 290 |
-
orig_text,
|
| 291 |
-
formatting_entities=entities,
|
| 292 |
-
link_preview=True,
|
| 293 |
-
)
|
| 294 |
|
| 295 |
def _extract_main_keyword(text_norm: str) -> Optional[str]:
|
| 296 |
-
"""
|
| 297 |
-
Ambil keyword utama pertama yang muncul.
|
| 298 |
-
Toleran untuk ticker $BTC -> btc.
|
| 299 |
-
"""
|
| 300 |
t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
|
| 301 |
for kw in THEME_KEYWORDS:
|
| 302 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
|
@@ -310,7 +343,8 @@ def _role_of(chat_id: int) -> str:
|
|
| 310 |
|
| 311 |
async def process_message(msg, source_chat_id: int) -> None:
|
| 312 |
"""
|
| 313 |
-
Filter, dedup, relevansi, klasifikasi, gating support vs core,
|
|
|
|
| 314 |
"""
|
| 315 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 316 |
text_norm = normalize_for_filter(orig_text).lower()
|
|
@@ -321,7 +355,7 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 321 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
| 322 |
return
|
| 323 |
|
| 324 |
-
# Dedup
|
| 325 |
h = hash_for_dedup(text_norm, msg)
|
| 326 |
if h in recent_hashes:
|
| 327 |
debug_log("Duplikat, dilewati", orig_text)
|
|
@@ -338,14 +372,15 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 338 |
|
| 339 |
# Tentukan keyword & kelas
|
| 340 |
main_kw = _extract_main_keyword(text_norm)
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
| 347 |
|
| 348 |
-
# Gating: support hanya
|
| 349 |
if role == "support" and unique_groups < SUPPORT_MIN_UNIQUE:
|
| 350 |
debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 351 |
return
|
|
@@ -356,14 +391,9 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 356 |
debug_log("Semua kalimat terfilter (kosong), dilewati", orig_text)
|
| 357 |
return
|
| 358 |
|
| 359 |
-
#
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
else:
|
| 363 |
-
text_to_send = cleaned_body
|
| 364 |
-
|
| 365 |
-
await send_as_is(msg, text_override=text_to_send)
|
| 366 |
-
debug_log(f"Dikirim ke target (role={role}, unique_groups={unique_groups})", orig_text)
|
| 367 |
|
| 368 |
|
| 369 |
async def backfill_history(entity, limit: int) -> None:
|
|
|
|
| 19 |
STRING_SESSION = os.environ.get("STRING_SESSION", "")
|
| 20 |
|
| 21 |
# --- Definisikan sumber sebagai CORE vs SUPPORT ---
|
| 22 |
+
# Boleh: "@username", id (int), atau "https://t.me/xxxxx"
|
| 23 |
CORE_CHATS = [
|
| 24 |
"https://t.me/PEPE_Calls28",
|
| 25 |
+
"https://t.me/Tanjirocall",
|
| 26 |
+
]
|
| 27 |
+
SUPPORT_CHATS = [
|
| 28 |
"https://t.me/ChinaPumpCommunity",
|
| 29 |
"https://t.me/Milagrosdegencalls",
|
| 30 |
"https://t.me/GM_Degencalls",
|
| 31 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# Gabungan untuk handler event
|
| 34 |
SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
|
| 35 |
|
| 36 |
# Target (boleh @username / id / link)
|
| 37 |
+
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
|
| 38 |
|
| 39 |
+
# Kata kunci topik untuk relevansi (termasuk ticker)
|
| 40 |
THEME_KEYWORDS = [
|
| 41 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 42 |
"pump", "spot", "futures", "setup",
|
|
|
|
| 46 |
FUZZ_WEIGHT = 0.6
|
| 47 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 48 |
|
| 49 |
+
# Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip bubble
|
| 50 |
EXCLUDE_PHRASES = [
|
| 51 |
"achievement unlocked",
|
| 52 |
]
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
# ========= Sentence-level invite filter =========
|
|
|
|
| 200 |
INVITE_PATTERNS = [
|
| 201 |
r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
|
| 202 |
r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
|
|
|
|
| 217 |
|
| 218 |
def filter_invite_sentences(text: str) -> str:
|
| 219 |
"""
|
| 220 |
+
Hapus kalimat yang mengandung ajakan (join/DM/VIP/dll),
|
| 221 |
+
tapi pertahankan 1 bubble (gabung ulang).
|
| 222 |
"""
|
| 223 |
if not text:
|
| 224 |
return text
|
|
|
|
|
|
|
| 225 |
parts = re.split(r'(?<=[\.\!\?])\s+|\n+', text, flags=re.UNICODE)
|
| 226 |
kept = [p.strip() for p in parts if p and not _is_invite_sentence(p)]
|
|
|
|
|
|
|
| 227 |
cleaned = "\n".join(kept).strip()
|
|
|
|
| 228 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
| 229 |
return cleaned
|
| 230 |
|
| 231 |
|
| 232 |
+
# ========= Post-on-threshold with EDIT =========
|
| 233 |
+
TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
|
| 234 |
+
last_posted: Dict[str, Dict[str, object]] = {} # keyword -> {"msg_id": int, "tier": str}
|
| 235 |
+
|
| 236 |
+
async def _send_initial(msg, text: str) -> int:
|
| 237 |
+
"""Kirim pertama kali (pilih media bila ada & diizinkan). Return message id."""
|
| 238 |
+
# Jangan gunakan entities lama karena text sudah diubah (prefix/cleaned)
|
| 239 |
+
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 240 |
+
try:
|
| 241 |
+
if getattr(msg, "photo", None):
|
| 242 |
+
m = await client.send_file(TARGET_CHAT, msg.photo, caption=text, caption_entities=None, force_document=False)
|
| 243 |
+
return m.id
|
| 244 |
+
doc = getattr(msg, "document", None)
|
| 245 |
+
if doc:
|
| 246 |
+
data = await client.download_media(msg, file=bytes)
|
| 247 |
+
if data:
|
| 248 |
+
bio = io.BytesIO(data)
|
| 249 |
+
ext = ".jpg"
|
| 250 |
+
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 251 |
+
if mt:
|
| 252 |
+
ext_guess = guess_extension(mt) or ".jpg"
|
| 253 |
+
if ext_guess == ".jpe":
|
| 254 |
+
ext_guess = ".jpg"
|
| 255 |
+
ext = ext_guess
|
| 256 |
+
bio.name = f"media{ext}"
|
| 257 |
+
m = await client.send_file(TARGET_CHAT, bio, caption=text, caption_entities=None, force_document=False)
|
| 258 |
+
return m.id
|
| 259 |
+
except Exception as e:
|
| 260 |
+
debug_log("Gagal kirim media awal, fallback text", str(e))
|
| 261 |
+
|
| 262 |
+
m = await client.send_message(TARGET_CHAT, text, link_preview=True)
|
| 263 |
+
return m.id
|
| 264 |
+
|
| 265 |
+
async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
|
| 266 |
+
"""
|
| 267 |
+
Kirim sekali per keyword; jika tier naik, EDIT pesan lama.
|
| 268 |
+
Tidak melakukan apa-apa jika tier tidak naik (hindari spam).
|
| 269 |
+
"""
|
| 270 |
+
prefix = f"[{new_tier.upper()}] "
|
| 271 |
+
text = prefix + body
|
| 272 |
+
|
| 273 |
+
prev = last_posted.get(keyword)
|
| 274 |
+
if not prev:
|
| 275 |
+
msg_id = await _send_initial(src_msg, text)
|
| 276 |
+
last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
|
| 277 |
+
return
|
| 278 |
+
|
| 279 |
+
# Sudah pernah post → cek apakah naik tier
|
| 280 |
+
if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
|
| 281 |
+
try:
|
| 282 |
+
await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
|
| 283 |
+
prev["tier"] = new_tier
|
| 284 |
+
except Exception as e:
|
| 285 |
+
debug_log("Edit gagal, fallback kirim baru", str(e))
|
| 286 |
+
msg_id = await _send_initial(src_msg, text)
|
| 287 |
+
last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
|
| 288 |
+
else:
|
| 289 |
+
# Tier sama / turun → no-op (hindari spam)
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
|
| 293 |
# ========= Core actions =========
|
| 294 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 295 |
"""
|
| 296 |
+
(Tersisa untuk kasus lain yang butuh forward apa adanya.)
|
| 297 |
+
Di mode post-on-threshold kita pakai post_or_update(), bukan fungsi ini.
|
| 298 |
"""
|
| 299 |
if text_override is not None:
|
| 300 |
orig_text = text_override
|
| 301 |
+
entities = None
|
| 302 |
else:
|
| 303 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 304 |
entities = getattr(msg, "entities", None)
|
|
|
|
| 306 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 307 |
try:
|
| 308 |
if getattr(msg, "photo", None):
|
| 309 |
+
await client.send_file(TARGET_CHAT, msg.photo, caption=orig_text, caption_entities=entities, force_document=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
return
|
|
|
|
| 311 |
doc = getattr(msg, "document", None)
|
| 312 |
if doc:
|
| 313 |
data = await client.download_media(msg, file=bytes)
|
|
|
|
| 321 |
ext_guess = ".jpg"
|
| 322 |
ext = ext_guess
|
| 323 |
bio.name = f"media{ext}"
|
| 324 |
+
await client.send_file(TARGET_CHAT, bio, caption=orig_text, caption_entities=entities, force_document=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
return
|
| 326 |
except Exception as e:
|
| 327 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 328 |
|
| 329 |
+
await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
def _extract_main_keyword(text_norm: str) -> Optional[str]:
|
| 332 |
+
"""Ambil keyword utama pertama yang muncul. Toleran untuk $BTC -> btc."""
|
|
|
|
|
|
|
|
|
|
| 333 |
t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
|
| 334 |
for kw in THEME_KEYWORDS:
|
| 335 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
|
|
|
| 343 |
|
| 344 |
async def process_message(msg, source_chat_id: int) -> None:
|
| 345 |
"""
|
| 346 |
+
Filter, dedup, relevansi, klasifikasi, gating support vs core,
|
| 347 |
+
potong kalimat ajakan, lalu POST/EDIT sesuai threshold.
|
| 348 |
"""
|
| 349 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 350 |
text_norm = normalize_for_filter(orig_text).lower()
|
|
|
|
| 355 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
| 356 |
return
|
| 357 |
|
| 358 |
+
# Dedup (berbasis teks norm + id media)
|
| 359 |
h = hash_for_dedup(text_norm, msg)
|
| 360 |
if h in recent_hashes:
|
| 361 |
debug_log("Duplikat, dilewati", orig_text)
|
|
|
|
| 372 |
|
| 373 |
# Tentukan keyword & kelas
|
| 374 |
main_kw = _extract_main_keyword(text_norm)
|
| 375 |
+
if not main_kw:
|
| 376 |
+
debug_log("Tak ada keyword utama yang cocok, dilewati", orig_text)
|
| 377 |
+
return
|
| 378 |
+
|
| 379 |
+
group_key = str(source_chat_id)
|
| 380 |
+
now = datetime.now(timezone.utc)
|
| 381 |
+
class_label, unique_groups = update_and_classify(main_kw, group_key, now)
|
| 382 |
|
| 383 |
+
# Gating: support hanya tampil jika unique_groups >= SUPPORT_MIN_UNIQUE
|
| 384 |
if role == "support" and unique_groups < SUPPORT_MIN_UNIQUE:
|
| 385 |
debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 386 |
return
|
|
|
|
| 391 |
debug_log("Semua kalimat terfilter (kosong), dilewati", orig_text)
|
| 392 |
return
|
| 393 |
|
| 394 |
+
# Mode post-on-threshold + edit
|
| 395 |
+
await post_or_update(main_kw, cleaned_body, class_label, msg)
|
| 396 |
+
debug_log(f"Posted/Edited (role={role}, unique_groups={unique_groups}, kw={main_kw}, tier={class_label})", orig_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
async def backfill_history(entity, limit: int) -> None:
|