agus1111 commited on
Commit
3808952
·
verified ·
1 Parent(s): 004cfa4

Update botsignal.py

Browse files
Files changed (1) hide show
  1. botsignal.py +92 -62
botsignal.py CHANGED
@@ -19,29 +19,24 @@ API_HASH = os.environ.get("API_HASH", "")
19
  STRING_SESSION = os.environ.get("STRING_SESSION", "")
20
 
21
  # --- Definisikan sumber sebagai CORE vs SUPPORT ---
22
- # (pakai data milikmu)
23
  CORE_CHATS = [
24
  "https://t.me/PEPE_Calls28",
25
- "https://t.me/HenryGems",
 
 
26
  "https://t.me/ChinaPumpCommunity",
27
  "https://t.me/Milagrosdegencalls",
28
  "https://t.me/GM_Degencalls",
29
  ]
30
- SUPPORT_CHATS = [
31
- "https://t.me/SephirothGemCalls1",
32
- "https://t.me/TheDonALPHAJournal",
33
- "https://t.me/savascalls",
34
- "https://t.me/Tanjirocall",
35
- "https://t.me/ChapoInsider",
36
- ]
37
 
38
- # Gabungan untuk handler event (hindari unpack agar sederhana)
39
  SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
40
 
41
  # Target (boleh @username / id / link)
42
- TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignalll")
43
 
44
- # Kata kunci topik untuk relevansi
45
  THEME_KEYWORDS = [
46
  "call", "signal", "entry", "buy", "sell", "tp", "sl",
47
  "pump", "spot", "futures", "setup",
@@ -51,7 +46,7 @@ KEYWORD_WEIGHT = 1.0
51
  FUZZ_WEIGHT = 0.6
52
  RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
53
 
54
- # Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip seluruh pesan
55
  EXCLUDE_PHRASES = [
56
  "achievement unlocked",
57
  ]
@@ -202,7 +197,6 @@ def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] =
202
 
203
 
204
  # ========= Sentence-level invite filter =========
205
- # Pola ajakan (join/DM/VIP/premium/berbayar/dll) — case-insensitive.
206
  INVITE_PATTERNS = [
207
  r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
208
  r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
@@ -223,32 +217,88 @@ def _is_invite_sentence(s: str) -> bool:
223
 
224
  def filter_invite_sentences(text: str) -> str:
225
  """
226
- Hapus hanya kalimat/segmen yang mengandung ajakan (join/DM/VIP/dll),
227
- tapi pertahankan bubble aslinya (gabungkan kembali jadi satu pesan).
228
  """
229
  if not text:
230
  return text
231
-
232
- # Pisah berdasarkan akhir kalimat . ! ? atau newline (umum di Telegram)
233
  parts = re.split(r'(?<=[\.\!\?])\s+|\n+', text, flags=re.UNICODE)
234
  kept = [p.strip() for p in parts if p and not _is_invite_sentence(p)]
235
-
236
- # Gabungkan kembali; gunakan newline agar tetap rapi, tapi tetap 1 bubble.
237
  cleaned = "\n".join(kept).strip()
238
- # Rapikan newline ganda
239
  cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
240
  return cleaned
241
 
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  # ========= Core actions =========
244
  async def send_as_is(msg, text_override: Optional[str] = None) -> None:
245
  """
246
- Forward konten ke TARGET_CHAT, dengan media bila sesuai.
247
- Jika text_override diberikan, entity asli TIDAK dipertahankan (offset berubah).
248
  """
249
  if text_override is not None:
250
  orig_text = text_override
251
- entities = None # jangan pakai entities lama: offset sudah tidak cocok
252
  else:
253
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
254
  entities = getattr(msg, "entities", None)
@@ -256,13 +306,8 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
256
  if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
257
  try:
258
  if getattr(msg, "photo", None):
259
- await client.send_file(
260
- TARGET_CHAT, msg.photo,
261
- caption=orig_text, caption_entities=entities,
262
- force_document=False,
263
- )
264
  return
265
-
266
  doc = getattr(msg, "document", None)
267
  if doc:
268
  data = await client.download_media(msg, file=bytes)
@@ -276,27 +321,15 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
276
  ext_guess = ".jpg"
277
  ext = ext_guess
278
  bio.name = f"media{ext}"
279
- await client.send_file(
280
- TARGET_CHAT, bio,
281
- caption=orig_text, caption_entities=entities,
282
- force_document=False,
283
- )
284
  return
285
  except Exception as e:
286
  debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
287
 
288
- await client.send_message(
289
- TARGET_CHAT,
290
- orig_text,
291
- formatting_entities=entities,
292
- link_preview=True,
293
- )
294
 
295
  def _extract_main_keyword(text_norm: str) -> Optional[str]:
296
- """
297
- Ambil keyword utama pertama yang muncul.
298
- Toleran untuk ticker $BTC -> btc.
299
- """
300
  t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
301
  for kw in THEME_KEYWORDS:
302
  if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
@@ -310,7 +343,8 @@ def _role_of(chat_id: int) -> str:
310
 
311
  async def process_message(msg, source_chat_id: int) -> None:
312
  """
313
- Filter, dedup, relevansi, klasifikasi, gating support vs core, potong kalimat ajakan, lalu kirim.
 
314
  """
315
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
316
  text_norm = normalize_for_filter(orig_text).lower()
@@ -321,7 +355,7 @@ async def process_message(msg, source_chat_id: int) -> None:
321
  debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
322
  return
323
 
324
- # Dedup
325
  h = hash_for_dedup(text_norm, msg)
326
  if h in recent_hashes:
327
  debug_log("Duplikat, dilewati", orig_text)
@@ -338,14 +372,15 @@ async def process_message(msg, source_chat_id: int) -> None:
338
 
339
  # Tentukan keyword & kelas
340
  main_kw = _extract_main_keyword(text_norm)
341
- class_label = None
342
- unique_groups = 0
343
- if main_kw:
344
- group_key = str(source_chat_id)
345
- now = datetime.now(timezone.utc)
346
- class_label, unique_groups = update_and_classify(main_kw, group_key, now)
 
347
 
348
- # Gating: support hanya kirim jika unique_groups >= SUPPORT_MIN_UNIQUE
349
  if role == "support" and unique_groups < SUPPORT_MIN_UNIQUE:
350
  debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
351
  return
@@ -356,14 +391,9 @@ async def process_message(msg, source_chat_id: int) -> None:
356
  debug_log("Semua kalimat terfilter (kosong), dilewati", orig_text)
357
  return
358
 
359
- # Prefix kelas di awal teks (jika ada)
360
- if class_label:
361
- text_to_send = f"[{class_label.upper()}] {cleaned_body}"
362
- else:
363
- text_to_send = cleaned_body
364
-
365
- await send_as_is(msg, text_override=text_to_send)
366
- debug_log(f"Dikirim ke target (role={role}, unique_groups={unique_groups})", orig_text)
367
 
368
 
369
  async def backfill_history(entity, limit: int) -> None:
 
19
  STRING_SESSION = os.environ.get("STRING_SESSION", "")
20
 
21
  # --- Definisikan sumber sebagai CORE vs SUPPORT ---
22
+ # Boleh: "@username", id (int), atau "https://t.me/xxxxx"
23
  CORE_CHATS = [
24
  "https://t.me/PEPE_Calls28",
25
+ "https://t.me/Tanjirocall",
26
+ ]
27
+ SUPPORT_CHATS = [
28
  "https://t.me/ChinaPumpCommunity",
29
  "https://t.me/Milagrosdegencalls",
30
  "https://t.me/GM_Degencalls",
31
  ]
 
 
 
 
 
 
 
32
 
33
+ # Gabungan untuk handler event
34
  SOURCE_CHATS = CORE_CHATS + SUPPORT_CHATS
35
 
36
  # Target (boleh @username / id / link)
37
+ TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
38
 
39
+ # Kata kunci topik untuk relevansi (termasuk ticker)
40
  THEME_KEYWORDS = [
41
  "call", "signal", "entry", "buy", "sell", "tp", "sl",
42
  "pump", "spot", "futures", "setup",
 
46
  FUZZ_WEIGHT = 0.6
47
  RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
48
 
49
+ # Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip bubble
50
  EXCLUDE_PHRASES = [
51
  "achievement unlocked",
52
  ]
 
197
 
198
 
199
  # ========= Sentence-level invite filter =========
 
200
  INVITE_PATTERNS = [
201
  r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
202
  r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
 
217
 
218
  def filter_invite_sentences(text: str) -> str:
219
  """
220
+ Hapus kalimat yang mengandung ajakan (join/DM/VIP/dll),
221
+ tapi pertahankan 1 bubble (gabung ulang).
222
  """
223
  if not text:
224
  return text
 
 
225
  parts = re.split(r'(?<=[\.\!\?])\s+|\n+', text, flags=re.UNICODE)
226
  kept = [p.strip() for p in parts if p and not _is_invite_sentence(p)]
 
 
227
  cleaned = "\n".join(kept).strip()
 
228
  cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
229
  return cleaned
230
 
231
 
232
+ # ========= Post-on-threshold with EDIT =========
233
+ TIER_ORDER = {"rendah": 0, "sedang": 1, "kuat": 2}
234
+ last_posted: Dict[str, Dict[str, object]] = {} # keyword -> {"msg_id": int, "tier": str}
235
+
236
+ async def _send_initial(msg, text: str) -> int:
237
+ """Kirim pertama kali (pilih media bila ada & diizinkan). Return message id."""
238
+ # Jangan gunakan entities lama karena text sudah diubah (prefix/cleaned)
239
+ if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
240
+ try:
241
+ if getattr(msg, "photo", None):
242
+ m = await client.send_file(TARGET_CHAT, msg.photo, caption=text, caption_entities=None, force_document=False)
243
+ return m.id
244
+ doc = getattr(msg, "document", None)
245
+ if doc:
246
+ data = await client.download_media(msg, file=bytes)
247
+ if data:
248
+ bio = io.BytesIO(data)
249
+ ext = ".jpg"
250
+ mt = (getattr(doc, "mime_type", "") or "").lower()
251
+ if mt:
252
+ ext_guess = guess_extension(mt) or ".jpg"
253
+ if ext_guess == ".jpe":
254
+ ext_guess = ".jpg"
255
+ ext = ext_guess
256
+ bio.name = f"media{ext}"
257
+ m = await client.send_file(TARGET_CHAT, bio, caption=text, caption_entities=None, force_document=False)
258
+ return m.id
259
+ except Exception as e:
260
+ debug_log("Gagal kirim media awal, fallback text", str(e))
261
+
262
+ m = await client.send_message(TARGET_CHAT, text, link_preview=True)
263
+ return m.id
264
+
265
+ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg) -> None:
266
+ """
267
+ Kirim sekali per keyword; jika tier naik, EDIT pesan lama.
268
+ Tidak melakukan apa-apa jika tier tidak naik (hindari spam).
269
+ """
270
+ prefix = f"[{new_tier.upper()}] "
271
+ text = prefix + body
272
+
273
+ prev = last_posted.get(keyword)
274
+ if not prev:
275
+ msg_id = await _send_initial(src_msg, text)
276
+ last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
277
+ return
278
+
279
+ # Sudah pernah post → cek apakah naik tier
280
+ if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
281
+ try:
282
+ await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
283
+ prev["tier"] = new_tier
284
+ except Exception as e:
285
+ debug_log("Edit gagal, fallback kirim baru", str(e))
286
+ msg_id = await _send_initial(src_msg, text)
287
+ last_posted[keyword] = {"msg_id": msg_id, "tier": new_tier}
288
+ else:
289
+ # Tier sama / turun → no-op (hindari spam)
290
+ pass
291
+
292
+
293
  # ========= Core actions =========
294
  async def send_as_is(msg, text_override: Optional[str] = None) -> None:
295
  """
296
+ (Tersisa untuk kasus lain yang butuh forward apa adanya.)
297
+ Di mode post-on-threshold kita pakai post_or_update(), bukan fungsi ini.
298
  """
299
  if text_override is not None:
300
  orig_text = text_override
301
+ entities = None
302
  else:
303
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
304
  entities = getattr(msg, "entities", None)
 
306
  if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
307
  try:
308
  if getattr(msg, "photo", None):
309
+ await client.send_file(TARGET_CHAT, msg.photo, caption=orig_text, caption_entities=entities, force_document=False)
 
 
 
 
310
  return
 
311
  doc = getattr(msg, "document", None)
312
  if doc:
313
  data = await client.download_media(msg, file=bytes)
 
321
  ext_guess = ".jpg"
322
  ext = ext_guess
323
  bio.name = f"media{ext}"
324
+ await client.send_file(TARGET_CHAT, bio, caption=orig_text, caption_entities=entities, force_document=False)
 
 
 
 
325
  return
326
  except Exception as e:
327
  debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
328
 
329
+ await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
 
 
 
 
 
330
 
331
  def _extract_main_keyword(text_norm: str) -> Optional[str]:
332
+ """Ambil keyword utama pertama yang muncul. Toleran untuk $BTC -> btc."""
 
 
 
333
  t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
334
  for kw in THEME_KEYWORDS:
335
  if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
 
343
 
344
  async def process_message(msg, source_chat_id: int) -> None:
345
  """
346
+ Filter, dedup, relevansi, klasifikasi, gating support vs core,
347
+ potong kalimat ajakan, lalu POST/EDIT sesuai threshold.
348
  """
349
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
350
  text_norm = normalize_for_filter(orig_text).lower()
 
355
  debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
356
  return
357
 
358
+ # Dedup (berbasis teks norm + id media)
359
  h = hash_for_dedup(text_norm, msg)
360
  if h in recent_hashes:
361
  debug_log("Duplikat, dilewati", orig_text)
 
372
 
373
  # Tentukan keyword & kelas
374
  main_kw = _extract_main_keyword(text_norm)
375
+ if not main_kw:
376
+ debug_log("Tak ada keyword utama yang cocok, dilewati", orig_text)
377
+ return
378
+
379
+ group_key = str(source_chat_id)
380
+ now = datetime.now(timezone.utc)
381
+ class_label, unique_groups = update_and_classify(main_kw, group_key, now)
382
 
383
+ # Gating: support hanya tampil jika unique_groups >= SUPPORT_MIN_UNIQUE
384
  if role == "support" and unique_groups < SUPPORT_MIN_UNIQUE:
385
  debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
386
  return
 
391
  debug_log("Semua kalimat terfilter (kosong), dilewati", orig_text)
392
  return
393
 
394
+ # Mode post-on-threshold + edit
395
+ await post_or_update(main_kw, cleaned_body, class_label, msg)
396
+ debug_log(f"Posted/Edited (role={role}, unique_groups={unique_groups}, kw={main_kw}, tier={class_label})", orig_text)
 
 
 
 
 
397
 
398
 
399
  async def backfill_history(entity, limit: int) -> None: