agus1111 commited on
Commit
b27991a
·
verified ·
1 Parent(s): 456bc10

Update botsignal.py

Browse files
Files changed (1) hide show
  1. botsignal.py +33 -18
botsignal.py CHANGED
@@ -8,9 +8,6 @@ from datetime import datetime, timedelta, timezone
8
  from mimetypes import guess_extension
9
  from typing import List, Tuple, Optional, Dict
10
 
11
- import nest_asyncio
12
- nest_asyncio.apply()
13
-
14
  from rapidfuzz import fuzz
15
  from telethon import TelegramClient, events
16
  from telethon.sessions import StringSession, MemorySession
@@ -39,7 +36,7 @@ SOURCE_CHATS = [*CORE_CHATS, *SUPPORT_CHATS]
39
  # Target (boleh @username / id / link)
40
  TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
41
 
42
- # Kata kunci topik untuk relevansi (tetap dari versi sebelumnya)
43
  THEME_KEYWORDS = [
44
  "call", "signal", "entry", "buy", "sell", "tp", "sl",
45
  "pump", "spot", "futures", "setup",
@@ -49,7 +46,7 @@ KEYWORD_WEIGHT = 1.0
49
  FUZZ_WEIGHT = 0.6
50
  RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
51
 
52
- # Filter pengecualian
53
  EXCLUDE_PHRASES = [
54
  "achievement unlocked",
55
  ]
@@ -60,7 +57,7 @@ MAX_MEDIA_MB = float(os.environ.get("MAX_MEDIA_MB", "12"))
60
  SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
61
  ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
62
 
63
- # Backfill
64
  INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
65
 
66
  # Dedup buffer
@@ -96,33 +93,41 @@ def debug_log(reason: str, content: str = "") -> None:
96
  def normalize_for_filter(text: str) -> str:
97
  if not text:
98
  return ""
99
- s = re.sub(r"(?m)^>.*$", "", text) # hilangin quote
 
100
  s = re.sub(r"\s+", " ", s).strip()
101
  return s
102
 
103
  def score_relevance(text: str, keywords: List[str]) -> float:
 
104
  if not text:
105
  return 0.0
106
  t = text.lower()
107
 
 
108
  exact_hits = 0
109
  for kw in keywords:
110
  if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
111
  exact_hits += 1
112
  exact_score = exact_hits * KEYWORD_WEIGHT
113
 
 
114
  fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
115
  fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
 
116
  return exact_score + fuzzy_score
117
 
118
  def hash_for_dedup(text: str, msg) -> str:
 
119
  parts = [text or ""]
120
  if getattr(msg, "id", None) is not None:
 
121
  parts.append(str(msg.id))
122
  doc = getattr(msg, "document", None)
123
  if doc and getattr(doc, "id", None) is not None:
124
  parts.append(f"doc:{doc.id}")
125
  if getattr(msg, "photo", None) is not None:
 
126
  ph = msg.photo
127
  ph_id = getattr(ph, "id", None)
128
  if ph_id is not None:
@@ -131,6 +136,7 @@ def hash_for_dedup(text: str, msg) -> str:
131
  return hashlib.sha1(raw).hexdigest()
132
 
133
  def is_image_message(msg) -> bool:
 
134
  if getattr(msg, "photo", None) is not None:
135
  return True
136
  doc = getattr(msg, "document", None)
@@ -158,6 +164,7 @@ def media_too_big(msg) -> bool:
158
  keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
159
 
160
  def _prune_expired(now: datetime) -> None:
 
161
  window = timedelta(minutes=CLASS_WINDOW_MINUTES)
162
  cutoff = now - window
163
  for kw, m in list(keyword_group_last_seen.items()):
@@ -194,15 +201,14 @@ def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] =
194
  # ========= Core actions =========
195
  async def send_as_is(msg, text_override: Optional[str] = None) -> None:
196
  """
197
- Forward konten 'apa adanya' (termasuk media yang diizinkan).
198
- Kompatibel dengan versi awal kamu:contentReference[oaicite:2]{index=2}.
199
  """
200
- orig_text = text_override if text_override is not None else (
201
- msg.message or (getattr(msg, "raw_text", None) or "")
202
- )
203
 
204
  if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
205
  try:
 
206
  if getattr(msg, "photo", None):
207
  await client.send_file(
208
  TARGET_CHAT,
@@ -213,11 +219,13 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
213
  )
214
  return
215
 
 
216
  doc = getattr(msg, "document", None)
217
  if doc:
218
  data = await client.download_media(msg, file=bytes)
219
  if data:
220
  bio = io.BytesIO(data)
 
221
  ext = ".jpg"
222
  mt = (getattr(doc, "mime_type", "") or "").lower()
223
  if mt:
@@ -237,6 +245,7 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
237
  except Exception as e:
238
  debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
239
 
 
240
  await client.send_message(
241
  TARGET_CHAT,
242
  orig_text,
@@ -245,9 +254,15 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
245
  )
246
 
247
  def _extract_main_keyword(text_norm: str) -> Optional[str]:
 
 
 
 
248
  t = text_norm
249
- t = re.sub(r"\$([a-z0-9]+)", r"\1", t, flags=re.I) # $BTC -> btc
 
250
  for kw in THEME_KEYWORDS:
 
251
  if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
252
  return kw.lower()
253
  return None
@@ -264,7 +279,7 @@ async def process_message(msg, source_chat_id: int) -> None:
264
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
265
  text_norm = normalize_for_filter(orig_text).lower()
266
 
267
- # Exclude phrases (case-insensitive)
268
  for phrase in EXCLUDE_PHRASES:
269
  if phrase.lower() in text_norm:
270
  debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
@@ -299,7 +314,7 @@ async def process_message(msg, source_chat_id: int) -> None:
299
  debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
300
  return
301
 
302
- # Susun prefix kelas
303
  text_to_send = orig_text
304
  if class_label:
305
  text_to_send = f"[{class_label.upper()}] {orig_text}"
@@ -309,6 +324,7 @@ async def process_message(msg, source_chat_id: int) -> None:
309
 
310
 
311
  async def backfill_history(entity, limit: int) -> None:
 
312
  if limit <= 0:
313
  return
314
  print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
@@ -344,7 +360,7 @@ async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
344
 
345
  async def start_bot_background() -> None:
346
  """
347
- Dipanggil dari server FastAPI (server.py) saat startup:contentReference[oaicite:3]{index=3}.
348
  Menjalankan client + backfill tanpa memblokir web server.
349
  """
350
  await client.start()
@@ -366,7 +382,7 @@ async def start_bot_background() -> None:
366
 
367
  async def app_main() -> None:
368
  """
369
- Mode legacy: `python botsignal.py` (blok hingga disconnect):contentReference[oaicite:4]{index=4}.
370
  """
371
  await client.start()
372
 
@@ -382,5 +398,4 @@ async def app_main() -> None:
382
 
383
 
384
  if __name__ == "__main__":
385
- nest_asyncio.apply()
386
  asyncio.run(app_main())
 
8
  from mimetypes import guess_extension
9
  from typing import List, Tuple, Optional, Dict
10
 
 
 
 
11
  from rapidfuzz import fuzz
12
  from telethon import TelegramClient, events
13
  from telethon.sessions import StringSession, MemorySession
 
36
  # Target (boleh @username / id / link)
37
  TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
38
 
39
+ # Kata kunci topik untuk relevansi
40
  THEME_KEYWORDS = [
41
  "call", "signal", "entry", "buy", "sell", "tp", "sl",
42
  "pump", "spot", "futures", "setup",
 
46
  FUZZ_WEIGHT = 0.6
47
  RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
48
 
49
+ # Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip
50
  EXCLUDE_PHRASES = [
51
  "achievement unlocked",
52
  ]
 
57
  SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
58
  ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
59
 
60
+ # History backfill saat startup
61
  INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
62
 
63
  # Dedup buffer
 
93
  def normalize_for_filter(text: str) -> str:
94
  if not text:
95
  return ""
96
+ # Hilangkan baris quote >..., rapikan whitespace
97
+ s = re.sub(r"(?m)^>.*$", "", text)
98
  s = re.sub(r"\s+", " ", s).strip()
99
  return s
100
 
101
  def score_relevance(text: str, keywords: List[str]) -> float:
102
+ """Skor sederhana: exact keyword + fuzzy ratio rata-rata top 3."""
103
  if not text:
104
  return 0.0
105
  t = text.lower()
106
 
107
+ # exact hits
108
  exact_hits = 0
109
  for kw in keywords:
110
  if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
111
  exact_hits += 1
112
  exact_score = exact_hits * KEYWORD_WEIGHT
113
 
114
+ # fuzzy hits (ambil 3 nilai tertinggi)
115
  fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
116
  fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
117
+
118
  return exact_score + fuzzy_score
119
 
120
  def hash_for_dedup(text: str, msg) -> str:
121
+ """Gabungkan teks dan sid media agar tidak double post."""
122
  parts = [text or ""]
123
  if getattr(msg, "id", None) is not None:
124
+ # id unik per chat; cukup bagus untuk dedup dalam window pendek
125
  parts.append(str(msg.id))
126
  doc = getattr(msg, "document", None)
127
  if doc and getattr(doc, "id", None) is not None:
128
  parts.append(f"doc:{doc.id}")
129
  if getattr(msg, "photo", None) is not None:
130
+ # Telegram photo punya id juga
131
  ph = msg.photo
132
  ph_id = getattr(ph, "id", None)
133
  if ph_id is not None:
 
136
  return hashlib.sha1(raw).hexdigest()
137
 
138
  def is_image_message(msg) -> bool:
139
+ """True jika pesan mengandung foto atau dokumen gambar yang boleh."""
140
  if getattr(msg, "photo", None) is not None:
141
  return True
142
  doc = getattr(msg, "document", None)
 
164
  keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
165
 
166
  def _prune_expired(now: datetime) -> None:
167
+ """Bersihkan entri yang sudah lewat jendela waktu agar hit tetap fresh."""
168
  window = timedelta(minutes=CLASS_WINDOW_MINUTES)
169
  cutoff = now - window
170
  for kw, m in list(keyword_group_last_seen.items()):
 
201
  # ========= Core actions =========
202
  async def send_as_is(msg, text_override: Optional[str] = None) -> None:
203
  """
204
+ Forward konten 'apa adanya' ke TARGET_CHAT, dengan media bila sesuai.
205
+ Bisa diberi text_override untuk menambahkan prefix class.
206
  """
207
+ orig_text = text_override if text_override is not None else (msg.message or (getattr(msg, "raw_text", None) or ""))
 
 
208
 
209
  if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
210
  try:
211
+ # Jika pesan berupa photo asli
212
  if getattr(msg, "photo", None):
213
  await client.send_file(
214
  TARGET_CHAT,
 
219
  )
220
  return
221
 
222
+ # Jika berupa document image/gif/video
223
  doc = getattr(msg, "document", None)
224
  if doc:
225
  data = await client.download_media(msg, file=bytes)
226
  if data:
227
  bio = io.BytesIO(data)
228
+ # Tentukan ekstensi file dari mime
229
  ext = ".jpg"
230
  mt = (getattr(doc, "mime_type", "") or "").lower()
231
  if mt:
 
245
  except Exception as e:
246
  debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
247
 
248
+ # Fallback: text saja
249
  await client.send_message(
250
  TARGET_CHAT,
251
  orig_text,
 
254
  )
255
 
256
  def _extract_main_keyword(text_norm: str) -> Optional[str]:
257
+ """
258
+ Ambil keyword utama pertama yang muncul.
259
+ Pakai boundary agar 'btc' benar2 kata, tapi tetap toleran untuk simbol seperti $BTC.
260
+ """
261
  t = text_norm
262
+ # normalisasi simbol ticker $btc -> btc
263
+ t = re.sub(r"\$([a-z0-9]+)", r"\1", t, flags=re.I)
264
  for kw in THEME_KEYWORDS:
265
+ # cocokkan sebagai kata utuh atau substring yang sangat jelas
266
  if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
267
  return kw.lower()
268
  return None
 
279
  orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
280
  text_norm = normalize_for_filter(orig_text).lower()
281
 
282
+ # Pengecualian eksplisit
283
  for phrase in EXCLUDE_PHRASES:
284
  if phrase.lower() in text_norm:
285
  debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
 
314
  debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
315
  return
316
 
317
+ # Susun prefix kelas di awal teks
318
  text_to_send = orig_text
319
  if class_label:
320
  text_to_send = f"[{class_label.upper()}] {orig_text}"
 
324
 
325
 
326
  async def backfill_history(entity, limit: int) -> None:
327
+ """Tarik pesan lama dari suatu source untuk diproses (opsional)."""
328
  if limit <= 0:
329
  return
330
  print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
 
360
 
361
  async def start_bot_background() -> None:
362
  """
363
+ Dipanggil dari server FastAPI saat startup.
364
  Menjalankan client + backfill tanpa memblokir web server.
365
  """
366
  await client.start()
 
382
 
383
  async def app_main() -> None:
384
  """
385
+ Mode legacy: jalankan `python botsignal.py` (blok hingga disconnect).
386
  """
387
  await client.start()
388
 
 
398
 
399
 
400
  if __name__ == "__main__":
 
401
  asyncio.run(app_main())