Spaces:
Sleeping
Sleeping
Update botsignal.py
Browse files- botsignal.py +33 -18
botsignal.py
CHANGED
|
@@ -8,9 +8,6 @@ from datetime import datetime, timedelta, timezone
|
|
| 8 |
from mimetypes import guess_extension
|
| 9 |
from typing import List, Tuple, Optional, Dict
|
| 10 |
|
| 11 |
-
import nest_asyncio
|
| 12 |
-
nest_asyncio.apply()
|
| 13 |
-
|
| 14 |
from rapidfuzz import fuzz
|
| 15 |
from telethon import TelegramClient, events
|
| 16 |
from telethon.sessions import StringSession, MemorySession
|
|
@@ -39,7 +36,7 @@ SOURCE_CHATS = [*CORE_CHATS, *SUPPORT_CHATS]
|
|
| 39 |
# Target (boleh @username / id / link)
|
| 40 |
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
|
| 41 |
|
| 42 |
-
# Kata kunci topik untuk relevansi
|
| 43 |
THEME_KEYWORDS = [
|
| 44 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 45 |
"pump", "spot", "futures", "setup",
|
|
@@ -49,7 +46,7 @@ KEYWORD_WEIGHT = 1.0
|
|
| 49 |
FUZZ_WEIGHT = 0.6
|
| 50 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 51 |
|
| 52 |
-
# Filter pengecualian
|
| 53 |
EXCLUDE_PHRASES = [
|
| 54 |
"achievement unlocked",
|
| 55 |
]
|
|
@@ -60,7 +57,7 @@ MAX_MEDIA_MB = float(os.environ.get("MAX_MEDIA_MB", "12"))
|
|
| 60 |
SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
|
| 61 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 62 |
|
| 63 |
-
#
|
| 64 |
INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
|
| 65 |
|
| 66 |
# Dedup buffer
|
|
@@ -96,33 +93,41 @@ def debug_log(reason: str, content: str = "") -> None:
|
|
| 96 |
def normalize_for_filter(text: str) -> str:
|
| 97 |
if not text:
|
| 98 |
return ""
|
| 99 |
-
|
|
|
|
| 100 |
s = re.sub(r"\s+", " ", s).strip()
|
| 101 |
return s
|
| 102 |
|
| 103 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
|
|
|
| 104 |
if not text:
|
| 105 |
return 0.0
|
| 106 |
t = text.lower()
|
| 107 |
|
|
|
|
| 108 |
exact_hits = 0
|
| 109 |
for kw in keywords:
|
| 110 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 111 |
exact_hits += 1
|
| 112 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
| 113 |
|
|
|
|
| 114 |
fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
|
| 115 |
fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
|
|
|
|
| 116 |
return exact_score + fuzzy_score
|
| 117 |
|
| 118 |
def hash_for_dedup(text: str, msg) -> str:
|
|
|
|
| 119 |
parts = [text or ""]
|
| 120 |
if getattr(msg, "id", None) is not None:
|
|
|
|
| 121 |
parts.append(str(msg.id))
|
| 122 |
doc = getattr(msg, "document", None)
|
| 123 |
if doc and getattr(doc, "id", None) is not None:
|
| 124 |
parts.append(f"doc:{doc.id}")
|
| 125 |
if getattr(msg, "photo", None) is not None:
|
|
|
|
| 126 |
ph = msg.photo
|
| 127 |
ph_id = getattr(ph, "id", None)
|
| 128 |
if ph_id is not None:
|
|
@@ -131,6 +136,7 @@ def hash_for_dedup(text: str, msg) -> str:
|
|
| 131 |
return hashlib.sha1(raw).hexdigest()
|
| 132 |
|
| 133 |
def is_image_message(msg) -> bool:
|
|
|
|
| 134 |
if getattr(msg, "photo", None) is not None:
|
| 135 |
return True
|
| 136 |
doc = getattr(msg, "document", None)
|
|
@@ -158,6 +164,7 @@ def media_too_big(msg) -> bool:
|
|
| 158 |
keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
|
| 159 |
|
| 160 |
def _prune_expired(now: datetime) -> None:
|
|
|
|
| 161 |
window = timedelta(minutes=CLASS_WINDOW_MINUTES)
|
| 162 |
cutoff = now - window
|
| 163 |
for kw, m in list(keyword_group_last_seen.items()):
|
|
@@ -194,15 +201,14 @@ def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] =
|
|
| 194 |
# ========= Core actions =========
|
| 195 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 196 |
"""
|
| 197 |
-
Forward konten 'apa adanya'
|
| 198 |
-
|
| 199 |
"""
|
| 200 |
-
orig_text = text_override if text_override is not None else (
|
| 201 |
-
msg.message or (getattr(msg, "raw_text", None) or "")
|
| 202 |
-
)
|
| 203 |
|
| 204 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 205 |
try:
|
|
|
|
| 206 |
if getattr(msg, "photo", None):
|
| 207 |
await client.send_file(
|
| 208 |
TARGET_CHAT,
|
|
@@ -213,11 +219,13 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 213 |
)
|
| 214 |
return
|
| 215 |
|
|
|
|
| 216 |
doc = getattr(msg, "document", None)
|
| 217 |
if doc:
|
| 218 |
data = await client.download_media(msg, file=bytes)
|
| 219 |
if data:
|
| 220 |
bio = io.BytesIO(data)
|
|
|
|
| 221 |
ext = ".jpg"
|
| 222 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 223 |
if mt:
|
|
@@ -237,6 +245,7 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 237 |
except Exception as e:
|
| 238 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 239 |
|
|
|
|
| 240 |
await client.send_message(
|
| 241 |
TARGET_CHAT,
|
| 242 |
orig_text,
|
|
@@ -245,9 +254,15 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 245 |
)
|
| 246 |
|
| 247 |
def _extract_main_keyword(text_norm: str) -> Optional[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
t = text_norm
|
| 249 |
-
|
|
|
|
| 250 |
for kw in THEME_KEYWORDS:
|
|
|
|
| 251 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
| 252 |
return kw.lower()
|
| 253 |
return None
|
|
@@ -264,7 +279,7 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 264 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 265 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 266 |
|
| 267 |
-
#
|
| 268 |
for phrase in EXCLUDE_PHRASES:
|
| 269 |
if phrase.lower() in text_norm:
|
| 270 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
|
@@ -299,7 +314,7 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 299 |
debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 300 |
return
|
| 301 |
|
| 302 |
-
# Susun prefix kelas
|
| 303 |
text_to_send = orig_text
|
| 304 |
if class_label:
|
| 305 |
text_to_send = f"[{class_label.upper()}] {orig_text}"
|
|
@@ -309,6 +324,7 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 309 |
|
| 310 |
|
| 311 |
async def backfill_history(entity, limit: int) -> None:
|
|
|
|
| 312 |
if limit <= 0:
|
| 313 |
return
|
| 314 |
print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
|
|
@@ -344,7 +360,7 @@ async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
|
|
| 344 |
|
| 345 |
async def start_bot_background() -> None:
|
| 346 |
"""
|
| 347 |
-
Dipanggil dari server FastAPI
|
| 348 |
Menjalankan client + backfill tanpa memblokir web server.
|
| 349 |
"""
|
| 350 |
await client.start()
|
|
@@ -366,7 +382,7 @@ async def start_bot_background() -> None:
|
|
| 366 |
|
| 367 |
async def app_main() -> None:
|
| 368 |
"""
|
| 369 |
-
Mode legacy: `python botsignal.py` (blok hingga disconnect)
|
| 370 |
"""
|
| 371 |
await client.start()
|
| 372 |
|
|
@@ -382,5 +398,4 @@ async def app_main() -> None:
|
|
| 382 |
|
| 383 |
|
| 384 |
if __name__ == "__main__":
|
| 385 |
-
nest_asyncio.apply()
|
| 386 |
asyncio.run(app_main())
|
|
|
|
| 8 |
from mimetypes import guess_extension
|
| 9 |
from typing import List, Tuple, Optional, Dict
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
from rapidfuzz import fuzz
|
| 12 |
from telethon import TelegramClient, events
|
| 13 |
from telethon.sessions import StringSession, MemorySession
|
|
|
|
| 36 |
# Target (boleh @username / id / link)
|
| 37 |
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
|
| 38 |
|
| 39 |
+
# Kata kunci topik untuk relevansi
|
| 40 |
THEME_KEYWORDS = [
|
| 41 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 42 |
"pump", "spot", "futures", "setup",
|
|
|
|
| 46 |
FUZZ_WEIGHT = 0.6
|
| 47 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 48 |
|
| 49 |
+
# Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip
|
| 50 |
EXCLUDE_PHRASES = [
|
| 51 |
"achievement unlocked",
|
| 52 |
]
|
|
|
|
| 57 |
SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
|
| 58 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 59 |
|
| 60 |
+
# History backfill saat startup
|
| 61 |
INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
|
| 62 |
|
| 63 |
# Dedup buffer
|
|
|
|
| 93 |
def normalize_for_filter(text: str) -> str:
|
| 94 |
if not text:
|
| 95 |
return ""
|
| 96 |
+
# Hilangkan baris quote >..., rapikan whitespace
|
| 97 |
+
s = re.sub(r"(?m)^>.*$", "", text)
|
| 98 |
s = re.sub(r"\s+", " ", s).strip()
|
| 99 |
return s
|
| 100 |
|
| 101 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
| 102 |
+
"""Skor sederhana: exact keyword + fuzzy ratio rata-rata top 3."""
|
| 103 |
if not text:
|
| 104 |
return 0.0
|
| 105 |
t = text.lower()
|
| 106 |
|
| 107 |
+
# exact hits
|
| 108 |
exact_hits = 0
|
| 109 |
for kw in keywords:
|
| 110 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 111 |
exact_hits += 1
|
| 112 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
| 113 |
|
| 114 |
+
# fuzzy hits (ambil 3 nilai tertinggi)
|
| 115 |
fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
|
| 116 |
fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
|
| 117 |
+
|
| 118 |
return exact_score + fuzzy_score
|
| 119 |
|
| 120 |
def hash_for_dedup(text: str, msg) -> str:
|
| 121 |
+
"""Gabungkan teks dan sid media agar tidak double post."""
|
| 122 |
parts = [text or ""]
|
| 123 |
if getattr(msg, "id", None) is not None:
|
| 124 |
+
# id unik per chat; cukup bagus untuk dedup dalam window pendek
|
| 125 |
parts.append(str(msg.id))
|
| 126 |
doc = getattr(msg, "document", None)
|
| 127 |
if doc and getattr(doc, "id", None) is not None:
|
| 128 |
parts.append(f"doc:{doc.id}")
|
| 129 |
if getattr(msg, "photo", None) is not None:
|
| 130 |
+
# Telegram photo punya id juga
|
| 131 |
ph = msg.photo
|
| 132 |
ph_id = getattr(ph, "id", None)
|
| 133 |
if ph_id is not None:
|
|
|
|
| 136 |
return hashlib.sha1(raw).hexdigest()
|
| 137 |
|
| 138 |
def is_image_message(msg) -> bool:
|
| 139 |
+
"""True jika pesan mengandung foto atau dokumen gambar yang boleh."""
|
| 140 |
if getattr(msg, "photo", None) is not None:
|
| 141 |
return True
|
| 142 |
doc = getattr(msg, "document", None)
|
|
|
|
| 164 |
keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
|
| 165 |
|
| 166 |
def _prune_expired(now: datetime) -> None:
|
| 167 |
+
"""Bersihkan entri yang sudah lewat jendela waktu agar hit tetap fresh."""
|
| 168 |
window = timedelta(minutes=CLASS_WINDOW_MINUTES)
|
| 169 |
cutoff = now - window
|
| 170 |
for kw, m in list(keyword_group_last_seen.items()):
|
|
|
|
| 201 |
# ========= Core actions =========
|
| 202 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 203 |
"""
|
| 204 |
+
Forward konten 'apa adanya' ke TARGET_CHAT, dengan media bila sesuai.
|
| 205 |
+
Bisa diberi text_override untuk menambahkan prefix class.
|
| 206 |
"""
|
| 207 |
+
orig_text = text_override if text_override is not None else (msg.message or (getattr(msg, "raw_text", None) or ""))
|
|
|
|
|
|
|
| 208 |
|
| 209 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 210 |
try:
|
| 211 |
+
# Jika pesan berupa photo asli
|
| 212 |
if getattr(msg, "photo", None):
|
| 213 |
await client.send_file(
|
| 214 |
TARGET_CHAT,
|
|
|
|
| 219 |
)
|
| 220 |
return
|
| 221 |
|
| 222 |
+
# Jika berupa document image/gif/video
|
| 223 |
doc = getattr(msg, "document", None)
|
| 224 |
if doc:
|
| 225 |
data = await client.download_media(msg, file=bytes)
|
| 226 |
if data:
|
| 227 |
bio = io.BytesIO(data)
|
| 228 |
+
# Tentukan ekstensi file dari mime
|
| 229 |
ext = ".jpg"
|
| 230 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 231 |
if mt:
|
|
|
|
| 245 |
except Exception as e:
|
| 246 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 247 |
|
| 248 |
+
# Fallback: text saja
|
| 249 |
await client.send_message(
|
| 250 |
TARGET_CHAT,
|
| 251 |
orig_text,
|
|
|
|
| 254 |
)
|
| 255 |
|
| 256 |
def _extract_main_keyword(text_norm: str) -> Optional[str]:
|
| 257 |
+
"""
|
| 258 |
+
Ambil keyword utama pertama yang muncul.
|
| 259 |
+
Pakai boundary agar 'btc' benar2 kata, tapi tetap toleran untuk simbol seperti $BTC.
|
| 260 |
+
"""
|
| 261 |
t = text_norm
|
| 262 |
+
# normalisasi simbol ticker $btc -> btc
|
| 263 |
+
t = re.sub(r"\$([a-z0-9]+)", r"\1", t, flags=re.I)
|
| 264 |
for kw in THEME_KEYWORDS:
|
| 265 |
+
# cocokkan sebagai kata utuh atau substring yang sangat jelas
|
| 266 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
| 267 |
return kw.lower()
|
| 268 |
return None
|
|
|
|
| 279 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 280 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 281 |
|
| 282 |
+
# Pengecualian eksplisit
|
| 283 |
for phrase in EXCLUDE_PHRASES:
|
| 284 |
if phrase.lower() in text_norm:
|
| 285 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
|
|
|
| 314 |
debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 315 |
return
|
| 316 |
|
| 317 |
+
# Susun prefix kelas di awal teks
|
| 318 |
text_to_send = orig_text
|
| 319 |
if class_label:
|
| 320 |
text_to_send = f"[{class_label.upper()}] {orig_text}"
|
|
|
|
| 324 |
|
| 325 |
|
| 326 |
async def backfill_history(entity, limit: int) -> None:
|
| 327 |
+
"""Tarik pesan lama dari suatu source untuk diproses (opsional)."""
|
| 328 |
if limit <= 0:
|
| 329 |
return
|
| 330 |
print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
|
|
|
|
| 360 |
|
| 361 |
async def start_bot_background() -> None:
|
| 362 |
"""
|
| 363 |
+
Dipanggil dari server FastAPI saat startup.
|
| 364 |
Menjalankan client + backfill tanpa memblokir web server.
|
| 365 |
"""
|
| 366 |
await client.start()
|
|
|
|
| 382 |
|
| 383 |
async def app_main() -> None:
|
| 384 |
"""
|
| 385 |
+
Mode legacy: jalankan `python botsignal.py` (blok hingga disconnect).
|
| 386 |
"""
|
| 387 |
await client.start()
|
| 388 |
|
|
|
|
| 398 |
|
| 399 |
|
| 400 |
if __name__ == "__main__":
|
|
|
|
| 401 |
asyncio.run(app_main())
|