agus1111 commited on
Commit
4d514f6
·
verified ·
1 Parent(s): 15407f1

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +27 -0
  2. README.md +9 -0
  3. botsignal.py +300 -0
  4. requirements.txt +5 -0
  5. server.py +20 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- Base image ----
2
+ FROM python:3.11-slim
3
+
4
+ # ---- System deps (kecil saja) ----
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ build-essential curl && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # ---- Workdir ----
10
+ WORKDIR /app
11
+
12
+ # ---- Python deps (pakai cache layer) ----
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # ---- App source ----
17
+ COPY . .
18
+
19
+ # ---- Port dari HF (jangan hardcode) ----
20
+ ENV PORT=7860
21
+ EXPOSE 7860
22
+
23
+ # (opsional tapi berguna) supaya log realtime
24
+ ENV PYTHONUNBUFFERED=1
25
+
26
+ # ---- Run FastAPI (Telethon start di startup event) ----
27
+ CMD ["bash", "-lc", "uvicorn server:app --host 0.0.0.0 --port ${PORT}"]
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Telegram scrapping text
3
+ sdk: docker
4
+ app_port: 7860
5
+ pinned: false
6
+ ---
7
+
8
+ # My Awesome Telegram Bot
9
+ Ini adalah bot yang meneruskan pesan dari beberapa channel.
botsignal.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import re
4
+ import io
5
+ import hashlib
6
+ from collections import deque
7
+ from mimetypes import guess_extension
8
+ from typing import List, Optional
9
+
10
+ import nest_asyncio
11
+ nest_asyncio.apply()
12
+
13
+ from rapidfuzz import fuzz
14
+ from telethon import TelegramClient, events
15
+ from telethon.sessions import StringSession, MemorySession
16
+
17
+
18
+ # ========= Configuration via Environment =========
19
+ API_ID = int(os.environ.get("API_ID", "0"))
20
+ API_HASH = os.environ.get("API_HASH", "")
21
+ STRING_SESSION = os.environ.get("STRING_SESSION", "")
22
+
23
+ # Sumber & target (boleh pakai @username, id, atau link t.me)
24
+ SOURCE_CHATS = [
25
+ "https://t.me/PEPE_Calls28",
26
+ "https://t.me/Tanjirocall",
27
+ "https://t.me/ChinaPumpCommunity",
28
+ "https://t.me/Milagrosdegencalls",
29
+ "https://t.me/GM_Degencalls",
30
+ ]
31
+ TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
32
+
33
+ # Kata kunci topik; simple relevansi supaya tidak semua pesan dikirim
34
+ THEME_KEYWORDS = [
35
+ "call", "signal", "entry", "buy", "sell", "tp", "sl",
36
+ "pump", "spot", "futures", "setup",
37
+ "pepe", "bnb", "eth", "btc", "sol", "meme",
38
+ ]
39
+ KEYWORD_WEIGHT = 1.0
40
+ FUZZ_WEIGHT = 0.6
41
+ RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
42
+
43
+ # Filter pengecualian: jika ada frasa ini (case-insensitive) -> skip
44
+ EXCLUDE_PHRASES = [
45
+ "achievement unlocked",
46
+ ]
47
+
48
+ # Media handling
49
+ INCLUDE_MEDIA = os.environ.get("INCLUDE_MEDIA", "1") == "1"
50
+ MAX_MEDIA_MB = float(os.environ.get("MAX_MEDIA_MB", "12"))
51
+ SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
52
+ ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
53
+
54
+ # History backfill saat startup
55
+ INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
56
+
57
+ # Dedup buffer
58
+ DEDUP_BUFFER_SIZE = int(os.environ.get("DEDUP_BUFFER_SIZE", "800"))
59
+
60
+
61
+ # ========= Client bootstrap =========
62
+ def build_client() -> TelegramClient:
63
+ if STRING_SESSION:
64
+ print(">> Using StringSession (persistent).")
65
+ return TelegramClient(StringSession(STRING_SESSION), API_ID, API_HASH)
66
+ print(">> Using MemorySession (login tiap run).")
67
+ return TelegramClient(MemorySession(), API_ID, API_HASH)
68
+
69
+ client = build_client()
70
+ recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
71
+
72
+
73
+ # ========= Utilities =========
74
+ def debug_log(reason: str, content: str = "") -> None:
75
+ short = (content or "").replace("\n", " ")[:160]
76
+ print(f"[DEBUG] {reason}: {short}")
77
+
78
+ def normalize_for_filter(text: str) -> str:
79
+ if not text:
80
+ return ""
81
+ # Hilangkan quote >..., rapikan whitespace
82
+ s = re.sub(r"(?m)^>.*$", "", text)
83
+ s = re.sub(r"\s+", " ", s).strip()
84
+ return s
85
+
86
+ def score_relevance(text: str, keywords: List[str]) -> float:
87
+ """Skor sederhana: exact keyword + fuzzy ratio rata-rata top 3."""
88
+ if not text:
89
+ return 0.0
90
+ t = text.lower()
91
+
92
+ # exact hits
93
+ exact_hits = 0
94
+ for kw in keywords:
95
+ if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
96
+ exact_hits += 1
97
+ exact_score = exact_hits * KEYWORD_WEIGHT
98
+
99
+ # fuzzy hits (ambil 3 nilai tertinggi)
100
+ fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
101
+ fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
102
+
103
+ return exact_score + fuzzy_score
104
+
105
+ def hash_for_dedup(text: str, msg) -> str:
106
+ """Gabungkan teks dan sid media agar tidak double post."""
107
+ parts = [text or ""]
108
+ if getattr(msg, "id", None) is not None:
109
+ # id unik per chat; cukup bagus untuk dedup dalam window pendek
110
+ parts.append(str(msg.id))
111
+ doc = getattr(msg, "document", None)
112
+ if doc and getattr(doc, "id", None) is not None:
113
+ parts.append(f"doc:{doc.id}")
114
+ if getattr(msg, "photo", None) is not None:
115
+ # Telegram photo punya id juga
116
+ ph = msg.photo
117
+ ph_id = getattr(ph, "id", None)
118
+ if ph_id is not None:
119
+ parts.append(f"photo:{ph_id}")
120
+ raw = "|".join(parts).encode("utf-8", errors="ignore")
121
+ return hashlib.sha1(raw).hexdigest()
122
+
123
+ def is_image_message(msg) -> bool:
124
+ """True jika pesan mengandung foto atau dokumen gambar yang boleh."""
125
+ if getattr(msg, "photo", None) is not None:
126
+ return True
127
+ doc = getattr(msg, "document", None)
128
+ if doc and getattr(doc, "mime_type", None):
129
+ mt = (doc.mime_type or "").lower()
130
+ if mt.startswith("image/"):
131
+ if SKIP_STICKERS and ("webp" in mt or "sticker" in mt):
132
+ return False
133
+ return True
134
+ if not ALLOW_GIFS_VIDEOS:
135
+ return False
136
+ if mt in ("video/mp4", "image/gif"):
137
+ return True
138
+ return False
139
+
140
+ def media_too_big(msg) -> bool:
141
+ doc = getattr(msg, "document", None)
142
+ if doc and getattr(doc, "size", None):
143
+ return (doc.size or 0) > MAX_MEDIA_MB * 1024 * 1024
144
+ return False
145
+
146
+
147
+ # ========= Core actions =========
148
+ async def send_as_is(msg) -> None:
149
+ """Forward konten 'apa adanya' ke TARGET_CHAT, dengan media bila sesuai."""
150
+ orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
151
+
152
+ if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
153
+ try:
154
+ # Jika pesan berupa photo asli
155
+ if getattr(msg, "photo", None):
156
+ await client.send_file(
157
+ TARGET_CHAT,
158
+ msg.photo,
159
+ caption=orig_text,
160
+ caption_entities=getattr(msg, "entities", None),
161
+ force_document=False,
162
+ )
163
+ return
164
+
165
+ # Jika berupa document image/gif/video
166
+ doc = getattr(msg, "document", None)
167
+ if doc:
168
+ data = await client.download_media(msg, file=bytes)
169
+ if data:
170
+ bio = io.BytesIO(data)
171
+ # Tentukan ekstensi file dari mime
172
+ ext = ".jpg"
173
+ mt = (getattr(doc, "mime_type", "") or "").lower()
174
+ if mt:
175
+ ext_guess = guess_extension(mt) or ".jpg"
176
+ if ext_guess == ".jpe":
177
+ ext_guess = ".jpg"
178
+ ext = ext_guess
179
+ bio.name = f"media{ext}"
180
+ await client.send_file(
181
+ TARGET_CHAT,
182
+ bio,
183
+ caption=orig_text,
184
+ caption_entities=getattr(msg, "entities", None),
185
+ force_document=False,
186
+ )
187
+ return
188
+ except Exception as e:
189
+ debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
190
+
191
+ # Fallback: text saja
192
+ await client.send_message(
193
+ TARGET_CHAT,
194
+ orig_text,
195
+ formatting_entities=getattr(msg, "entities", None),
196
+ link_preview=True,
197
+ )
198
+
199
+ async def process_message(msg, source_name: str) -> None:
200
+ """Filter, dedup, relevansi, lalu kirim."""
201
+ orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
202
+ text_norm = normalize_for_filter(orig_text).lower()
203
+
204
+ # Pengecualian eksplisit
205
+ for phrase in EXCLUDE_PHRASES:
206
+ if phrase.lower() in text_norm:
207
+ debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
208
+ return
209
+
210
+ # Dedup
211
+ h = hash_for_dedup(text_norm, msg)
212
+ if h in recent_hashes:
213
+ debug_log("Duplikat, dilewati", orig_text)
214
+ return
215
+ recent_hashes.append(h)
216
+
217
+ # Relevansi
218
+ score = score_relevance(text_norm, THEME_KEYWORDS)
219
+ debug_log(f"Skor relevansi={score:.2f}", orig_text)
220
+ if score < RELEVANCE_THRESHOLD:
221
+ return
222
+
223
+ await send_as_is(msg)
224
+ debug_log("Dikirim ke target", orig_text)
225
+
226
+ async def backfill_history(entity, limit: int) -> None:
227
+ """Tarik pesan lama dari suatu source untuk diproses (opsional)."""
228
+ if limit <= 0:
229
+ return
230
+ print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
231
+ async for m in client.iter_messages(entity, limit=limit):
232
+ try:
233
+ await process_message(m, source_name=str(entity))
234
+ except Exception as e:
235
+ debug_log("Error saat memproses backfill", str(e))
236
+
237
+
238
+ # ========= Event handlers =========
239
+ @client.on(events.NewMessage(chats=SOURCE_CHATS))
240
+ async def on_new_message(event):
241
+ try:
242
+ await process_message(event.message, source_name=str(event.chat_id))
243
+ except Exception as e:
244
+ print("Process error:", e)
245
+
246
+
247
+ # ========= Entry points =========
248
+ async def start_bot_background() -> None:
249
+ """
250
+ Dipanggil dari server FastAPI (server.py).
251
+ Menjalankan client + backfill tapi tidak memblokir event loop web-server.
252
+ """
253
+ await client.start()
254
+
255
+ # resolve semua sources
256
+ resolved_sources = []
257
+ for src in SOURCE_CHATS:
258
+ try:
259
+ ent = await client.get_entity(src)
260
+ resolved_sources.append(ent)
261
+ except Exception as e:
262
+ print(f"Gagal resolve sumber {src}: {e}")
263
+
264
+ for ent in resolved_sources:
265
+ try:
266
+ await backfill_history(ent, INITIAL_BACKFILL)
267
+ except Exception as e:
268
+ print(f"Backfill gagal untuk {ent}: {e}")
269
+
270
+ print("Kurator berjalan (background task). Menunggu pesan baru...")
271
+ # Jangan blokir: jalankan client sampai disconnect sebagai task terpisah
272
+ asyncio.create_task(client.run_until_disconnected())
273
+
274
+
275
+ async def app_main() -> None:
276
+ """
277
+ Jalur eksekusi legacy: biar bisa `python botsignal.py`
278
+ (blok hingga disconnect).
279
+ """
280
+ await client.start()
281
+
282
+ resolved_sources = []
283
+ for src in SOURCE_CHATS:
284
+ try:
285
+ ent = await client.get_entity(src)
286
+ resolved_sources.append(ent)
287
+ except Exception as e:
288
+ print(f"Gagal resolve sumber {src}: {e}")
289
+
290
+ for ent in resolved_sources:
291
+ await backfill_history(ent, INITIAL_BACKFILL)
292
+
293
+ print("Kurator berjalan. Menunggu pesan baru... (Stop dengan interrupt).")
294
+ await client.run_until_disconnected()
295
+
296
+
297
+ if __name__ == "__main__":
298
+
299
+ nest_asyncio.apply()
300
+ asyncio.run(app_main())
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ telethon
2
+ rapidfuzz
3
+ nest-asyncio
4
+ fastapi
5
+ uvicorn[standard]
server.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from fastapi import FastAPI
4
+ from botsignal import start_bot_background
5
+
6
+ app = FastAPI(title="Telegram Curator Health")
7
+
8
+ @app.on_event("startup")
9
+ async def startup():
10
+
11
+ await start_bot_background()
12
+
13
+ @app.get("/")
14
+ async def root():
15
+ return {"status": "ok"}
16
+
17
+ @app.get("/health")
18
+ async def health():
19
+
20
+ return {"healthy": True}