File size: 3,782 Bytes
565e754 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import time
import datetime
from typing import Union, Generator, List, Dict, Any, Optional
from pyrogram import Client
from pyrogram.types import Message
class PyroSource:
def __init__(
self,
api_id: Union[int, str],
api_hash: str,
app_name: str = "default_app",
):
self.client = Client(name=app_name, api_id=api_id, api_hash=api_hash)
def load_messages(
self,
channel_id: Union[int, str],
limit: int,
offset: int = 0,
offset_id: int = 0,
time_sleep: float = 0.05,
) -> List[Dict[str, Any]]:
"""
channel_id: channel id or username
limit: number of messages to load
offset: offset index
offset_id: message id offset
"""
posts = []
with self.client as app:
messages: Generator[Message] = app.get_chat_history(
chat_id=channel_id,
limit=limit,
offset=offset,
offset_id=offset_id,
)
for msg in messages:
time.sleep(time_sleep)
content = msg.text or msg.caption or ''
original_author = (
msg.forward_from_chat.username if msg.forward_from_chat else ''
)
message_dt = msg.date.strftime("%Y-%m-%d")
meta = {
"message_dt" : message_dt,
"message_id" : msg.id,
"channel_id" : channel_id,
"content" : content,
"views" : msg.views,
"original_author" : original_author,
}
posts.append(meta)
return posts
def load_days(
self,
channel_id: Union[int, str],
from_date: datetime.date,
to_date: Optional[datetime.date] = None,
limit: int = 1000,
time_sleep: float = 0.05,
) -> List[Dict[str, Any]]:
"""
Загружает сообщения в диапазоне дат [from_date, to_date]
channel_id: channel id or username
from_date: дата начала (включительно)
to_date: дата конца (включительно)
limit: safety limit
"""
posts = []
offset_date = datetime.datetime.combine(
from_date + datetime.timedelta(days=1),
datetime.time.min
)
with self.client as app:
messages: Generator[Message] = app.get_chat_history(
chat_id=channel_id,
limit=limit,
offset_date=offset_date,
)
for msg in messages:
time.sleep(time_sleep)
msg_date = msg.date.date()
# если ушли слишком далеко в прошлое — стоп
if msg_date < from_date:
break
# если задан to_date и сообщение новее — пропускаем
if to_date and msg_date > to_date:
continue
content = msg.text or msg.caption or ''
original_author = (
msg.forward_from_chat.username if msg.forward_from_chat else ''
)
meta = {
"message_dt": msg_date.isoformat(),
"message_id": msg.id,
"channel_id": channel_id,
"content": content,
"views": msg.views,
"original_author": original_author,
}
posts.append(meta)
return posts
|