|
|
import time |
|
|
import datetime |
|
|
from typing import Union, Generator, List, Dict, Any, Optional |
|
|
|
|
|
from pyrogram import Client |
|
|
from pyrogram.types import Message |
|
|
|
|
|
|
|
|
class PyroSource: |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
api_id: Union[int, str], |
|
|
api_hash: str, |
|
|
app_name: str = "default_app", |
|
|
): |
|
|
self.client = Client(name=app_name, api_id=api_id, api_hash=api_hash) |
|
|
|
|
|
|
|
|
def load_messages( |
|
|
self, |
|
|
channel_id: Union[int, str], |
|
|
limit: int, |
|
|
offset: int = 0, |
|
|
offset_id: int = 0, |
|
|
time_sleep: float = 0.05, |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
channel_id: channel id or username |
|
|
limit: number of messages to load |
|
|
offset: offset index |
|
|
offset_id: message id offset |
|
|
""" |
|
|
posts = [] |
|
|
|
|
|
with self.client as app: |
|
|
messages: Generator[Message] = app.get_chat_history( |
|
|
chat_id=channel_id, |
|
|
limit=limit, |
|
|
offset=offset, |
|
|
offset_id=offset_id, |
|
|
) |
|
|
|
|
|
for msg in messages: |
|
|
time.sleep(time_sleep) |
|
|
|
|
|
content = msg.text or msg.caption or '' |
|
|
original_author = ( |
|
|
msg.forward_from_chat.username if msg.forward_from_chat else '' |
|
|
) |
|
|
message_dt = msg.date.strftime("%Y-%m-%d") |
|
|
|
|
|
meta = { |
|
|
"message_dt" : message_dt, |
|
|
"message_id" : msg.id, |
|
|
"channel_id" : channel_id, |
|
|
"content" : content, |
|
|
"views" : msg.views, |
|
|
"original_author" : original_author, |
|
|
} |
|
|
|
|
|
posts.append(meta) |
|
|
|
|
|
return posts |
|
|
|
|
|
|
|
|
def load_days( |
|
|
self, |
|
|
channel_id: Union[int, str], |
|
|
from_date: datetime.date, |
|
|
to_date: Optional[datetime.date] = None, |
|
|
limit: int = 1000, |
|
|
time_sleep: float = 0.05, |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Загружает сообщения в диапазоне дат [from_date, to_date] |
|
|
|
|
|
channel_id: channel id or username |
|
|
from_date: дата начала (включительно) |
|
|
to_date: дата конца (включительно) |
|
|
limit: safety limit |
|
|
""" |
|
|
posts = [] |
|
|
|
|
|
offset_date = datetime.datetime.combine( |
|
|
from_date + datetime.timedelta(days=1), |
|
|
datetime.time.min |
|
|
) |
|
|
|
|
|
with self.client as app: |
|
|
messages: Generator[Message] = app.get_chat_history( |
|
|
chat_id=channel_id, |
|
|
limit=limit, |
|
|
offset_date=offset_date, |
|
|
) |
|
|
|
|
|
for msg in messages: |
|
|
time.sleep(time_sleep) |
|
|
|
|
|
msg_date = msg.date.date() |
|
|
|
|
|
|
|
|
if msg_date < from_date: |
|
|
break |
|
|
|
|
|
|
|
|
if to_date and msg_date > to_date: |
|
|
continue |
|
|
|
|
|
content = msg.text or msg.caption or '' |
|
|
original_author = ( |
|
|
msg.forward_from_chat.username if msg.forward_from_chat else '' |
|
|
) |
|
|
|
|
|
meta = { |
|
|
"message_dt": msg_date.isoformat(), |
|
|
"message_id": msg.id, |
|
|
"channel_id": channel_id, |
|
|
"content": content, |
|
|
"views": msg.views, |
|
|
"original_author": original_author, |
|
|
} |
|
|
|
|
|
posts.append(meta) |
|
|
|
|
|
return posts |
|
|
|