Spaces:
Sleeping
Sleeping
| import time | |
| import datetime | |
| from typing import Union, Generator, List, Dict, Any, Optional | |
| from pyrogram import Client | |
| from pyrogram.types import Message | |
| class PyroSource: | |
| def __init__( | |
| self, | |
| api_id: Union[int, str], | |
| api_hash: str, | |
| app_name: str = "default_app", | |
| ): | |
| self.client = Client(name=app_name, api_id=api_id, api_hash=api_hash) | |
| def load_messages( | |
| self, | |
| channel_id: Union[int, str], | |
| limit: int, | |
| offset: int = 0, | |
| offset_id: int = 0, | |
| time_sleep: float = 0.05, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| channel_id: channel id or username | |
| limit: number of messages to load | |
| offset: offset index | |
| offset_id: message id offset | |
| """ | |
| posts = [] | |
| with self.client as app: | |
| messages: Generator[Message] = app.get_chat_history( | |
| chat_id=channel_id, | |
| limit=limit, | |
| offset=offset, | |
| offset_id=offset_id, | |
| ) | |
| for msg in messages: | |
| time.sleep(time_sleep) | |
| content = msg.text or msg.caption or '' | |
| original_author = ( | |
| msg.forward_from_chat.username if msg.forward_from_chat else '' | |
| ) | |
| message_dt = msg.date.strftime("%Y-%m-%d") | |
| meta = { | |
| "message_dt" : message_dt, | |
| "message_id" : msg.id, | |
| "channel_id" : channel_id, | |
| "content" : content, | |
| "views" : msg.views, | |
| "original_author" : original_author, | |
| } | |
| posts.append(meta) | |
| return posts | |
| def load_days( | |
| self, | |
| channel_id: Union[int, str], | |
| from_date: datetime.date, | |
| to_date: Optional[datetime.date] = None, | |
| limit: int = 1000, | |
| time_sleep: float = 0.05, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Загружает сообщения в диапазоне дат [from_date, to_date] | |
| channel_id: channel id or username | |
| from_date: дата начала (включительно) | |
| to_date: дата конца (включительно) | |
| limit: safety limit | |
| """ | |
| posts = [] | |
| offset_date = datetime.datetime.combine( | |
| from_date + datetime.timedelta(days=1), | |
| datetime.time.min | |
| ) | |
| with self.client as app: | |
| messages: Generator[Message] = app.get_chat_history( | |
| chat_id=channel_id, | |
| limit=limit, | |
| offset_date=offset_date, | |
| ) | |
| for msg in messages: | |
| time.sleep(time_sleep) | |
| msg_date = msg.date.date() | |
| # если ушли слишком далеко в прошлое — стоп | |
| if msg_date < from_date: | |
| break | |
| # если задан to_date и сообщение новее — пропускаем | |
| if to_date and msg_date > to_date: | |
| continue | |
| content = msg.text or msg.caption or '' | |
| original_author = ( | |
| msg.forward_from_chat.username if msg.forward_from_chat else '' | |
| ) | |
| meta = { | |
| "message_dt": msg_date.isoformat(), | |
| "message_id": msg.id, | |
| "channel_id": channel_id, | |
| "content": content, | |
| "views": msg.views, | |
| "original_author": original_author, | |
| } | |
| posts.append(meta) | |
| return posts | |