Spaces:
Sleeping
Sleeping
| """ | |
| CLI functions for retrieving archived tweets. | |
| """ | |
| from datetime import datetime | |
| from typing import Any, Optional | |
| import click | |
| from rich import print as rprint | |
| from waybacktweets.api.export import TweetsExporter | |
| from waybacktweets.api.parse import TweetsParser | |
| from waybacktweets.api.request import WaybackTweets | |
| from waybacktweets.config.config import config | |
| def _parse_date( | |
| ctx: Optional[Any] = None, param: Optional[Any] = None, value: Optional[str] = None | |
| ) -> Optional[str]: | |
| """ | |
| Parses a date string and returns it in the format "YYYYMMDD". | |
| Args: | |
| ctx: Necessary when used with the click package. Defaults to None. | |
| param: Necessary when used with the click package. Defaults to None. | |
| value: A date string in the "YYYYMMDD" format. Defaults to None. | |
| Returns: | |
| The input date string formatted in the "YYYYMMDD" format, or None if no date string was provided. | |
| """ # noqa: E501 | |
| try: | |
| if value is None: | |
| return None | |
| date = datetime.strptime(value, "%Y%m%d") | |
| return date.strftime("%Y%m%d") | |
| except ValueError: | |
| raise click.BadParameter("Date must be in format YYYYmmdd") | |
| def main( | |
| username: str, | |
| collapse: Optional[str], | |
| timestamp_from: Optional[str], | |
| timestamp_to: Optional[str], | |
| limit: Optional[int], | |
| offset: Optional[int], | |
| matchtype: Optional[str], | |
| verbose: Optional[bool], | |
| ) -> None: | |
| """ | |
| Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data. | |
| USERNAME: The Twitter username without @. | |
| """ # noqa: E501 | |
| try: | |
| config.verbose = verbose | |
| api = WaybackTweets( | |
| username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype | |
| ) | |
| print(f"Waybacking @{username}'s archived tweets...") | |
| archived_tweets = api.get() | |
| if archived_tweets: | |
| field_options = [ | |
| "archived_urlkey", | |
| "archived_timestamp", | |
| "parsed_archived_timestamp", | |
| "archived_tweet_url", | |
| "parsed_archived_tweet_url", | |
| "original_tweet_url", | |
| "parsed_tweet_url", | |
| "available_tweet_text", | |
| "available_tweet_is_RT", | |
| "available_tweet_info", | |
| "archived_mimetype", | |
| "archived_statuscode", | |
| "archived_digest", | |
| "archived_length", | |
| ] | |
| parser = TweetsParser(archived_tweets, username, field_options) | |
| parsed_tweets = parser.parse(print_progress=True) | |
| exporter = TweetsExporter(parsed_tweets, username, field_options) | |
| exporter.save_to_csv() | |
| exporter.save_to_json() | |
| exporter.save_to_html() | |
| except Exception as e: | |
| rprint(f"[red]{e}") | |
| finally: | |
| rprint( | |
| "[yellow]\nNeed help? Read the docs: https://claromes.github.io/waybacktweets" # noqa: E501 | |
| ) | |