Spaces:

yuyutsu07
/

Tweet-Wayback

Sleeping

App Files Files Community

Tweet-Wayback / waybacktweets /api /request.py

yuyutsu07

Upload 43 files

cbb84f2 verified over 1 year ago

raw

history blame contribute delete

3.53 kB

	"""
	Requests data from the Wayback Machine API.
	"""

	from typing import Any, Dict, Optional

	from rich import print as rprint

	from waybacktweets.config.config import config
	from waybacktweets.exceptions.exceptions import (
	ConnectionError,
	EmptyResponseError,
	GetResponseError,
	HTTPError,
	ReadTimeoutError,
	)
	from waybacktweets.utils.utils import get_response


	class WaybackTweets:
	"""
	Class responsible for requesting data from the Wayback CDX Server API.

	Args:
	username (str): The username associated with the tweets.
	collapse (str, optional): The field to collapse duplicate lines on.
	timestamp_from (str, optional): The timestamp to start retrieving tweets from.
	timestamp_to (str, optional): The timestamp to stop retrieving tweets at.
	limit (int, optional): The maximum number of results to return.
	offset (int, optional): The number of lines to skip in the results.
	matchtype (str, optional): Results matching a certain prefix, a certain host or all subdomains.
	""" # noqa: E501

	def __init__(
	self,
	username: str,
	collapse: str = None,
	timestamp_from: str = None,
	timestamp_to: str = None,
	limit: int = None,
	offset: int = None,
	matchtype: str = None,
	):
	self.username = username
	self.collapse = collapse
	self.timestamp_from = timestamp_from
	self.timestamp_to = timestamp_to
	self.limit = limit
	self.offset = offset
	self.matchtype = matchtype

	def get(self) -> Optional[Dict[str, Any]]:
	"""
	Sends a GET request to the Internet Archive's CDX API to retrieve archived tweets.

	Returns:
	The response from the CDX API in JSON format, if successful. Otherwise, None.
	""" # noqa: E501
	url = "https://web.archive.org/cdx/search/cdx"

	wildcard_pathname = "/*"
	if self.matchtype:
	wildcard_pathname = ""

	params = {
	"url": f"https://twitter.com/{self.username}/status{wildcard_pathname}",
	"output": "json",
	}

	if self.collapse:
	params["collapse"] = self.collapse

	if self.timestamp_from:
	params["from"] = self.timestamp_from

	if self.timestamp_to:
	params["to"] = self.timestamp_to

	if self.limit:
	params["limit"] = self.limit

	if self.offset:
	params["offset"] = self.offset

	if self.matchtype:
	params["matchType"] = self.matchtype

	try:
	response = get_response(url=url, params=params)
	return response.json()
	except ReadTimeoutError:
	if config.verbose:
	rprint("[red]Connection to web.archive.org timed out.")
	except ConnectionError:
	if config.verbose:
	rprint(
	"[red]Failed to establish a new connection with web.archive.org. Max retries exceeded. Please wait a few minutes and try again." # noqa: E501
	)
	except HTTPError as e:
	if config.verbose:
	rprint(f"[red]HTTP error occurred: {str(e)}")
	except EmptyResponseError:
	if config.verbose:
	rprint("[red]No data was saved due to an empty response.")
	except GetResponseError as e:
	if config.verbose:
	rprint(f"[red]An error occurred: {str(e)}")

	return None