Spaces:

yuyutsu07
/

Tweet-Wayback

Sleeping

App Files Files Community

Tweet-Wayback / waybacktweets /api /export.py

yuyutsu07

Upload 43 files

cbb84f2 verified over 1 year ago

raw

history blame contribute delete

3.52 kB

	"""
	Exports the parsed archived tweets.
	"""

	import datetime
	import os
	import re
	from typing import Any, Dict, List, Optional

	import pandas as pd

	from waybacktweets.api.visualize import HTMLTweetsVisualizer


	class TweetsExporter:
	"""
	Class responsible for exporting parsed archived tweets.

	Args:
	data (Dict[str, List[Any]]): The parsed archived tweets data.
	username (str): The username associated with the tweets.
	field_options (List[str]): The fields to be included in the exported data. For more details on each option, visit :ref:`field_options`.
	""" # noqa: E501

	def __init__(
	self, data: Dict[str, List[Any]], username: str, field_options: List[str]
	):
	self.data = data
	self.username = username
	self.field_options = field_options
	self.formatted_datetime = self._datetime_now()
	self.filename = f"{self.username}_tweets_{self.formatted_datetime}"
	self.dataframe = self._create_dataframe()

	@staticmethod
	def _datetime_now() -> str:
	"""
	Returns the current datetime, formatted as a string.

	Returns:
	The current datetime.
	"""
	now = datetime.datetime.now()
	formatted_now = now.strftime("%Y%m%d%H%M%S")
	formatted_now = re.sub(r"\W+", "", formatted_now)

	return formatted_now

	@staticmethod
	def _transpose_matrix(
	data: Dict[str, List[Any]], fill_value: Optional[Any] = None
	) -> List[List[Any]]:
	"""
	Transposes a matrix, filling in missing values with a specified fill value if needed.

	Args:
	data (Dict[str, List[Any]]): The matrix to be transposed.
	fill_value (Optional[Any]): The value to fill in missing values with.

	Returns:
	The transposed matrix.
	""" # noqa: E501
	max_length = max(len(sublist) for sublist in data.values())

	filled_data = {
	key: value + [fill_value] * (max_length - len(value))
	for key, value in data.items()
	}

	data_transposed = [list(row) for row in zip(*filled_data.values())]

	return data_transposed

	def _create_dataframe(self) -> pd.DataFrame:
	"""
	Creates a DataFrame from the transposed data.

	Returns:
	The DataFrame representation of the data.
	"""
	data_transposed = self._transpose_matrix(self.data)

	df = pd.DataFrame(data_transposed, columns=self.field_options)

	return df

	def save_to_csv(self) -> None:
	"""
	Saves the DataFrame to a CSV file.
	"""
	csv_file_path = f"{self.filename}.csv"
	self.dataframe.to_csv(csv_file_path, index=False)

	print(f"Saved to {csv_file_path}")

	def save_to_json(self) -> None:
	"""
	Saves the DataFrame to a JSON file.
	"""
	json_path = f"{self.filename}.json"
	self.dataframe.to_json(json_path, orient="records", lines=False)

	print(f"Saved to {json_path}")

	def save_to_html(self) -> None:
	"""
	Saves the DataFrame to an HTML file.
	"""
	json_path = f"{self.filename}.json"

	if not os.path.exists(json_path):
	self.save_to_json()

	html_file_path = f"{self.filename}.html"

	html = HTMLTweetsVisualizer(self.username, json_path, html_file_path)

	html_content = html.generate()
	html.save(html_content)

	print(f"Saved to {html_file_path}")