yuyutsu07's picture
Upload 43 files
cbb84f2 verified
"""
Exports the parsed archived tweets.
"""
import datetime
import os
import re
from typing import Any, Dict, List, Optional
import pandas as pd
from waybacktweets.api.visualize import HTMLTweetsVisualizer
class TweetsExporter:
"""
Class responsible for exporting parsed archived tweets.
Args:
data (Dict[str, List[Any]]): The parsed archived tweets data.
username (str): The username associated with the tweets.
field_options (List[str]): The fields to be included in the exported data. For more details on each option, visit :ref:`field_options`.
""" # noqa: E501
def __init__(
self, data: Dict[str, List[Any]], username: str, field_options: List[str]
):
self.data = data
self.username = username
self.field_options = field_options
self.formatted_datetime = self._datetime_now()
self.filename = f"{self.username}_tweets_{self.formatted_datetime}"
self.dataframe = self._create_dataframe()
@staticmethod
def _datetime_now() -> str:
"""
Returns the current datetime, formatted as a string.
Returns:
The current datetime.
"""
now = datetime.datetime.now()
formatted_now = now.strftime("%Y%m%d%H%M%S")
formatted_now = re.sub(r"\W+", "", formatted_now)
return formatted_now
@staticmethod
def _transpose_matrix(
data: Dict[str, List[Any]], fill_value: Optional[Any] = None
) -> List[List[Any]]:
"""
Transposes a matrix, filling in missing values with a specified fill value if needed.
Args:
data (Dict[str, List[Any]]): The matrix to be transposed.
fill_value (Optional[Any]): The value to fill in missing values with.
Returns:
The transposed matrix.
""" # noqa: E501
max_length = max(len(sublist) for sublist in data.values())
filled_data = {
key: value + [fill_value] * (max_length - len(value))
for key, value in data.items()
}
data_transposed = [list(row) for row in zip(*filled_data.values())]
return data_transposed
def _create_dataframe(self) -> pd.DataFrame:
"""
Creates a DataFrame from the transposed data.
Returns:
The DataFrame representation of the data.
"""
data_transposed = self._transpose_matrix(self.data)
df = pd.DataFrame(data_transposed, columns=self.field_options)
return df
def save_to_csv(self) -> None:
"""
Saves the DataFrame to a CSV file.
"""
csv_file_path = f"{self.filename}.csv"
self.dataframe.to_csv(csv_file_path, index=False)
print(f"Saved to {csv_file_path}")
def save_to_json(self) -> None:
"""
Saves the DataFrame to a JSON file.
"""
json_path = f"{self.filename}.json"
self.dataframe.to_json(json_path, orient="records", lines=False)
print(f"Saved to {json_path}")
def save_to_html(self) -> None:
"""
Saves the DataFrame to an HTML file.
"""
json_path = f"{self.filename}.json"
if not os.path.exists(json_path):
self.save_to_json()
html_file_path = f"{self.filename}.html"
html = HTMLTweetsVisualizer(self.username, json_path, html_file_path)
html_content = html.generate()
html.save(html_content)
print(f"Saved to {html_file_path}")