Spaces:

yuyutsu07
/

Tweet-Wayback

Sleeping

File size: 3,520 Bytes

cbb84f2

"""
Exports the parsed archived tweets.
"""

import datetime
import os
import re
from typing import Any, Dict, List, Optional

import pandas as pd

from waybacktweets.api.visualize import HTMLTweetsVisualizer


class TweetsExporter:
    """
    Class responsible for exporting parsed archived tweets.

    Args:
        data (Dict[str, List[Any]]): The parsed archived tweets data.
        username (str): The username associated with the tweets.
        field_options (List[str]): The fields to be included in the exported data. For more details on each option, visit :ref:`field_options`.
    """  # noqa: E501

    def __init__(
        self, data: Dict[str, List[Any]], username: str, field_options: List[str]
    ):
        self.data = data
        self.username = username
        self.field_options = field_options
        self.formatted_datetime = self._datetime_now()
        self.filename = f"{self.username}_tweets_{self.formatted_datetime}"
        self.dataframe = self._create_dataframe()

    @staticmethod
    def _datetime_now() -> str:
        """
        Returns the current datetime, formatted as a string.

        Returns:
            The current datetime.
        """
        now = datetime.datetime.now()
        formatted_now = now.strftime("%Y%m%d%H%M%S")
        formatted_now = re.sub(r"\W+", "", formatted_now)

        return formatted_now

    @staticmethod
    def _transpose_matrix(
        data: Dict[str, List[Any]], fill_value: Optional[Any] = None
    ) -> List[List[Any]]:
        """
        Transposes a matrix, filling in missing values with a specified fill value if needed.

        Args:
            data (Dict[str, List[Any]]): The matrix to be transposed.
            fill_value (Optional[Any]): The value to fill in missing values with.

        Returns:
            The transposed matrix.
        """  # noqa: E501
        max_length = max(len(sublist) for sublist in data.values())

        filled_data = {
            key: value + [fill_value] * (max_length - len(value))
            for key, value in data.items()
        }

        data_transposed = [list(row) for row in zip(*filled_data.values())]

        return data_transposed

    def _create_dataframe(self) -> pd.DataFrame:
        """
        Creates a DataFrame from the transposed data.

        Returns:
            The DataFrame representation of the data.
        """
        data_transposed = self._transpose_matrix(self.data)

        df = pd.DataFrame(data_transposed, columns=self.field_options)

        return df

    def save_to_csv(self) -> None:
        """
        Saves the DataFrame to a CSV file.
        """
        csv_file_path = f"{self.filename}.csv"
        self.dataframe.to_csv(csv_file_path, index=False)

        print(f"Saved to {csv_file_path}")

    def save_to_json(self) -> None:
        """
        Saves the DataFrame to a JSON file.
        """
        json_path = f"{self.filename}.json"
        self.dataframe.to_json(json_path, orient="records", lines=False)

        print(f"Saved to {json_path}")

    def save_to_html(self) -> None:
        """
        Saves the DataFrame to an HTML file.
        """
        json_path = f"{self.filename}.json"

        if not os.path.exists(json_path):
            self.save_to_json()

        html_file_path = f"{self.filename}.html"

        html = HTMLTweetsVisualizer(self.username, json_path, html_file_path)

        html_content = html.generate()
        html.save(html_content)

        print(f"Saved to {html_file_path}")