Spaces:
Sleeping
Sleeping
| """ | |
| Exports the parsed archived tweets. | |
| """ | |
| import datetime | |
| import os | |
| import re | |
| from typing import Any, Dict, List, Optional | |
| import pandas as pd | |
| from waybacktweets.api.visualize import HTMLTweetsVisualizer | |
| class TweetsExporter: | |
| """ | |
| Class responsible for exporting parsed archived tweets. | |
| Args: | |
| data (Dict[str, List[Any]]): The parsed archived tweets data. | |
| username (str): The username associated with the tweets. | |
| field_options (List[str]): The fields to be included in the exported data. For more details on each option, visit :ref:`field_options`. | |
| """ # noqa: E501 | |
| def __init__( | |
| self, data: Dict[str, List[Any]], username: str, field_options: List[str] | |
| ): | |
| self.data = data | |
| self.username = username | |
| self.field_options = field_options | |
| self.formatted_datetime = self._datetime_now() | |
| self.filename = f"{self.username}_tweets_{self.formatted_datetime}" | |
| self.dataframe = self._create_dataframe() | |
| def _datetime_now() -> str: | |
| """ | |
| Returns the current datetime, formatted as a string. | |
| Returns: | |
| The current datetime. | |
| """ | |
| now = datetime.datetime.now() | |
| formatted_now = now.strftime("%Y%m%d%H%M%S") | |
| formatted_now = re.sub(r"\W+", "", formatted_now) | |
| return formatted_now | |
| def _transpose_matrix( | |
| data: Dict[str, List[Any]], fill_value: Optional[Any] = None | |
| ) -> List[List[Any]]: | |
| """ | |
| Transposes a matrix, filling in missing values with a specified fill value if needed. | |
| Args: | |
| data (Dict[str, List[Any]]): The matrix to be transposed. | |
| fill_value (Optional[Any]): The value to fill in missing values with. | |
| Returns: | |
| The transposed matrix. | |
| """ # noqa: E501 | |
| max_length = max(len(sublist) for sublist in data.values()) | |
| filled_data = { | |
| key: value + [fill_value] * (max_length - len(value)) | |
| for key, value in data.items() | |
| } | |
| data_transposed = [list(row) for row in zip(*filled_data.values())] | |
| return data_transposed | |
| def _create_dataframe(self) -> pd.DataFrame: | |
| """ | |
| Creates a DataFrame from the transposed data. | |
| Returns: | |
| The DataFrame representation of the data. | |
| """ | |
| data_transposed = self._transpose_matrix(self.data) | |
| df = pd.DataFrame(data_transposed, columns=self.field_options) | |
| return df | |
| def save_to_csv(self) -> None: | |
| """ | |
| Saves the DataFrame to a CSV file. | |
| """ | |
| csv_file_path = f"{self.filename}.csv" | |
| self.dataframe.to_csv(csv_file_path, index=False) | |
| print(f"Saved to {csv_file_path}") | |
| def save_to_json(self) -> None: | |
| """ | |
| Saves the DataFrame to a JSON file. | |
| """ | |
| json_path = f"{self.filename}.json" | |
| self.dataframe.to_json(json_path, orient="records", lines=False) | |
| print(f"Saved to {json_path}") | |
| def save_to_html(self) -> None: | |
| """ | |
| Saves the DataFrame to an HTML file. | |
| """ | |
| json_path = f"{self.filename}.json" | |
| if not os.path.exists(json_path): | |
| self.save_to_json() | |
| html_file_path = f"{self.filename}.html" | |
| html = HTMLTweetsVisualizer(self.username, json_path, html_file_path) | |
| html_content = html.generate() | |
| html.save(html_content) | |
| print(f"Saved to {html_file_path}") | |