Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from pathlib import Path | |
| class DataManager: | |
| """ | |
| A class to manage data processing tasks. | |
| Attributes: | |
| db_file (str): The filename of the main CSV database. | |
| """ | |
| def __init__(self): | |
| """ | |
| Initialize DataManager object with the default database files. | |
| """ | |
| self.db_file = Path("data").joinpath("database.csv") | |
| self.db_level = Path("data").joinpath("db_level") | |
| def get_id(self, level): | |
| """ | |
| Find the question corresponding to the given level value. | |
| Args: | |
| level (str): The level value to search for. | |
| Returns: | |
| str: The corresponding question text if found, otherwise None. | |
| """ | |
| df_level = pd.read_csv(self.db_level) | |
| question_text = df_level.loc[df_level['level'] == level, 'question'].values | |
| return question_text[0] if len(question_text) > 0 else None | |
| def cleaner(texts): | |
| """ | |
| Clean text data by stripping unnecessary characters. | |
| Args: | |
| texts (str): Text data separated by '|'. | |
| Returns: | |
| list of str: Cleaned text data. | |
| """ | |
| return [txt.strip('"\n') for txt in texts.split("|")] | |
| def make_id(level_id, num, next_id): | |
| """ | |
| Generate IDs based on a root ID, level ID, and a number. | |
| Args: | |
| level_id (str): The level ID. | |
| num (int): The number of IDs to generate. | |
| next_id (list of str): The next IDs. | |
| Returns: | |
| list of str: Generated IDs. | |
| """ | |
| if level_id != "root": | |
| return [f"L{level_id}/{next_id[i]}" for i in range(num)] | |
| else: | |
| return [f"L{next_id[i]}" for i in range(num)] | |
| def make_row( | |
| self, level, question_text, options, feedback, next_id | |
| ): | |
| """ | |
| Generate rows based on input data. | |
| Args: | |
| level (str): ID of the question. | |
| question_text (str): Text of the question. | |
| options (str): Text of the options. | |
| feedback (str): Text of the actions. | |
| next_id (str): IDs of the next questions. | |
| Returns: | |
| list of list: Generated rows. | |
| """ | |
| next_list = self.cleaner(next_id) | |
| action_list = self.cleaner(feedback) | |
| option_list = self.cleaner(options) | |
| list_id = self.make_id(level, len(option_list), next_list,) | |
| return [ | |
| [list_id[i], question_text, option_list[i], action_list[i], next_list[i]] | |
| for i in range(len(list_id)) | |
| ] | |
| def _create_dataframe( | |
| self, level, question_text, options, feedback, next_id | |
| ): | |
| """ | |
| Create a DataFrame from input rows. | |
| Args: | |
| level (str): ID of the question. | |
| question_text (str): Text of the question. | |
| options (str): Texts of the options. | |
| feedback (str): Text of the actions. | |
| next_id (str): IDs of the next questions. | |
| Returns: | |
| pd.DataFrame: Constructed DataFrame. | |
| """ | |
| rows = self.make_row( | |
| level, question_text, | |
| options, feedback, next_id | |
| ) | |
| return pd.DataFrame( | |
| columns=["level", "question_text", "options", "feedback", "next"], | |
| data=rows, | |
| ) | |
| def _clean_dataframe(df_unclean): | |
| """ | |
| Clean the DataFrame by removing duplicates and NaN values. | |
| Args: | |
| df_unclean (pd.DataFrame): The unclean DataFrame. | |
| Returns: | |
| pd.DataFrame: The cleaned DataFrame. | |
| """ | |
| return df_unclean.drop_duplicates().dropna() | |
| def save_to_database( | |
| self, level, question_text, options, feedback, next_id | |
| ): | |
| """ | |
| Save the DataFrame to a CSV file after cleaning and combining with existing data. | |
| Args: | |
| level (str): ID of the question. | |
| question_text (str): Text of the question. | |
| options (str): Text of the options. | |
| feedback (str): Text of the actions. | |
| next_id (str): IDs of the next questions. | |
| """ | |
| df_input = self._create_dataframe( | |
| level, | |
| question_text, | |
| options, | |
| feedback, | |
| next_id | |
| ) | |
| df_database = self.read_db() | |
| df_combined = pd.concat([df_database, df_input], ignore_index=True) | |
| df_cleaned = self._clean_dataframe(df_combined) | |
| df_cleaned.to_csv(self.db_file, index=False) | |
| def read_db(self): | |
| """ | |
| Read data from a CSV file and return it as a DataFrame. | |
| Returns: | |
| pd.DataFrame: The DataFrame read from the CSV file. | |
| """ | |
| return pd.read_csv(self.db_file) |