Spaces:

digitalai
/

ai-assistant

Sleeping

App Files Files Community

ai-assistant / config /process_text.py

digitalai

Upload 29 files

6736fcd verified almost 2 years ago

raw

history blame contribute delete

4.87 kB

	import pandas as pd
	from pathlib import Path

	class DataManager:
	"""
	A class to manage data processing tasks.

	Attributes:
	db_file (str): The filename of the main CSV database.
	"""

	def __init__(self):
	"""
	Initialize DataManager object with the default database files.
	"""
	self.db_file = Path("data").joinpath("database.csv")
	self.db_level = Path("data").joinpath("db_level")


	def get_id(self, level):
	"""
	Find the question corresponding to the given level value.

	Args:
	level (str): The level value to search for.

	Returns:
	str: The corresponding question text if found, otherwise None.
	"""
	df_level = pd.read_csv(self.db_level)
	question_text = df_level.loc[df_level['level'] == level, 'question'].values

	return question_text[0] if len(question_text) > 0 else None

	@staticmethod
	def cleaner(texts):
	"""
	Clean text data by stripping unnecessary characters.

	Args:
	texts (str): Text data separated by '\|'.

	Returns:
	list of str: Cleaned text data.
	"""
	return [txt.strip('"\n') for txt in texts.split("\|")]

	@staticmethod
	def make_id(level_id, num, next_id):
	"""
	Generate IDs based on a root ID, level ID, and a number.

	Args:

	level_id (str): The level ID.
	num (int): The number of IDs to generate.
	next_id (list of str): The next IDs.

	Returns:
	list of str: Generated IDs.
	"""
	if level_id != "root":
	return [f"L{level_id}/{next_id[i]}" for i in range(num)]
	else:
	return [f"L{next_id[i]}" for i in range(num)]

	def make_row(
	self, level, question_text, options, feedback, next_id
	):
	"""
	Generate rows based on input data.

	Args:
	level (str): ID of the question.
	question_text (str): Text of the question.
	options (str): Text of the options.
	feedback (str): Text of the actions.
	next_id (str): IDs of the next questions.

	Returns:
	list of list: Generated rows.
	"""
	next_list = self.cleaner(next_id)
	action_list = self.cleaner(feedback)
	option_list = self.cleaner(options)
	list_id = self.make_id(level, len(option_list), next_list,)
	return [
	[list_id[i], question_text, option_list[i], action_list[i], next_list[i]]
	for i in range(len(list_id))
	]

	def _create_dataframe(
	self, level, question_text, options, feedback, next_id
	):
	"""
	Create a DataFrame from input rows.

	Args:
	level (str): ID of the question.
	question_text (str): Text of the question.

	options (str): Texts of the options.
	feedback (str): Text of the actions.
	next_id (str): IDs of the next questions.

	Returns:
	pd.DataFrame: Constructed DataFrame.
	"""
	rows = self.make_row(
	level, question_text,
	options, feedback, next_id
	)
	return pd.DataFrame(
	columns=["level", "question_text", "options", "feedback", "next"],
	data=rows,
	)

	@staticmethod
	def _clean_dataframe(df_unclean):
	"""
	Clean the DataFrame by removing duplicates and NaN values.

	Args:
	df_unclean (pd.DataFrame): The unclean DataFrame.

	Returns:
	pd.DataFrame: The cleaned DataFrame.
	"""
	return df_unclean.drop_duplicates().dropna()

	def save_to_database(
	self, level, question_text, options, feedback, next_id
	):
	"""
	Save the DataFrame to a CSV file after cleaning and combining with existing data.

	Args:
	level (str): ID of the question.
	question_text (str): Text of the question.
	options (str): Text of the options.
	feedback (str): Text of the actions.
	next_id (str): IDs of the next questions.
	"""
	df_input = self._create_dataframe(
	level,
	question_text,
	options,
	feedback,
	next_id
	)
	df_database = self.read_db()
	df_combined = pd.concat([df_database, df_input], ignore_index=True)
	df_cleaned = self._clean_dataframe(df_combined)
	df_cleaned.to_csv(self.db_file, index=False)

	def read_db(self):
	"""
	Read data from a CSV file and return it as a DataFrame.

	Returns:
	pd.DataFrame: The DataFrame read from the CSV file.
	"""
	return pd.read_csv(self.db_file)