Spaces:

ClickMons
/

CreativUp_Demo

Sleeping

App Files Files Community

CreativUp_Demo / src /art_pieces_db /database.py

Aurel-test

Upload folder using huggingface_hub

c3c0d39 verified 5 months ago

raw

history blame contribute delete

5.87 kB

	from .utils import str_to_date, str_to_list
	import logging
	import pandas as pd
	import re

	COL_NAMES = [
	'database_id',
	'museum_id',
	'art_piece_place',
	'art_piece_type',
	'art_piece_date',
	'related_names',
	'related_dates',
	'related_places',
	'keywords',
	'related_emotions',
	'explanation',
	'question',
	'name_image' # à implémenter
	]


	class Database:
	"""
	A database, backed by Pandas.
	"""

	def __init__(self, csv):
	"""
	Creates a new database given a CSV file which separator is ";".
	"""
	self.dataframe = pd.read_csv(
	csv, sep=";", names=COL_NAMES, usecols=range(0, len(COL_NAMES)), na_filter=False)
	self.normalize_fields()

	def get_dataframe(self):
	"""
	Access to the underlying dataframe.
	"""
	return self.dataframe

	def n_pieces(self):
	"""
	Returns the number of art pieces in the database.
	"""
	return len(self.dataframe)

	def normalize_fields(self):
	"""
	Normalize all the fields, e.g. by translating string split by commas into lists.
	"""
	self.normalize_field_related_names()
	self.normalize_field_related_dates()
	self.normalize_field_related_places()
	self.normalize_field_related_emotions()

	def normalize_field_related_names(self):
	"""
	Translates the string content of the related_names column into lists of names. Firstname and lastname are split.
	In case a name seems too short or too long, a warning is raised and the value is ignored.
	"""
	def normalize(row):
	value = row['related_names']
	try:
	names = str_to_list(value, separators=',;\n ')
	except ValueError:
	logging.warning(
	f"ignoring dubious list of related names \"{value}\" for database_id={row['database_id']}")
	return []

	def is_name(name):
	is_name = len(name) > 2 and len(name) < 16
	if not is_name:
	logging.warning(
	f"ignoring dubious related name \"{name}\" for database_id={row['database_id']}")
	return is_name
	return list(filter(is_name, names))
	self.dataframe['related_names'] = self.dataframe.apply(
	normalize, axis=1)

	def normalize_field_related_dates(self):
	"""
	Translates the string content of the related_dates column into lists of dates.
	In case a value cannot be translated, a warning is raised and the value is ignored.
	"""
	def normalize(row):
	value = row['related_dates']
	try:
	str_dates = str_to_list(value)
	except ValueError:
	logging.warning(
	f"ignoring dubious list of related dates \"{value}\" for database_id={row['database_id']}")
	return []

	def to_date(str_date):
	try:
	return str_to_date(str_date)
	except ValueError:
	logging.warning(
	f"ignoring dubious related date \"{str_date}\" for database_id={row['database_id']}")
	return None
	return list(filter(lambda x: x is not None, map(to_date, str_dates)))
	self.dataframe['related_dates'] = self.dataframe.apply(
	normalize, axis=1)

	def normalize_field_related_places(self):
	"""
	Translates the string content of the related_places column into lists of places.
	In case a name seems too short or too long, a warning is raised and the value is ignored.
	"""
	def normalize(row):
	value = row['related_places']
	try:
	places = str_to_list(value)
	except ValueError:
	logging.warning(
	f"ignoring dubious list of related places \"{value}\" for database_id={row['database_id']}")
	return []

	def is_place(place):
	is_place = len(place) > 2 and len(place) < 32
	if not is_place:
	logging.warning(
	f"ignoring dubious related place \"{place}\" for database_id={row['database_id']}")
	return is_place
	return list(filter(is_place, places))
	self.dataframe['related_places'] = self.dataframe.apply(
	normalize, axis=1)
	def normalize_field_related_emotions(self):
	"""
	Translates the string content of the related_emotions column into lists of emotions.
	"""
	def normalize(row):
	value = row['related_emotions']
	try:
	emotions = str_to_list(value)
	except ValueError:
	logging.warning(
	f"ignoring dubious list of related emotions \"{value}\" for database_id={row['database_id']}")
	return []

	# Clean and normalize emotions
	def is_emotion(emotion):
	# Filter out very long strings that are likely not emotions
	is_emotion = len(emotion) > 1 and len(emotion) < 30
	if not is_emotion:
	logging.warning(
	f"ignoring dubious related emotion \"{emotion}\" for database_id={row['database_id']}")
	return is_emotion

	# Normalize to lowercase and strip whitespace
	cleaned = []
	for e in emotions:
	normalized = e.lower().strip()
	if is_emotion(normalized):
	cleaned.append(normalized)
	return cleaned

	self.dataframe['related_emotions'] = self.dataframe.apply(
	normalize, axis=1)