Spaces:

Lordemarco
/

sentiment-fastapi

Sleeping

sentiment-fastapi / tests /unit /test_data.py

GitHub Actions

Auto-deploy new version [skip ci]

1180a53 about 2 months ago

1.8 kB

	# tests/test_data.py
	import os
	import subprocess
	from datasets import load_from_disk

	TWEET_PROCESSED_PATH = "data/processed/tweet_eval_tokenized"
	YT_PROCESSED_PATH = "data/processed/youtube_tokenized"


	def run_data_preparation(dataset_name):
	"""Esegue lo script di data preparation per il dataset richiesto."""
	print(f"⚙️ Avvio data_preparation.py per il dataset: {dataset_name}")
	subprocess.run(["python", "src/data_preparation.py", "--dataset", dataset_name], check=True)


	def test_tweet_eval_dataset_exists_or_create():
	"""Controlla o crea il dataset Tweet Eval preprocessato."""
	if not os.path.exists(TWEET_PROCESSED_PATH):
	run_data_preparation("tweet_eval")
	assert os.path.exists(TWEET_PROCESSED_PATH), "Tweet Eval non disponibile dopo la preparazione"


	def test_youtube_dataset_exists_or_create():
	"""Controlla o crea il dataset YouTube preprocessato."""
	if not os.path.exists(YT_PROCESSED_PATH):
	run_data_preparation("youtube")
	assert os.path.exists(YT_PROCESSED_PATH), "YouTube dataset non disponibile dopo la preparazione"


	def test_tweet_eval_structure():
	"""Verifica che il dataset Tweet Eval abbia la struttura corretta."""
	ds = load_from_disk(TWEET_PROCESSED_PATH)
	assert "text" in ds["test"].features, "Campo 'text' mancante in Tweet Eval"
	assert "label" in ds["test"].features, "Campo 'label' mancante in Tweet Eval"


	def test_youtube_structure():
	"""Verifica che il dataset YouTube abbia la struttura corretta."""
	ds = load_from_disk(YT_PROCESSED_PATH)
	assert (
	"CommentText" in ds["train"].features or "CommentText" in ds["train"].features
	), "Campo testuale mancante in YouTube dataset"
	assert "Sentiment" in ds["train"].features, "Campo 'label' mancante in YouTube dataset"