Spiritual_Health_Project / tests /test_file_processing_service.py
DocUA's picture
✅ Enhanced Verification Modes - Production Ready
7bbd836
# test_file_processing_service.py
"""
Unit tests for FileProcessingService.
Tests core functionality of file processing including CSV/XLSX parsing,
validation, and template generation.
"""
import csv
import io
import tempfile
import pytest
from pathlib import Path
from datetime import datetime
import pandas as pd
from src.core.file_processing_service import FileProcessingService
from src.core.verification_models import TestMessage, FileUploadResult
class TestFileProcessingService:
"""Test cases for FileProcessingService."""
def setup_method(self):
"""Set up test fixtures."""
self.service = FileProcessingService()
def test_validate_file_format_csv(self):
"""Test CSV file extension validation."""
assert self.service.validate_file_extension("test.csv") is True
assert self.service.validate_file_extension("test.CSV") is True
def test_validate_file_format_xlsx(self):
"""Test XLSX file extension validation."""
assert self.service.validate_file_extension("test.xlsx") is True
assert self.service.validate_file_extension("test.XLSX") is True
def test_validate_file_format_invalid(self):
"""Test invalid file extension validation."""
assert self.service.validate_file_extension("test.txt") is False
assert self.service.validate_file_extension("test.doc") is False
assert self.service.validate_file_extension("test") is False
def test_detect_csv_delimiter_comma(self):
"""Test CSV delimiter detection for comma."""
content = "message,expected_classification\nHello,green\nWorld,red"
delimiter = self.service._detect_csv_delimiter(content)
assert delimiter == ","
def test_detect_csv_delimiter_semicolon(self):
"""Test CSV delimiter detection for semicolon."""
content = "message;expected_classification\nHello;green\nWorld;red"
delimiter = self.service._detect_csv_delimiter(content)
assert delimiter == ";"
def test_detect_csv_delimiter_tab(self):
"""Test CSV delimiter detection for tab."""
content = "message\texpected_classification\nHello\tgreen\nWorld\tred"
delimiter = self.service._detect_csv_delimiter(content)
assert delimiter == "\t"
def test_normalize_column_names_standard(self):
"""Test column name normalization with standard names."""
columns = ["message", "expected_classification"]
normalized = self.service._normalize_column_names(columns)
assert normalized["message"] == "message"
assert normalized["expected_classification"] == "expected_classification"
def test_normalize_column_names_alternatives(self):
"""Test column name normalization with alternative names."""
columns = ["text", "label"]
normalized = self.service._normalize_column_names(columns)
assert normalized["message"] == "text"
assert normalized["expected_classification"] == "label"
def test_validate_test_cases_data_valid(self):
"""Test validation of valid test case data."""
data = [
{"message": "Hello world", "expected_classification": "green"},
{"message": "I'm worried", "expected_classification": "yellow"},
]
errors = self.service._validate_test_cases_data(data)
assert len(errors) == 0
def test_validate_test_cases_data_empty_message(self):
"""Test validation with empty message."""
data = [
{"message": "", "expected_classification": "green"},
]
errors = self.service._validate_test_cases_data(data)
assert len(errors) == 1
assert "message text is empty" in errors[0]
def test_validate_test_cases_data_invalid_classification(self):
"""Test validation with invalid classification."""
data = [
{"message": "Hello", "expected_classification": "blue"},
]
errors = self.service._validate_test_cases_data(data)
assert len(errors) == 1
assert "invalid classification" in errors[0]
def test_parse_csv_file_valid(self):
"""Test parsing a valid CSV file."""
# Create temporary CSV file
csv_content = "message,expected_classification\nHello world,green\nI'm worried,yellow\n"
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(csv_content)
temp_path = f.name
try:
result = self.service.parse_csv_file(temp_path)
assert result.file_format == "csv"
assert result.total_rows == 2
assert result.valid_rows == 2
assert len(result.validation_errors) == 0
assert len(result.parsed_test_cases) == 2
# Check first test case
first_case = result.parsed_test_cases[0]
assert first_case.text == "Hello world"
assert first_case.pre_classified_label == "green"
finally:
Path(temp_path).unlink()
def test_parse_csv_file_missing_columns(self):
"""Test parsing CSV file with missing required columns."""
csv_content = "text,label\nHello world,green\n"
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(csv_content)
temp_path = f.name
try:
result = self.service.parse_csv_file(temp_path)
# Should still work because 'text' and 'label' are alternative names
assert result.file_format == "csv"
assert result.total_rows == 1
assert result.valid_rows == 1
assert len(result.parsed_test_cases) == 1
finally:
Path(temp_path).unlink()
def test_parse_xlsx_file_valid(self):
"""Test parsing a valid XLSX file."""
# Create temporary XLSX file
data = {
"message": ["Hello world", "I'm worried"],
"expected_classification": ["green", "yellow"]
}
df = pd.DataFrame(data)
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as f:
temp_path = f.name
df.to_excel(temp_path, index=False)
try:
result = self.service.parse_xlsx_file(temp_path)
assert result.file_format == "xlsx"
assert result.total_rows == 2
assert result.valid_rows == 2
assert len(result.validation_errors) == 0
assert len(result.parsed_test_cases) == 2
# Check first test case
first_case = result.parsed_test_cases[0]
assert first_case.text == "Hello world"
assert first_case.pre_classified_label == "green"
finally:
Path(temp_path).unlink()
def test_convert_to_test_messages(self):
"""Test converting parsed data to TestMessage objects."""
data = [
{"message": "Hello world", "expected_classification": "green"},
{"message": "I'm worried", "expected_classification": "yellow"},
]
messages = self.service.convert_to_test_messages(data)
assert len(messages) == 2
assert messages[0].text == "Hello world"
assert messages[0].pre_classified_label == "green"
assert messages[1].text == "I'm worried"
assert messages[1].pre_classified_label == "yellow"
def test_generate_csv_template(self):
"""Test CSV template generation."""
template = self.service.generate_csv_template()
# Parse the template to verify structure
reader = csv.reader(io.StringIO(template))
rows = list(reader)
assert len(rows) >= 2 # Header + at least one data row
assert rows[0] == ["message", "expected_classification"]
# Check that all data rows have valid classifications
for row in rows[1:]:
if len(row) >= 2:
assert row[1].lower() in ["green", "yellow", "red"]
def test_generate_xlsx_template(self):
"""Test XLSX template generation."""
template_bytes = self.service.generate_xlsx_template()
assert isinstance(template_bytes, bytes)
assert len(template_bytes) > 0
# Verify we can read the generated template
with tempfile.NamedTemporaryFile(suffix='.xlsx') as f:
f.write(template_bytes)
f.flush()
df = pd.read_excel(f.name)
assert "message" in df.columns
assert "expected_classification" in df.columns
assert len(df) > 0
def test_get_validation_error_details(self):
"""Test validation error details generation."""
errors = [
"Missing required columns: message",
"Row 1: invalid classification 'blue'",
"Row 2: message text is empty"
]
details = self.service.get_validation_error_details(errors)
assert details["total_errors"] == 3
assert details["errors"] == errors
assert len(details["suggestions"]) > 0
assert "format_help" in details
def test_suggest_format_corrections(self):
"""Test format correction suggestions."""
content = "text;label\nHello;green\nWorld;red"
suggestions = self.service.suggest_format_corrections(content)
assert len(suggestions) > 0
# Should suggest something about semicolon delimiter or column names
def test_process_uploaded_file_invalid_format(self):
"""Test processing file with invalid format."""
with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as f:
f.write(b"Hello world")
temp_path = f.name
try:
result = self.service.process_uploaded_file(temp_path)
assert result.file_format == "unknown"
assert len(result.validation_errors) > 0
assert "Unsupported file format" in result.validation_errors[0]
finally:
Path(temp_path).unlink()