Spaces:

kernel14
/

Nora

Sleeping

Nora / tests /test_storage_properties.py

GitHub Action

Deploy clean version of Nora

59bd45e 3 months ago

32.5 kB

	"""Property-based tests for storage service.

	This module uses hypothesis to verify that storage properties hold across
	many random inputs, ensuring data persistence integrity.

	Requirements: 7.1, 7.2, 7.3, 7.4
	"""

	import json
	import pytest
	import tempfile
	import shutil
	from pathlib import Path
	from datetime import datetime

	from hypothesis import given, strategies as st
	from hypothesis import settings

	from app.storage import StorageService, StorageError
	from app.models import (
	RecordData,
	ParsedData,
	MoodData,
	InspirationData,
	TodoData
	)


	# Note: We don't use pytest fixtures with hypothesis tests because
	# fixtures are not reset between examples. Instead, we create temp
	# directories directly in the test methods.


	# Custom strategies for generating valid model data
	@st.composite
	def mood_data_strategy(draw):
	"""Generate valid MoodData instances."""
	mood_type = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
	intensity = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=10)))
	keywords = draw(st.lists(st.text(min_size=1, max_size=15), min_size=0, max_size=5))

	return MoodData(type=mood_type, intensity=intensity, keywords=keywords)


	@st.composite
	def inspiration_data_strategy(draw):
	"""Generate valid InspirationData instances."""
	core_idea = draw(st.text(min_size=1, max_size=20))
	tags = draw(st.lists(st.text(min_size=1, max_size=10), min_size=0, max_size=5))
	category = draw(st.sampled_from(["工作", "生活", "学习", "创意"]))

	return InspirationData(core_idea=core_idea, tags=tags, category=category)


	@st.composite
	def todo_data_strategy(draw):
	"""Generate valid TodoData instances."""
	task = draw(st.text(min_size=1, max_size=50))
	time = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
	location = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
	status = "pending" # Always default to pending for new todos

	return TodoData(task=task, time=time, location=location, status=status)


	@st.composite
	def parsed_data_strategy(draw):
	"""Generate valid ParsedData instances with optional mood, inspirations, and todos."""
	# Randomly include or exclude mood
	has_mood = draw(st.booleans())
	mood = draw(mood_data_strategy()) if has_mood else None

	# Generate 0-3 inspirations
	inspirations = draw(st.lists(inspiration_data_strategy(), min_size=0, max_size=3))

	# Generate 0-3 todos
	todos = draw(st.lists(todo_data_strategy(), min_size=0, max_size=3))

	return ParsedData(mood=mood, inspirations=inspirations, todos=todos)


	@st.composite
	def record_data_strategy(draw):
	"""Generate valid RecordData instances."""
	record_id = draw(st.text(min_size=1, max_size=36)) # UUID-like length
	timestamp = draw(st.text(min_size=10, max_size=30)) # ISO timestamp-like
	input_type = draw(st.sampled_from(["audio", "text"]))
	original_text = draw(st.text(min_size=1, max_size=200))
	parsed_data = draw(parsed_data_strategy())

	return RecordData(
	record_id=record_id,
	timestamp=timestamp,
	input_type=input_type,
	original_text=original_text,
	parsed_data=parsed_data
	)


	class TestStorageServiceProperties:
	"""Property-based tests for StorageService.

	Validates: Requirements 7.1, 7.2, 7.3, 7.4
	"""

	@given(record=record_data_strategy())
	@settings(max_examples=100)
	def test_property_9_data_persistence_integrity(self, record):
	"""
	Property 9: 数据持久化完整性

	For any successfully processed record, it should be saved in records.json,
	and if it contains mood/inspiration/todo data, it should also be appended
	to the corresponding moods.json, inspirations.json, todos.json files.

	Validates: Requirements 7.1, 7.2, 7.3, 7.4
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	# Save the complete record
	returned_record_id = storage_service.save_record(record)

	# Property 1: Record should be saved in records.json
	assert storage_service.records_file.exists()
	with open(storage_service.records_file, 'r', encoding='utf-8') as f:
	records = json.load(f)

	assert len(records) >= 1
	# Find the saved record
	saved_record = None
	for r in records:
	if r["record_id"] == returned_record_id:
	saved_record = r
	break

	assert saved_record is not None, "Record should be saved in records.json"
	assert saved_record["timestamp"] == record.timestamp
	assert saved_record["input_type"] == record.input_type
	assert saved_record["original_text"] == record.original_text

	# Property 2: If mood data exists, it should be in moods.json
	if record.parsed_data.mood is not None:
	storage_service.append_mood(
	record.parsed_data.mood,
	returned_record_id,
	record.timestamp
	)

	assert storage_service.moods_file.exists()
	with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
	moods = json.load(f)

	# Find the mood entry for this record
	mood_entries = [m for m in moods if m["record_id"] == returned_record_id]
	assert len(mood_entries) >= 1, "Mood should be saved in moods.json"

	mood_entry = mood_entries[-1] # Get the last one
	assert mood_entry["record_id"] == returned_record_id
	assert mood_entry["timestamp"] == record.timestamp
	assert mood_entry["type"] == record.parsed_data.mood.type
	assert mood_entry["intensity"] == record.parsed_data.mood.intensity
	assert mood_entry["keywords"] == record.parsed_data.mood.keywords

	# Property 3: If inspiration data exists, it should be in inspirations.json
	if record.parsed_data.inspirations:
	storage_service.append_inspirations(
	record.parsed_data.inspirations,
	returned_record_id,
	record.timestamp
	)

	assert storage_service.inspirations_file.exists()
	with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
	inspirations = json.load(f)

	# Find inspiration entries for this record
	inspiration_entries = [i for i in inspirations if i["record_id"] == returned_record_id]
	assert len(inspiration_entries) == len(record.parsed_data.inspirations), \
	"All inspirations should be saved in inspirations.json"

	# Verify each inspiration - use a copy to track matched entries
	remaining_entries = inspiration_entries.copy()
	for inspiration in record.parsed_data.inspirations:
	# Find matching entry (may not be in same order)
	matching_entry = None
	for idx, entry in enumerate(remaining_entries):
	if (entry["core_idea"] == inspiration.core_idea and
	entry["category"] == inspiration.category and
	entry["tags"] == inspiration.tags):
	matching_entry = entry
	remaining_entries.pop(idx)
	break

	assert matching_entry is not None, \
	f"Could not find matching entry for inspiration: {inspiration}"
	assert matching_entry["record_id"] == returned_record_id
	assert matching_entry["timestamp"] == record.timestamp

	# Property 4: If todo data exists, it should be in todos.json
	if record.parsed_data.todos:
	storage_service.append_todos(
	record.parsed_data.todos,
	returned_record_id,
	record.timestamp
	)

	assert storage_service.todos_file.exists()
	with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
	todos = json.load(f)

	# Find todo entries for this record
	todo_entries = [t for t in todos if t["record_id"] == returned_record_id]
	assert len(todo_entries) == len(record.parsed_data.todos), \
	"All todos should be saved in todos.json"

	# Verify each todo - use a copy to track matched entries
	remaining_entries = todo_entries.copy()
	for todo in record.parsed_data.todos:
	# Find matching entry (may not be in same order)
	matching_entry = None
	for idx, entry in enumerate(remaining_entries):
	if (entry["task"] == todo.task and
	entry["time"] == todo.time and
	entry["location"] == todo.location and
	entry["status"] == todo.status):
	matching_entry = entry
	remaining_entries.pop(idx)
	break

	assert matching_entry is not None, \
	f"Could not find matching entry for todo: {todo}"
	assert matching_entry["record_id"] == returned_record_id
	assert matching_entry["timestamp"] == record.timestamp
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(records=st.lists(record_data_strategy(), min_size=1, max_size=5))
	@settings(max_examples=100)
	def test_property_9_multiple_records_persistence(self, records):
	"""
	Property 9: 数据持久化完整性 - Multiple Records

	For any list of successfully processed records, all records should be
	saved and retrievable from their respective JSON files.

	Validates: Requirements 7.1, 7.2, 7.3, 7.4
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	saved_record_ids = []

	# Save all records
	for record in records:
	record_id = storage_service.save_record(record)
	saved_record_ids.append(record_id)

	# Append mood if exists
	if record.parsed_data.mood is not None:
	storage_service.append_mood(
	record.parsed_data.mood,
	record_id,
	record.timestamp
	)

	# Append inspirations if exist
	if record.parsed_data.inspirations:
	storage_service.append_inspirations(
	record.parsed_data.inspirations,
	record_id,
	record.timestamp
	)

	# Append todos if exist
	if record.parsed_data.todos:
	storage_service.append_todos(
	record.parsed_data.todos,
	record_id,
	record.timestamp
	)

	# Verify all records are saved
	with open(storage_service.records_file, 'r', encoding='utf-8') as f:
	saved_records = json.load(f)

	assert len(saved_records) >= len(records), \
	"All records should be saved in records.json"

	# Verify each record can be found
	for record_id in saved_record_ids:
	found = any(r["record_id"] == record_id for r in saved_records)
	assert found, f"Record {record_id} should be in records.json"

	# Count expected moods, inspirations, and todos
	expected_moods = sum(1 for r in records if r.parsed_data.mood is not None)
	expected_inspirations = sum(len(r.parsed_data.inspirations) for r in records)
	expected_todos = sum(len(r.parsed_data.todos) for r in records)

	# Verify moods count
	if expected_moods > 0:
	assert storage_service.moods_file.exists()
	with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
	moods = json.load(f)
	assert len(moods) >= expected_moods, \
	f"Expected at least {expected_moods} moods, found {len(moods)}"

	# Verify inspirations count
	if expected_inspirations > 0:
	assert storage_service.inspirations_file.exists()
	with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
	inspirations = json.load(f)
	assert len(inspirations) >= expected_inspirations, \
	f"Expected at least {expected_inspirations} inspirations, found {len(inspirations)}"

	# Verify todos count
	if expected_todos > 0:
	assert storage_service.todos_file.exists()
	with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
	todos = json.load(f)
	assert len(todos) >= expected_todos, \
	f"Expected at least {expected_todos} todos, found {len(todos)}"
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(
	record=record_data_strategy(),
	has_mood=st.booleans(),
	has_inspirations=st.booleans(),
	has_todos=st.booleans()
	)
	@settings(max_examples=100)
	def test_property_9_selective_data_persistence(
	self, record, has_mood, has_inspirations, has_todos
	):
	"""
	Property 9: 数据持久化完整性 - Selective Persistence

	For any record, only the data types that exist should be persisted
	to their respective files. Empty data should not create unnecessary entries.

	Validates: Requirements 7.1, 7.2, 7.3, 7.4
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	# Modify record based on flags
	if not has_mood:
	record.parsed_data.mood = None
	if not has_inspirations:
	record.parsed_data.inspirations = []
	if not has_todos:
	record.parsed_data.todos = []

	# Save the record
	record_id = storage_service.save_record(record)

	# Always save mood/inspirations/todos if they exist
	if record.parsed_data.mood is not None:
	storage_service.append_mood(
	record.parsed_data.mood,
	record_id,
	record.timestamp
	)

	if record.parsed_data.inspirations:
	storage_service.append_inspirations(
	record.parsed_data.inspirations,
	record_id,
	record.timestamp
	)

	if record.parsed_data.todos:
	storage_service.append_todos(
	record.parsed_data.todos,
	record_id,
	record.timestamp
	)

	# Verify records.json always exists
	assert storage_service.records_file.exists()

	# Verify mood file existence matches data presence
	if has_mood and record.parsed_data.mood is not None:
	assert storage_service.moods_file.exists(), \
	"moods.json should exist when mood data is present"

	# Verify inspirations file existence matches data presence
	if has_inspirations and record.parsed_data.inspirations:
	assert storage_service.inspirations_file.exists(), \
	"inspirations.json should exist when inspiration data is present"

	# Verify todos file existence matches data presence
	if has_todos and record.parsed_data.todos:
	assert storage_service.todos_file.exists(), \
	"todos.json should exist when todo data is present"
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(
	file_type=st.sampled_from(["records", "moods", "inspirations", "todos"])
	)
	@settings(max_examples=100)
	def test_property_10_file_initialization(self, file_type):
	"""
	Property 10: 文件初始化

	For any non-existent JSON file, when first written to, the system should
	create the file and initialize it as an empty array [].

	Validates: Requirements 7.5
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	# Map file type to file path
	file_map = {
	"records": storage_service.records_file,
	"moods": storage_service.moods_file,
	"inspirations": storage_service.inspirations_file,
	"todos": storage_service.todos_file
	}

	target_file = file_map[file_type]

	# Verify file doesn't exist initially
	assert not target_file.exists(), \
	f"{file_type}.json should not exist initially"

	# Trigger file initialization by calling _ensure_file_exists
	storage_service._ensure_file_exists(target_file)

	# Property 1: File should now exist
	assert target_file.exists(), \
	f"{file_type}.json should be created"

	# Property 2: File should be initialized as empty array
	with open(target_file, 'r', encoding='utf-8') as f:
	content = json.load(f)

	assert isinstance(content, list), \
	f"{file_type}.json should contain a list"
	assert content == [], \
	f"{file_type}.json should be initialized as empty array []"

	# Property 3: File should be valid JSON
	# (already verified by json.load above, but let's be explicit)
	with open(target_file, 'r', encoding='utf-8') as f:
	raw_content = f.read()

	# Should be able to parse without error
	parsed = json.loads(raw_content)
	assert parsed == []
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(
	operations=st.lists(
	st.sampled_from(["records", "moods", "inspirations", "todos"]),
	min_size=1,
	max_size=10
	)
	)
	@settings(max_examples=100)
	def test_property_10_file_initialization_idempotent(self, operations):
	"""
	Property 10: 文件初始化 - Idempotency

	For any sequence of file initialization operations, calling _ensure_file_exists
	multiple times should be idempotent - it should not corrupt or overwrite
	existing data.

	Validates: Requirements 7.5
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	file_map = {
	"records": storage_service.records_file,
	"moods": storage_service.moods_file,
	"inspirations": storage_service.inspirations_file,
	"todos": storage_service.todos_file
	}

	# Track which files have been initialized
	initialized_files = set()

	for file_type in operations:
	target_file = file_map[file_type]

	# Call _ensure_file_exists
	storage_service._ensure_file_exists(target_file)

	# File should exist
	assert target_file.exists()

	# Read current content
	with open(target_file, 'r', encoding='utf-8') as f:
	content = json.load(f)

	if file_type not in initialized_files:
	# First time - should be empty array
	assert content == [], \
	f"First initialization of {file_type}.json should create empty array"
	initialized_files.add(file_type)
	else:
	# Subsequent calls - should preserve empty array
	# (In real usage, data would be added between calls,
	# but _ensure_file_exists should not overwrite)
	assert isinstance(content, list), \
	f"Subsequent calls should preserve list structure"

	# Verify all unique files were created
	unique_files = set(operations)
	for file_type in unique_files:
	target_file = file_map[file_type]
	assert target_file.exists(), \
	f"{file_type}.json should exist after operations"
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(record=record_data_strategy())
	@settings(max_examples=100)
	def test_property_10_file_initialization_on_first_write(self, record):
	"""
	Property 10: 文件初始化 - First Write

	For any record being saved, if the JSON files don't exist, they should
	be automatically created and initialized before writing data.

	Validates: Requirements 7.5
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	# Verify no files exist initially
	assert not storage_service.records_file.exists()
	assert not storage_service.moods_file.exists()
	assert not storage_service.inspirations_file.exists()
	assert not storage_service.todos_file.exists()

	# Save a record (this should trigger file initialization)
	record_id = storage_service.save_record(record)

	# records.json should now exist and contain the record
	assert storage_service.records_file.exists()
	with open(storage_service.records_file, 'r', encoding='utf-8') as f:
	records = json.load(f)
	assert len(records) >= 1
	assert any(r["record_id"] == record_id for r in records)

	# If mood exists, save it and verify file initialization
	if record.parsed_data.mood is not None:
	storage_service.append_mood(
	record.parsed_data.mood,
	record_id,
	record.timestamp
	)
	assert storage_service.moods_file.exists()
	with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
	moods = json.load(f)
	assert isinstance(moods, list)
	assert len(moods) >= 1

	# If inspirations exist, save them and verify file initialization
	if record.parsed_data.inspirations:
	storage_service.append_inspirations(
	record.parsed_data.inspirations,
	record_id,
	record.timestamp
	)
	assert storage_service.inspirations_file.exists()
	with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
	inspirations = json.load(f)
	assert isinstance(inspirations, list)
	assert len(inspirations) >= len(record.parsed_data.inspirations)

	# If todos exist, save them and verify file initialization
	if record.parsed_data.todos:
	storage_service.append_todos(
	record.parsed_data.todos,
	record_id,
	record.timestamp
	)
	assert storage_service.todos_file.exists()
	with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
	todos = json.load(f)
	assert isinstance(todos, list)
	assert len(todos) >= len(record.parsed_data.todos)
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(records=st.lists(record_data_strategy(), min_size=2, max_size=20))
	@settings(max_examples=100)
	def test_property_11_unique_id_generation(self, records):
	"""
	Property 11: 唯一 ID 生成

	For any two different records, the generated record_ids should be unique (non-repeating).

	Validates: Requirements 7.7
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	generated_ids = []

	# Save all records and collect their IDs
	for record in records:
	# Clear the record_id to force generation of a new one
	record.record_id = ""

	# Save record and get the generated ID
	record_id = storage_service.save_record(record)
	generated_ids.append(record_id)

	# Property 1: All IDs should be non-empty strings
	for record_id in generated_ids:
	assert record_id, "Generated record_id should not be empty"
	assert isinstance(record_id, str), "Generated record_id should be a string"

	# Property 2: All IDs should be unique (no duplicates)
	unique_ids = set(generated_ids)
	assert len(unique_ids) == len(generated_ids), \
	f"All generated IDs should be unique. Generated {len(generated_ids)} IDs but only {len(unique_ids)} are unique. Duplicates found!"

	# Property 3: IDs should be valid UUIDs (format check)
	import uuid
	for record_id in generated_ids:
	try:
	# Try to parse as UUID - this will raise ValueError if invalid
	uuid.UUID(record_id)
	except ValueError:
	pytest.fail(f"Generated ID '{record_id}' is not a valid UUID")

	# Property 4: Verify all records are saved with their unique IDs
	with open(storage_service.records_file, 'r', encoding='utf-8') as f:
	saved_records = json.load(f)

	saved_ids = [r["record_id"] for r in saved_records]

	# All generated IDs should be in the saved records
	for record_id in generated_ids:
	assert record_id in saved_ids, \
	f"Generated ID {record_id} should be found in saved records"
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(
	num_records=st.integers(min_value=10, max_value=50)
	)
	@settings(max_examples=50, deadline=500)
	def test_property_11_unique_id_generation_stress(self, num_records):
	"""
	Property 11: 唯一 ID 生成 - Stress Test

	For a large number of records saved in quick succession, all generated
	record_ids should still be unique. This tests the robustness of UUID generation.

	Validates: Requirements 7.7
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	generated_ids = []

	# Generate and save many records quickly
	for i in range(num_records):
	# Create a minimal record
	record = RecordData(
	record_id="", # Force generation
	timestamp=f"2024-01-01T00:00:{i:02d}Z",
	input_type="text",
	original_text=f"Test record {i}",
	parsed_data=ParsedData(mood=None, inspirations=[], todos=[])
	)

	record_id = storage_service.save_record(record)
	generated_ids.append(record_id)

	# All IDs should be unique
	unique_ids = set(generated_ids)
	assert len(unique_ids) == num_records, \
	f"Expected {num_records} unique IDs, but got {len(unique_ids)}. " \
	f"Found {num_records - len(unique_ids)} duplicates!"

	# Verify all are valid UUIDs
	import uuid
	for record_id in generated_ids:
	try:
	uuid.UUID(record_id)
	except ValueError:
	pytest.fail(f"Generated ID '{record_id}' is not a valid UUID")
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)

	@given(record=record_data_strategy())
	@settings(max_examples=100)
	def test_property_11_unique_id_generation_preserves_existing(self, record):
	"""
	Property 11: 唯一 ID 生成 - Preserve Existing IDs

	If a record already has a record_id set, the save_record method should
	preserve it and not generate a new one.

	Validates: Requirements 7.7
	"""
	# Create a fresh temporary directory and storage service for each example
	temp_dir = tempfile.mkdtemp()
	try:
	storage_service = StorageService(temp_dir)

	# Use the record's existing ID
	original_id = record.record_id

	# Save the record
	returned_id = storage_service.save_record(record)

	# The returned ID should match the original
	assert returned_id == original_id, \
	"save_record should preserve existing record_id"

	# Verify the record is saved with the original ID
	with open(storage_service.records_file, 'r', encoding='utf-8') as f:
	saved_records = json.load(f)

	found_record = None
	for r in saved_records:
	if r["record_id"] == original_id:
	found_record = r
	break

	assert found_record is not None, \
	"Record should be saved with its original ID"
	assert found_record["record_id"] == original_id
	finally:
	# Clean up temporary directory
	shutil.rmtree(temp_dir)