"""Property-based tests for storage service.

This module uses hypothesis to verify that storage properties hold across
many random inputs, ensuring data persistence integrity.

Requirements: 7.1, 7.2, 7.3, 7.4
"""

import json
import pytest
import tempfile
import shutil
from pathlib import Path
from datetime import datetime

from hypothesis import given, strategies as st
from hypothesis import settings

from app.storage import StorageService, StorageError
from app.models import (
    RecordData,
    ParsedData,
    MoodData,
    InspirationData,
    TodoData
)


# Note: We don't use pytest fixtures with hypothesis tests because
# fixtures are not reset between examples. Instead, we create temp
# directories directly in the test methods.


# Custom strategies for generating valid model data
@st.composite
def mood_data_strategy(draw):
    """Generate valid MoodData instances."""
    mood_type = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
    intensity = draw(st.one_of(st.none(), st.integers(min_value=1, max_value=10)))
    keywords = draw(st.lists(st.text(min_size=1, max_size=15), min_size=0, max_size=5))
    
    return MoodData(type=mood_type, intensity=intensity, keywords=keywords)


@st.composite
def inspiration_data_strategy(draw):
    """Generate valid InspirationData instances."""
    core_idea = draw(st.text(min_size=1, max_size=20))
    tags = draw(st.lists(st.text(min_size=1, max_size=10), min_size=0, max_size=5))
    category = draw(st.sampled_from(["工作", "生活", "学习", "创意"]))
    
    return InspirationData(core_idea=core_idea, tags=tags, category=category)


@st.composite
def todo_data_strategy(draw):
    """Generate valid TodoData instances."""
    task = draw(st.text(min_size=1, max_size=50))
    time = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
    location = draw(st.one_of(st.none(), st.text(min_size=1, max_size=20)))
    status = "pending"  # Always default to pending for new todos
    
    return TodoData(task=task, time=time, location=location, status=status)


@st.composite
def parsed_data_strategy(draw):
    """Generate valid ParsedData instances with optional mood, inspirations, and todos."""
    # Randomly include or exclude mood
    has_mood = draw(st.booleans())
    mood = draw(mood_data_strategy()) if has_mood else None
    
    # Generate 0-3 inspirations
    inspirations = draw(st.lists(inspiration_data_strategy(), min_size=0, max_size=3))
    
    # Generate 0-3 todos
    todos = draw(st.lists(todo_data_strategy(), min_size=0, max_size=3))
    
    return ParsedData(mood=mood, inspirations=inspirations, todos=todos)


@st.composite
def record_data_strategy(draw):
    """Generate valid RecordData instances."""
    record_id = draw(st.text(min_size=1, max_size=36))  # UUID-like length
    timestamp = draw(st.text(min_size=10, max_size=30))  # ISO timestamp-like
    input_type = draw(st.sampled_from(["audio", "text"]))
    original_text = draw(st.text(min_size=1, max_size=200))
    parsed_data = draw(parsed_data_strategy())
    
    return RecordData(
        record_id=record_id,
        timestamp=timestamp,
        input_type=input_type,
        original_text=original_text,
        parsed_data=parsed_data
    )


class TestStorageServiceProperties:
    """Property-based tests for StorageService.
    
    **Validates: Requirements 7.1, 7.2, 7.3, 7.4**
    """
    
    @given(record=record_data_strategy())
    @settings(max_examples=100)
    def test_property_9_data_persistence_integrity(self, record):
        """
        Property 9: 数据持久化完整性
        
        For any successfully processed record, it should be saved in records.json,
        and if it contains mood/inspiration/todo data, it should also be appended
        to the corresponding moods.json, inspirations.json, todos.json files.
        
        **Validates: Requirements 7.1, 7.2, 7.3, 7.4**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            # Save the complete record
            returned_record_id = storage_service.save_record(record)
            
            # Property 1: Record should be saved in records.json
            assert storage_service.records_file.exists()
            with open(storage_service.records_file, 'r', encoding='utf-8') as f:
                records = json.load(f)
            
            assert len(records) >= 1
            # Find the saved record
            saved_record = None
            for r in records:
                if r["record_id"] == returned_record_id:
                    saved_record = r
                    break
            
            assert saved_record is not None, "Record should be saved in records.json"
            assert saved_record["timestamp"] == record.timestamp
            assert saved_record["input_type"] == record.input_type
            assert saved_record["original_text"] == record.original_text
            
            # Property 2: If mood data exists, it should be in moods.json
            if record.parsed_data.mood is not None:
                storage_service.append_mood(
                    record.parsed_data.mood,
                    returned_record_id,
                    record.timestamp
                )
                
                assert storage_service.moods_file.exists()
                with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
                    moods = json.load(f)
                
                # Find the mood entry for this record
                mood_entries = [m for m in moods if m["record_id"] == returned_record_id]
                assert len(mood_entries) >= 1, "Mood should be saved in moods.json"
                
                mood_entry = mood_entries[-1]  # Get the last one
                assert mood_entry["record_id"] == returned_record_id
                assert mood_entry["timestamp"] == record.timestamp
                assert mood_entry["type"] == record.parsed_data.mood.type
                assert mood_entry["intensity"] == record.parsed_data.mood.intensity
                assert mood_entry["keywords"] == record.parsed_data.mood.keywords
            
            # Property 3: If inspiration data exists, it should be in inspirations.json
            if record.parsed_data.inspirations:
                storage_service.append_inspirations(
                    record.parsed_data.inspirations,
                    returned_record_id,
                    record.timestamp
                )
                
                assert storage_service.inspirations_file.exists()
                with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
                    inspirations = json.load(f)
                
                # Find inspiration entries for this record
                inspiration_entries = [i for i in inspirations if i["record_id"] == returned_record_id]
                assert len(inspiration_entries) == len(record.parsed_data.inspirations), \
                    "All inspirations should be saved in inspirations.json"
                
                # Verify each inspiration - use a copy to track matched entries
                remaining_entries = inspiration_entries.copy()
                for inspiration in record.parsed_data.inspirations:
                    # Find matching entry (may not be in same order)
                    matching_entry = None
                    for idx, entry in enumerate(remaining_entries):
                        if (entry["core_idea"] == inspiration.core_idea and
                            entry["category"] == inspiration.category and
                            entry["tags"] == inspiration.tags):
                            matching_entry = entry
                            remaining_entries.pop(idx)
                            break
                    
                    assert matching_entry is not None, \
                        f"Could not find matching entry for inspiration: {inspiration}"
                    assert matching_entry["record_id"] == returned_record_id
                    assert matching_entry["timestamp"] == record.timestamp
            
            # Property 4: If todo data exists, it should be in todos.json
            if record.parsed_data.todos:
                storage_service.append_todos(
                    record.parsed_data.todos,
                    returned_record_id,
                    record.timestamp
                )
                
                assert storage_service.todos_file.exists()
                with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
                    todos = json.load(f)
                
                # Find todo entries for this record
                todo_entries = [t for t in todos if t["record_id"] == returned_record_id]
                assert len(todo_entries) == len(record.parsed_data.todos), \
                    "All todos should be saved in todos.json"
                
                # Verify each todo - use a copy to track matched entries
                remaining_entries = todo_entries.copy()
                for todo in record.parsed_data.todos:
                    # Find matching entry (may not be in same order)
                    matching_entry = None
                    for idx, entry in enumerate(remaining_entries):
                        if (entry["task"] == todo.task and
                            entry["time"] == todo.time and
                            entry["location"] == todo.location and
                            entry["status"] == todo.status):
                            matching_entry = entry
                            remaining_entries.pop(idx)
                            break
                    
                    assert matching_entry is not None, \
                        f"Could not find matching entry for todo: {todo}"
                    assert matching_entry["record_id"] == returned_record_id
                    assert matching_entry["timestamp"] == record.timestamp
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(records=st.lists(record_data_strategy(), min_size=1, max_size=5))
    @settings(max_examples=100)
    def test_property_9_multiple_records_persistence(self, records):
        """
        Property 9: 数据持久化完整性 - Multiple Records
        
        For any list of successfully processed records, all records should be
        saved and retrievable from their respective JSON files.
        
        **Validates: Requirements 7.1, 7.2, 7.3, 7.4**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            saved_record_ids = []
            
            # Save all records
            for record in records:
                record_id = storage_service.save_record(record)
                saved_record_ids.append(record_id)
                
                # Append mood if exists
                if record.parsed_data.mood is not None:
                    storage_service.append_mood(
                        record.parsed_data.mood,
                        record_id,
                        record.timestamp
                    )
                
                # Append inspirations if exist
                if record.parsed_data.inspirations:
                    storage_service.append_inspirations(
                        record.parsed_data.inspirations,
                        record_id,
                        record.timestamp
                    )
                
                # Append todos if exist
                if record.parsed_data.todos:
                    storage_service.append_todos(
                        record.parsed_data.todos,
                        record_id,
                        record.timestamp
                    )
            
            # Verify all records are saved
            with open(storage_service.records_file, 'r', encoding='utf-8') as f:
                saved_records = json.load(f)
            
            assert len(saved_records) >= len(records), \
                "All records should be saved in records.json"
            
            # Verify each record can be found
            for record_id in saved_record_ids:
                found = any(r["record_id"] == record_id for r in saved_records)
                assert found, f"Record {record_id} should be in records.json"
            
            # Count expected moods, inspirations, and todos
            expected_moods = sum(1 for r in records if r.parsed_data.mood is not None)
            expected_inspirations = sum(len(r.parsed_data.inspirations) for r in records)
            expected_todos = sum(len(r.parsed_data.todos) for r in records)
            
            # Verify moods count
            if expected_moods > 0:
                assert storage_service.moods_file.exists()
                with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
                    moods = json.load(f)
                assert len(moods) >= expected_moods, \
                    f"Expected at least {expected_moods} moods, found {len(moods)}"
            
            # Verify inspirations count
            if expected_inspirations > 0:
                assert storage_service.inspirations_file.exists()
                with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
                    inspirations = json.load(f)
                assert len(inspirations) >= expected_inspirations, \
                    f"Expected at least {expected_inspirations} inspirations, found {len(inspirations)}"
            
            # Verify todos count
            if expected_todos > 0:
                assert storage_service.todos_file.exists()
                with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
                    todos = json.load(f)
                assert len(todos) >= expected_todos, \
                    f"Expected at least {expected_todos} todos, found {len(todos)}"
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(
        record=record_data_strategy(),
        has_mood=st.booleans(),
        has_inspirations=st.booleans(),
        has_todos=st.booleans()
    )
    @settings(max_examples=100)
    def test_property_9_selective_data_persistence(
        self, record, has_mood, has_inspirations, has_todos
    ):
        """
        Property 9: 数据持久化完整性 - Selective Persistence
        
        For any record, only the data types that exist should be persisted
        to their respective files. Empty data should not create unnecessary entries.
        
        **Validates: Requirements 7.1, 7.2, 7.3, 7.4**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            # Modify record based on flags
            if not has_mood:
                record.parsed_data.mood = None
            if not has_inspirations:
                record.parsed_data.inspirations = []
            if not has_todos:
                record.parsed_data.todos = []
            
            # Save the record
            record_id = storage_service.save_record(record)
            
            # Always save mood/inspirations/todos if they exist
            if record.parsed_data.mood is not None:
                storage_service.append_mood(
                    record.parsed_data.mood,
                    record_id,
                    record.timestamp
                )
            
            if record.parsed_data.inspirations:
                storage_service.append_inspirations(
                    record.parsed_data.inspirations,
                    record_id,
                    record.timestamp
                )
            
            if record.parsed_data.todos:
                storage_service.append_todos(
                    record.parsed_data.todos,
                    record_id,
                    record.timestamp
                )
            
            # Verify records.json always exists
            assert storage_service.records_file.exists()
            
            # Verify mood file existence matches data presence
            if has_mood and record.parsed_data.mood is not None:
                assert storage_service.moods_file.exists(), \
                    "moods.json should exist when mood data is present"
            
            # Verify inspirations file existence matches data presence
            if has_inspirations and record.parsed_data.inspirations:
                assert storage_service.inspirations_file.exists(), \
                    "inspirations.json should exist when inspiration data is present"
            
            # Verify todos file existence matches data presence
            if has_todos and record.parsed_data.todos:
                assert storage_service.todos_file.exists(), \
                    "todos.json should exist when todo data is present"
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)

    @given(
        file_type=st.sampled_from(["records", "moods", "inspirations", "todos"])
    )
    @settings(max_examples=100)
    def test_property_10_file_initialization(self, file_type):
        """
        Property 10: 文件初始化
        
        For any non-existent JSON file, when first written to, the system should
        create the file and initialize it as an empty array [].
        
        **Validates: Requirements 7.5**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            # Map file type to file path
            file_map = {
                "records": storage_service.records_file,
                "moods": storage_service.moods_file,
                "inspirations": storage_service.inspirations_file,
                "todos": storage_service.todos_file
            }
            
            target_file = file_map[file_type]
            
            # Verify file doesn't exist initially
            assert not target_file.exists(), \
                f"{file_type}.json should not exist initially"
            
            # Trigger file initialization by calling _ensure_file_exists
            storage_service._ensure_file_exists(target_file)
            
            # Property 1: File should now exist
            assert target_file.exists(), \
                f"{file_type}.json should be created"
            
            # Property 2: File should be initialized as empty array
            with open(target_file, 'r', encoding='utf-8') as f:
                content = json.load(f)
            
            assert isinstance(content, list), \
                f"{file_type}.json should contain a list"
            assert content == [], \
                f"{file_type}.json should be initialized as empty array []"
            
            # Property 3: File should be valid JSON
            # (already verified by json.load above, but let's be explicit)
            with open(target_file, 'r', encoding='utf-8') as f:
                raw_content = f.read()
            
            # Should be able to parse without error
            parsed = json.loads(raw_content)
            assert parsed == []
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(
        operations=st.lists(
            st.sampled_from(["records", "moods", "inspirations", "todos"]),
            min_size=1,
            max_size=10
        )
    )
    @settings(max_examples=100)
    def test_property_10_file_initialization_idempotent(self, operations):
        """
        Property 10: 文件初始化 - Idempotency
        
        For any sequence of file initialization operations, calling _ensure_file_exists
        multiple times should be idempotent - it should not corrupt or overwrite
        existing data.
        
        **Validates: Requirements 7.5**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            file_map = {
                "records": storage_service.records_file,
                "moods": storage_service.moods_file,
                "inspirations": storage_service.inspirations_file,
                "todos": storage_service.todos_file
            }
            
            # Track which files have been initialized
            initialized_files = set()
            
            for file_type in operations:
                target_file = file_map[file_type]
                
                # Call _ensure_file_exists
                storage_service._ensure_file_exists(target_file)
                
                # File should exist
                assert target_file.exists()
                
                # Read current content
                with open(target_file, 'r', encoding='utf-8') as f:
                    content = json.load(f)
                
                if file_type not in initialized_files:
                    # First time - should be empty array
                    assert content == [], \
                        f"First initialization of {file_type}.json should create empty array"
                    initialized_files.add(file_type)
                else:
                    # Subsequent calls - should preserve empty array
                    # (In real usage, data would be added between calls,
                    # but _ensure_file_exists should not overwrite)
                    assert isinstance(content, list), \
                        f"Subsequent calls should preserve list structure"
            
            # Verify all unique files were created
            unique_files = set(operations)
            for file_type in unique_files:
                target_file = file_map[file_type]
                assert target_file.exists(), \
                    f"{file_type}.json should exist after operations"
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(record=record_data_strategy())
    @settings(max_examples=100)
    def test_property_10_file_initialization_on_first_write(self, record):
        """
        Property 10: 文件初始化 - First Write
        
        For any record being saved, if the JSON files don't exist, they should
        be automatically created and initialized before writing data.
        
        **Validates: Requirements 7.5**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            # Verify no files exist initially
            assert not storage_service.records_file.exists()
            assert not storage_service.moods_file.exists()
            assert not storage_service.inspirations_file.exists()
            assert not storage_service.todos_file.exists()
            
            # Save a record (this should trigger file initialization)
            record_id = storage_service.save_record(record)
            
            # records.json should now exist and contain the record
            assert storage_service.records_file.exists()
            with open(storage_service.records_file, 'r', encoding='utf-8') as f:
                records = json.load(f)
            assert len(records) >= 1
            assert any(r["record_id"] == record_id for r in records)
            
            # If mood exists, save it and verify file initialization
            if record.parsed_data.mood is not None:
                storage_service.append_mood(
                    record.parsed_data.mood,
                    record_id,
                    record.timestamp
                )
                assert storage_service.moods_file.exists()
                with open(storage_service.moods_file, 'r', encoding='utf-8') as f:
                    moods = json.load(f)
                assert isinstance(moods, list)
                assert len(moods) >= 1
            
            # If inspirations exist, save them and verify file initialization
            if record.parsed_data.inspirations:
                storage_service.append_inspirations(
                    record.parsed_data.inspirations,
                    record_id,
                    record.timestamp
                )
                assert storage_service.inspirations_file.exists()
                with open(storage_service.inspirations_file, 'r', encoding='utf-8') as f:
                    inspirations = json.load(f)
                assert isinstance(inspirations, list)
                assert len(inspirations) >= len(record.parsed_data.inspirations)
            
            # If todos exist, save them and verify file initialization
            if record.parsed_data.todos:
                storage_service.append_todos(
                    record.parsed_data.todos,
                    record_id,
                    record.timestamp
                )
                assert storage_service.todos_file.exists()
                with open(storage_service.todos_file, 'r', encoding='utf-8') as f:
                    todos = json.load(f)
                assert isinstance(todos, list)
                assert len(todos) >= len(record.parsed_data.todos)
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)

    @given(records=st.lists(record_data_strategy(), min_size=2, max_size=20))
    @settings(max_examples=100)
    def test_property_11_unique_id_generation(self, records):
        """
        Property 11: 唯一 ID 生成
        
        For any two different records, the generated record_ids should be unique (non-repeating).
        
        **Validates: Requirements 7.7**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            generated_ids = []
            
            # Save all records and collect their IDs
            for record in records:
                # Clear the record_id to force generation of a new one
                record.record_id = ""
                
                # Save record and get the generated ID
                record_id = storage_service.save_record(record)
                generated_ids.append(record_id)
            
            # Property 1: All IDs should be non-empty strings
            for record_id in generated_ids:
                assert record_id, "Generated record_id should not be empty"
                assert isinstance(record_id, str), "Generated record_id should be a string"
            
            # Property 2: All IDs should be unique (no duplicates)
            unique_ids = set(generated_ids)
            assert len(unique_ids) == len(generated_ids), \
                f"All generated IDs should be unique. Generated {len(generated_ids)} IDs but only {len(unique_ids)} are unique. Duplicates found!"
            
            # Property 3: IDs should be valid UUIDs (format check)
            import uuid
            for record_id in generated_ids:
                try:
                    # Try to parse as UUID - this will raise ValueError if invalid
                    uuid.UUID(record_id)
                except ValueError:
                    pytest.fail(f"Generated ID '{record_id}' is not a valid UUID")
            
            # Property 4: Verify all records are saved with their unique IDs
            with open(storage_service.records_file, 'r', encoding='utf-8') as f:
                saved_records = json.load(f)
            
            saved_ids = [r["record_id"] for r in saved_records]
            
            # All generated IDs should be in the saved records
            for record_id in generated_ids:
                assert record_id in saved_ids, \
                    f"Generated ID {record_id} should be found in saved records"
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(
        num_records=st.integers(min_value=10, max_value=50)
    )
    @settings(max_examples=50, deadline=500)
    def test_property_11_unique_id_generation_stress(self, num_records):
        """
        Property 11: 唯一 ID 生成 - Stress Test
        
        For a large number of records saved in quick succession, all generated
        record_ids should still be unique. This tests the robustness of UUID generation.
        
        **Validates: Requirements 7.7**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            generated_ids = []
            
            # Generate and save many records quickly
            for i in range(num_records):
                # Create a minimal record
                record = RecordData(
                    record_id="",  # Force generation
                    timestamp=f"2024-01-01T00:00:{i:02d}Z",
                    input_type="text",
                    original_text=f"Test record {i}",
                    parsed_data=ParsedData(mood=None, inspirations=[], todos=[])
                )
                
                record_id = storage_service.save_record(record)
                generated_ids.append(record_id)
            
            # All IDs should be unique
            unique_ids = set(generated_ids)
            assert len(unique_ids) == num_records, \
                f"Expected {num_records} unique IDs, but got {len(unique_ids)}. " \
                f"Found {num_records - len(unique_ids)} duplicates!"
            
            # Verify all are valid UUIDs
            import uuid
            for record_id in generated_ids:
                try:
                    uuid.UUID(record_id)
                except ValueError:
                    pytest.fail(f"Generated ID '{record_id}' is not a valid UUID")
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)
    
    @given(record=record_data_strategy())
    @settings(max_examples=100)
    def test_property_11_unique_id_generation_preserves_existing(self, record):
        """
        Property 11: 唯一 ID 生成 - Preserve Existing IDs
        
        If a record already has a record_id set, the save_record method should
        preserve it and not generate a new one.
        
        **Validates: Requirements 7.7**
        """
        # Create a fresh temporary directory and storage service for each example
        temp_dir = tempfile.mkdtemp()
        try:
            storage_service = StorageService(temp_dir)
            
            # Use the record's existing ID
            original_id = record.record_id
            
            # Save the record
            returned_id = storage_service.save_record(record)
            
            # The returned ID should match the original
            assert returned_id == original_id, \
                "save_record should preserve existing record_id"
            
            # Verify the record is saved with the original ID
            with open(storage_service.records_file, 'r', encoding='utf-8') as f:
                saved_records = json.load(f)
            
            found_record = None
            for r in saved_records:
                if r["record_id"] == original_id:
                    found_record = r
                    break
            
            assert found_record is not None, \
                "Record should be saved with its original ID"
            assert found_record["record_id"] == original_id
        finally:
            # Clean up temporary directory
            shutil.rmtree(temp_dir)