"""Tests for the Memory Bridge (markdown <-> Headroom bidirectional sync).

Parser tests are pure functions (no backend needed).
Bridge tests use a temp LocalBackend with a temporary database.

Run with: pytest tests/test_memory_bridge.py -v
"""

from __future__ import annotations

import json
import uuid

import pytest

from headroom.memory.bridge_config import BridgeConfig, MarkdownFormat
from headroom.memory.bridge_parsers import (
    ParsedSection,
    detect_format,
    extract_entities_from_text,
    extract_relationships_from_section,
    parse_chatgpt_facts,
    parse_claude_code_memory,
    parse_generic_markdown,
    parse_markdown,
)

# Sample content for testing
CLAUDE_CODE_MEMORY = """\
# Project Memory

## Project Overview
- **Headroom**: Context optimization layer for LLM applications
- **Repos**: OSS at ~/claude-projects/headroom

## Key Architecture
- 186 Python files, 34 packages, 100K+ lines
- 6 compression algorithms: SmartCrusher, CacheAligner, ContentRouter

## Competitors
- Direct: Compresr (YC W26), Token Company
- Gateways: Portkey, Helicone, LiteLLM
"""

CHATGPT_FACTS = """\
User prefers Python over JavaScript
User works at Netflix
User likes dark mode
- User has a cat named Luna
"""

GENERIC_MARKDOWN = """\
# Notes

## Architecture
The system uses FastAPI for the proxy layer.
- SQLite for storage
- HNSW for vector search

## TODO
- Add caching layer
- Improve error handling
"""


# =============================================================================
# Parser Tests (pure functions, no backend)
# =============================================================================


class TestClaudeCodeParser:
    def test_parse_sections(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        # H1 + 3 H2 sections
        assert len(parsed.sections) >= 3
        assert parsed.format == "claude_code"

    def test_heading_levels(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        headings = {s.heading: s.heading_level for s in parsed.sections if s.heading}
        assert headings.get("Project Overview") == 2
        assert headings.get("Key Architecture") == 2
        assert headings.get("Competitors") == 2

    def test_bullets_become_facts(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        overview = next(s for s in parsed.sections if s.heading == "Project Overview")
        assert len(overview.facts) == 2
        assert any("Headroom" in f for f in overview.facts)
        assert any("Repos" in f for f in overview.facts)

    def test_bold_text_extracted_as_entities(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        overview = next(s for s in parsed.sections if s.heading == "Project Overview")
        assert "Headroom" in overview.entities
        assert "Repos" in overview.entities

    def test_content_hash_computed(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        for section in parsed.sections:
            if section.content:
                assert section.content_hash
                assert len(section.content_hash) == 64  # SHA-256

    def test_content_hash_deterministic(self):
        parsed1 = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        parsed2 = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        for s1, s2 in zip(parsed1.sections, parsed2.sections):
            assert s1.content_hash == s2.content_hash

    def test_file_hash_computed(self):
        parsed = parse_claude_code_memory(CLAUDE_CODE_MEMORY)
        assert parsed.file_hash
        assert len(parsed.file_hash) == 64


class TestChatGPTParser:
    def test_parse_flat_facts(self):
        parsed = parse_chatgpt_facts(CHATGPT_FACTS)
        assert parsed.format == "chatgpt"
        assert len(parsed.sections) == 1
        assert len(parsed.sections[0].facts) == 4

    def test_bullet_prefix_stripped(self):
        parsed = parse_chatgpt_facts(CHATGPT_FACTS)
        facts = parsed.sections[0].facts
        assert "User has a cat named Luna" in facts

    def test_empty_lines_skipped(self):
        content = "Fact 1\n\n\nFact 2\n\n"
        parsed = parse_chatgpt_facts(content)
        assert len(parsed.sections[0].facts) == 2

    def test_empty_content(self):
        parsed = parse_chatgpt_facts("")
        assert len(parsed.sections) == 0


class TestGenericParser:
    def test_parse_multi_level_headers(self):
        parsed = parse_generic_markdown(GENERIC_MARKDOWN)
        assert parsed.format == "generic"
        headings = [s.heading for s in parsed.sections if s.heading]
        assert "Architecture" in headings
        assert "TODO" in headings

    def test_non_bullet_lines_are_facts(self):
        parsed = parse_generic_markdown(GENERIC_MARKDOWN)
        arch = next(s for s in parsed.sections if s.heading == "Architecture")
        # "The system uses FastAPI..." and bullets should all be facts
        assert len(arch.facts) >= 3


class TestFormatDetection:
    def test_detect_claude_code(self):
        assert detect_format(CLAUDE_CODE_MEMORY) == "claude_code"

    def test_detect_chatgpt(self):
        assert detect_format(CHATGPT_FACTS) == "chatgpt"

    def test_detect_generic(self):
        content = "Some long paragraph without headers or bullet points that goes on and on describing things in great detail.\nAnother very long line that describes more things in this generic format."
        assert detect_format(content) in ("generic", "chatgpt")

    def test_empty_content(self):
        assert detect_format("") == "generic"


class TestAutoParser:
    def test_auto_parses_claude_code(self):
        parsed = parse_markdown(CLAUDE_CODE_MEMORY)
        assert parsed.format == "claude_code"

    def test_auto_parses_chatgpt(self):
        parsed = parse_markdown(CHATGPT_FACTS)
        assert parsed.format == "chatgpt"

    def test_force_format(self):
        parsed = parse_markdown(CLAUDE_CODE_MEMORY, format="generic")
        assert parsed.format == "generic"


class TestEntityExtraction:
    def test_bold_text(self):
        entities = extract_entities_from_text("I use **Python** and **FastAPI**")
        assert "Python" in entities
        assert "FastAPI" in entities

    def test_camel_case(self):
        entities = extract_entities_from_text("Using SmartCrusher and CacheAligner")
        assert "SmartCrusher" in entities
        assert "CacheAligner" in entities

    def test_no_false_positives_on_stop_words(self):
        entities = extract_entities_from_text("The system is very important and useful")
        # "The" and other stop words should not appear
        assert "The" not in entities

    def test_all_caps(self):
        entities = extract_entities_from_text("Using HNSW and SQLite")
        assert "HNSW" in entities


class TestRelationshipExtraction:
    def test_bold_colon_pattern(self):
        section = ParsedSection(
            heading="Test",
            heading_level=2,
            content="- **Headroom**: Context optimization layer",
            facts=["**Headroom**: Context optimization layer"],
        )
        rels = extract_relationships_from_section(section)
        assert len(rels) >= 1
        assert rels[0]["source"] == "Headroom"
        assert rels[0]["relationship"] == "is"

    def test_verb_patterns(self):
        section = ParsedSection(
            heading="Test",
            heading_level=2,
            content="Headroom uses SQLite for storage",
            facts=["Headroom uses SQLite for storage"],
        )
        rels = extract_relationships_from_section(section)
        uses_rels = [r for r in rels if r["relationship"] == "uses"]
        assert len(uses_rels) >= 1


# =============================================================================
# Bridge Tests (require backend)
# =============================================================================


@pytest.fixture
def tmp_dir(tmp_path):
    """Provide a temporary directory for test files."""
    return tmp_path


@pytest.fixture
def user_id():
    """Unique user ID for test isolation."""
    return f"test_bridge_{uuid.uuid4().hex[:8]}"


@pytest.fixture
def bridge_config(tmp_dir):
    """Create a BridgeConfig with test paths."""
    return BridgeConfig(
        user_id="test_user",
        sync_state_path=tmp_dir / "bridge_state.json",
        dedup_similarity_threshold=0.95,
    )


@pytest.fixture
async def backend(tmp_dir):
    """Create a LocalBackend with temp database."""
    from headroom.memory.backends.local import LocalBackend, LocalBackendConfig

    config = LocalBackendConfig(db_path=str(tmp_dir / "test_memory.db"))
    backend = LocalBackend(config)
    await backend._ensure_initialized()
    yield backend
    await backend.close()


@pytest.fixture
def bridge(bridge_config, backend):
    """Create a MemoryBridge."""
    from headroom.memory.bridge import MemoryBridge

    return MemoryBridge(bridge_config, backend)


class TestMemoryBridgeImport:
    @pytest.mark.asyncio
    async def test_import_claude_code_memory(self, bridge, tmp_dir, backend):
        """Import a MEMORY.md file and verify memories are stored."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

        assert stats.files_processed == 1
        assert stats.sections_imported > 0
        assert stats.total_facts > 0

        # Verify memories exist in backend
        memories = await backend.get_user_memories("test_user", limit=100)
        assert len(memories) > 0

    @pytest.mark.asyncio
    async def test_import_skips_unchanged_file(self, bridge, tmp_dir):
        """Second import of same file should skip (hash unchanged)."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        stats1 = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
        assert stats1.sections_imported > 0

        stats2 = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
        assert stats2.files_skipped_unchanged == 1
        assert stats2.sections_imported == 0

    @pytest.mark.asyncio
    async def test_import_detects_changes(self, bridge, tmp_dir):
        """Modified file should re-import changed sections."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

        # Modify file
        modified = CLAUDE_CODE_MEMORY + "\n## New Section\n- Brand new fact\n"
        md_path.write_text(modified, encoding="utf-8")

        stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
        assert stats.files_processed == 1
        assert stats.sections_imported >= 1  # At least the new section

    @pytest.mark.asyncio
    async def test_import_force(self, bridge, tmp_dir):
        """Force import should re-import even if unchanged."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

        stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user", force=True)
        # Force should process the file, though sections may be deduped by semantic search
        assert stats.files_processed == 1

    @pytest.mark.asyncio
    async def test_import_chatgpt_facts(self, bridge, tmp_dir, backend):
        """Import ChatGPT-style facts."""
        md_path = tmp_dir / "chatgpt.txt"
        md_path.write_text(CHATGPT_FACTS, encoding="utf-8")

        bridge._config.md_format = MarkdownFormat.CHATGPT
        stats = await bridge.import_from_markdown(paths=[md_path], user_id="test_user")
        assert stats.sections_imported > 0

    @pytest.mark.asyncio
    async def test_import_missing_file(self, bridge, tmp_dir):
        """Missing file should be skipped gracefully."""
        from pathlib import Path

        stats = await bridge.import_from_markdown(
            paths=[Path(tmp_dir / "nonexistent.md")], user_id="test_user"
        )
        assert stats.files_processed == 0

    @pytest.mark.asyncio
    async def test_metadata_preserved(self, bridge, tmp_dir, backend):
        """Imported memories should have bridge metadata."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

        memories = await backend.get_user_memories("test_user", limit=100)
        for memory in memories:
            metadata = memory.metadata or {}
            assert metadata.get("source") == "memory_bridge"
            assert "source_file" in metadata


class TestMemoryBridgeExport:
    @pytest.mark.asyncio
    async def test_export_claude_code_style(self, bridge, tmp_dir, backend):
        """Export memories as Claude Code style markdown."""
        # Add some memories
        await backend.save_memory(
            content="Headroom is a context optimization layer",
            user_id="test_user",
            importance=0.8,
            metadata={"section_heading": "Overview"},
        )
        await backend.save_memory(
            content="Uses SQLite for storage",
            user_id="test_user",
            importance=0.7,
            metadata={"section_heading": "Architecture"},
        )

        export_path = tmp_dir / "export.md"
        markdown = await bridge.export_to_markdown(
            path=export_path,
            user_id="test_user",
            format=MarkdownFormat.CLAUDE_CODE,
        )

        assert "# Memory" in markdown
        assert "## Overview" in markdown
        assert "## Architecture" in markdown
        assert "Headroom" in markdown
        assert export_path.exists()

    @pytest.mark.asyncio
    async def test_export_chatgpt_style(self, bridge, backend):
        """Export as flat facts."""
        await backend.save_memory(
            content="User prefers Python",
            user_id="test_user",
            importance=0.7,
        )

        markdown = await bridge.export_to_markdown(
            user_id="test_user",
            format=MarkdownFormat.CHATGPT,
        )

        assert "User prefers Python" in markdown
        # Should NOT have headers
        assert "## " not in markdown

    @pytest.mark.asyncio
    async def test_export_empty(self, bridge):
        """Export with no memories should produce placeholder."""
        markdown = await bridge.export_to_markdown(user_id="nonexistent_user")
        assert "No memories" in markdown


class TestMemoryBridgeSync:
    @pytest.mark.asyncio
    async def test_sync_imports_and_exports(self, bridge, tmp_dir, backend):
        """Full sync: import from file, add organic memory, sync exports it."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text("## Facts\n- User likes Python\n", encoding="utf-8")
        bridge._config.md_paths = [md_path]

        # First sync: imports from file
        stats = await bridge.sync(user_id="test_user")
        assert stats.import_stats.sections_imported > 0

        # Add an organic memory (not from bridge)
        await backend.save_memory(
            content="User also likes Rust",
            user_id="test_user",
            importance=0.7,
            metadata={},  # No source tag = organic
        )

        # Second sync: should export the organic memory
        stats2 = await bridge.sync(user_id="test_user")
        assert stats2.memories_exported >= 1

        # Verify the file now contains the new memory
        updated_content = md_path.read_text(encoding="utf-8")
        assert "Rust" in updated_content

    @pytest.mark.asyncio
    async def test_source_tag_prevents_reexport(self, bridge, tmp_dir, backend):
        """Memories imported via bridge should not be re-exported."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text("## Facts\n- Imported fact\n", encoding="utf-8")
        bridge._config.md_paths = [md_path]

        # Import
        await bridge.sync(user_id="test_user")

        # Sync again - nothing should be exported (all memories have source tag)
        stats = await bridge.sync(user_id="test_user")
        assert stats.memories_exported == 0


class TestSyncStatePersistence:
    @pytest.mark.asyncio
    async def test_state_saved_and_loaded(self, tmp_dir, backend):
        """Sync state should persist across bridge instances."""
        from headroom.memory.bridge import MemoryBridge

        state_path = tmp_dir / "state.json"
        config = BridgeConfig(
            user_id="test_user",
            sync_state_path=state_path,
        )

        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        # First bridge instance: import
        bridge1 = MemoryBridge(config, backend)
        await bridge1.import_from_markdown(paths=[md_path], user_id="test_user")

        # Verify state file exists
        assert state_path.exists()
        state = json.loads(state_path.read_text())
        assert "files" in state
        assert str(md_path) in state["files"]

        # Second bridge instance: should detect unchanged file
        bridge2 = MemoryBridge(config, backend)
        stats = await bridge2.import_from_markdown(paths=[md_path], user_id="test_user")
        assert stats.files_skipped_unchanged == 1


class TestRoundTrip:
    @pytest.mark.asyncio
    async def test_import_export_preserves_facts(self, bridge, tmp_dir, backend):
        """Import a MEMORY.md, export it, verify all facts are present."""
        md_path = tmp_dir / "MEMORY.md"
        md_path.write_text(CLAUDE_CODE_MEMORY, encoding="utf-8")

        # Import
        await bridge.import_from_markdown(paths=[md_path], user_id="test_user")

        # Export
        export_path = tmp_dir / "exported.md"
        markdown = await bridge.export_to_markdown(
            path=export_path,
            user_id="test_user",
            format=MarkdownFormat.CLAUDE_CODE,
        )

        # Key facts should survive the round trip
        assert "Headroom" in markdown
        assert "compression" in markdown.lower() or "SmartCrusher" in markdown
        assert "Compresr" in markdown or "Portkey" in markdown