from __future__ import annotations
"""Citation management for source tracking."""

from dataclasses import dataclass, field
from typing import Any
from urllib.parse import urlparse


@dataclass
class Citation:
    """A source citation."""

    title: str
    url: str
    snippet: str = ""
    accessed_at: str = ""
    reliability_score: float = 0.5


class CitationManager:
    """Manages citations and source tracking."""

    def __init__(self):
        """Initialize the citation manager."""
        self._citations: list[Citation] = []
        self._url_index: dict[str, int] = {}

    def add_citation(
        self,
        title: str,
        url: str,
        snippet: str = "",
    ) -> int:
        """Add a citation and return its index.
        
        Args:
            title: Source title
            url: Source URL
            snippet: Relevant text snippet
            
        Returns:
            Citation index (1-based)
        """
        # Check if URL already exists
        if url in self._url_index:
            return self._url_index[url]

        # Calculate reliability score based on domain
        reliability = self._assess_reliability(url)

        citation = Citation(
            title=title,
            url=url,
            snippet=snippet,
            reliability_score=reliability,
        )

        self._citations.append(citation)
        index = len(self._citations)
        self._url_index[url] = index

        return index

    def get_citation(self, index: int) -> Citation | None:
        """Get a citation by index.
        
        Args:
            index: Citation index (1-based)
            
        Returns:
            Citation or None if not found
        """
        if 1 <= index <= len(self._citations):
            return self._citations[index - 1]
        return None

    def get_all_citations(self) -> list[Citation]:
        """Get all citations.
        
        Returns:
            List of all citations
        """
        return list(self._citations)

    def format_inline(self, index: int) -> str:
        """Format an inline citation reference.
        
        Args:
            index: Citation index
            
        Returns:
            Formatted inline citation [n]
        """
        return f"[{index}]"

    def format_bibliography(self, style: str = "markdown") -> str:
        """Format all citations as a bibliography.
        
        Args:
            style: Output style (markdown, plain, html)
            
        Returns:
            Formatted bibliography
        """
        if not self._citations:
            return ""

        lines = []

        if style == "markdown":
            lines.append("**Sources:**")
            for i, cite in enumerate(self._citations, 1):
                lines.append(f"[{i}] [{cite.title}]({cite.url})")

        elif style == "plain":
            lines.append("Sources:")
            for i, cite in enumerate(self._citations, 1):
                lines.append(f"{i}. {cite.title}")
                lines.append(f"   {cite.url}")

        elif style == "html":
            lines.append("<div class='sources'>")
            lines.append("<h4>Sources:</h4>")
            lines.append("<ol>")
            for cite in self._citations:
                lines.append(
                    f'<li><a href="{cite.url}" target="_blank">{cite.title}</a></li>'
                )
            lines.append("</ol>")
            lines.append("</div>")

        return "\n".join(lines)

    def to_dict_list(self) -> list[dict[str, Any]]:
        """Convert citations to list of dictionaries.
        
        Returns:
            List of citation dictionaries
        """
        return [
            {
                "title": cite.title,
                "url": cite.url,
                "snippet": cite.snippet,
                "reliability_score": cite.reliability_score,
            }
            for cite in self._citations
        ]

    def clear(self) -> None:
        """Clear all citations."""
        self._citations.clear()
        self._url_index.clear()

    def _assess_reliability(self, url: str) -> float:
        """Assess reliability of a source based on URL.
        
        Args:
            url: Source URL
            
        Returns:
            Reliability score (0.0 - 1.0)
        """
        try:
            parsed = urlparse(url)
            domain = parsed.netloc.lower()
        except Exception:
            return 0.3

        # High reliability domains
        high_reliability = [
            ".gov",
            ".edu",
            "wikipedia.org",
            "bbc.com",
            "reuters.com",
            "apnews.com",
            "nature.com",
            "sciencedirect.com",
            "pubmed.gov",
            "nytimes.com",
            "wsj.com",
            "economist.com",
        ]

        # Medium reliability domains
        medium_reliability = [
            "medium.com",
            "techcrunch.com",
            "wired.com",
            "arstechnica.com",
            "theverge.com",
            "forbes.com",
            "bloomberg.com",
        ]

        for high in high_reliability:
            if high in domain:
                return 0.9

        for medium in medium_reliability:
            if medium in domain:
                return 0.7

        # Default moderate reliability for unknown sources
        return 0.5

    def get_most_reliable(self, n: int = 3) -> list[Citation]:
        """Get the n most reliable citations.
        
        Args:
            n: Number of citations to return
            
        Returns:
            List of most reliable citations
        """
        sorted_citations = sorted(
            self._citations,
            key=lambda c: c.reliability_score,
            reverse=True,
        )
        return sorted_citations[:n]