"""
arXiv API client wrapper with error handling and caching.
"""
import os
import logging
from typing import List, Optional
from pathlib import Path
import arxiv
from tenacity import retry, stop_after_attempt, wait_exponential

from utils.schemas import Paper

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def _extract_pdf_url(result: arxiv.Result) -> Optional[str]:
    """
    Extract PDF URL from arxiv.Result, handling arxiv library v2.2.0 breaking change.

    In arxiv v2.2.0+, pdf_url attribute is always None. PDF URL is now in links field.

    Args:
        result: arxiv.Result object

    Returns:
        PDF URL string or None if not found
    """
    # Try legacy pdf_url attribute first (backward compatibility)
    if result.pdf_url:
        return result.pdf_url

    # arxiv v2.2.0+: PDF URL is in links
    # Links typically have format:
    #   [0] abs URL (alternate)
    #   [1] pdf URL (alternate)
    #   [2] DOI URL (related)
    try:
        for link in result.links:
            if 'pdf' in link.href.lower():
                logger.debug(f"Extracted PDF URL from links: {link.href}")
                return link.href
    except (AttributeError, TypeError) as e:
        logger.warning(f"Error extracting PDF URL from links: {e}")

    # Fallback: construct URL from entry_id
    # entry_id format: http://arxiv.org/abs/2102.08370v2
    try:
        paper_id = result.entry_id.split('/')[-1]
        fallback_url = f"https://arxiv.org/pdf/{paper_id}"
        logger.warning(f"Using fallback PDF URL construction: {fallback_url}")
        return fallback_url
    except (AttributeError, IndexError) as e:
        logger.error(f"Failed to construct fallback PDF URL: {e}")
        return None


class ArxivClient:
    """Wrapper for arXiv API with error handling and caching."""

    def __init__(self, cache_dir: str = "data/papers"):
        """
        Initialize arXiv client.

        Args:
            cache_dir: Directory to cache downloaded papers
        """
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10)
    )
    def search_papers(
        self,
        query: str,
        max_results: int = 5,
        category: Optional[str] = None,
        sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
    ) -> List[Paper]:
        """
        Search for papers on arXiv.

        Args:
            query: Search query
            max_results: Maximum number of papers to return
            category: Optional arXiv category filter (e.g., 'cs.AI')
            sort_by: Sort criterion

        Returns:
            List of Paper objects

        Raises:
            Exception: If arXiv API fails after retries
        """
        try:
            # Build search query
            search_query = query
            if category:
                search_query = f"{query} AND cat:{category}"

            logger.info(f"Searching arXiv for: {search_query}")

            # Create search
            search = arxiv.Search(
                query=search_query,
                max_results=max_results,
                sort_by=sort_by
            )

            # Fetch results
            papers = []
            for result in search.results():
                paper = Paper(
                    arxiv_id=result.entry_id.split('/')[-1],
                    title=result.title,
                    authors=[author.name for author in result.authors],
                    abstract=result.summary,
                    pdf_url=_extract_pdf_url(result),
                    published=result.published,
                    categories=result.categories
                )
                papers.append(paper)

            logger.info(f"Found {len(papers)} papers")
            return papers

        except Exception as e:
            logger.error(f"Error searching arXiv: {str(e)}")
            raise

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10)
    )
    def download_paper(self, paper: Paper) -> Optional[Path]:
        """
        Download paper PDF if not already cached.

        Args:
            paper: Paper object

        Returns:
            Path to downloaded PDF, or None if download fails
        """
        try:
            # Check if already cached
            pdf_path = self.cache_dir / f"{paper.arxiv_id}.pdf"
            if pdf_path.exists():
                logger.info(f"Paper {paper.arxiv_id} already cached")
                return pdf_path

            logger.info(f"Downloading paper {paper.arxiv_id}")

            # Download using arxiv library
            search = arxiv.Search(id_list=[paper.arxiv_id])
            result = next(search.results())
            result.download_pdf(dirpath=str(self.cache_dir), filename=f"{paper.arxiv_id}.pdf")

            logger.info(f"Downloaded paper to {pdf_path}")
            return pdf_path

        except Exception as e:
            logger.error(f"Error downloading paper {paper.arxiv_id}: {str(e)}")
            return None

    def download_papers(self, papers: List[Paper]) -> List[Path]:
        """
        Download multiple papers.

        Args:
            papers: List of Paper objects

        Returns:
            List of Paths to downloaded PDFs
        """
        paths = []
        for paper in papers:
            path = self.download_paper(paper)
            if path:
                paths.append(path)
        return paths

    def get_cached_papers(self) -> List[Path]:
        """
        Get list of cached paper PDFs.

        Returns:
            List of Paths to cached PDFs
        """
        return list(self.cache_dir.glob("*.pdf"))