"""
AI Core Module for GitHub Companion

Handles:
- Document embedding with ChromaDB
- Conversational RAG chain creation
- Context-aware query processing

Uses stable LangChain imports compatible with latest versions.
"""

import os
import tempfile
import pathlib
import logging
from typing import List

# LangChain imports - using stable paths for latest versions
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from shared import analysis_jobs, update_session, get_session

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Cache directory for embeddings model
CACHE_DIR = os.path.join(tempfile.gettempdir(), "huggingface_cache", "sentence_transformers")


def format_docs(docs):
    """Format retrieved documents into a single string."""
    return "\n\n".join(doc.page_content for doc in docs)


def create_conversational_chain(file_paths: List[str], session_id: str):
    """
    Create a conversational RAG chain from the provided files.
    
    Args:
        file_paths: List of file paths to embed for initial context
        session_id: Unique session identifier
        
    Returns:
        A runnable chain or None if creation fails
    """
    try:
        logger.info(f"Creating conversational chain for session {session_id}")
        
        chroma_db_path = os.path.join(tempfile.gettempdir(), "chroma_db_cache", session_id)
        
        # Load documents
        documents = []
        if file_paths:
            for file_path in file_paths:
                try:
                    loader = TextLoader(file_path, encoding='utf-8')
                    documents.extend(loader.load())
                    logger.debug(f"Loaded file: {file_path}")
                except Exception as e:
                    logger.warning(f"Skipping file {file_path}: {e}")
                    continue
        
        # Fallback if no documents
        if not documents:
            documents = [Document(page_content="No text files were provided for initial analysis.")]
            logger.warning("No documents loaded, using fallback.")
        
        # Split documents
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
        texts = text_splitter.split_documents(documents)
        logger.info(f"Split into {len(texts)} text chunks")
        
        # Create embeddings
        embeddings = SentenceTransformerEmbeddings(
            model_name="all-MiniLM-L6-v2",
            cache_folder=CACHE_DIR
        )
        
        # Create vector store
        db = Chroma.from_documents(texts, embeddings, persist_directory=chroma_db_path)
        logger.info(f"Created ChromaDB at {chroma_db_path}")
        
        # Create retriever
        retriever = db.as_retriever(search_kwargs={"k": 5})
        
        # Create LLM
        llm = GoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)
        
        # System prompt template
        prompt = ChatPromptTemplate.from_template(
            """You are an expert software developer assistant. Your goal is to help users 
understand a GitHub repository. Use the following pieces of retrieved context 
to answer the question. If you don't know the answer, just say that you don't know. 
Keep your answers concise and informative. When providing code snippets, use markdown formatting.

Context:
{context}

Question: {question}

Answer:"""
        )
        
        # Create chain using LCEL (LangChain Expression Language)
        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )
        
        # Store vectorstore and retriever in session
        update_session(session_id, "vectorstore", db)
        update_session(session_id, "retriever", retriever)
        
        logger.info(f"✅ Conversational chain created for session {session_id}")
        return rag_chain
        
    except Exception as e:
        logger.error(f"❌ Error creating conversational chain: {e}")
        return None


def embed_entire_repository(session_id: str, all_file_paths: List[str]):
    """
    Background task to embed all text files in the repository.
    
    Args:
        session_id: Unique session identifier
        all_file_paths: List of all text file paths to embed
    """
    try:
        logger.info(f"Starting background embedding for session {session_id} ({len(all_file_paths)} files)")
        
        job = get_session(session_id)
        if not job or "vectorstore" not in job:
            logger.error(f"No vectorstore found for session {session_id}")
            return
            
        vectorstore = job["vectorstore"]
        
        # Load all documents
        documents = []
        for file_path in all_file_paths:
            try:
                loader = TextLoader(file_path, encoding='utf-8')
                documents.extend(loader.load())
            except Exception:
                continue
        
        if documents:
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
            texts = text_splitter.split_documents(documents)
            vectorstore.add_documents(texts)
            logger.info(f"Added {len(texts)} chunks to vectorstore")
        
        update_session(session_id, "embedding_complete", True)
        logger.info(f"✅ Background embedding complete for session {session_id}")
        
    except Exception as e:
        logger.error(f"❌ Error in background embedding for session {session_id}: {e}")


def query_with_context(rag_chain, chat_history: list, query: str, pinned_files: List[str], repo_path: str) -> str:
    """
    Query the RAG chain with additional context from pinned files.
    
    Args:
        rag_chain: The runnable chain
        chat_history: List of previous chat messages
        query: The user's query
        pinned_files: List of file paths the user has pinned for context
        repo_path: Path to the repository root
        
    Returns:
        The AI's response as a string
    """
    try:
        # Build context from pinned files
        context_str = ""
        if pinned_files:
            context_str += "The user has pinned the following files for primary context. Prioritize information from these files:\n\n"
            for file in pinned_files:
                file_p = (pathlib.Path(repo_path) / file).resolve()
                if file_p.is_file():
                    context_str += f"--- START OF FILE: {file} ---\n"
                    try:
                        # Limit file content to prevent token overflow
                        context_str += file_p.read_text(encoding="utf-8")[:4000]
                    except Exception:
                        context_str += "(Could not read file content)"
                    context_str += f"\n--- END OF FILE: {file} ---\n\n"
        
        # Build final query with pinned context
        final_query = f"{context_str}Based on the context, answer the question: {query}"
        
        # Invoke the chain - LCEL chains are simpler to invoke
        answer = rag_chain.invoke(final_query)
        
        # Update chat history
        chat_history.extend([HumanMessage(content=query), AIMessage(content=answer)])
        
        return answer
        
    except Exception as e:
        logger.error(f"Error during query invocation: {e}")
        return f"An error occurred while processing your request: {str(e)}"