import os
import json
import faiss
import numpy as np
from pathlib import Path
from typing import List, Dict, Any
from sentence_transformers import SentenceTransformer

# Initialize embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Directory to store FAISS indexes
INDEXES_DIR = Path('faiss_indexes')
INDEXES_DIR.mkdir(exist_ok=True)


def _sanitize_repo_url(repo_url: str) -> str:
    """Sanitize repository URL to create a valid filename."""
    return repo_url.replace('https://', '').replace('http://', '').replace('/', '_').replace('.', '_').replace(':', '_')[:63]


def store_docs(repo_url: str, docs_list: List[Dict[str, Any]]) -> None:
    """
    Stores generated documentation in FAISS with sentence-transformers embeddings.
    
    Args:
        repo_url: The repository URL (used for index filename)
        docs_list: List of dicts containing 'filename' and 'documentation'
    """
    repo_slug = _sanitize_repo_url(repo_url)
    print(f"[STORE_DOCS] Starting storage for repo: {repo_url}")
    print(f"[STORE_DOCS] Repo slug: {repo_slug}")
    
    # Extract all documentation text
    doc_texts = []
    doc_metadata = []
    
    for doc_item in docs_list:
        text = doc_item['documentation']
        filename = doc_item['filename']
        doc_texts.append(text)
        doc_metadata.append({'filename': filename, 'repo_url': repo_url})
    
    print(f"[STORE_DOCS] Extracted {len(doc_texts)} documents")
    
    if not doc_texts:
        print(f"[STORE_DOCS] No documents to store, returning early")
        return
    
    # Generate embeddings for all documents
    print(f"[STORE_DOCS] Generating embeddings for {len(doc_texts)} documents...")
    embeddings = model.encode(doc_texts, convert_to_numpy=True)
    print(f"[STORE_DOCS] Embeddings shape: {embeddings.shape}")
    
    # Create FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings.astype(np.float32))
    print(f"[STORE_DOCS] FAISS index created with {index.ntotal} vectors")
    
    # Save FAISS index
    index_path = INDEXES_DIR / f'{repo_slug}.index'
    faiss.write_index(index, str(index_path))
    print(f"[STORE_DOCS] FAISS index saved to: {index_path}")
    
    # Save text chunks and metadata
    texts_path = INDEXES_DIR / f'{repo_slug}_texts.json'
    with open(texts_path, 'w') as f:
        json.dump({
            'texts': doc_texts,
            'metadata': doc_metadata,
            'repo_url': repo_url
        }, f)
    print(f"[STORE_DOCS] Metadata saved to: {texts_path}")
    print(f"[STORE_DOCS] Storage complete!")


def update_docs(repo_url: str, updated_docs_list: List[Dict[str, Any]]) -> None:
    """
    Updates documentation for specific files in an existing FAISS index.
    Loads existing index, updates entries for the specified files, and rebuilds.
    
    Args:
        repo_url: The repository URL
        updated_docs_list: List of dicts with 'filename' and 'documentation' to update
    """
    repo_slug = _sanitize_repo_url(repo_url)
    print(f"[UPDATE_DOCS] Starting update for repo: {repo_url}")
    print(f"[UPDATE_DOCS] Updating {len(updated_docs_list)} files")
    
    index_path = INDEXES_DIR / f'{repo_slug}.index'
    texts_path = INDEXES_DIR / f'{repo_slug}_texts.json'
    
    # Load existing index and texts
    if not index_path.exists() or not texts_path.exists():
        print(f"[UPDATE_DOCS] No existing index found, creating new one")
        # No existing index, create new one with these docs
        store_docs(repo_url, updated_docs_list)
        return
    
    try:
        # Load existing data
        with open(texts_path, 'r') as f:
            data = json.load(f)
        
        existing_texts = data['texts']
        existing_metadata = data['metadata']
        print(f"[UPDATE_DOCS] Loaded {len(existing_texts)} existing documents")
        
        # Get filenames being updated
        updated_filenames = {doc['filename'] for doc in updated_docs_list}
        
        # Remove entries for files being updated
        filtered_texts = []
        filtered_metadata = []
        for text, meta in zip(existing_texts, existing_metadata):
            if meta.get('filename') not in updated_filenames:
                filtered_texts.append(text)
                filtered_metadata.append(meta)
        
        print(f"[UPDATE_DOCS] After filtering: {len(filtered_texts)} documents remain")
        
        # Add new entries
        for doc in updated_docs_list:
            filtered_texts.append(doc['documentation'])
            filtered_metadata.append({
                'filename': doc['filename'],
                'repo_url': repo_url
            })
        
        print(f"[UPDATE_DOCS] After adding updates: {len(filtered_texts)} documents total")
        
        if not filtered_texts:
            print(f"[UPDATE_DOCS] No documents remaining, returning early")
            return
        
        # Rebuild FAISS index
        print(f"[UPDATE_DOCS] Rebuilding FAISS index...")
        embeddings = model.encode(filtered_texts, convert_to_numpy=True)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings.astype(np.float32))
        print(f"[UPDATE_DOCS] FAISS index rebuilt with {index.ntotal} vectors")
        
        # Save updated index
        faiss.write_index(index, str(index_path))
        print(f"[UPDATE_DOCS] Updated FAISS index saved to: {index_path}")
        
        # Save updated texts
        with open(texts_path, 'w') as f:
            json.dump({
                'texts': filtered_texts,
                'metadata': filtered_metadata,
                'repo_url': repo_url
            }, f)
        print(f"[UPDATE_DOCS] Updated metadata saved to: {texts_path}")
        print(f"[UPDATE_DOCS] Update complete!")
    
    except Exception as e:
        print(f"Error updating index for {repo_url}: {e}")
        raise


def search_docs(repo_url: str, question: str, num_results: int = 3) -> List[Dict[str, Any]]:
    """
    Searches stored documentation using semantic similarity.
    
    Args:
        repo_url: The repository URL
        question: Natural language question to search for
        num_results: Number of top results to return (default: 3)
    
    Returns:
        List of dicts containing 'document', 'filename', and 'distance'
    """
    repo_slug = _sanitize_repo_url(repo_url)
    print(f"[SEARCH_DOCS] Searching for: '{question}' in repo: {repo_url}")
    
    # Check if index exists
    index_path = INDEXES_DIR / f'{repo_slug}.index'
    texts_path = INDEXES_DIR / f'{repo_slug}_texts.json'
    
    if not index_path.exists() or not texts_path.exists():
        print(f"[SEARCH_DOCS] ERROR: No index found for repo. Please generate documentation first.")
        print(f"[SEARCH_DOCS] Expected paths: {index_path} and {texts_path}")
        return []
    
    try:
        # Load FAISS index
        print(f"[SEARCH_DOCS] Loading FAISS index from: {index_path}")
        index = faiss.read_index(str(index_path))
        print(f"[SEARCH_DOCS] FAISS index loaded with {index.ntotal} vectors")
        
        # Load text chunks
        with open(texts_path, 'r') as f:
            data = json.load(f)
        
        texts = data['texts']
        metadata = data['metadata']
        print(f"[SEARCH_DOCS] Loaded {len(texts)} text documents and metadata")
    except Exception as e:
        print(f"[SEARCH_DOCS] Error loading index for {repo_url}: {e}")
        return []
    
    # Embed the question
    print(f"[SEARCH_DOCS] Encoding question embedding...")
    question_embedding = model.encode([question], convert_to_numpy=True)
    
    # Search the index
    print(f"[SEARCH_DOCS] Searching for top {min(num_results, len(texts))} results...")
    distances, indices = index.search(question_embedding.astype(np.float32), min(num_results, len(texts)))
    
    # Format results
    formatted_results = []
    for idx, distance in zip(indices[0], distances[0]):
        if idx == -1:  # No valid result
            continue
        formatted_results.append({
            'document': texts[idx],
            'filename': metadata[idx].get('filename', 'unknown'),
            'distance': float(distance)
        })
    
    print(f"[SEARCH_DOCS] Found {len(formatted_results)} results")
    return formatted_results