import json
import os
import random
import glob
import pandas as pd
from typing import List, Dict, Tuple

# Load the fossils paths CSV to map fossil names to URLs
FOSSILS_CSV_PATH = os.path.join(os.path.dirname(__file__), "fossils_paths.csv")

def get_fossil_url_from_csv(fossil_name: str) -> Tuple[str, str]:
    """
    Search the fossils_paths.csv to find the URL and family for a fossil.
    
    Args:
        fossil_name: Name of the fossil (e.g., "FLFO_002787A", "CU_0387cu")
        
    Returns:
        Tuple of (URL to the fossil image, family name) or (None, None) if not found
    """
    try:
        if not os.path.exists(FOSSILS_CSV_PATH):
            return None, None
        
        df = pd.read_csv(FOSSILS_CSV_PATH)
        
        # Search for the fossil name in the file_name column
        # CSV filenames may not include the full prefix (e.g., "FLFO_002787A" -> "002787A")
        # Try multiple search patterns
        search_patterns = [
            fossil_name,  # Full name
            fossil_name.replace("FLFO_", "").replace("CU_", ""),  # Without prefix
            fossil_name.split("_")[-1] if "_" in fossil_name else fossil_name,  # Last part after underscore
        ]
        
        matching_rows = None
        for pattern in search_patterns:
            mask = df['file_name'].str.contains(pattern, case=False, na=False, regex=False)
            if mask.sum() > 0:
                matching_rows = df[mask]
                break
        
        if matching_rows is not None and len(matching_rows) > 0:
            # Get the first match
            row = matching_rows.iloc[0]
            file_path = row['file_name']
            family = row.get('family', 'Unknown')
            
            # Convert to public URL
            folder_florissant = 'https://storage.googleapis.com/serrelab/prj_fossils/2024/Florissant_Fossil_v2.0/'
            folder_general = 'https://storage.googleapis.com/serrelab/prj_fossils/2024/General_Fossil_v2.0/'
            
            if 'Florissant_Fossil/512/full/jpg/' in file_path:
                public_path = file_path.replace(
                    '/gpfs/data/tserre/irodri15/Fossils/new_data/leavesdb-v1_1/images/Fossil/Florissant_Fossil/512/full/jpg/', 
                    folder_florissant
                )
                return public_path, family
            elif 'General_Fossil/512/full/jpg/' in file_path:
                public_path = file_path.replace(
                    '/gpfs/data/tserre/irodri15/Fossils/new_data/leavesdb-v1_1/images/Fossil/General_Fossil/512/full/jpg/', 
                    folder_general
                )
                return public_path, family
        
        return None, None
    except Exception as e:
        print(f"Error searching CSV for {fossil_name}: {e}")
        return None, None

def load_plausible_fossils(json_dir: str = None) -> List[Dict]:
    """
    Load all JSON files from the directory and extract entries marked as 'Plausible'.
    
    Args:
        json_dir: Directory containing the JSON response files (defaults to ../Unknown)
        
    Returns:
        List of dictionaries containing plausible fossil entries
    """
    if json_dir is None:
        # Default to ../Unknown relative to the fossil_app directory
        json_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "Unknown")
    
    plausible_fossils = []
    
    # Find all JSON files in the directory
    json_files = glob.glob(os.path.join(json_dir, "unidentified_fossil_responses*.json"))
    
    for json_file in json_files:
        try:
            with open(json_file, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
            # Filter for plausible entries
            for entry in data:
                if entry.get("User Selection") == "Plausible":
                    plausible_fossils.append({
                        "Serial Number": entry.get("Serial Number"),
                        "Fossil Name": entry.get("Fossil Name"),
                        "Source File": os.path.basename(json_file)
                    })
        except Exception as e:
            print(f"Error loading {json_file}: {e}")
            continue
    
    # Remove duplicates based on Fossil Name
    seen = set()
    unique_fossils = []
    for fossil in plausible_fossils:
        fossil_name = fossil["Fossil Name"]
        if fossil_name not in seen:
            seen.add(fossil_name)
            unique_fossils.append(fossil)
    
    return unique_fossils

def get_fossil_image_url(fossil_name: str) -> Tuple[str, str]:
    """
    Get the image URL and family for a fossil by searching the CSV file.
    
    Args:
        fossil_name: Name of the fossil (e.g., "FLFO_002787A", "CU_0387cu")
        
    Returns:
        Tuple of (URL to the fossil image, family name) or (placeholder URL, "Unknown")
    """
    url, family = get_fossil_url_from_csv(fossil_name)
    if url:
        return url, family
    
    # Fallback: construct a basic URL (may not work for all fossils)
    base_florissant = "https://storage.googleapis.com/serrelab/prj_fossils/2024/Florissant_Fossil_v2.0/"
    return f"{base_florissant}{fossil_name}/image.jpg", "Unknown"  # Placeholder

def get_random_plausible_fossils(count: int = 10, json_dir: str = None) -> List[Dict]:
    """
    Get a random selection of plausible fossils.
    
    Args:
        count: Number of fossils to return
        json_dir: Directory containing JSON files
        
    Returns:
        List of fossil dictionaries with image URLs
    """
    all_plausible = load_plausible_fossils(json_dir)
    
    if len(all_plausible) <= count:
        selected = all_plausible
    else:
        selected = random.sample(all_plausible, count)
    
    # Add image URLs and families
    for fossil in selected:
        url, family = get_fossil_image_url(fossil["Fossil Name"])
        fossil["Image URL"] = url
        fossil["Family"] = family
    
    return selected

def get_all_plausible_fossils(json_dir: str = None) -> List[Dict]:
    """
    Get all plausible fossils with image URLs.
    
    Args:
        json_dir: Directory containing JSON files
        
    Returns:
        List of all fossil dictionaries with image URLs
    """
    all_plausible = load_plausible_fossils(json_dir)
    
    # Add image URLs and families
    for fossil in all_plausible:
        url, family = get_fossil_image_url(fossil["Fossil Name"])
        fossil["Image URL"] = url
        fossil["Family"] = family
    
    return all_plausible

def format_fossil_html(fossil: Dict) -> str:
    """
    Format a fossil entry as HTML with link.
    
    Args:
        fossil: Dictionary containing fossil information
        
    Returns:
        HTML string for the fossil
    """
    fossil_name = fossil.get("Fossil Name", "Unknown")
    image_url = fossil.get("Image URL", "")
    serial_num = fossil.get("Serial Number", "")
    family = fossil.get("Family", "Unknown")
    
    # Check if URL is valid (not a placeholder)
    has_valid_url = image_url and "image.jpg" not in image_url and image_url != ""
    
    link_html = ""
    if has_valid_url:
        link_html = f"""
        <p style='margin: 10px 0 0 0;'>
            <a href='{image_url}' target='_blank' style='color: #0066cc; text-decoration: none; font-weight: bold; padding: 8px 15px; background-color: #e3f2fd; border-radius: 4px; display: inline-block;'>
                🔗 View Image →
            </a>
        </p>
        """
    else:
        link_html = """
        <p style='margin: 10px 0 0 0; color: #666; font-style: italic;'>
            Image URL not available in database
        </p>
        """
    
    html = f"""
    <div style='border: 1px solid #ddd; padding: 15px; margin: 10px; border-radius: 8px; background-color: #fafafa; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
        <h3 style='margin-top: 0; margin-bottom: 10px; color: #333;'>{fossil_name}</h3>
        <p style='margin: 5px 0;'><strong>Serial Number:</strong> {serial_num}</p>
        <p style='margin: 5px 0;'><strong>Family:</strong> {family}</p>
        {link_html}
    </div>
    """
    return html