import streamlit as st
import tempfile
import os
import pandas as pd
from src.extract_text.google_document_api import GoogleDocumentAPI
from pdf2image import convert_from_path
from PIL import Image, ImageDraw, ImageFont
from src.utils.image_utils import ImageUtils
import base64
from io import BytesIO
from src.utils.barcode import Barcode
import anthropic
import json

def load_client_artwork_files():
    """Load all artwork PDF files from client directory"""
    base_path = "requirements_library/client-requirements"
    artwork_files = []
    
    if not os.path.exists(base_path):
        return artwork_files
    
    # Walk through all subdirectories
    for root, dirs, files in os.walk(base_path):
        for file in files:
            file_path = os.path.join(root, file)
            relative_path = os.path.relpath(file_path, base_path)
            
            if file.lower().endswith('.pdf'):
                artwork_files.append({
                    'name': f"{relative_path}",
                    'path': file_path,
                    'type': 'artwork'
                })
    
    return artwork_files

def load_artwork_content(file_info):
    """Load artwork content as bytes"""
    try:
        with open(file_info['path'], 'rb') as f:
            return f.read()
    except Exception as e:
        st.error(f"Error loading artwork file {file_info['name']}: {str(e)}")
        return None

def extract_pdf_data(pdf_file, file_name):
    """Extract text, bounding boxes, images, and barcodes from PDF"""
    try:
        # Create a temporary file to process the PDF
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            pdf_file.seek(0)
            tmp_file.write(pdf_file.read())
            tmp_pdf_path = tmp_file.name
        
        # Extract text and bounding boxes using Google Document API
        google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json")
        document = google_document_api.process_document(tmp_pdf_path)
        text_content = google_document_api.extract_text_with_markdown_table(document)
        bounding_boxes = google_document_api.extract_text_with_bounding_boxes(document)
        
        # Convert PDF to image
        try:
            images = convert_from_path(tmp_pdf_path)
            if not images:
                raise ValueError("No pages found in PDF")
            page_image = images[0]  # Assuming single page for now
        except Exception as e:
            st.error(f"Error converting PDF to image: {str(e)}")
            # Create a placeholder image
            page_image = Image.new('RGB', (800, 600), color='white')
            draw = ImageDraw.Draw(page_image)
            draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm')
        
        # Process image for comparison: standardize size and optimize quality
        processed_image, quality, file_size = ImageUtils.process_image_for_comparison(
            page_image, 
            target_size=(1200, 1600),  # Standard size for comparison
            max_size_bytes=1024 * 1024  # 1MB limit
        )
        
        # Convert processed image to base64 for API
        image_base64 = ImageUtils.image_to_base64_optimized(
            page_image,
            target_size=(1200, 1600),
            max_size_bytes=1024 * 1024
        )
        
        # Scan for barcodes
        barcode = Barcode()
        barcode_results = barcode.scan_and_validate(page_image)
        
        # Clean up temporary file
        if os.path.exists(tmp_pdf_path):
            os.unlink(tmp_pdf_path)
        
        return {
            'text_content': text_content,
            'bounding_boxes': bounding_boxes,
            'image': processed_image,  # Use the processed image
            'original_image': page_image,  # Keep original for reference
            'image_base64': image_base64,
            'barcode_results': barcode_results,
            'file_name': file_name,
            'image_quality': quality,
            'image_size_bytes': file_size
        }
        
    except Exception as e:
        st.error(f"Error processing PDF {file_name}: {str(e)}")
        return None

def compare_artworks_with_claude(artwork1_data, artwork2_data, model="claude-sonnet-4-20250514"):
    """Compare two artworks using Claude API"""
    
    # Prepare the comparison prompt
    prompt = f"""
You are an expert packaging compliance analyzer. Compare these two artwork PDFs and provide a detailed analysis of their differences and similarities.

## Artwork 1: {artwork1_data['file_name']}
**Text Content:**
{artwork1_data['text_content']}

**Bounding Box Data:**
{json.dumps(artwork1_data['bounding_boxes'][:10], indent=2) if artwork1_data['bounding_boxes'] else "No text elements detected"}

**Barcode Data:**
{json.dumps(artwork1_data['barcode_results'], indent=2) if artwork1_data['barcode_results'] else "No barcodes detected"}

## Artwork 2: {artwork2_data['file_name']}
**Text Content:**
{artwork2_data['text_content']}

**Bounding Box Data:**
{json.dumps(artwork2_data['bounding_boxes'][:10], indent=2) if artwork2_data['bounding_boxes'] else "No text elements detected"}

**Barcode Data:**
{json.dumps(artwork2_data['barcode_results'], indent=2) if artwork2_data['barcode_results'] else "No barcodes detected"}

Please provide a comprehensive comparison analysis in the following JSON format:

{{
    "overall_similarity": 0.85,
    "comparison_summary": "Brief overview of the comparison results",
    "text_differences": [
        {{
            "category": "Missing Text",
            "artwork1_content": "Text found only in artwork 1",
            "artwork2_content": "Text found only in artwork 2",
            "significance": "HIGH/MEDIUM/LOW",
            "description": "Detailed explanation of the difference"
        }}
    ],
    "layout_differences": [
        {{
            "category": "Position Changes",
            "element": "Element that moved",
            "artwork1_position": "Description of position in artwork 1",
            "artwork2_position": "Description of position in artwork 2",
            "significance": "HIGH/MEDIUM/LOW",
            "description": "Impact of this change"
        }}
    ],
    "barcode_differences": [
        {{
            "category": "Barcode Changes",
            "artwork1_barcodes": "Description of barcodes in artwork 1",
            "artwork2_barcodes": "Description of barcodes in artwork 2",
            "significance": "HIGH/MEDIUM/LOW",
            "description": "Analysis of barcode differences"
        }}
    ],
    "visual_differences": [
        {{
            "category": "Visual Elements",
            "description": "Description of visual differences observed in the images",
            "significance": "HIGH/MEDIUM/LOW",
            "recommendation": "Suggested action or consideration"
        }}
    ],
    "compliance_impact": [
        {{
            "area": "Regulatory compliance area affected",
            "impact": "Description of potential compliance impact",
            "risk_level": "HIGH/MEDIUM/LOW",
            "recommendation": "Recommended action"
        }}
    ],
    "recommendations": [
        "List of actionable recommendations based on the comparison"
    ]
}}

Analyze both the textual content and visual elements. Pay special attention to:
1. Missing or changed text elements
2. Repositioned elements that might affect readability
3. Barcode differences that could impact functionality
4. Visual changes that might affect brand consistency or compliance
5. Any changes that could impact regulatory compliance

Provide specific, actionable insights that would be valuable for quality control and compliance verification.
"""

    try:
        # Initialize Anthropic client
        client = anthropic.Anthropic(api_key=os.getenv('CLAUDE_API_KEY'))
        
        # Create message with both images
        message = client.messages.create(
            model=model,
            max_tokens=4000,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": artwork1_data['image_base64']
                            }
                        },
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": artwork2_data['image_base64']
                            }
                        }
                    ]
                }
            ]
        )
        
        # Parse the response
        response_text = ""
        for content_block in message.content:
            if hasattr(content_block, 'type') and content_block.type == 'text':
                response_text += content_block.text
        
        # Try to extract JSON from the response
        try:
            # Find JSON in the response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                comparison_results = json.loads(json_str)
            else:
                # Fallback: create a basic structure with the raw response
                comparison_results = {
                    "overall_similarity": 0.5,
                    "comparison_summary": "Analysis completed but JSON parsing failed",
                    "raw_response": response_text,
                    "text_differences": [],
                    "layout_differences": [],
                    "barcode_differences": [],
                    "visual_differences": [],
                    "compliance_impact": [],
                    "recommendations": ["Review the raw analysis output for detailed insights"]
                }
        except json.JSONDecodeError:
            # Fallback for JSON parsing errors
            comparison_results = {
                "overall_similarity": 0.5,
                "comparison_summary": "Analysis completed but structured parsing failed",
                "raw_response": response_text,
                "text_differences": [],
                "layout_differences": [],
                "barcode_differences": [],
                "visual_differences": [],
                "compliance_impact": [],
                "recommendations": ["Review the raw analysis output for detailed insights"]
            }
        
        return comparison_results
        
    except Exception as e:
        st.error(f"Error calling Claude API: {str(e)}")
        return None

def display_comparison_results(results, artwork1_data, artwork2_data):
    """Display the comparison results in a structured format"""
    
    if not results:
        st.error("No comparison results to display")
        return
    
    # Overall Summary
    st.markdown("## 📊 Comparison Summary")
    
    col1, col2, col3 = st.columns(3)
    with col1:
        similarity = results.get('overall_similarity', 0.5)
        st.metric("Overall Similarity", f"{similarity:.1%}")
    
    with col2:
        total_differences = (
            len(results.get('text_differences', [])) +
            len(results.get('layout_differences', [])) +
            len(results.get('barcode_differences', [])) +
            len(results.get('visual_differences', []))
        )
        st.metric("Total Differences", total_differences)
    
    with col3:
        compliance_impacts = len(results.get('compliance_impact', []))
        st.metric("Compliance Impacts", compliance_impacts)
    
    # Summary description
    if 'comparison_summary' in results:
        st.markdown(f"**Summary:** {results['comparison_summary']}")
    
    # Create tabs for different types of differences
    tabs = st.tabs(["📝 Text Differences", "📐 Layout Changes", "📱 Barcode Changes", "🎨 Visual Differences", "⚖️ Compliance Impact", "💡 Recommendations"])
    
    with tabs[0]:  # Text Differences
        st.markdown("### Text Content Differences")
        text_diffs = results.get('text_differences', [])
        if text_diffs:
            for i, diff in enumerate(text_diffs):
                significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡")
                
                with st.expander(f"{significance_color} {diff.get('category', 'Text Difference')} - {diff.get('significance', 'MEDIUM')} Impact"):
                    col1, col2 = st.columns(2)
                    with col1:
                        st.markdown(f"**{artwork1_data['file_name']}:**")
                        st.text(diff.get('artwork1_content', 'N/A'))
                    with col2:
                        st.markdown(f"**{artwork2_data['file_name']}:**")
                        st.text(diff.get('artwork2_content', 'N/A'))
                    
                    st.markdown(f"**Description:** {diff.get('description', 'No description available')}")
        else:
            st.info("No significant text differences found")
    
    with tabs[1]:  # Layout Changes
        st.markdown("### Layout and Positioning Changes")
        layout_diffs = results.get('layout_differences', [])
        if layout_diffs:
            for diff in layout_diffs:
                significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡")
                
                with st.expander(f"{significance_color} {diff.get('category', 'Layout Change')} - {diff.get('significance', 'MEDIUM')} Impact"):
                    st.markdown(f"**Element:** {diff.get('element', 'Unknown element')}")
                    
                    col1, col2 = st.columns(2)
                    with col1:
                        st.markdown(f"**Position in {artwork1_data['file_name']}:**")
                        st.text(diff.get('artwork1_position', 'N/A'))
                    with col2:
                        st.markdown(f"**Position in {artwork2_data['file_name']}:**")
                        st.text(diff.get('artwork2_position', 'N/A'))
                    
                    st.markdown(f"**Impact:** {diff.get('description', 'No description available')}")
        else:
            st.info("No significant layout differences found")
    
    with tabs[2]:  # Barcode Changes
        st.markdown("### Barcode Differences")
        barcode_diffs = results.get('barcode_differences', [])
        if barcode_diffs:
            for diff in barcode_diffs:
                significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡")
                
                with st.expander(f"{significance_color} {diff.get('category', 'Barcode Change')} - {diff.get('significance', 'MEDIUM')} Impact"):
                    col1, col2 = st.columns(2)
                    with col1:
                        st.markdown(f"**{artwork1_data['file_name']} Barcodes:**")
                        st.text(diff.get('artwork1_barcodes', 'N/A'))
                    with col2:
                        st.markdown(f"**{artwork2_data['file_name']} Barcodes:**")
                        st.text(diff.get('artwork2_barcodes', 'N/A'))
                    
                    st.markdown(f"**Analysis:** {diff.get('description', 'No description available')}")
        else:
            st.info("No significant barcode differences found")
    
    with tabs[3]:  # Visual Differences
        st.markdown("### Visual and Design Differences")
        visual_diffs = results.get('visual_differences', [])
        if visual_diffs:
            for diff in visual_diffs:
                significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡")
                
                with st.expander(f"{significance_color} {diff.get('category', 'Visual Change')} - {diff.get('significance', 'MEDIUM')} Impact"):
                    st.markdown(f"**Description:** {diff.get('description', 'No description available')}")
                    if 'recommendation' in diff:
                        st.markdown(f"**Recommendation:** {diff['recommendation']}")
        else:
            st.info("No significant visual differences found")
    
    with tabs[4]:  # Compliance Impact
        st.markdown("### Compliance and Regulatory Impact")
        compliance_impacts = results.get('compliance_impact', [])
        if compliance_impacts:
            for impact in compliance_impacts:
                risk_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(impact.get('risk_level', 'MEDIUM'), "🟡")
                
                with st.expander(f"{risk_color} {impact.get('area', 'Compliance Area')} - {impact.get('risk_level', 'MEDIUM')} Risk"):
                    st.markdown(f"**Impact:** {impact.get('impact', 'No description available')}")
                    st.markdown(f"**Recommendation:** {impact.get('recommendation', 'No recommendation provided')}")
        else:
            st.success("No compliance impacts identified")
    
    with tabs[5]:  # Recommendations
        st.markdown("### Action Items and Recommendations")
        recommendations = results.get('recommendations', [])
        if recommendations:
            for i, rec in enumerate(recommendations, 1):
                st.markdown(f"{i}. {rec}")
        else:
            st.info("No specific recommendations provided")
    
    # Raw response section (collapsible)
    if 'raw_response' in results:
        with st.expander("🔍 Raw Analysis Output"):
            st.text(results['raw_response'])

def display_side_by_side_images(artwork1_data, artwork2_data):
    """Display the two artwork images side by side"""
    st.markdown("## 🖼️ Side-by-Side Comparison")
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.markdown(f"### {artwork1_data['file_name']}")
        st.image(ImageUtils.crop_image(artwork1_data['image']), caption=artwork1_data['file_name'], use_container_width=True)
        
        # Display image processing info
        if 'image_quality' in artwork1_data and 'image_size_bytes' in artwork1_data:
            quality = artwork1_data['image_quality']
            size_mb = artwork1_data['image_size_bytes'] / (1024 * 1024)
            st.info(f"📊 Image Quality: {quality}% | Size: {size_mb:.2f}MB")
        
        # Display extracted data summary
        with st.expander("📊 Extracted Data Summary"):
            text_elements = len(artwork1_data['bounding_boxes']) if artwork1_data['bounding_boxes'] else 0
            barcodes = len(artwork1_data['barcode_results']) if artwork1_data['barcode_results'] else 0
            st.metric("Text Elements", text_elements)
            st.metric("Barcodes", barcodes)
    
    with col2:
        st.markdown(f"### {artwork2_data['file_name']}")
        st.image(ImageUtils.crop_image(artwork2_data['image']), caption=artwork2_data['file_name'], use_container_width=True)
        
        # Display image processing info
        if 'image_quality' in artwork2_data and 'image_size_bytes' in artwork2_data:
            quality = artwork2_data['image_quality']
            size_mb = artwork2_data['image_size_bytes'] / (1024 * 1024)
            st.info(f"📊 Image Quality: {quality}% | Size: {size_mb:.2f}MB")
        
        # Display extracted data summary
        with st.expander("📊 Extracted Data Summary"):
            text_elements = len(artwork2_data['bounding_boxes']) if artwork2_data['bounding_boxes'] else 0
            barcodes = len(artwork2_data['barcode_results']) if artwork2_data['barcode_results'] else 0
            st.metric("Text Elements", text_elements)
            st.metric("Barcodes", barcodes)

def main():
    st.set_page_config(layout="wide", page_title="Artwork Comparison Tool")
    
    # Load client artwork files
    client_artwork_files = load_client_artwork_files()
    
    # Initialize session state
    if "artwork1_data" not in st.session_state:
        st.session_state.artwork1_data = None
    if "artwork2_data" not in st.session_state:
        st.session_state.artwork2_data = None
    if "comparison_results" not in st.session_state:
        st.session_state.comparison_results = None
    
    st.title("🎨 Artwork Comparison Tool")
    st.write("Compare two packaging artwork PDFs to identify differences in text, layout, barcodes, and visual elements.")
    
    # File selection section
    st.markdown("## 📁 Select Artworks to Compare")
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.markdown("### 🎨 Artwork 1")
        
        # Create tabs for client files vs upload
        art1_tab1, art1_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"])
        
        with art1_tab1:
            if client_artwork_files:
                art1_options = ["Select artwork 1..."] + [f["name"] for f in client_artwork_files]
                selected_art1_file = st.selectbox("Choose artwork 1:", art1_options, key="art1_select")
                
                if selected_art1_file != "Select artwork 1...":
                    # Find and load the selected file
                    for file_info in client_artwork_files:
                        if file_info["name"] == selected_art1_file:
                            file_content = load_artwork_content(file_info)
                            if file_content:
                                import io
                                temp_file = io.BytesIO(file_content)
                                temp_file.name = file_info["name"]
                                
                                # Extract data from the artwork
                                with st.spinner("Processing artwork 1..."):
                                    st.session_state.artwork1_data = extract_pdf_data(temp_file, file_info["name"])
                                
                                if st.session_state.artwork1_data:
                                    st.success(f"✅ Loaded artwork 1: {selected_art1_file}")
                            break
            else:
                st.info("No client artwork files found")
        
        with art1_tab2:
            artwork1_file = st.file_uploader("Upload Artwork 1 (PDF)", type=["pdf"], key="art1_upload")
            
            if artwork1_file:
                with st.spinner("Processing artwork 1..."):
                    st.session_state.artwork1_data = extract_pdf_data(artwork1_file, artwork1_file.name)
                
                if st.session_state.artwork1_data:
                    st.success(f"✅ Uploaded artwork 1: {artwork1_file.name}")
    
    with col2:
        st.markdown("### 🎨 Artwork 2")
        
        # Create tabs for client files vs upload
        art2_tab1, art2_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"])
        
        with art2_tab1:
            if client_artwork_files:
                art2_options = ["Select artwork 2..."] + [f["name"] for f in client_artwork_files]
                selected_art2_file = st.selectbox("Choose artwork 2:", art2_options, key="art2_select")
                
                if selected_art2_file != "Select artwork 2...":
                    # Find and load the selected file
                    for file_info in client_artwork_files:
                        if file_info["name"] == selected_art2_file:
                            file_content = load_artwork_content(file_info)
                            if file_content:
                                import io
                                temp_file = io.BytesIO(file_content)
                                temp_file.name = file_info["name"]
                                
                                # Extract data from the artwork
                                with st.spinner("Processing artwork 2..."):
                                    st.session_state.artwork2_data = extract_pdf_data(temp_file, file_info["name"])
                                
                                if st.session_state.artwork2_data:
                                    st.success(f"✅ Loaded artwork 2: {selected_art2_file}")
                            break
            else:
                st.info("No client artwork files found")
        
        with art2_tab2:
            artwork2_file = st.file_uploader("Upload Artwork 2 (PDF)", type=["pdf"], key="art2_upload")
            
            if artwork2_file:
                with st.spinner("Processing artwork 2..."):
                    st.session_state.artwork2_data = extract_pdf_data(artwork2_file, artwork2_file.name)
                
                if st.session_state.artwork2_data:
                    st.success(f"✅ Uploaded artwork 2: {artwork2_file.name}")
    
    # Display images side by side if both are loaded
    if st.session_state.artwork1_data and st.session_state.artwork2_data:
        display_side_by_side_images(st.session_state.artwork1_data, st.session_state.artwork2_data)
    
    # Model selection
    model_option = "claude-sonnet-4-20250514"
    
    # Comparison button
    if st.button("🔍 Compare Artworks", type="primary"):
        if st.session_state.artwork1_data and st.session_state.artwork2_data:
            with st.spinner("Analyzing artworks with Claude..."):
                st.session_state.comparison_results = compare_artworks_with_claude(
                    st.session_state.artwork1_data,
                    st.session_state.artwork2_data,
                    model=model_option
                )
            
            if st.session_state.comparison_results:
                st.success("✅ Comparison analysis complete!")
            else:
                st.error("❌ Comparison analysis failed")
        else:
            st.warning("⚠️ Please select or upload both artworks before comparing")
    
    # Display comparison results
    if st.session_state.comparison_results:
        display_comparison_results(
            st.session_state.comparison_results,
            st.session_state.artwork1_data,
            st.session_state.artwork2_data
        )
    
    # Add helpful information
    st.markdown("---")
    st.markdown("""
    ### 🛠️ How It Works
    1. **Extract Content**: The tool extracts text, bounding boxes, images, and barcodes from both PDFs
    2. **AI Analysis**: Claude analyzes the extracted data and visual elements to identify differences
    3. **Structured Results**: Differences are categorized by type (text, layout, barcode, visual) and significance
    4. **Compliance Assessment**: Potential compliance impacts are identified with risk levels and recommendations
    
    ### 🎯 Use Cases
    - **Quality Control**: Verify artwork changes between versions
    - **Brand Consistency**: Ensure visual elements remain consistent
    - **Compliance Review**: Identify changes that might affect regulatory compliance
    - **Change Documentation**: Track and document artwork modifications
    """)

if __name__ == "__main__":
    main()