import streamlit as st import tempfile import os import pandas as pd from src.extract_text.google_document_api import GoogleDocumentAPI from pdf2image import convert_from_path from PIL import Image, ImageDraw, ImageFont from src.utils.image_utils import ImageUtils import base64 from io import BytesIO from src.utils.barcode import Barcode import anthropic import json def load_client_artwork_files(): """Load all artwork PDF files from client directory""" base_path = "requirements_library/client-requirements" artwork_files = [] if not os.path.exists(base_path): return artwork_files # Walk through all subdirectories for root, dirs, files in os.walk(base_path): for file in files: file_path = os.path.join(root, file) relative_path = os.path.relpath(file_path, base_path) if file.lower().endswith('.pdf'): artwork_files.append({ 'name': f"{relative_path}", 'path': file_path, 'type': 'artwork' }) return artwork_files def load_artwork_content(file_info): """Load artwork content as bytes""" try: with open(file_info['path'], 'rb') as f: return f.read() except Exception as e: st.error(f"Error loading artwork file {file_info['name']}: {str(e)}") return None def extract_pdf_data(pdf_file, file_name): """Extract text, bounding boxes, images, and barcodes from PDF""" try: # Create a temporary file to process the PDF with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: pdf_file.seek(0) tmp_file.write(pdf_file.read()) tmp_pdf_path = tmp_file.name # Extract text and bounding boxes using Google Document API google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json") document = google_document_api.process_document(tmp_pdf_path) text_content = google_document_api.extract_text_with_markdown_table(document) bounding_boxes = google_document_api.extract_text_with_bounding_boxes(document) # Convert PDF to image try: images = convert_from_path(tmp_pdf_path) if not images: raise ValueError("No pages found in PDF") page_image = images[0] # Assuming single page for now except Exception as e: st.error(f"Error converting PDF to image: {str(e)}") # Create a placeholder image page_image = Image.new('RGB', (800, 600), color='white') draw = ImageDraw.Draw(page_image) draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') # Process image for comparison: standardize size and optimize quality processed_image, quality, file_size = ImageUtils.process_image_for_comparison( page_image, target_size=(1200, 1600), # Standard size for comparison max_size_bytes=1024 * 1024 # 1MB limit ) # Convert processed image to base64 for API image_base64 = ImageUtils.image_to_base64_optimized( page_image, target_size=(1200, 1600), max_size_bytes=1024 * 1024 ) # Scan for barcodes barcode = Barcode() barcode_results = barcode.scan_and_validate(page_image) # Clean up temporary file if os.path.exists(tmp_pdf_path): os.unlink(tmp_pdf_path) return { 'text_content': text_content, 'bounding_boxes': bounding_boxes, 'image': processed_image, # Use the processed image 'original_image': page_image, # Keep original for reference 'image_base64': image_base64, 'barcode_results': barcode_results, 'file_name': file_name, 'image_quality': quality, 'image_size_bytes': file_size } except Exception as e: st.error(f"Error processing PDF {file_name}: {str(e)}") return None def compare_artworks_with_claude(artwork1_data, artwork2_data, model="claude-sonnet-4-20250514"): """Compare two artworks using Claude API""" # Prepare the comparison prompt prompt = f""" You are an expert packaging compliance analyzer. Compare these two artwork PDFs and provide a detailed analysis of their differences and similarities. ## Artwork 1: {artwork1_data['file_name']} **Text Content:** {artwork1_data['text_content']} **Bounding Box Data:** {json.dumps(artwork1_data['bounding_boxes'][:10], indent=2) if artwork1_data['bounding_boxes'] else "No text elements detected"} **Barcode Data:** {json.dumps(artwork1_data['barcode_results'], indent=2) if artwork1_data['barcode_results'] else "No barcodes detected"} ## Artwork 2: {artwork2_data['file_name']} **Text Content:** {artwork2_data['text_content']} **Bounding Box Data:** {json.dumps(artwork2_data['bounding_boxes'][:10], indent=2) if artwork2_data['bounding_boxes'] else "No text elements detected"} **Barcode Data:** {json.dumps(artwork2_data['barcode_results'], indent=2) if artwork2_data['barcode_results'] else "No barcodes detected"} Please provide a comprehensive comparison analysis in the following JSON format: {{ "overall_similarity": 0.85, "comparison_summary": "Brief overview of the comparison results", "text_differences": [ {{ "category": "Missing Text", "artwork1_content": "Text found only in artwork 1", "artwork2_content": "Text found only in artwork 2", "significance": "HIGH/MEDIUM/LOW", "description": "Detailed explanation of the difference" }} ], "layout_differences": [ {{ "category": "Position Changes", "element": "Element that moved", "artwork1_position": "Description of position in artwork 1", "artwork2_position": "Description of position in artwork 2", "significance": "HIGH/MEDIUM/LOW", "description": "Impact of this change" }} ], "barcode_differences": [ {{ "category": "Barcode Changes", "artwork1_barcodes": "Description of barcodes in artwork 1", "artwork2_barcodes": "Description of barcodes in artwork 2", "significance": "HIGH/MEDIUM/LOW", "description": "Analysis of barcode differences" }} ], "visual_differences": [ {{ "category": "Visual Elements", "description": "Description of visual differences observed in the images", "significance": "HIGH/MEDIUM/LOW", "recommendation": "Suggested action or consideration" }} ], "compliance_impact": [ {{ "area": "Regulatory compliance area affected", "impact": "Description of potential compliance impact", "risk_level": "HIGH/MEDIUM/LOW", "recommendation": "Recommended action" }} ], "recommendations": [ "List of actionable recommendations based on the comparison" ] }} Analyze both the textual content and visual elements. Pay special attention to: 1. Missing or changed text elements 2. Repositioned elements that might affect readability 3. Barcode differences that could impact functionality 4. Visual changes that might affect brand consistency or compliance 5. Any changes that could impact regulatory compliance Provide specific, actionable insights that would be valuable for quality control and compliance verification. """ try: # Initialize Anthropic client client = anthropic.Anthropic(api_key=os.getenv('CLAUDE_API_KEY')) # Create message with both images message = client.messages.create( model=model, max_tokens=4000, messages=[ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": artwork1_data['image_base64'] } }, { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": artwork2_data['image_base64'] } } ] } ] ) # Parse the response response_text = "" for content_block in message.content: if hasattr(content_block, 'type') and content_block.type == 'text': response_text += content_block.text # Try to extract JSON from the response try: # Find JSON in the response start_idx = response_text.find('{') end_idx = response_text.rfind('}') + 1 if start_idx != -1 and end_idx != -1: json_str = response_text[start_idx:end_idx] comparison_results = json.loads(json_str) else: # Fallback: create a basic structure with the raw response comparison_results = { "overall_similarity": 0.5, "comparison_summary": "Analysis completed but JSON parsing failed", "raw_response": response_text, "text_differences": [], "layout_differences": [], "barcode_differences": [], "visual_differences": [], "compliance_impact": [], "recommendations": ["Review the raw analysis output for detailed insights"] } except json.JSONDecodeError: # Fallback for JSON parsing errors comparison_results = { "overall_similarity": 0.5, "comparison_summary": "Analysis completed but structured parsing failed", "raw_response": response_text, "text_differences": [], "layout_differences": [], "barcode_differences": [], "visual_differences": [], "compliance_impact": [], "recommendations": ["Review the raw analysis output for detailed insights"] } return comparison_results except Exception as e: st.error(f"Error calling Claude API: {str(e)}") return None def display_comparison_results(results, artwork1_data, artwork2_data): """Display the comparison results in a structured format""" if not results: st.error("No comparison results to display") return # Overall Summary st.markdown("## 📊 Comparison Summary") col1, col2, col3 = st.columns(3) with col1: similarity = results.get('overall_similarity', 0.5) st.metric("Overall Similarity", f"{similarity:.1%}") with col2: total_differences = ( len(results.get('text_differences', [])) + len(results.get('layout_differences', [])) + len(results.get('barcode_differences', [])) + len(results.get('visual_differences', [])) ) st.metric("Total Differences", total_differences) with col3: compliance_impacts = len(results.get('compliance_impact', [])) st.metric("Compliance Impacts", compliance_impacts) # Summary description if 'comparison_summary' in results: st.markdown(f"**Summary:** {results['comparison_summary']}") # Create tabs for different types of differences tabs = st.tabs(["📝 Text Differences", "📐 Layout Changes", "📱 Barcode Changes", "🎨 Visual Differences", "⚖️ Compliance Impact", "💡 Recommendations"]) with tabs[0]: # Text Differences st.markdown("### Text Content Differences") text_diffs = results.get('text_differences', []) if text_diffs: for i, diff in enumerate(text_diffs): significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡") with st.expander(f"{significance_color} {diff.get('category', 'Text Difference')} - {diff.get('significance', 'MEDIUM')} Impact"): col1, col2 = st.columns(2) with col1: st.markdown(f"**{artwork1_data['file_name']}:**") st.text(diff.get('artwork1_content', 'N/A')) with col2: st.markdown(f"**{artwork2_data['file_name']}:**") st.text(diff.get('artwork2_content', 'N/A')) st.markdown(f"**Description:** {diff.get('description', 'No description available')}") else: st.info("No significant text differences found") with tabs[1]: # Layout Changes st.markdown("### Layout and Positioning Changes") layout_diffs = results.get('layout_differences', []) if layout_diffs: for diff in layout_diffs: significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡") with st.expander(f"{significance_color} {diff.get('category', 'Layout Change')} - {diff.get('significance', 'MEDIUM')} Impact"): st.markdown(f"**Element:** {diff.get('element', 'Unknown element')}") col1, col2 = st.columns(2) with col1: st.markdown(f"**Position in {artwork1_data['file_name']}:**") st.text(diff.get('artwork1_position', 'N/A')) with col2: st.markdown(f"**Position in {artwork2_data['file_name']}:**") st.text(diff.get('artwork2_position', 'N/A')) st.markdown(f"**Impact:** {diff.get('description', 'No description available')}") else: st.info("No significant layout differences found") with tabs[2]: # Barcode Changes st.markdown("### Barcode Differences") barcode_diffs = results.get('barcode_differences', []) if barcode_diffs: for diff in barcode_diffs: significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡") with st.expander(f"{significance_color} {diff.get('category', 'Barcode Change')} - {diff.get('significance', 'MEDIUM')} Impact"): col1, col2 = st.columns(2) with col1: st.markdown(f"**{artwork1_data['file_name']} Barcodes:**") st.text(diff.get('artwork1_barcodes', 'N/A')) with col2: st.markdown(f"**{artwork2_data['file_name']} Barcodes:**") st.text(diff.get('artwork2_barcodes', 'N/A')) st.markdown(f"**Analysis:** {diff.get('description', 'No description available')}") else: st.info("No significant barcode differences found") with tabs[3]: # Visual Differences st.markdown("### Visual and Design Differences") visual_diffs = results.get('visual_differences', []) if visual_diffs: for diff in visual_diffs: significance_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(diff.get('significance', 'MEDIUM'), "🟡") with st.expander(f"{significance_color} {diff.get('category', 'Visual Change')} - {diff.get('significance', 'MEDIUM')} Impact"): st.markdown(f"**Description:** {diff.get('description', 'No description available')}") if 'recommendation' in diff: st.markdown(f"**Recommendation:** {diff['recommendation']}") else: st.info("No significant visual differences found") with tabs[4]: # Compliance Impact st.markdown("### Compliance and Regulatory Impact") compliance_impacts = results.get('compliance_impact', []) if compliance_impacts: for impact in compliance_impacts: risk_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(impact.get('risk_level', 'MEDIUM'), "🟡") with st.expander(f"{risk_color} {impact.get('area', 'Compliance Area')} - {impact.get('risk_level', 'MEDIUM')} Risk"): st.markdown(f"**Impact:** {impact.get('impact', 'No description available')}") st.markdown(f"**Recommendation:** {impact.get('recommendation', 'No recommendation provided')}") else: st.success("No compliance impacts identified") with tabs[5]: # Recommendations st.markdown("### Action Items and Recommendations") recommendations = results.get('recommendations', []) if recommendations: for i, rec in enumerate(recommendations, 1): st.markdown(f"{i}. {rec}") else: st.info("No specific recommendations provided") # Raw response section (collapsible) if 'raw_response' in results: with st.expander("🔍 Raw Analysis Output"): st.text(results['raw_response']) def display_side_by_side_images(artwork1_data, artwork2_data): """Display the two artwork images side by side""" st.markdown("## 🖼️ Side-by-Side Comparison") col1, col2 = st.columns(2) with col1: st.markdown(f"### {artwork1_data['file_name']}") st.image(ImageUtils.crop_image(artwork1_data['image']), caption=artwork1_data['file_name'], use_container_width=True) # Display image processing info if 'image_quality' in artwork1_data and 'image_size_bytes' in artwork1_data: quality = artwork1_data['image_quality'] size_mb = artwork1_data['image_size_bytes'] / (1024 * 1024) st.info(f"📊 Image Quality: {quality}% | Size: {size_mb:.2f}MB") # Display extracted data summary with st.expander("📊 Extracted Data Summary"): text_elements = len(artwork1_data['bounding_boxes']) if artwork1_data['bounding_boxes'] else 0 barcodes = len(artwork1_data['barcode_results']) if artwork1_data['barcode_results'] else 0 st.metric("Text Elements", text_elements) st.metric("Barcodes", barcodes) with col2: st.markdown(f"### {artwork2_data['file_name']}") st.image(ImageUtils.crop_image(artwork2_data['image']), caption=artwork2_data['file_name'], use_container_width=True) # Display image processing info if 'image_quality' in artwork2_data and 'image_size_bytes' in artwork2_data: quality = artwork2_data['image_quality'] size_mb = artwork2_data['image_size_bytes'] / (1024 * 1024) st.info(f"📊 Image Quality: {quality}% | Size: {size_mb:.2f}MB") # Display extracted data summary with st.expander("📊 Extracted Data Summary"): text_elements = len(artwork2_data['bounding_boxes']) if artwork2_data['bounding_boxes'] else 0 barcodes = len(artwork2_data['barcode_results']) if artwork2_data['barcode_results'] else 0 st.metric("Text Elements", text_elements) st.metric("Barcodes", barcodes) def main(): st.set_page_config(layout="wide", page_title="Artwork Comparison Tool") # Load client artwork files client_artwork_files = load_client_artwork_files() # Initialize session state if "artwork1_data" not in st.session_state: st.session_state.artwork1_data = None if "artwork2_data" not in st.session_state: st.session_state.artwork2_data = None if "comparison_results" not in st.session_state: st.session_state.comparison_results = None st.title("🎨 Artwork Comparison Tool") st.write("Compare two packaging artwork PDFs to identify differences in text, layout, barcodes, and visual elements.") # File selection section st.markdown("## 📁 Select Artworks to Compare") col1, col2 = st.columns(2) with col1: st.markdown("### 🎨 Artwork 1") # Create tabs for client files vs upload art1_tab1, art1_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) with art1_tab1: if client_artwork_files: art1_options = ["Select artwork 1..."] + [f["name"] for f in client_artwork_files] selected_art1_file = st.selectbox("Choose artwork 1:", art1_options, key="art1_select") if selected_art1_file != "Select artwork 1...": # Find and load the selected file for file_info in client_artwork_files: if file_info["name"] == selected_art1_file: file_content = load_artwork_content(file_info) if file_content: import io temp_file = io.BytesIO(file_content) temp_file.name = file_info["name"] # Extract data from the artwork with st.spinner("Processing artwork 1..."): st.session_state.artwork1_data = extract_pdf_data(temp_file, file_info["name"]) if st.session_state.artwork1_data: st.success(f"✅ Loaded artwork 1: {selected_art1_file}") break else: st.info("No client artwork files found") with art1_tab2: artwork1_file = st.file_uploader("Upload Artwork 1 (PDF)", type=["pdf"], key="art1_upload") if artwork1_file: with st.spinner("Processing artwork 1..."): st.session_state.artwork1_data = extract_pdf_data(artwork1_file, artwork1_file.name) if st.session_state.artwork1_data: st.success(f"✅ Uploaded artwork 1: {artwork1_file.name}") with col2: st.markdown("### 🎨 Artwork 2") # Create tabs for client files vs upload art2_tab1, art2_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) with art2_tab1: if client_artwork_files: art2_options = ["Select artwork 2..."] + [f["name"] for f in client_artwork_files] selected_art2_file = st.selectbox("Choose artwork 2:", art2_options, key="art2_select") if selected_art2_file != "Select artwork 2...": # Find and load the selected file for file_info in client_artwork_files: if file_info["name"] == selected_art2_file: file_content = load_artwork_content(file_info) if file_content: import io temp_file = io.BytesIO(file_content) temp_file.name = file_info["name"] # Extract data from the artwork with st.spinner("Processing artwork 2..."): st.session_state.artwork2_data = extract_pdf_data(temp_file, file_info["name"]) if st.session_state.artwork2_data: st.success(f"✅ Loaded artwork 2: {selected_art2_file}") break else: st.info("No client artwork files found") with art2_tab2: artwork2_file = st.file_uploader("Upload Artwork 2 (PDF)", type=["pdf"], key="art2_upload") if artwork2_file: with st.spinner("Processing artwork 2..."): st.session_state.artwork2_data = extract_pdf_data(artwork2_file, artwork2_file.name) if st.session_state.artwork2_data: st.success(f"✅ Uploaded artwork 2: {artwork2_file.name}") # Display images side by side if both are loaded if st.session_state.artwork1_data and st.session_state.artwork2_data: display_side_by_side_images(st.session_state.artwork1_data, st.session_state.artwork2_data) # Model selection model_option = "claude-sonnet-4-20250514" # Comparison button if st.button("🔍 Compare Artworks", type="primary"): if st.session_state.artwork1_data and st.session_state.artwork2_data: with st.spinner("Analyzing artworks with Claude..."): st.session_state.comparison_results = compare_artworks_with_claude( st.session_state.artwork1_data, st.session_state.artwork2_data, model=model_option ) if st.session_state.comparison_results: st.success("✅ Comparison analysis complete!") else: st.error("❌ Comparison analysis failed") else: st.warning("⚠️ Please select or upload both artworks before comparing") # Display comparison results if st.session_state.comparison_results: display_comparison_results( st.session_state.comparison_results, st.session_state.artwork1_data, st.session_state.artwork2_data ) # Add helpful information st.markdown("---") st.markdown(""" ### 🛠️ How It Works 1. **Extract Content**: The tool extracts text, bounding boxes, images, and barcodes from both PDFs 2. **AI Analysis**: Claude analyzes the extracted data and visual elements to identify differences 3. **Structured Results**: Differences are categorized by type (text, layout, barcode, visual) and significance 4. **Compliance Assessment**: Potential compliance impacts are identified with risk levels and recommendations ### 🎯 Use Cases - **Quality Control**: Verify artwork changes between versions - **Brand Consistency**: Ensure visual elements remain consistent - **Compliance Review**: Identify changes that might affect regulatory compliance - **Change Documentation**: Track and document artwork modifications """) if __name__ == "__main__": main()