import streamlit as st import tempfile import os import pandas as pd from src.extract_text.ingest import RequirementsIngest from src.extract_text.google_document_api import GoogleDocumentAPI from src.extract_text.extract_meta_data import PDFArtworkMetadataExtractor from src.core.analysis import ComplianceAnalysis from pdf2image import convert_from_path from PIL import Image, ImageDraw, ImageFont from src.utils.image_utils import ImageUtils import base64 from io import BytesIO from src.utils.barcode import Barcode import glob import os os.system("apt-get update && apt-get install -y poppler-utils") def load_client_requirements_files(): """Load all requirements and packaging files from client-requirements directory""" base_path = "requirements_library/client-requirements" requirements_files = [] packaging_files = [] if not os.path.exists(base_path): return requirements_files, packaging_files # Walk through all subdirectories for root, dirs, files in os.walk(base_path): for file in files: file_path = os.path.join(root, file) relative_path = os.path.relpath(file_path, base_path) if file.lower().endswith('.txt') and 'requirement' in file.lower(): requirements_files.append({ 'name': f"{relative_path}", 'path': file_path, 'type': 'requirements' }) elif file.lower().endswith('.pdf') and 'requirement' in file.lower(): requirements_files.append({ 'name': f"{relative_path}", 'path': file_path, 'type': 'requirements' }) elif file.lower().endswith('.pdf'): packaging_files.append({ 'name': f"{relative_path}", 'path': file_path, 'type': 'packaging' }) return requirements_files, packaging_files def load_file_content(file_info): """Load content from a file based on its type""" try: if file_info['type'] == 'requirements': # For requirements files, read as text with open(file_info['path'], 'r', encoding='utf-8') as f: return f.read() else: # For packaging files, return bytes with open(file_info['path'], 'rb') as f: return f.read() except Exception as e: st.error(f"Error loading file {file_info['name']}: {str(e)}") return None def load_requirements_content(file_info): """Load requirements content as string""" try: with open(file_info['path'], 'r', encoding='utf-8') as f: return f.read() except Exception as e: st.error(f"Error loading requirements file {file_info['name']}: {str(e)}") return None def load_packaging_content(file_info): """Load packaging content as bytes""" try: with open(file_info['path'], 'rb') as f: return f.read() except Exception as e: st.error(f"Error loading packaging file {file_info['name']}: {str(e)}") return None def main(): st.set_page_config(layout="wide", page_title="Packaging Compliance Checker") # Load client requirements files client_requirements_files, client_packaging_files = load_client_requirements_files() # Initialize session state variables if "requirements_text" not in st.session_state: st.session_state.requirements_text = None if "analysis_results" not in st.session_state: st.session_state.analysis_results = None if "current_requirements_file" not in st.session_state: st.session_state.current_requirements_file = None if "uploaded_packaging_files" not in st.session_state: st.session_state.uploaded_packaging_files = [] if "selected_packaging_file" not in st.session_state: st.session_state.selected_packaging_file = None if "client_requirements_files" not in st.session_state: st.session_state.client_requirements_files = client_requirements_files if "client_packaging_files" not in st.session_state: st.session_state.client_packaging_files = client_packaging_files st.title("Packaging Compliance Checker") st.write( "Upload a requirements document (plain text) that specifies requirements, " "and then upload one or more packaging PDFs to check for compliance." ) # Create two columns for the layout col1, col2 = st.columns([1, 1]) with col1: # Stylish upload section with custom CSS st.markdown(""" """, unsafe_allow_html=True) # Upload section container st.markdown('
', unsafe_allow_html=True) st.markdown('
📄 Document Upload
', unsafe_allow_html=True) st.markdown('
Upload your requirements and packaging documents for compliance analysis
', unsafe_allow_html=True) # Requirements file selection st.markdown('
', unsafe_allow_html=True) st.markdown("**📋 Requirements Document**") # Create tabs for client files vs upload req_tab1, req_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) with req_tab1: if st.session_state.client_requirements_files: req_options = ["Select a requirements file..."] + [f["name"] for f in st.session_state.client_requirements_files] selected_req_file = st.selectbox("Choose from client files:", req_options) if selected_req_file != "Select a requirements file...": # Find the selected file selected_file_info = None for file_info in st.session_state.client_requirements_files: if file_info["name"] == selected_req_file: selected_file_info = file_info break if selected_file_info: # Load and process the requirements file if selected_file_info["name"].lower().endswith('.pdf'): # Handle PDF file - load as bytes requirements_content = load_packaging_content(selected_file_info) if requirements_content: # Create a temporary file-like object for the RequirementsIngest import io temp_file = io.BytesIO(requirements_content) temp_file.name = selected_file_info["name"] else: # Handle text file - load as text requirements_content = load_requirements_content(selected_file_info) if requirements_content: # Create a temporary file-like object for the RequirementsIngest import io temp_file = io.StringIO(requirements_content) temp_file.name = selected_file_info["name"] st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(temp_file) st.session_state.current_requirements_file = temp_file st.session_state.analysis_results = None # Clear previous results # Display file type information if isinstance(st.session_state.requirements_text, dict): file_type = st.session_state.requirements_text.get('type', 'unknown') if file_type == 'pdf': st.success(f"✅ Loaded PDF requirements from: {selected_req_file}") st.info("📄 PDF will be processed natively by Claude for full visual analysis") else: st.success(f"✅ Loaded requirements from: {selected_req_file}") else: st.success(f"✅ Loaded requirements from: {selected_req_file}") else: st.info("No client requirements files found") with req_tab2: requirements_file = st.file_uploader("Upload Requirements Document (TXT or PDF)", type=["txt", "pdf"]) # Only process requirements if a new file is uploaded if requirements_file and requirements_file != st.session_state.current_requirements_file: st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(requirements_file) st.session_state.current_requirements_file = requirements_file st.session_state.analysis_results = None # Clear previous results # Display file type information if isinstance(st.session_state.requirements_text, dict): file_type = st.session_state.requirements_text.get('type', 'unknown') file_size = st.session_state.requirements_text.get('file_size', 0) if file_type == 'pdf': st.success(f"✅ Uploaded PDF requirements: {requirements_file.name} ({file_size:,} bytes)") st.info("📄 PDF will be processed natively by Claude for full visual analysis") else: st.success(f"✅ Uploaded requirements: {requirements_file.name}") else: st.success(f"✅ Uploaded requirements: {requirements_file.name}") st.markdown('
', unsafe_allow_html=True) # Packaging files selection st.markdown('
', unsafe_allow_html=True) st.markdown("**📦 Packaging PDFs**") # Create tabs for client files vs upload pkg_tab1, pkg_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) with pkg_tab1: if st.session_state.client_packaging_files: pkg_options = ["Select packaging files..."] + [f["name"] for f in st.session_state.client_packaging_files] selected_pkg_files = st.multiselect("Choose from client files:", pkg_options[1:]) # Skip the placeholder if selected_pkg_files: # Convert selected client files to file-like objects client_file_objects = [] for selected_file_name in selected_pkg_files: # Find the selected file for file_info in st.session_state.client_packaging_files: if file_info["name"] == selected_file_name: # Create a file-like object import io file_content = load_packaging_content(file_info) if file_content: temp_file = io.BytesIO(file_content) temp_file.name = file_info["name"] client_file_objects.append(temp_file) break st.session_state.uploaded_packaging_files = client_file_objects # Set the first file as selected if none is selected if not st.session_state.selected_packaging_file and client_file_objects: st.session_state.selected_packaging_file = client_file_objects[0] st.success(f"✅ Loaded {len(client_file_objects)} packaging files from client directory") else: st.info("No client packaging files found") with pkg_tab2: packaging_files = st.file_uploader("Upload Packaging PDFs", type=["pdf"], accept_multiple_files=True) # Update uploaded files list when new files are uploaded if packaging_files: st.session_state.uploaded_packaging_files = packaging_files # Set the first file as selected if none is selected if not st.session_state.selected_packaging_file and packaging_files: st.session_state.selected_packaging_file = packaging_files[0] st.success(f"✅ Uploaded {len(packaging_files)} packaging files") else: # Only clear if no files are selected from client directory either if not st.session_state.uploaded_packaging_files: st.session_state.selected_packaging_file = None st.markdown('
', unsafe_allow_html=True) # File selector for multiple packaging files if st.session_state.uploaded_packaging_files: st.markdown('
', unsafe_allow_html=True) file_names = [f.name for f in st.session_state.uploaded_packaging_files] selected_file_name = st.selectbox( "Select packaging file to display:", file_names, index=file_names.index(st.session_state.selected_packaging_file.name) if st.session_state.selected_packaging_file else 0 ) # Update selected file for file in st.session_state.uploaded_packaging_files: if file.name == selected_file_name: st.session_state.selected_packaging_file = file break st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # Requirements display section if st.session_state.requirements_text: st.markdown('
', unsafe_allow_html=True) with st.expander("📋 Requirements Document", expanded=True): if isinstance(st.session_state.requirements_text, dict): # PDF requirements file_type = st.session_state.requirements_text.get('type', 'unknown') filename = st.session_state.requirements_text.get('filename', 'Unknown') file_size = st.session_state.requirements_text.get('file_size', 0) st.markdown(f"**File Type:** {file_type.upper()}") st.markdown(f"**Filename:** {filename}") st.markdown(f"**File Size:** {file_size:,} bytes") if file_type == 'pdf': st.info("📄 This PDF will be processed natively by Claude for full visual analysis including charts, graphs, and visual layouts.") st.markdown("**Preview Text:**") st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) else: st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) else: # Text requirements (backward compatibility) st.text_area("Requirements Text", st.session_state.requirements_text, height=200) st.markdown('
', unsafe_allow_html=True) # Artwork display section if st.session_state.selected_packaging_file: st.markdown('
', unsafe_allow_html=True) with st.expander("🎨 Package Artwork", expanded=True): try: # Reset file pointer to beginning st.session_state.selected_packaging_file.seek(0) # Create a temporary file to process the PDF with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(st.session_state.selected_packaging_file.read()) tmp_pdf_path = tmp_file.name # Convert PDF to image try: images = convert_from_path(tmp_pdf_path) if not images: raise ValueError("No pages found in PDF") page_image = images[0] # Assuming single page for now except Exception as e: st.error(f"Error converting PDF to image: {str(e)}") # Create a placeholder image page_image = Image.new('RGB', (800, 600), color='white') draw = ImageDraw.Draw(page_image) draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') # Display the image with proportional sizing st.markdown('
', unsafe_allow_html=True) st.image(page_image, caption=f"Package: {st.session_state.selected_packaging_file.name}", use_container_width=True) st.markdown('
', unsafe_allow_html=True) # Clean up temporary file if os.path.exists(tmp_pdf_path): os.unlink(tmp_pdf_path) except Exception as e: st.error(f"Error displaying package artwork: {str(e)}") st.markdown('
', unsafe_allow_html=True) with col2: # Compliance guidelines section st.markdown(""" """, unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown('
📋 Compliance Guidelines
', unsafe_allow_html=True) # Read and display the compliance outline try: with open("requirements_library/compliance_outline.txt", "r") as f: outline_content = f.read() st.markdown('
', unsafe_allow_html=True) # Parse and format the content for better display lines = outline_content.strip().split('\n') current_section = "" for line in lines: line = line.strip() if not line: continue if line == "Compliance Outline": st.markdown("**📋 Compliance Outline**") elif line == "Compliant": st.markdown('
', unsafe_allow_html=True) st.markdown("🟢 **Compliant**") current_section = "compliant" elif line == "Partially Compliant": st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown("🟡 **Partially Compliant**") current_section = "partial" elif line == "Non-Compliant": st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown("🔴 **Non-Compliant**") current_section = "non_compliant" elif line.startswith("> "): # Description line description = line[2:] # Remove "> " st.markdown(f"*{description}*") elif line == "Example Criteria:": st.markdown("**Example Criteria:**") elif line.startswith("- "): # Criteria item criteria = line[2:] # Remove "- " st.markdown(f"• {criteria}") elif line and not line.startswith("Example Criteria:"): # Any other content st.markdown(line) # Close the last status div if current_section: st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) except FileNotFoundError: st.error("Compliance outline file not found") except Exception as e: st.error(f"Error reading compliance outline: {e}") st.markdown('
', unsafe_allow_html=True) # Model selection # model_option = st.selectbox( # "Select Claude Model", # ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"] # ) model_option = "claude-sonnet-4-20250514" # Analysis button if st.button("Analyze Compliance"): if st.session_state.requirements_text and st.session_state.uploaded_packaging_files: for packaging_file in st.session_state.uploaded_packaging_files: st.markdown(f"## Analyzing: {packaging_file.name}") # Create a progress bar progress_bar = st.progress(0) status_text = st.empty() # Save the uploaded PDF temporarily. # Reset file pointer to beginning packaging_file.seek(0) with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(packaging_file.read()) tmp_pdf_path = tmp_file.name try: # Ingest the packaging document. status_text.text("Extracting text from packaging PDF...") google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json") document = google_document_api.process_document(tmp_pdf_path) packaging_text = google_document_api.extract_text_with_markdown_table(document) packaging_data = google_document_api.extract_text_with_bounding_boxes(document) progress_bar.progress(25) # Process image once and store it efficiently status_text.text("Processing packaging image...") try: images = convert_from_path(tmp_pdf_path) if not images: raise ValueError("No pages found in PDF") page_image = images[0] # Assuming single page for now except Exception as e: st.error(f"Error converting PDF to image: {str(e)}") # Create a placeholder image page_image = Image.new('RGB', (800, 600), color='white') draw = ImageDraw.Draw(page_image) draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') # Convert to base64 once for analysis buffer = BytesIO() page_image.save(buffer, format='PNG') image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') # Scan for barcodes status_text.text("Scanning for barcodes...") barcode = Barcode() barcode_results = barcode.scan_and_validate(page_image) progress_bar.progress(40) #Extract metadata from the PDF status_text.text("Extracting metadata from packaging...") metadata_extractor = PDFArtworkMetadataExtractor() metadata_results = metadata_extractor.extract_metadata(tmp_pdf_path) # Convert tuple keys to strings for JSON serialization if metadata_results and not metadata_results.get('error'): if 'text_colors' in metadata_results: # Convert color tuples to string representation text_colors_str = {} for color_tuple, count in metadata_results['text_colors'].items(): if isinstance(color_tuple, tuple): color_str = f"RGB{color_tuple}" else: color_str = str(color_tuple) text_colors_str[color_str] = count metadata_results['text_colors'] = text_colors_str progress_bar.progress(50) # Call the enhanced analyze_compliance method with the raw text documents and metadata status_text.text("Analyzing requirements and compliance...") st.session_state.analysis_results = ComplianceAnalysis().analyze_compliance( st.session_state.requirements_text, packaging_text, packaging_data, image_base64, barcode_results, metadata_results, model=model_option ) progress_bar.progress(100) status_text.text("Analysis complete!") # Display the structured results st.markdown("### Extracted Requirements") if "requirements" in st.session_state.analysis_results: req_df = pd.DataFrame(st.session_state.analysis_results["requirements"]) st.dataframe(req_df) st.markdown("### Verification Results") if "verifications" in st.session_state.analysis_results: # Create tabs for different views of the results tabs = st.tabs(["Summary", "Detailed Results"]) with tabs[0]: # Count compliance statuses if "verifications" in st.session_state.analysis_results: statuses = [v.get("compliance_status", "UNKNOWN") for v in st.session_state.analysis_results["verifications"]] compliant = statuses.count("COMPLIANT") non_compliant = statuses.count("NON-COMPLIANT") partial = statuses.count("PARTIALLY COMPLIANT") error = len(statuses) - compliant - non_compliant - partial # Create columns for status counts col1, col2, col3, col4 = st.columns(4) col1.metric("Compliant", compliant) col2.metric("Non-Compliant", non_compliant) col3.metric("Partially Compliant", partial) col4.metric("Errors", error) # Display the overall compliance report if "compliance_report" in st.session_state.analysis_results: st.markdown(st.session_state.analysis_results["compliance_report"]) with tabs[1]: st.markdown("### Barcode Scanning Results") if "barcode_data" in st.session_state.analysis_results and st.session_state.analysis_results["barcode_data"]: barcode_df = pd.DataFrame(st.session_state.analysis_results["barcode_data"]) st.dataframe(barcode_df) # Display barcode summary valid_barcodes = sum(1 for barcode in st.session_state.analysis_results["barcode_data"] if barcode["valid"]) total_barcodes = len(st.session_state.analysis_results["barcode_data"]) st.markdown(f"**Barcode Summary:** {valid_barcodes}/{total_barcodes} valid barcodes found") else: st.info("No barcodes found in the packaging") # Display metadata results st.markdown("### Typography and Design Metadata") if "metadata" in st.session_state.analysis_results and st.session_state.analysis_results["metadata"]: metadata = st.session_state.analysis_results["metadata"] if metadata.get('error'): st.error(f"Metadata extraction error: {metadata['error']}") else: # Display metadata summary col1, col2 = st.columns(2) with col1: st.markdown("**Extraction Info:**") st.write(f"**Method:** {metadata.get('extraction_method', 'Unknown')}") st.write(f"**Selectable Text:** {'Yes' if metadata.get('has_selectable_text') else 'No'}") st.write(f"**Pages Processed:** {metadata.get('pages_processed', 0)}") with col2: st.markdown("**Dominant Elements:**") if metadata.get('fonts'): dominant_font = max(metadata['fonts'].items(), key=lambda x: x[1])[0] st.write(f"**Font:** {dominant_font}") if metadata.get('font_sizes'): dominant_size = max(metadata['font_sizes'].items(), key=lambda x: x[1])[0] st.write(f"**Font Size:** {dominant_size:.1f}pt") if metadata.get('text_colors'): dominant_color = max(metadata['text_colors'].items(), key=lambda x: x[1])[0] st.write(f"**Text Color:** {dominant_color}") # Display detailed metadata in expandable sections with st.expander("📊 Detailed Font Analysis"): if metadata.get('fonts'): font_df = pd.DataFrame([ {'Font': font, 'Character Count': count} for font, count in list(metadata['fonts'].items())[:10] # Top 10 ]) st.dataframe(font_df) else: st.info("No font data available") with st.expander("📏 Font Size Distribution"): if metadata.get('font_sizes'): size_df = pd.DataFrame([ {'Font Size (pt)': f"{size:.1f}", 'Character Count': count} for size, count in list(metadata['font_sizes'].items())[:10] # Top 10 ]) st.dataframe(size_df) else: st.info("No font size data available") with st.expander("🎨 Text Color Analysis"): if metadata.get('text_colors'): color_df = pd.DataFrame([ {'Color (RGB)': str(color), 'Character Count': count} for color, count in list(metadata['text_colors'].items())[:10] # Top 10 ]) st.dataframe(color_df) else: st.info("No color data available") else: st.info("No metadata available") # Show detailed verification results for i, verification in enumerate(st.session_state.analysis_results["verifications"]): req_id = verification.get("requirement_id", f"REQ{i+1}") text_id = verification.get("Text ID", "Unknown") status = verification.get("compliance_status", "UNKNOWN") # Color-code status if status == "COMPLIANT": status_color = "green" elif status == "NON-COMPLIANT": status_color = "red" elif status == "PARTIALLY COMPLIANT": status_color = "orange" else: status_color = "gray" with st.expander(f"{req_id}: {status}", expanded=status != "COMPLIANT"): # Show confidence score if available if "confidence" in verification: st.progress(verification["confidence"]) # Show reasoning if "reasoning" in verification: st.markdown(f"**Reasoning:** {verification['reasoning']}") # Show criteria if available if "criteria" in verification and verification["criteria"]: st.markdown("**Criteria:**") for criterion in verification["criteria"]: st.markdown(f"- {criterion}") # Show evidence if available if "evidence_found" in verification and verification["evidence_found"]: st.markdown("**Evidence Found:**") # Separate text, visual, and barcode evidence text_evidence = [] visual_evidence = [] barcode_evidence = [] for evidence in verification["evidence_found"]: if "text_id" in evidence and evidence["text_id"] is not None: text_evidence.append(evidence) elif "barcode_id" in evidence and evidence["barcode_id"] is not None: barcode_evidence.append(evidence) else: visual_evidence.append(evidence) # Display text evidence if text_evidence: st.markdown("**Text Evidence:**") for evidence in text_evidence: text_id = evidence.get("text_id", "Unknown") evidence_text = evidence.get("evidence_text", "No description") st.markdown(f"- **Text ID {text_id}:** {evidence_text}") # Display barcode evidence if barcode_evidence: st.markdown("**Barcode Evidence:**") for evidence in barcode_evidence: barcode_id = evidence.get("barcode_id", "Unknown") evidence_text = evidence.get("evidence_text", "No description") st.markdown(f"- **Barcode ID {barcode_id}:** {evidence_text}") # Display visual evidence if visual_evidence: st.markdown("**Visual Evidence (from image analysis):**") for i, evidence in enumerate(visual_evidence, 1): evidence_text = evidence.get("evidence_text", "Visual element referenced by Claude") st.markdown(f"- **Visual {i}:** {evidence_text}") # Show summary total_evidence = len(verification["evidence_found"]) st.markdown(f"*Total evidence: {total_evidence} ({len(text_evidence)} text, {len(barcode_evidence)} barcode, {len(visual_evidence)} visual)*") # Individual visualization for this requirement if "evidence_found" in verification and verification["evidence_found"]: st.markdown(f"### Evidence Visualization for {req_id}") # Create a copy of the image for drawing try: draw_image = page_image.copy() draw = ImageDraw.Draw(draw_image) img_width, img_height = draw_image.size # Define colors for different compliance statuses status_colors = { "COMPLIANT": "green", "NON-COMPLIANT": "red", "PARTIALLY COMPLIANT": "orange", "ERROR": "purple", "UNKNOWN": "gray" } # Get color for this requirement's status color = status_colors.get(status, "gray") # Add a legend for this requirement st.markdown(f"**Status:** {status}", unsafe_allow_html=True) # Track evidence types text_evidence_count = 0 visual_evidence_count = 0 barcode_evidence_count = 0 # Draw evidence boxes for this specific requirement if "packaging_data" in st.session_state.analysis_results: for evidence in verification["evidence_found"]: if "text_id" in evidence and evidence["text_id"] is not None: # Handle text-based evidence with bounding boxes text_id = evidence["text_id"] try: # Check if text_id is numeric for bounding box lookup if isinstance(text_id, (int, float)) or (isinstance(text_id, str) and text_id.isdigit()): # Text ID is 1-based, list is 0-based numeric_id = int(text_id) item = st.session_state.analysis_results["packaging_data"][numeric_id - 1] box = item["bounding_box"] # Denormalize vertices points = [(v['x'] * img_width, v['y'] * img_height) for v in box] # Draw polygon draw.polygon(points, outline=color, width=3) # Add a label with evidence number text_evidence_count += 1 label = f"Text Evidence {text_evidence_count}" draw.text(points[0], label, fill="white", stroke_width=2, stroke_fill="black") else: # Handle non-numeric text IDs (like barcode references) text_evidence_count += 1 st.info(f"Text Evidence {text_evidence_count}: {evidence.get('evidence_text', 'Text element referenced by Claude')} (ID: {text_id})") except (IndexError, KeyError) as e: st.warning(f"Could not find bounding box for Text ID {text_id}: {e}") elif "barcode_id" in evidence and evidence["barcode_id"] is not None: # Handle barcode-based evidence with bounding boxes barcode_id = evidence["barcode_id"] try: # Find the barcode in barcode_data barcode_found = None for barcode in st.session_state.analysis_results.get("barcode_data", []): if barcode["id"] == barcode_id: barcode_found = barcode break if barcode_found: pos = barcode_found["position"] x, y = pos["x"], pos["y"] w, h = pos["width"], pos["height"] # Draw rectangle for barcode draw.rectangle([x, y, x + w, y + h], outline=color, width=3) # Add a label with evidence number barcode_evidence_count += 1 label = f"Barcode Evidence {barcode_evidence_count}" draw.text((x, y - 20), label, fill="white", stroke_width=2, stroke_fill="black") # Add barcode info barcode_info = f"{barcode_found['type']}: {barcode_found['data']}" draw.text((x, y - 40), barcode_info, fill="white", stroke_width=2, stroke_fill="black") else: st.warning(f"Could not find barcode data for Barcode ID {barcode_id}") except Exception as e: st.warning(f"Could not draw barcode bounding box for Barcode ID {barcode_id}: {e}") else: # Handle visual-only evidence (no text_id or barcode_id) visual_evidence_count += 1 st.info(f"Visual Evidence {visual_evidence_count}: {evidence.get('evidence_text', 'Visual element referenced by Claude')}") # Show the image if we have any evidence if text_evidence_count > 0 or visual_evidence_count > 0 or barcode_evidence_count > 0: # Add evidence count summary evidence_summary = [] if text_evidence_count > 0: evidence_summary.append(f"{text_evidence_count} text") if barcode_evidence_count > 0: evidence_summary.append(f"{barcode_evidence_count} barcode") if visual_evidence_count > 0: evidence_summary.append(f"{visual_evidence_count} visual") st.markdown(f"**Evidence Count:** {', '.join(evidence_summary)}") st.image(ImageUtils.crop_image(draw_image), caption=f"Evidence for {req_id} - {status}", use_container_width=True) else: st.info(f"No visual evidence found for {req_id}") else: # Handle case where no packaging data is available but we have evidence evidence_counts = { 'text': len([e for e in verification["evidence_found"] if "text_id" in e and e["text_id"] is not None]), 'barcode': len([e for e in verification["evidence_found"] if "barcode_id" in e and e["barcode_id"] is not None]), 'visual': len([e for e in verification["evidence_found"] if ("text_id" not in e or e["text_id"] is None) and ("barcode_id" not in e or e["barcode_id"] is None)]) } total_evidence = sum(evidence_counts.values()) if total_evidence > 0: evidence_summary = [] if evidence_counts['text'] > 0: evidence_summary.append(f"{evidence_counts['text']} text") if evidence_counts['barcode'] > 0: evidence_summary.append(f"{evidence_counts['barcode']} barcode") if evidence_counts['visual'] > 0: evidence_summary.append(f"{evidence_counts['visual']} visual") st.info(f"Evidence Count: {', '.join(evidence_summary)} (no bounding box data available)") # Show the original image without annotations st.image(ImageUtils.crop_image(page_image), caption=f"Original image for {req_id} - {status}", use_container_width=True) else: st.info("No packaging data available for visualization") except Exception as e: st.error(f"Failed to generate visualization for {req_id}: {e}") else: st.info(f"No evidence found for {req_id}") except Exception as e: st.error(f"Error analyzing {packaging_file.name}: {str(e)}") finally: # Clean up the temporary file if os.path.exists(tmp_pdf_path): os.unlink(tmp_pdf_path) else: st.warning("Please upload a requirements document and at least one packaging PDF.") # Add some helpful information at the bottom st.markdown("---") st.markdown(""" ### How It Works 1. **Upload Requirements**: The system extracts structured requirements from your document 2. **Upload Packaging**: We extract text from PDFs and analyze them against requirements 3. **Analysis**: Each requirement is verified using structured reasoning and semantic matching """) if __name__ == "__main__": # Import pandas here to avoid issues with st.set_page_config import pandas as pd main()