| | import streamlit as st |
| | import tempfile |
| | import os |
| | import pandas as pd |
| | from src.extract_text.ingest import RequirementsIngest |
| | from src.extract_text.google_document_api import GoogleDocumentAPI |
| | from src.extract_text.extract_meta_data import PDFArtworkMetadataExtractor |
| | from src.core.analysis import ComplianceAnalysis |
| | from pdf2image import convert_from_path |
| | from PIL import Image, ImageDraw, ImageFont |
| | from src.utils.image_utils import ImageUtils |
| | import base64 |
| | from io import BytesIO |
| | from src.utils.barcode import Barcode |
| | import glob |
| | import os |
| |
|
| | os.system("apt-get update && apt-get install -y poppler-utils") |
| |
|
| | def load_client_requirements_files(): |
| | """Load all requirements and packaging files from client-requirements directory""" |
| | base_path = "requirements_library/client-requirements" |
| | requirements_files = [] |
| | packaging_files = [] |
| | |
| | if not os.path.exists(base_path): |
| | return requirements_files, packaging_files |
| | |
| | |
| | for root, dirs, files in os.walk(base_path): |
| | for file in files: |
| | file_path = os.path.join(root, file) |
| | relative_path = os.path.relpath(file_path, base_path) |
| | |
| | if file.lower().endswith('.txt') and 'requirement' in file.lower(): |
| | requirements_files.append({ |
| | 'name': f"{relative_path}", |
| | 'path': file_path, |
| | 'type': 'requirements' |
| | }) |
| | elif file.lower().endswith('.pdf') and 'requirement' in file.lower(): |
| | requirements_files.append({ |
| | 'name': f"{relative_path}", |
| | 'path': file_path, |
| | 'type': 'requirements' |
| | }) |
| | elif file.lower().endswith('.pdf'): |
| | packaging_files.append({ |
| | 'name': f"{relative_path}", |
| | 'path': file_path, |
| | 'type': 'packaging' |
| | }) |
| | |
| | return requirements_files, packaging_files |
| |
|
| | def load_file_content(file_info): |
| | """Load content from a file based on its type""" |
| | try: |
| | if file_info['type'] == 'requirements': |
| | |
| | with open(file_info['path'], 'r', encoding='utf-8') as f: |
| | return f.read() |
| | else: |
| | |
| | with open(file_info['path'], 'rb') as f: |
| | return f.read() |
| | except Exception as e: |
| | st.error(f"Error loading file {file_info['name']}: {str(e)}") |
| | return None |
| |
|
| | def load_requirements_content(file_info): |
| | """Load requirements content as string""" |
| | try: |
| | with open(file_info['path'], 'r', encoding='utf-8') as f: |
| | return f.read() |
| | except Exception as e: |
| | st.error(f"Error loading requirements file {file_info['name']}: {str(e)}") |
| | return None |
| |
|
| | def load_packaging_content(file_info): |
| | """Load packaging content as bytes""" |
| | try: |
| | with open(file_info['path'], 'rb') as f: |
| | return f.read() |
| | except Exception as e: |
| | st.error(f"Error loading packaging file {file_info['name']}: {str(e)}") |
| | return None |
| |
|
| | def main(): |
| | st.set_page_config(layout="wide", page_title="Packaging Compliance Checker") |
| | |
| | |
| | client_requirements_files, client_packaging_files = load_client_requirements_files() |
| | |
| | |
| | if "requirements_text" not in st.session_state: |
| | st.session_state.requirements_text = None |
| | if "analysis_results" not in st.session_state: |
| | st.session_state.analysis_results = None |
| | if "current_requirements_file" not in st.session_state: |
| | st.session_state.current_requirements_file = None |
| | if "uploaded_packaging_files" not in st.session_state: |
| | st.session_state.uploaded_packaging_files = [] |
| | if "selected_packaging_file" not in st.session_state: |
| | st.session_state.selected_packaging_file = None |
| | if "client_requirements_files" not in st.session_state: |
| | st.session_state.client_requirements_files = client_requirements_files |
| | if "client_packaging_files" not in st.session_state: |
| | st.session_state.client_packaging_files = client_packaging_files |
| |
|
| | st.title("Packaging Compliance Checker") |
| | st.write( |
| | "Upload a requirements document (plain text) that specifies requirements, " |
| | "and then upload one or more packaging PDFs to check for compliance." |
| | ) |
| | |
| | |
| | col1, col2 = st.columns([1, 1]) |
| | |
| | with col1: |
| | |
| | st.markdown(""" |
| | <style> |
| | .upload-section { |
| | background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| | padding: 20px; |
| | border-radius: 15px; |
| | color: white; |
| | margin-bottom: 20px; |
| | } |
| | .upload-title { |
| | font-size: 24px; |
| | font-weight: bold; |
| | margin-bottom: 15px; |
| | text-align: center; |
| | } |
| | .upload-description { |
| | font-size: 14px; |
| | opacity: 0.9; |
| | margin-bottom: 20px; |
| | text-align: center; |
| | } |
| | .file-uploader { |
| | background: rgba(255, 255, 255, 0.1); |
| | border: 2px dashed rgba(255, 255, 255, 0.3); |
| | border-radius: 10px; |
| | padding: 15px; |
| | margin-bottom: 15px; |
| | } |
| | .requirements-display { |
| | background: rgba(255, 255, 255, 0.05); |
| | border-radius: 10px; |
| | padding: 15px; |
| | margin-top: 15px; |
| | } |
| | .artwork-display { |
| | background: rgba(255, 255, 255, 0.05); |
| | border-radius: 10px; |
| | padding: 15px; |
| | margin-top: 15px; |
| | } |
| | .image-container { |
| | max-width: 100%; |
| | border-radius: 8px; |
| | overflow: hidden; |
| | box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| | |
| | |
| | st.markdown('<div class="upload-section">', unsafe_allow_html=True) |
| | st.markdown('<div class="upload-title">📄 Document Upload</div>', unsafe_allow_html=True) |
| | st.markdown('<div class="upload-description">Upload your requirements and packaging documents for compliance analysis</div>', unsafe_allow_html=True) |
| | |
| | |
| | st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| | st.markdown("**📋 Requirements Document**") |
| | |
| | |
| | req_tab1, req_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) |
| | |
| | with req_tab1: |
| | if st.session_state.client_requirements_files: |
| | req_options = ["Select a requirements file..."] + [f["name"] for f in st.session_state.client_requirements_files] |
| | selected_req_file = st.selectbox("Choose from client files:", req_options) |
| | |
| | if selected_req_file != "Select a requirements file...": |
| | |
| | selected_file_info = None |
| | for file_info in st.session_state.client_requirements_files: |
| | if file_info["name"] == selected_req_file: |
| | selected_file_info = file_info |
| | break |
| | |
| | if selected_file_info: |
| | |
| | if selected_file_info["name"].lower().endswith('.pdf'): |
| | |
| | requirements_content = load_packaging_content(selected_file_info) |
| | if requirements_content: |
| | |
| | import io |
| | temp_file = io.BytesIO(requirements_content) |
| | temp_file.name = selected_file_info["name"] |
| | else: |
| | |
| | requirements_content = load_requirements_content(selected_file_info) |
| | if requirements_content: |
| | |
| | import io |
| | temp_file = io.StringIO(requirements_content) |
| | temp_file.name = selected_file_info["name"] |
| | |
| | st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(temp_file) |
| | st.session_state.current_requirements_file = temp_file |
| | st.session_state.analysis_results = None |
| | |
| | |
| | if isinstance(st.session_state.requirements_text, dict): |
| | file_type = st.session_state.requirements_text.get('type', 'unknown') |
| | if file_type == 'pdf': |
| | st.success(f"✅ Loaded PDF requirements from: {selected_req_file}") |
| | st.info("📄 PDF will be processed natively by Claude for full visual analysis") |
| | else: |
| | st.success(f"✅ Loaded requirements from: {selected_req_file}") |
| | else: |
| | st.success(f"✅ Loaded requirements from: {selected_req_file}") |
| | else: |
| | st.info("No client requirements files found") |
| | |
| | with req_tab2: |
| | requirements_file = st.file_uploader("Upload Requirements Document (TXT or PDF)", type=["txt", "pdf"]) |
| | |
| | |
| | if requirements_file and requirements_file != st.session_state.current_requirements_file: |
| | st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(requirements_file) |
| | st.session_state.current_requirements_file = requirements_file |
| | st.session_state.analysis_results = None |
| | |
| | |
| | if isinstance(st.session_state.requirements_text, dict): |
| | file_type = st.session_state.requirements_text.get('type', 'unknown') |
| | file_size = st.session_state.requirements_text.get('file_size', 0) |
| | if file_type == 'pdf': |
| | st.success(f"✅ Uploaded PDF requirements: {requirements_file.name} ({file_size:,} bytes)") |
| | st.info("📄 PDF will be processed natively by Claude for full visual analysis") |
| | else: |
| | st.success(f"✅ Uploaded requirements: {requirements_file.name}") |
| | else: |
| | st.success(f"✅ Uploaded requirements: {requirements_file.name}") |
| | |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| | st.markdown("**📦 Packaging PDFs**") |
| | |
| | |
| | pkg_tab1, pkg_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) |
| | |
| | with pkg_tab1: |
| | if st.session_state.client_packaging_files: |
| | pkg_options = ["Select packaging files..."] + [f["name"] for f in st.session_state.client_packaging_files] |
| | selected_pkg_files = st.multiselect("Choose from client files:", pkg_options[1:]) |
| | |
| | if selected_pkg_files: |
| | |
| | client_file_objects = [] |
| | for selected_file_name in selected_pkg_files: |
| | |
| | for file_info in st.session_state.client_packaging_files: |
| | if file_info["name"] == selected_file_name: |
| | |
| | import io |
| | file_content = load_packaging_content(file_info) |
| | if file_content: |
| | temp_file = io.BytesIO(file_content) |
| | temp_file.name = file_info["name"] |
| | client_file_objects.append(temp_file) |
| | break |
| | |
| | st.session_state.uploaded_packaging_files = client_file_objects |
| | |
| | if not st.session_state.selected_packaging_file and client_file_objects: |
| | st.session_state.selected_packaging_file = client_file_objects[0] |
| | st.success(f"✅ Loaded {len(client_file_objects)} packaging files from client directory") |
| | else: |
| | st.info("No client packaging files found") |
| | |
| | with pkg_tab2: |
| | packaging_files = st.file_uploader("Upload Packaging PDFs", type=["pdf"], accept_multiple_files=True) |
| | |
| | |
| | if packaging_files: |
| | st.session_state.uploaded_packaging_files = packaging_files |
| | |
| | if not st.session_state.selected_packaging_file and packaging_files: |
| | st.session_state.selected_packaging_file = packaging_files[0] |
| | st.success(f"✅ Uploaded {len(packaging_files)} packaging files") |
| | else: |
| | |
| | if not st.session_state.uploaded_packaging_files: |
| | st.session_state.selected_packaging_file = None |
| | |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | if st.session_state.uploaded_packaging_files: |
| | st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| | file_names = [f.name for f in st.session_state.uploaded_packaging_files] |
| | selected_file_name = st.selectbox( |
| | "Select packaging file to display:", |
| | file_names, |
| | index=file_names.index(st.session_state.selected_packaging_file.name) if st.session_state.selected_packaging_file else 0 |
| | ) |
| | |
| | |
| | for file in st.session_state.uploaded_packaging_files: |
| | if file.name == selected_file_name: |
| | st.session_state.selected_packaging_file = file |
| | break |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| |
|
| | |
| | |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | if st.session_state.requirements_text: |
| | st.markdown('<div class="requirements-display">', unsafe_allow_html=True) |
| | with st.expander("📋 Requirements Document", expanded=True): |
| | if isinstance(st.session_state.requirements_text, dict): |
| | |
| | file_type = st.session_state.requirements_text.get('type', 'unknown') |
| | filename = st.session_state.requirements_text.get('filename', 'Unknown') |
| | file_size = st.session_state.requirements_text.get('file_size', 0) |
| | |
| | st.markdown(f"**File Type:** {file_type.upper()}") |
| | st.markdown(f"**Filename:** {filename}") |
| | st.markdown(f"**File Size:** {file_size:,} bytes") |
| | |
| | if file_type == 'pdf': |
| | st.info("📄 This PDF will be processed natively by Claude for full visual analysis including charts, graphs, and visual layouts.") |
| | st.markdown("**Preview Text:**") |
| | st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) |
| | else: |
| | st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) |
| | else: |
| | |
| | st.text_area("Requirements Text", st.session_state.requirements_text, height=200) |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | if st.session_state.selected_packaging_file: |
| | st.markdown('<div class="artwork-display">', unsafe_allow_html=True) |
| | with st.expander("🎨 Package Artwork", expanded=True): |
| | try: |
| | |
| | st.session_state.selected_packaging_file.seek(0) |
| | |
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| | tmp_file.write(st.session_state.selected_packaging_file.read()) |
| | tmp_pdf_path = tmp_file.name |
| | |
| | |
| | try: |
| | images = convert_from_path(tmp_pdf_path) |
| | if not images: |
| | raise ValueError("No pages found in PDF") |
| | page_image = images[0] |
| | except Exception as e: |
| | st.error(f"Error converting PDF to image: {str(e)}") |
| | |
| | page_image = Image.new('RGB', (800, 600), color='white') |
| | draw = ImageDraw.Draw(page_image) |
| | draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') |
| | |
| | |
| | st.markdown('<div class="image-container">', unsafe_allow_html=True) |
| | st.image(page_image, caption=f"Package: {st.session_state.selected_packaging_file.name}", use_container_width=True) |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | if os.path.exists(tmp_pdf_path): |
| | os.unlink(tmp_pdf_path) |
| | |
| | except Exception as e: |
| | st.error(f"Error displaying package artwork: {str(e)}") |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | with col2: |
| | |
| | st.markdown(""" |
| | <style> |
| | .compliance-section { |
| | background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); |
| | padding: 20px; |
| | border-radius: 15px; |
| | color: white; |
| | height: 100%; |
| | } |
| | .compliance-title { |
| | font-size: 24px; |
| | font-weight: bold; |
| | margin-bottom: 15px; |
| | text-align: center; |
| | } |
| | .compliance-content { |
| | background: rgba(255, 255, 255, 0.1); |
| | border-radius: 10px; |
| | padding: 15px; |
| | margin-top: 15px; |
| | } |
| | .status-compliant { |
| | background: rgba(76, 175, 80, 0.2); |
| | border-left: 4px solid #4CAF50; |
| | padding: 10px; |
| | margin: 10px 0; |
| | border-radius: 5px; |
| | } |
| | .status-partial { |
| | background: rgba(255, 193, 7, 0.2); |
| | border-left: 4px solid #FFC107; |
| | padding: 10px; |
| | margin: 10px 0; |
| | border-radius: 5px; |
| | } |
| | .status-non-compliant { |
| | background: rgba(244, 67, 54, 0.2); |
| | border-left: 4px solid #F44336; |
| | padding: 10px; |
| | margin: 10px 0; |
| | border-radius: 5px; |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown('<div class="compliance-section">', unsafe_allow_html=True) |
| | st.markdown('<div class="compliance-title">📋 Compliance Guidelines</div>', unsafe_allow_html=True) |
| | |
| | |
| | try: |
| | with open("requirements_library/compliance_outline.txt", "r") as f: |
| | outline_content = f.read() |
| | |
| | st.markdown('<div class="compliance-content">', unsafe_allow_html=True) |
| | |
| | |
| | lines = outline_content.strip().split('\n') |
| | current_section = "" |
| | |
| | for line in lines: |
| | line = line.strip() |
| | if not line: |
| | continue |
| | |
| | if line == "Compliance Outline": |
| | st.markdown("**📋 Compliance Outline**") |
| | elif line == "Compliant": |
| | st.markdown('<div class="status-compliant">', unsafe_allow_html=True) |
| | st.markdown("🟢 **Compliant**") |
| | current_section = "compliant" |
| | elif line == "Partially Compliant": |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | st.markdown('<div class="status-partial">', unsafe_allow_html=True) |
| | st.markdown("🟡 **Partially Compliant**") |
| | current_section = "partial" |
| | elif line == "Non-Compliant": |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | st.markdown('<div class="status-non-compliant">', unsafe_allow_html=True) |
| | st.markdown("🔴 **Non-Compliant**") |
| | current_section = "non_compliant" |
| | elif line.startswith("> "): |
| | |
| | description = line[2:] |
| | st.markdown(f"*{description}*") |
| | elif line == "Example Criteria:": |
| | st.markdown("**Example Criteria:**") |
| | elif line.startswith("- "): |
| | |
| | criteria = line[2:] |
| | st.markdown(f"• {criteria}") |
| | elif line and not line.startswith("Example Criteria:"): |
| | |
| | st.markdown(line) |
| | |
| | |
| | if current_section: |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | except FileNotFoundError: |
| | st.error("Compliance outline file not found") |
| | except Exception as e: |
| | st.error(f"Error reading compliance outline: {e}") |
| | |
| | st.markdown('</div>', unsafe_allow_html=True) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | model_option = "claude-sonnet-4-20250514" |
| | |
| | |
| | if st.button("Analyze Compliance"): |
| | if st.session_state.requirements_text and st.session_state.uploaded_packaging_files: |
| | for packaging_file in st.session_state.uploaded_packaging_files: |
| | st.markdown(f"## Analyzing: {packaging_file.name}") |
| | |
| | |
| | progress_bar = st.progress(0) |
| | status_text = st.empty() |
| | |
| | |
| | |
| | packaging_file.seek(0) |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| | tmp_file.write(packaging_file.read()) |
| | tmp_pdf_path = tmp_file.name |
| | |
| | try: |
| | |
| | status_text.text("Extracting text from packaging PDF...") |
| | google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json") |
| | document = google_document_api.process_document(tmp_pdf_path) |
| | packaging_text = google_document_api.extract_text_with_markdown_table(document) |
| | packaging_data = google_document_api.extract_text_with_bounding_boxes(document) |
| | progress_bar.progress(25) |
| | |
| | |
| | status_text.text("Processing packaging image...") |
| | try: |
| | images = convert_from_path(tmp_pdf_path) |
| | if not images: |
| | raise ValueError("No pages found in PDF") |
| | page_image = images[0] |
| | except Exception as e: |
| | st.error(f"Error converting PDF to image: {str(e)}") |
| | |
| | page_image = Image.new('RGB', (800, 600), color='white') |
| | draw = ImageDraw.Draw(page_image) |
| | draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') |
| | |
| | buffer = BytesIO() |
| | page_image.save(buffer, format='PNG') |
| | image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') |
| |
|
| | |
| | status_text.text("Scanning for barcodes...") |
| | barcode = Barcode() |
| | barcode_results = barcode.scan_and_validate(page_image) |
| |
|
| | progress_bar.progress(40) |
| | |
| | |
| | status_text.text("Extracting metadata from packaging...") |
| | metadata_extractor = PDFArtworkMetadataExtractor() |
| | metadata_results = metadata_extractor.extract_metadata(tmp_pdf_path) |
| | |
| | |
| | if metadata_results and not metadata_results.get('error'): |
| | if 'text_colors' in metadata_results: |
| | |
| | text_colors_str = {} |
| | for color_tuple, count in metadata_results['text_colors'].items(): |
| | if isinstance(color_tuple, tuple): |
| | color_str = f"RGB{color_tuple}" |
| | else: |
| | color_str = str(color_tuple) |
| | text_colors_str[color_str] = count |
| | metadata_results['text_colors'] = text_colors_str |
| | |
| | progress_bar.progress(50) |
| | |
| | |
| | status_text.text("Analyzing requirements and compliance...") |
| | st.session_state.analysis_results = ComplianceAnalysis().analyze_compliance( |
| | st.session_state.requirements_text, |
| | packaging_text, |
| | packaging_data, |
| | image_base64, |
| | barcode_results, |
| | metadata_results, |
| | model=model_option |
| | ) |
| | progress_bar.progress(100) |
| | status_text.text("Analysis complete!") |
| | |
| | |
| | st.markdown("### Extracted Requirements") |
| | if "requirements" in st.session_state.analysis_results: |
| | req_df = pd.DataFrame(st.session_state.analysis_results["requirements"]) |
| | st.dataframe(req_df) |
| | |
| | st.markdown("### Verification Results") |
| | if "verifications" in st.session_state.analysis_results: |
| | |
| | tabs = st.tabs(["Summary", "Detailed Results"]) |
| | |
| | with tabs[0]: |
| | |
| | if "verifications" in st.session_state.analysis_results: |
| | statuses = [v.get("compliance_status", "UNKNOWN") for v in st.session_state.analysis_results["verifications"]] |
| | compliant = statuses.count("COMPLIANT") |
| | non_compliant = statuses.count("NON-COMPLIANT") |
| | partial = statuses.count("PARTIALLY COMPLIANT") |
| | error = len(statuses) - compliant - non_compliant - partial |
| | |
| | |
| | col1, col2, col3, col4 = st.columns(4) |
| | col1.metric("Compliant", compliant) |
| | col2.metric("Non-Compliant", non_compliant) |
| | col3.metric("Partially Compliant", partial) |
| | col4.metric("Errors", error) |
| | |
| | |
| | if "compliance_report" in st.session_state.analysis_results: |
| | st.markdown(st.session_state.analysis_results["compliance_report"]) |
| | |
| | with tabs[1]: |
| | st.markdown("### Barcode Scanning Results") |
| | if "barcode_data" in st.session_state.analysis_results and st.session_state.analysis_results["barcode_data"]: |
| | barcode_df = pd.DataFrame(st.session_state.analysis_results["barcode_data"]) |
| | st.dataframe(barcode_df) |
| | |
| | |
| | valid_barcodes = sum(1 for barcode in st.session_state.analysis_results["barcode_data"] if barcode["valid"]) |
| | total_barcodes = len(st.session_state.analysis_results["barcode_data"]) |
| | st.markdown(f"**Barcode Summary:** {valid_barcodes}/{total_barcodes} valid barcodes found") |
| | else: |
| | st.info("No barcodes found in the packaging") |
| | |
| | |
| | st.markdown("### Typography and Design Metadata") |
| | if "metadata" in st.session_state.analysis_results and st.session_state.analysis_results["metadata"]: |
| | metadata = st.session_state.analysis_results["metadata"] |
| | |
| | if metadata.get('error'): |
| | st.error(f"Metadata extraction error: {metadata['error']}") |
| | else: |
| | |
| | col1, col2 = st.columns(2) |
| | |
| | with col1: |
| | st.markdown("**Extraction Info:**") |
| | st.write(f"**Method:** {metadata.get('extraction_method', 'Unknown')}") |
| | st.write(f"**Selectable Text:** {'Yes' if metadata.get('has_selectable_text') else 'No'}") |
| | st.write(f"**Pages Processed:** {metadata.get('pages_processed', 0)}") |
| | |
| | with col2: |
| | st.markdown("**Dominant Elements:**") |
| | if metadata.get('fonts'): |
| | dominant_font = max(metadata['fonts'].items(), key=lambda x: x[1])[0] |
| | st.write(f"**Font:** {dominant_font}") |
| | if metadata.get('font_sizes'): |
| | dominant_size = max(metadata['font_sizes'].items(), key=lambda x: x[1])[0] |
| | st.write(f"**Font Size:** {dominant_size:.1f}pt") |
| | if metadata.get('text_colors'): |
| | dominant_color = max(metadata['text_colors'].items(), key=lambda x: x[1])[0] |
| | st.write(f"**Text Color:** {dominant_color}") |
| | |
| | |
| | with st.expander("📊 Detailed Font Analysis"): |
| | if metadata.get('fonts'): |
| | font_df = pd.DataFrame([ |
| | {'Font': font, 'Character Count': count} |
| | for font, count in list(metadata['fonts'].items())[:10] |
| | ]) |
| | st.dataframe(font_df) |
| | else: |
| | st.info("No font data available") |
| | |
| | with st.expander("📏 Font Size Distribution"): |
| | if metadata.get('font_sizes'): |
| | size_df = pd.DataFrame([ |
| | {'Font Size (pt)': f"{size:.1f}", 'Character Count': count} |
| | for size, count in list(metadata['font_sizes'].items())[:10] |
| | ]) |
| | st.dataframe(size_df) |
| | else: |
| | st.info("No font size data available") |
| | |
| | with st.expander("🎨 Text Color Analysis"): |
| | if metadata.get('text_colors'): |
| | color_df = pd.DataFrame([ |
| | {'Color (RGB)': str(color), 'Character Count': count} |
| | for color, count in list(metadata['text_colors'].items())[:10] |
| | ]) |
| | st.dataframe(color_df) |
| | else: |
| | st.info("No color data available") |
| | else: |
| | st.info("No metadata available") |
| | |
| | |
| | for i, verification in enumerate(st.session_state.analysis_results["verifications"]): |
| | req_id = verification.get("requirement_id", f"REQ{i+1}") |
| | text_id = verification.get("Text ID", "Unknown") |
| | status = verification.get("compliance_status", "UNKNOWN") |
| | |
| | |
| | if status == "COMPLIANT": |
| | status_color = "green" |
| | elif status == "NON-COMPLIANT": |
| | status_color = "red" |
| | elif status == "PARTIALLY COMPLIANT": |
| | status_color = "orange" |
| | else: |
| | status_color = "gray" |
| | |
| | with st.expander(f"{req_id}: {status}", expanded=status != "COMPLIANT"): |
| | |
| | if "confidence" in verification: |
| | st.progress(verification["confidence"]) |
| | |
| | |
| | if "reasoning" in verification: |
| | st.markdown(f"**Reasoning:** {verification['reasoning']}") |
| | |
| | |
| | if "criteria" in verification and verification["criteria"]: |
| | st.markdown("**Criteria:**") |
| | for criterion in verification["criteria"]: |
| | st.markdown(f"- {criterion}") |
| | |
| | |
| | if "evidence_found" in verification and verification["evidence_found"]: |
| | st.markdown("**Evidence Found:**") |
| | |
| | |
| | text_evidence = [] |
| | visual_evidence = [] |
| | barcode_evidence = [] |
| | |
| | for evidence in verification["evidence_found"]: |
| | if "text_id" in evidence and evidence["text_id"] is not None: |
| | text_evidence.append(evidence) |
| | elif "barcode_id" in evidence and evidence["barcode_id"] is not None: |
| | barcode_evidence.append(evidence) |
| | else: |
| | visual_evidence.append(evidence) |
| | |
| | |
| | if text_evidence: |
| | st.markdown("**Text Evidence:**") |
| | for evidence in text_evidence: |
| | text_id = evidence.get("text_id", "Unknown") |
| | evidence_text = evidence.get("evidence_text", "No description") |
| | st.markdown(f"- **Text ID {text_id}:** {evidence_text}") |
| | |
| | |
| | if barcode_evidence: |
| | st.markdown("**Barcode Evidence:**") |
| | for evidence in barcode_evidence: |
| | barcode_id = evidence.get("barcode_id", "Unknown") |
| | evidence_text = evidence.get("evidence_text", "No description") |
| | st.markdown(f"- **Barcode ID {barcode_id}:** {evidence_text}") |
| | |
| | |
| | if visual_evidence: |
| | st.markdown("**Visual Evidence (from image analysis):**") |
| | for i, evidence in enumerate(visual_evidence, 1): |
| | evidence_text = evidence.get("evidence_text", "Visual element referenced by Claude") |
| | st.markdown(f"- **Visual {i}:** {evidence_text}") |
| | |
| | |
| | total_evidence = len(verification["evidence_found"]) |
| | st.markdown(f"*Total evidence: {total_evidence} ({len(text_evidence)} text, {len(barcode_evidence)} barcode, {len(visual_evidence)} visual)*") |
| | |
| | |
| | if "evidence_found" in verification and verification["evidence_found"]: |
| | st.markdown(f"### Evidence Visualization for {req_id}") |
| | |
| | |
| | try: |
| | draw_image = page_image.copy() |
| | draw = ImageDraw.Draw(draw_image) |
| | img_width, img_height = draw_image.size |
| | |
| | |
| | status_colors = { |
| | "COMPLIANT": "green", |
| | "NON-COMPLIANT": "red", |
| | "PARTIALLY COMPLIANT": "orange", |
| | "ERROR": "purple", |
| | "UNKNOWN": "gray" |
| | } |
| | |
| | |
| | color = status_colors.get(status, "gray") |
| | |
| | |
| | st.markdown(f"**Status:** <span style='color:{color}'>■</span> {status}", unsafe_allow_html=True) |
| | |
| | |
| | text_evidence_count = 0 |
| | visual_evidence_count = 0 |
| | barcode_evidence_count = 0 |
| | |
| | |
| | if "packaging_data" in st.session_state.analysis_results: |
| | for evidence in verification["evidence_found"]: |
| | if "text_id" in evidence and evidence["text_id"] is not None: |
| | |
| | text_id = evidence["text_id"] |
| | try: |
| | |
| | if isinstance(text_id, (int, float)) or (isinstance(text_id, str) and text_id.isdigit()): |
| | |
| | numeric_id = int(text_id) |
| | item = st.session_state.analysis_results["packaging_data"][numeric_id - 1] |
| | box = item["bounding_box"] |
| | |
| | |
| | points = [(v['x'] * img_width, v['y'] * img_height) for v in box] |
| | |
| | |
| | draw.polygon(points, outline=color, width=3) |
| | |
| | |
| | text_evidence_count += 1 |
| | label = f"Text Evidence {text_evidence_count}" |
| | draw.text(points[0], label, fill="white", stroke_width=2, stroke_fill="black") |
| | else: |
| | |
| | text_evidence_count += 1 |
| | st.info(f"Text Evidence {text_evidence_count}: {evidence.get('evidence_text', 'Text element referenced by Claude')} (ID: {text_id})") |
| | |
| | except (IndexError, KeyError) as e: |
| | st.warning(f"Could not find bounding box for Text ID {text_id}: {e}") |
| | elif "barcode_id" in evidence and evidence["barcode_id"] is not None: |
| | |
| | barcode_id = evidence["barcode_id"] |
| | try: |
| | |
| | barcode_found = None |
| | for barcode in st.session_state.analysis_results.get("barcode_data", []): |
| | if barcode["id"] == barcode_id: |
| | barcode_found = barcode |
| | break |
| | |
| | if barcode_found: |
| | pos = barcode_found["position"] |
| | x, y = pos["x"], pos["y"] |
| | w, h = pos["width"], pos["height"] |
| | |
| | |
| | draw.rectangle([x, y, x + w, y + h], outline=color, width=3) |
| | |
| | |
| | barcode_evidence_count += 1 |
| | label = f"Barcode Evidence {barcode_evidence_count}" |
| | draw.text((x, y - 20), label, fill="white", stroke_width=2, stroke_fill="black") |
| | |
| | |
| | barcode_info = f"{barcode_found['type']}: {barcode_found['data']}" |
| | draw.text((x, y - 40), barcode_info, fill="white", stroke_width=2, stroke_fill="black") |
| | else: |
| | st.warning(f"Could not find barcode data for Barcode ID {barcode_id}") |
| | |
| | except Exception as e: |
| | st.warning(f"Could not draw barcode bounding box for Barcode ID {barcode_id}: {e}") |
| | else: |
| | |
| | visual_evidence_count += 1 |
| | st.info(f"Visual Evidence {visual_evidence_count}: {evidence.get('evidence_text', 'Visual element referenced by Claude')}") |
| | |
| | |
| | if text_evidence_count > 0 or visual_evidence_count > 0 or barcode_evidence_count > 0: |
| | |
| | evidence_summary = [] |
| | if text_evidence_count > 0: |
| | evidence_summary.append(f"{text_evidence_count} text") |
| | if barcode_evidence_count > 0: |
| | evidence_summary.append(f"{barcode_evidence_count} barcode") |
| | if visual_evidence_count > 0: |
| | evidence_summary.append(f"{visual_evidence_count} visual") |
| | |
| | st.markdown(f"**Evidence Count:** {', '.join(evidence_summary)}") |
| | |
| | st.image(ImageUtils.crop_image(draw_image), caption=f"Evidence for {req_id} - {status}", use_container_width=True) |
| | else: |
| | st.info(f"No visual evidence found for {req_id}") |
| | else: |
| | |
| | evidence_counts = { |
| | 'text': len([e for e in verification["evidence_found"] if "text_id" in e and e["text_id"] is not None]), |
| | 'barcode': len([e for e in verification["evidence_found"] if "barcode_id" in e and e["barcode_id"] is not None]), |
| | 'visual': len([e for e in verification["evidence_found"] if ("text_id" not in e or e["text_id"] is None) and ("barcode_id" not in e or e["barcode_id"] is None)]) |
| | } |
| | |
| | total_evidence = sum(evidence_counts.values()) |
| | if total_evidence > 0: |
| | evidence_summary = [] |
| | if evidence_counts['text'] > 0: |
| | evidence_summary.append(f"{evidence_counts['text']} text") |
| | if evidence_counts['barcode'] > 0: |
| | evidence_summary.append(f"{evidence_counts['barcode']} barcode") |
| | if evidence_counts['visual'] > 0: |
| | evidence_summary.append(f"{evidence_counts['visual']} visual") |
| | |
| | st.info(f"Evidence Count: {', '.join(evidence_summary)} (no bounding box data available)") |
| | |
| | st.image(ImageUtils.crop_image(page_image), caption=f"Original image for {req_id} - {status}", use_container_width=True) |
| | else: |
| | st.info("No packaging data available for visualization") |
| | |
| | except Exception as e: |
| | st.error(f"Failed to generate visualization for {req_id}: {e}") |
| | else: |
| | st.info(f"No evidence found for {req_id}") |
| |
|
| | except Exception as e: |
| | st.error(f"Error analyzing {packaging_file.name}: {str(e)}") |
| | |
| | finally: |
| | |
| | if os.path.exists(tmp_pdf_path): |
| | os.unlink(tmp_pdf_path) |
| | else: |
| | st.warning("Please upload a requirements document and at least one packaging PDF.") |
| | |
| | |
| | st.markdown("---") |
| | st.markdown(""" |
| | ### How It Works |
| | 1. **Upload Requirements**: The system extracts structured requirements from your document |
| | 2. **Upload Packaging**: We extract text from PDFs and analyze them against requirements |
| | 3. **Analysis**: Each requirement is verified using structured reasoning and semantic matching |
| | """) |
| |
|
| | if __name__ == "__main__": |
| | |
| | import pandas as pd |
| | main() |