QC_Rules / main.py
Jakecole1's picture
Update main.py
67cedd5 verified
import streamlit as st
import tempfile
import os
import pandas as pd
from src.extract_text.ingest import RequirementsIngest
from src.extract_text.google_document_api import GoogleDocumentAPI
from src.extract_text.extract_meta_data import PDFArtworkMetadataExtractor
from src.core.analysis import ComplianceAnalysis
from pdf2image import convert_from_path
from PIL import Image, ImageDraw, ImageFont
from src.utils.image_utils import ImageUtils
import base64
from io import BytesIO
from src.utils.barcode import Barcode
import glob
import os
os.system("apt-get update && apt-get install -y poppler-utils")
def load_client_requirements_files():
"""Load all requirements and packaging files from client-requirements directory"""
base_path = "requirements_library/client-requirements"
requirements_files = []
packaging_files = []
if not os.path.exists(base_path):
return requirements_files, packaging_files
# Walk through all subdirectories
for root, dirs, files in os.walk(base_path):
for file in files:
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, base_path)
if file.lower().endswith('.txt') and 'requirement' in file.lower():
requirements_files.append({
'name': f"{relative_path}",
'path': file_path,
'type': 'requirements'
})
elif file.lower().endswith('.pdf') and 'requirement' in file.lower():
requirements_files.append({
'name': f"{relative_path}",
'path': file_path,
'type': 'requirements'
})
elif file.lower().endswith('.pdf'):
packaging_files.append({
'name': f"{relative_path}",
'path': file_path,
'type': 'packaging'
})
return requirements_files, packaging_files
def load_file_content(file_info):
"""Load content from a file based on its type"""
try:
if file_info['type'] == 'requirements':
# For requirements files, read as text
with open(file_info['path'], 'r', encoding='utf-8') as f:
return f.read()
else:
# For packaging files, return bytes
with open(file_info['path'], 'rb') as f:
return f.read()
except Exception as e:
st.error(f"Error loading file {file_info['name']}: {str(e)}")
return None
def load_requirements_content(file_info):
"""Load requirements content as string"""
try:
with open(file_info['path'], 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
st.error(f"Error loading requirements file {file_info['name']}: {str(e)}")
return None
def load_packaging_content(file_info):
"""Load packaging content as bytes"""
try:
with open(file_info['path'], 'rb') as f:
return f.read()
except Exception as e:
st.error(f"Error loading packaging file {file_info['name']}: {str(e)}")
return None
def main():
st.set_page_config(layout="wide", page_title="Packaging Compliance Checker")
# Load client requirements files
client_requirements_files, client_packaging_files = load_client_requirements_files()
# Initialize session state variables
if "requirements_text" not in st.session_state:
st.session_state.requirements_text = None
if "analysis_results" not in st.session_state:
st.session_state.analysis_results = None
if "current_requirements_file" not in st.session_state:
st.session_state.current_requirements_file = None
if "uploaded_packaging_files" not in st.session_state:
st.session_state.uploaded_packaging_files = []
if "selected_packaging_file" not in st.session_state:
st.session_state.selected_packaging_file = None
if "client_requirements_files" not in st.session_state:
st.session_state.client_requirements_files = client_requirements_files
if "client_packaging_files" not in st.session_state:
st.session_state.client_packaging_files = client_packaging_files
st.title("Packaging Compliance Checker")
st.write(
"Upload a requirements document (plain text) that specifies requirements, "
"and then upload one or more packaging PDFs to check for compliance."
)
# Create two columns for the layout
col1, col2 = st.columns([1, 1])
with col1:
# Stylish upload section with custom CSS
st.markdown("""
<style>
.upload-section {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
border-radius: 15px;
color: white;
margin-bottom: 20px;
}
.upload-title {
font-size: 24px;
font-weight: bold;
margin-bottom: 15px;
text-align: center;
}
.upload-description {
font-size: 14px;
opacity: 0.9;
margin-bottom: 20px;
text-align: center;
}
.file-uploader {
background: rgba(255, 255, 255, 0.1);
border: 2px dashed rgba(255, 255, 255, 0.3);
border-radius: 10px;
padding: 15px;
margin-bottom: 15px;
}
.requirements-display {
background: rgba(255, 255, 255, 0.05);
border-radius: 10px;
padding: 15px;
margin-top: 15px;
}
.artwork-display {
background: rgba(255, 255, 255, 0.05);
border-radius: 10px;
padding: 15px;
margin-top: 15px;
}
.image-container {
max-width: 100%;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
}
</style>
""", unsafe_allow_html=True)
# Upload section container
st.markdown('<div class="upload-section">', unsafe_allow_html=True)
st.markdown('<div class="upload-title">📄 Document Upload</div>', unsafe_allow_html=True)
st.markdown('<div class="upload-description">Upload your requirements and packaging documents for compliance analysis</div>', unsafe_allow_html=True)
# Requirements file selection
st.markdown('<div class="file-uploader">', unsafe_allow_html=True)
st.markdown("**📋 Requirements Document**")
# Create tabs for client files vs upload
req_tab1, req_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"])
with req_tab1:
if st.session_state.client_requirements_files:
req_options = ["Select a requirements file..."] + [f["name"] for f in st.session_state.client_requirements_files]
selected_req_file = st.selectbox("Choose from client files:", req_options)
if selected_req_file != "Select a requirements file...":
# Find the selected file
selected_file_info = None
for file_info in st.session_state.client_requirements_files:
if file_info["name"] == selected_req_file:
selected_file_info = file_info
break
if selected_file_info:
# Load and process the requirements file
if selected_file_info["name"].lower().endswith('.pdf'):
# Handle PDF file - load as bytes
requirements_content = load_packaging_content(selected_file_info)
if requirements_content:
# Create a temporary file-like object for the RequirementsIngest
import io
temp_file = io.BytesIO(requirements_content)
temp_file.name = selected_file_info["name"]
else:
# Handle text file - load as text
requirements_content = load_requirements_content(selected_file_info)
if requirements_content:
# Create a temporary file-like object for the RequirementsIngest
import io
temp_file = io.StringIO(requirements_content)
temp_file.name = selected_file_info["name"]
st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(temp_file)
st.session_state.current_requirements_file = temp_file
st.session_state.analysis_results = None # Clear previous results
# Display file type information
if isinstance(st.session_state.requirements_text, dict):
file_type = st.session_state.requirements_text.get('type', 'unknown')
if file_type == 'pdf':
st.success(f"✅ Loaded PDF requirements from: {selected_req_file}")
st.info("📄 PDF will be processed natively by Claude for full visual analysis")
else:
st.success(f"✅ Loaded requirements from: {selected_req_file}")
else:
st.success(f"✅ Loaded requirements from: {selected_req_file}")
else:
st.info("No client requirements files found")
with req_tab2:
requirements_file = st.file_uploader("Upload Requirements Document (TXT or PDF)", type=["txt", "pdf"])
# Only process requirements if a new file is uploaded
if requirements_file and requirements_file != st.session_state.current_requirements_file:
st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(requirements_file)
st.session_state.current_requirements_file = requirements_file
st.session_state.analysis_results = None # Clear previous results
# Display file type information
if isinstance(st.session_state.requirements_text, dict):
file_type = st.session_state.requirements_text.get('type', 'unknown')
file_size = st.session_state.requirements_text.get('file_size', 0)
if file_type == 'pdf':
st.success(f"✅ Uploaded PDF requirements: {requirements_file.name} ({file_size:,} bytes)")
st.info("📄 PDF will be processed natively by Claude for full visual analysis")
else:
st.success(f"✅ Uploaded requirements: {requirements_file.name}")
else:
st.success(f"✅ Uploaded requirements: {requirements_file.name}")
st.markdown('</div>', unsafe_allow_html=True)
# Packaging files selection
st.markdown('<div class="file-uploader">', unsafe_allow_html=True)
st.markdown("**📦 Packaging PDFs**")
# Create tabs for client files vs upload
pkg_tab1, pkg_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"])
with pkg_tab1:
if st.session_state.client_packaging_files:
pkg_options = ["Select packaging files..."] + [f["name"] for f in st.session_state.client_packaging_files]
selected_pkg_files = st.multiselect("Choose from client files:", pkg_options[1:]) # Skip the placeholder
if selected_pkg_files:
# Convert selected client files to file-like objects
client_file_objects = []
for selected_file_name in selected_pkg_files:
# Find the selected file
for file_info in st.session_state.client_packaging_files:
if file_info["name"] == selected_file_name:
# Create a file-like object
import io
file_content = load_packaging_content(file_info)
if file_content:
temp_file = io.BytesIO(file_content)
temp_file.name = file_info["name"]
client_file_objects.append(temp_file)
break
st.session_state.uploaded_packaging_files = client_file_objects
# Set the first file as selected if none is selected
if not st.session_state.selected_packaging_file and client_file_objects:
st.session_state.selected_packaging_file = client_file_objects[0]
st.success(f"✅ Loaded {len(client_file_objects)} packaging files from client directory")
else:
st.info("No client packaging files found")
with pkg_tab2:
packaging_files = st.file_uploader("Upload Packaging PDFs", type=["pdf"], accept_multiple_files=True)
# Update uploaded files list when new files are uploaded
if packaging_files:
st.session_state.uploaded_packaging_files = packaging_files
# Set the first file as selected if none is selected
if not st.session_state.selected_packaging_file and packaging_files:
st.session_state.selected_packaging_file = packaging_files[0]
st.success(f"✅ Uploaded {len(packaging_files)} packaging files")
else:
# Only clear if no files are selected from client directory either
if not st.session_state.uploaded_packaging_files:
st.session_state.selected_packaging_file = None
st.markdown('</div>', unsafe_allow_html=True)
# File selector for multiple packaging files
if st.session_state.uploaded_packaging_files:
st.markdown('<div class="file-uploader">', unsafe_allow_html=True)
file_names = [f.name for f in st.session_state.uploaded_packaging_files]
selected_file_name = st.selectbox(
"Select packaging file to display:",
file_names,
index=file_names.index(st.session_state.selected_packaging_file.name) if st.session_state.selected_packaging_file else 0
)
# Update selected file
for file in st.session_state.uploaded_packaging_files:
if file.name == selected_file_name:
st.session_state.selected_packaging_file = file
break
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# Requirements display section
if st.session_state.requirements_text:
st.markdown('<div class="requirements-display">', unsafe_allow_html=True)
with st.expander("📋 Requirements Document", expanded=True):
if isinstance(st.session_state.requirements_text, dict):
# PDF requirements
file_type = st.session_state.requirements_text.get('type', 'unknown')
filename = st.session_state.requirements_text.get('filename', 'Unknown')
file_size = st.session_state.requirements_text.get('file_size', 0)
st.markdown(f"**File Type:** {file_type.upper()}")
st.markdown(f"**Filename:** {filename}")
st.markdown(f"**File Size:** {file_size:,} bytes")
if file_type == 'pdf':
st.info("📄 This PDF will be processed natively by Claude for full visual analysis including charts, graphs, and visual layouts.")
st.markdown("**Preview Text:**")
st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200)
else:
st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200)
else:
# Text requirements (backward compatibility)
st.text_area("Requirements Text", st.session_state.requirements_text, height=200)
st.markdown('</div>', unsafe_allow_html=True)
# Artwork display section
if st.session_state.selected_packaging_file:
st.markdown('<div class="artwork-display">', unsafe_allow_html=True)
with st.expander("🎨 Package Artwork", expanded=True):
try:
# Reset file pointer to beginning
st.session_state.selected_packaging_file.seek(0)
# Create a temporary file to process the PDF
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(st.session_state.selected_packaging_file.read())
tmp_pdf_path = tmp_file.name
# Convert PDF to image
try:
images = convert_from_path(tmp_pdf_path)
if not images:
raise ValueError("No pages found in PDF")
page_image = images[0] # Assuming single page for now
except Exception as e:
st.error(f"Error converting PDF to image: {str(e)}")
# Create a placeholder image
page_image = Image.new('RGB', (800, 600), color='white')
draw = ImageDraw.Draw(page_image)
draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm')
# Display the image with proportional sizing
st.markdown('<div class="image-container">', unsafe_allow_html=True)
st.image(page_image, caption=f"Package: {st.session_state.selected_packaging_file.name}", use_container_width=True)
st.markdown('</div>', unsafe_allow_html=True)
# Clean up temporary file
if os.path.exists(tmp_pdf_path):
os.unlink(tmp_pdf_path)
except Exception as e:
st.error(f"Error displaying package artwork: {str(e)}")
st.markdown('</div>', unsafe_allow_html=True)
with col2:
# Compliance guidelines section
st.markdown("""
<style>
.compliance-section {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
padding: 20px;
border-radius: 15px;
color: white;
height: 100%;
}
.compliance-title {
font-size: 24px;
font-weight: bold;
margin-bottom: 15px;
text-align: center;
}
.compliance-content {
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
padding: 15px;
margin-top: 15px;
}
.status-compliant {
background: rgba(76, 175, 80, 0.2);
border-left: 4px solid #4CAF50;
padding: 10px;
margin: 10px 0;
border-radius: 5px;
}
.status-partial {
background: rgba(255, 193, 7, 0.2);
border-left: 4px solid #FFC107;
padding: 10px;
margin: 10px 0;
border-radius: 5px;
}
.status-non-compliant {
background: rgba(244, 67, 54, 0.2);
border-left: 4px solid #F44336;
padding: 10px;
margin: 10px 0;
border-radius: 5px;
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="compliance-section">', unsafe_allow_html=True)
st.markdown('<div class="compliance-title">📋 Compliance Guidelines</div>', unsafe_allow_html=True)
# Read and display the compliance outline
try:
with open("requirements_library/compliance_outline.txt", "r") as f:
outline_content = f.read()
st.markdown('<div class="compliance-content">', unsafe_allow_html=True)
# Parse and format the content for better display
lines = outline_content.strip().split('\n')
current_section = ""
for line in lines:
line = line.strip()
if not line:
continue
if line == "Compliance Outline":
st.markdown("**📋 Compliance Outline**")
elif line == "Compliant":
st.markdown('<div class="status-compliant">', unsafe_allow_html=True)
st.markdown("🟢 **Compliant**")
current_section = "compliant"
elif line == "Partially Compliant":
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('<div class="status-partial">', unsafe_allow_html=True)
st.markdown("🟡 **Partially Compliant**")
current_section = "partial"
elif line == "Non-Compliant":
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('<div class="status-non-compliant">', unsafe_allow_html=True)
st.markdown("🔴 **Non-Compliant**")
current_section = "non_compliant"
elif line.startswith("> "):
# Description line
description = line[2:] # Remove "> "
st.markdown(f"*{description}*")
elif line == "Example Criteria:":
st.markdown("**Example Criteria:**")
elif line.startswith("- "):
# Criteria item
criteria = line[2:] # Remove "- "
st.markdown(f"• {criteria}")
elif line and not line.startswith("Example Criteria:"):
# Any other content
st.markdown(line)
# Close the last status div
if current_section:
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
except FileNotFoundError:
st.error("Compliance outline file not found")
except Exception as e:
st.error(f"Error reading compliance outline: {e}")
st.markdown('</div>', unsafe_allow_html=True)
# Model selection
# model_option = st.selectbox(
# "Select Claude Model",
# ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022"]
# )
model_option = "claude-sonnet-4-20250514"
# Analysis button
if st.button("Analyze Compliance"):
if st.session_state.requirements_text and st.session_state.uploaded_packaging_files:
for packaging_file in st.session_state.uploaded_packaging_files:
st.markdown(f"## Analyzing: {packaging_file.name}")
# Create a progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Save the uploaded PDF temporarily.
# Reset file pointer to beginning
packaging_file.seek(0)
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(packaging_file.read())
tmp_pdf_path = tmp_file.name
try:
# Ingest the packaging document.
status_text.text("Extracting text from packaging PDF...")
google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json")
document = google_document_api.process_document(tmp_pdf_path)
packaging_text = google_document_api.extract_text_with_markdown_table(document)
packaging_data = google_document_api.extract_text_with_bounding_boxes(document)
progress_bar.progress(25)
# Process image once and store it efficiently
status_text.text("Processing packaging image...")
try:
images = convert_from_path(tmp_pdf_path)
if not images:
raise ValueError("No pages found in PDF")
page_image = images[0] # Assuming single page for now
except Exception as e:
st.error(f"Error converting PDF to image: {str(e)}")
# Create a placeholder image
page_image = Image.new('RGB', (800, 600), color='white')
draw = ImageDraw.Draw(page_image)
draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm')
# Convert to base64 once for analysis
buffer = BytesIO()
page_image.save(buffer, format='PNG')
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
# Scan for barcodes
status_text.text("Scanning for barcodes...")
barcode = Barcode()
barcode_results = barcode.scan_and_validate(page_image)
progress_bar.progress(40)
#Extract metadata from the PDF
status_text.text("Extracting metadata from packaging...")
metadata_extractor = PDFArtworkMetadataExtractor()
metadata_results = metadata_extractor.extract_metadata(tmp_pdf_path)
# Convert tuple keys to strings for JSON serialization
if metadata_results and not metadata_results.get('error'):
if 'text_colors' in metadata_results:
# Convert color tuples to string representation
text_colors_str = {}
for color_tuple, count in metadata_results['text_colors'].items():
if isinstance(color_tuple, tuple):
color_str = f"RGB{color_tuple}"
else:
color_str = str(color_tuple)
text_colors_str[color_str] = count
metadata_results['text_colors'] = text_colors_str
progress_bar.progress(50)
# Call the enhanced analyze_compliance method with the raw text documents and metadata
status_text.text("Analyzing requirements and compliance...")
st.session_state.analysis_results = ComplianceAnalysis().analyze_compliance(
st.session_state.requirements_text,
packaging_text,
packaging_data,
image_base64,
barcode_results,
metadata_results,
model=model_option
)
progress_bar.progress(100)
status_text.text("Analysis complete!")
# Display the structured results
st.markdown("### Extracted Requirements")
if "requirements" in st.session_state.analysis_results:
req_df = pd.DataFrame(st.session_state.analysis_results["requirements"])
st.dataframe(req_df)
st.markdown("### Verification Results")
if "verifications" in st.session_state.analysis_results:
# Create tabs for different views of the results
tabs = st.tabs(["Summary", "Detailed Results"])
with tabs[0]:
# Count compliance statuses
if "verifications" in st.session_state.analysis_results:
statuses = [v.get("compliance_status", "UNKNOWN") for v in st.session_state.analysis_results["verifications"]]
compliant = statuses.count("COMPLIANT")
non_compliant = statuses.count("NON-COMPLIANT")
partial = statuses.count("PARTIALLY COMPLIANT")
error = len(statuses) - compliant - non_compliant - partial
# Create columns for status counts
col1, col2, col3, col4 = st.columns(4)
col1.metric("Compliant", compliant)
col2.metric("Non-Compliant", non_compliant)
col3.metric("Partially Compliant", partial)
col4.metric("Errors", error)
# Display the overall compliance report
if "compliance_report" in st.session_state.analysis_results:
st.markdown(st.session_state.analysis_results["compliance_report"])
with tabs[1]:
st.markdown("### Barcode Scanning Results")
if "barcode_data" in st.session_state.analysis_results and st.session_state.analysis_results["barcode_data"]:
barcode_df = pd.DataFrame(st.session_state.analysis_results["barcode_data"])
st.dataframe(barcode_df)
# Display barcode summary
valid_barcodes = sum(1 for barcode in st.session_state.analysis_results["barcode_data"] if barcode["valid"])
total_barcodes = len(st.session_state.analysis_results["barcode_data"])
st.markdown(f"**Barcode Summary:** {valid_barcodes}/{total_barcodes} valid barcodes found")
else:
st.info("No barcodes found in the packaging")
# Display metadata results
st.markdown("### Typography and Design Metadata")
if "metadata" in st.session_state.analysis_results and st.session_state.analysis_results["metadata"]:
metadata = st.session_state.analysis_results["metadata"]
if metadata.get('error'):
st.error(f"Metadata extraction error: {metadata['error']}")
else:
# Display metadata summary
col1, col2 = st.columns(2)
with col1:
st.markdown("**Extraction Info:**")
st.write(f"**Method:** {metadata.get('extraction_method', 'Unknown')}")
st.write(f"**Selectable Text:** {'Yes' if metadata.get('has_selectable_text') else 'No'}")
st.write(f"**Pages Processed:** {metadata.get('pages_processed', 0)}")
with col2:
st.markdown("**Dominant Elements:**")
if metadata.get('fonts'):
dominant_font = max(metadata['fonts'].items(), key=lambda x: x[1])[0]
st.write(f"**Font:** {dominant_font}")
if metadata.get('font_sizes'):
dominant_size = max(metadata['font_sizes'].items(), key=lambda x: x[1])[0]
st.write(f"**Font Size:** {dominant_size:.1f}pt")
if metadata.get('text_colors'):
dominant_color = max(metadata['text_colors'].items(), key=lambda x: x[1])[0]
st.write(f"**Text Color:** {dominant_color}")
# Display detailed metadata in expandable sections
with st.expander("📊 Detailed Font Analysis"):
if metadata.get('fonts'):
font_df = pd.DataFrame([
{'Font': font, 'Character Count': count}
for font, count in list(metadata['fonts'].items())[:10] # Top 10
])
st.dataframe(font_df)
else:
st.info("No font data available")
with st.expander("📏 Font Size Distribution"):
if metadata.get('font_sizes'):
size_df = pd.DataFrame([
{'Font Size (pt)': f"{size:.1f}", 'Character Count': count}
for size, count in list(metadata['font_sizes'].items())[:10] # Top 10
])
st.dataframe(size_df)
else:
st.info("No font size data available")
with st.expander("🎨 Text Color Analysis"):
if metadata.get('text_colors'):
color_df = pd.DataFrame([
{'Color (RGB)': str(color), 'Character Count': count}
for color, count in list(metadata['text_colors'].items())[:10] # Top 10
])
st.dataframe(color_df)
else:
st.info("No color data available")
else:
st.info("No metadata available")
# Show detailed verification results
for i, verification in enumerate(st.session_state.analysis_results["verifications"]):
req_id = verification.get("requirement_id", f"REQ{i+1}")
text_id = verification.get("Text ID", "Unknown")
status = verification.get("compliance_status", "UNKNOWN")
# Color-code status
if status == "COMPLIANT":
status_color = "green"
elif status == "NON-COMPLIANT":
status_color = "red"
elif status == "PARTIALLY COMPLIANT":
status_color = "orange"
else:
status_color = "gray"
with st.expander(f"{req_id}: {status}", expanded=status != "COMPLIANT"):
# Show confidence score if available
if "confidence" in verification:
st.progress(verification["confidence"])
# Show reasoning
if "reasoning" in verification:
st.markdown(f"**Reasoning:** {verification['reasoning']}")
# Show criteria if available
if "criteria" in verification and verification["criteria"]:
st.markdown("**Criteria:**")
for criterion in verification["criteria"]:
st.markdown(f"- {criterion}")
# Show evidence if available
if "evidence_found" in verification and verification["evidence_found"]:
st.markdown("**Evidence Found:**")
# Separate text, visual, and barcode evidence
text_evidence = []
visual_evidence = []
barcode_evidence = []
for evidence in verification["evidence_found"]:
if "text_id" in evidence and evidence["text_id"] is not None:
text_evidence.append(evidence)
elif "barcode_id" in evidence and evidence["barcode_id"] is not None:
barcode_evidence.append(evidence)
else:
visual_evidence.append(evidence)
# Display text evidence
if text_evidence:
st.markdown("**Text Evidence:**")
for evidence in text_evidence:
text_id = evidence.get("text_id", "Unknown")
evidence_text = evidence.get("evidence_text", "No description")
st.markdown(f"- **Text ID {text_id}:** {evidence_text}")
# Display barcode evidence
if barcode_evidence:
st.markdown("**Barcode Evidence:**")
for evidence in barcode_evidence:
barcode_id = evidence.get("barcode_id", "Unknown")
evidence_text = evidence.get("evidence_text", "No description")
st.markdown(f"- **Barcode ID {barcode_id}:** {evidence_text}")
# Display visual evidence
if visual_evidence:
st.markdown("**Visual Evidence (from image analysis):**")
for i, evidence in enumerate(visual_evidence, 1):
evidence_text = evidence.get("evidence_text", "Visual element referenced by Claude")
st.markdown(f"- **Visual {i}:** {evidence_text}")
# Show summary
total_evidence = len(verification["evidence_found"])
st.markdown(f"*Total evidence: {total_evidence} ({len(text_evidence)} text, {len(barcode_evidence)} barcode, {len(visual_evidence)} visual)*")
# Individual visualization for this requirement
if "evidence_found" in verification and verification["evidence_found"]:
st.markdown(f"### Evidence Visualization for {req_id}")
# Create a copy of the image for drawing
try:
draw_image = page_image.copy()
draw = ImageDraw.Draw(draw_image)
img_width, img_height = draw_image.size
# Define colors for different compliance statuses
status_colors = {
"COMPLIANT": "green",
"NON-COMPLIANT": "red",
"PARTIALLY COMPLIANT": "orange",
"ERROR": "purple",
"UNKNOWN": "gray"
}
# Get color for this requirement's status
color = status_colors.get(status, "gray")
# Add a legend for this requirement
st.markdown(f"**Status:** <span style='color:{color}'>■</span> {status}", unsafe_allow_html=True)
# Track evidence types
text_evidence_count = 0
visual_evidence_count = 0
barcode_evidence_count = 0
# Draw evidence boxes for this specific requirement
if "packaging_data" in st.session_state.analysis_results:
for evidence in verification["evidence_found"]:
if "text_id" in evidence and evidence["text_id"] is not None:
# Handle text-based evidence with bounding boxes
text_id = evidence["text_id"]
try:
# Check if text_id is numeric for bounding box lookup
if isinstance(text_id, (int, float)) or (isinstance(text_id, str) and text_id.isdigit()):
# Text ID is 1-based, list is 0-based
numeric_id = int(text_id)
item = st.session_state.analysis_results["packaging_data"][numeric_id - 1]
box = item["bounding_box"]
# Denormalize vertices
points = [(v['x'] * img_width, v['y'] * img_height) for v in box]
# Draw polygon
draw.polygon(points, outline=color, width=3)
# Add a label with evidence number
text_evidence_count += 1
label = f"Text Evidence {text_evidence_count}"
draw.text(points[0], label, fill="white", stroke_width=2, stroke_fill="black")
else:
# Handle non-numeric text IDs (like barcode references)
text_evidence_count += 1
st.info(f"Text Evidence {text_evidence_count}: {evidence.get('evidence_text', 'Text element referenced by Claude')} (ID: {text_id})")
except (IndexError, KeyError) as e:
st.warning(f"Could not find bounding box for Text ID {text_id}: {e}")
elif "barcode_id" in evidence and evidence["barcode_id"] is not None:
# Handle barcode-based evidence with bounding boxes
barcode_id = evidence["barcode_id"]
try:
# Find the barcode in barcode_data
barcode_found = None
for barcode in st.session_state.analysis_results.get("barcode_data", []):
if barcode["id"] == barcode_id:
barcode_found = barcode
break
if barcode_found:
pos = barcode_found["position"]
x, y = pos["x"], pos["y"]
w, h = pos["width"], pos["height"]
# Draw rectangle for barcode
draw.rectangle([x, y, x + w, y + h], outline=color, width=3)
# Add a label with evidence number
barcode_evidence_count += 1
label = f"Barcode Evidence {barcode_evidence_count}"
draw.text((x, y - 20), label, fill="white", stroke_width=2, stroke_fill="black")
# Add barcode info
barcode_info = f"{barcode_found['type']}: {barcode_found['data']}"
draw.text((x, y - 40), barcode_info, fill="white", stroke_width=2, stroke_fill="black")
else:
st.warning(f"Could not find barcode data for Barcode ID {barcode_id}")
except Exception as e:
st.warning(f"Could not draw barcode bounding box for Barcode ID {barcode_id}: {e}")
else:
# Handle visual-only evidence (no text_id or barcode_id)
visual_evidence_count += 1
st.info(f"Visual Evidence {visual_evidence_count}: {evidence.get('evidence_text', 'Visual element referenced by Claude')}")
# Show the image if we have any evidence
if text_evidence_count > 0 or visual_evidence_count > 0 or barcode_evidence_count > 0:
# Add evidence count summary
evidence_summary = []
if text_evidence_count > 0:
evidence_summary.append(f"{text_evidence_count} text")
if barcode_evidence_count > 0:
evidence_summary.append(f"{barcode_evidence_count} barcode")
if visual_evidence_count > 0:
evidence_summary.append(f"{visual_evidence_count} visual")
st.markdown(f"**Evidence Count:** {', '.join(evidence_summary)}")
st.image(ImageUtils.crop_image(draw_image), caption=f"Evidence for {req_id} - {status}", use_container_width=True)
else:
st.info(f"No visual evidence found for {req_id}")
else:
# Handle case where no packaging data is available but we have evidence
evidence_counts = {
'text': len([e for e in verification["evidence_found"] if "text_id" in e and e["text_id"] is not None]),
'barcode': len([e for e in verification["evidence_found"] if "barcode_id" in e and e["barcode_id"] is not None]),
'visual': len([e for e in verification["evidence_found"] if ("text_id" not in e or e["text_id"] is None) and ("barcode_id" not in e or e["barcode_id"] is None)])
}
total_evidence = sum(evidence_counts.values())
if total_evidence > 0:
evidence_summary = []
if evidence_counts['text'] > 0:
evidence_summary.append(f"{evidence_counts['text']} text")
if evidence_counts['barcode'] > 0:
evidence_summary.append(f"{evidence_counts['barcode']} barcode")
if evidence_counts['visual'] > 0:
evidence_summary.append(f"{evidence_counts['visual']} visual")
st.info(f"Evidence Count: {', '.join(evidence_summary)} (no bounding box data available)")
# Show the original image without annotations
st.image(ImageUtils.crop_image(page_image), caption=f"Original image for {req_id} - {status}", use_container_width=True)
else:
st.info("No packaging data available for visualization")
except Exception as e:
st.error(f"Failed to generate visualization for {req_id}: {e}")
else:
st.info(f"No evidence found for {req_id}")
except Exception as e:
st.error(f"Error analyzing {packaging_file.name}: {str(e)}")
finally:
# Clean up the temporary file
if os.path.exists(tmp_pdf_path):
os.unlink(tmp_pdf_path)
else:
st.warning("Please upload a requirements document and at least one packaging PDF.")
# Add some helpful information at the bottom
st.markdown("---")
st.markdown("""
### How It Works
1. **Upload Requirements**: The system extracts structured requirements from your document
2. **Upload Packaging**: We extract text from PDFs and analyze them against requirements
3. **Analysis**: Each requirement is verified using structured reasoning and semantic matching
""")
if __name__ == "__main__":
# Import pandas here to avoid issues with st.set_page_config
import pandas as pd
main()