File size: 31,182 Bytes
863cb78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
import os
import anthropic
import requests
import streamlit as st
import numpy as np
import json
import re
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from src.extract_text.google_document_api import GoogleDocumentAPI

CLAUDE_API_URL = "https://api.anthropic.com/v1/messages"



class LLM:
    def __init__(self):
        self.claude_api_key = os.getenv('CLAUDE_API_KEY')
        if not self.claude_api_key:
            raise ValueError("Please set the CLAUDE_API_KEY environment variable.")
        
        # Configure retry strategy with more comprehensive error handling
        retry_strategy = Retry(
            total=5,  # Increased total retries
            backoff_factor=2,  # Increased backoff factor for exponential backoff
            status_forcelist=[429, 500, 502, 503, 504, 529],  # Added 529 for server overload
            allowed_methods=["POST"],  # Only retry POST requests
            respect_retry_after_header=True,  # Respect Retry-After headers
        )
        
        # Create session with retry strategy
        self.session = requests.Session()
        self.session.mount("https://", HTTPAdapter(max_retries=retry_strategy))

    def call_claude_api(self, prompt, system_prompt, model="claude-sonnet-4-20250514", max_tokens=2000) -> str:
        """
        Helper function to call Claude API with consistent parameters and enhanced error handling.
        """
        headers = {
            "x-api-key": self.claude_api_key,
            "anthropic-version": "2023-06-01",
            "Content-Type": "application/json"
        }
        
        payload = {
            "model": model,
            "max_tokens": max_tokens,
            "temperature": 0.1,
            "messages": [
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "system": system_prompt
        }
        
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = self.session.post(
                    CLAUDE_API_URL,
                    headers=headers,
                    json=payload,
                    verify=True,  # Explicitly enable SSL verification
                    timeout=60  # Increased timeout for better reliability
                )
                
                # Handle specific error codes
                if response.status_code == 529:
                    st.warning(f"Server overload (529) on attempt {attempt + 1}/{max_retries}. Retrying with exponential backoff...")
                    if attempt < max_retries - 1:
                        import time
                        time.sleep(2 ** attempt)  # Exponential backoff: 1s, 2s, 4s
                        continue
                    else:
                        st.error("Server overload after all retries. Please try again later.")
                        return ""
                
                response.raise_for_status()  # Raise exception for other bad status codes
                
                # Parse response
                response_data = response.json()
                if "content" in response_data and len(response_data["content"]) > 0:
                    return response_data["content"][0]["text"]
                else:
                    st.error("Unexpected response format from Claude API")
                    return ""
                    
            except requests.exceptions.SSLError as ssl_err:
                st.error(f"SSL Error when calling Claude API. Please check your SSL certificates and network connection. Error: {ssl_err}")
                return ""
            except requests.exceptions.Timeout as timeout_err:
                st.warning(f"Timeout on attempt {attempt + 1}/{max_retries}. Retrying...")
                if attempt == max_retries - 1:
                    st.error("Request timed out after all retries")
                    return ""
            except requests.exceptions.RequestException as e:
                st.error(f"Error calling Claude API: {str(e)}")
                return ""
            except json.JSONDecodeError as json_err:
                st.error(f"Invalid JSON response from Claude API: {json_err}")
                return ""
        
        return ""

    def call_claude_vision_api(self, prompt, system_prompt, image_base64, model="claude-sonnet-4-20250514", max_tokens=2000) -> str:
        """
        Helper function to call Claude Vision API with image support and enhanced error handling.
        """
        headers = {
            "x-api-key": self.claude_api_key,
            "anthropic-version": "2023-06-01",
            "Content-Type": "application/json"
        }
        
        content = [
            {
                "type": "text",
                "text": prompt
            },
            {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/png",
                    "data": image_base64
                }
            }
        ]
        
        payload = {
            "model": model,
            "max_tokens": max_tokens,
            "temperature": 0,
            "messages": [
                {
                    "role": "user",
                    "content": content
                }
            ],
            "system": system_prompt
        }
        
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = self.session.post(
                    CLAUDE_API_URL,
                    headers=headers,
                    json=payload,
                    verify=True,  # Explicitly enable SSL verification
                    timeout=90  # Increased timeout for vision API calls
                )
                
                # Handle specific error codes
                if response.status_code == 529:
                    st.warning(f"Server overload (529) on attempt {attempt + 1}/{max_retries}. Retrying with exponential backoff...")
                    if attempt < max_retries - 1:
                        import time
                        time.sleep(2 ** attempt)  # Exponential backoff: 1s, 2s, 4s
                        continue
                    else:
                        st.error("Server overload after all retries. Please try again later.")
                        return ""
                
                response.raise_for_status()  # Raise exception for other bad status codes
                
                # Parse response
                response_data = response.json()
                if "content" in response_data and len(response_data["content"]) > 0:
                    return response_data["content"][0]["text"]
                else:
                    st.error("Unexpected response format from Claude Vision API")
                    return ""
                    
            except requests.exceptions.SSLError as ssl_err:
                st.error(f"SSL Error when calling Claude Vision API. Please check your SSL certificates and network connection. Error: {ssl_err}")
                return ""
            except requests.exceptions.Timeout as timeout_err:
                st.warning(f"Timeout on attempt {attempt + 1}/{max_retries}. Retrying...")
                if attempt == max_retries - 1:
                    st.error("Request timed out after all retries")
                    return ""
            except requests.exceptions.RequestException as e:
                st.error(f"Error calling Claude Vision API: {str(e)}")
                return ""
            except json.JSONDecodeError as json_err:
                st.error(f"Invalid JSON response from Claude Vision API: {json_err}")
                return ""
        
        return ""

    def call_claude_pdf_api(self, prompt, system_prompt, pdf_base64, model="claude-sonnet-4-20250514", max_tokens=4000) -> str:
        """
        Helper function to call Claude API with PDF support for requirements documents.
        For now, we'll fall back to text-based processing since PDF API requires specific setup.
        """
        # For now, we'll use the regular API with text extraction
        # In the future, this can be enhanced to use the Converse API with citations
        st.info("πŸ“„ PDF requirements detected. Using text-based processing for now.")
        st.info("πŸ’‘ For full visual PDF analysis, consider using the Converse API with citations enabled.")
        
        # Extract text from PDF using a simple approach
        # In a production environment, you might want to use a more robust PDF text extraction library
        try:
            import base64
            import io
            
            # Try to import PyPDF2
            try:
                from PyPDF2 import PdfReader
                pdf_reader_available = True
            except ImportError:
                pdf_reader_available = False
                st.warning("PyPDF2 not available. Using basic text processing for PDF.")
            
            if pdf_reader_available:
                # Decode base64 PDF
                pdf_bytes = base64.b64decode(pdf_base64)
                pdf_stream = io.BytesIO(pdf_bytes)
                
                # Extract text from PDF
                reader = PdfReader(pdf_stream)
                text_content = ""
                for page in reader.pages:
                    text_content += page.extract_text() + "\n"
                
                if not text_content.strip():
                    text_content = "PDF Requirements Document (text extraction limited)"
                
                # Use regular API with extracted text
                return self.call_claude_api(prompt, system_prompt, model=model, max_tokens=max_tokens)
            else:
                # Fallback when PyPDF2 is not available
                return self.call_claude_api(prompt, system_prompt, model=model, max_tokens=max_tokens)
            
        except Exception as e:
            st.warning(f"PDF text extraction failed: {e}")
            st.warning("Falling back to basic text processing")
            
            # Fallback to basic text processing
            return self.call_claude_api(prompt, system_prompt, model=model, max_tokens=max_tokens)

class ComplianceAnalysis:
    def __init__(self):
        self.llm = LLM()

    def extract_structured_requirements(self, requirements_data) -> list[dict]:
        """
        Use Claude to extract structured requirements from the requirements document.
        
        Args:
            requirements_data: Either a string (for text files) or a dict (for PDF files) containing requirements.
            
        Returns:
            A list of dictionaries, each containing a requirement ID, description, and category.
        """
        # Handle both text and PDF requirements
        if isinstance(requirements_data, str):
            # Text-based requirements
            requirements_text = requirements_data
            requirements_type = "text"
        elif isinstance(requirements_data, dict):
            # PDF-based requirements
            requirements_text = requirements_data.get('text_content', '')
            requirements_type = requirements_data.get('type', 'text')
            pdf_base64 = requirements_data.get('content', '') if requirements_type == 'pdf' else None
        else:
            st.error("Invalid requirements data format. Please upload a valid requirements document.")
            return []
        
        # Check if requirements text is empty or None
        if not requirements_text or not requirements_text.strip():
            st.error("Requirements text is empty. Please upload a valid requirements document.")
            return []
            
        system_prompt = """You are an expert requirements analyst. Extract clear, structured requirements from documents. You must always return valid JSON, even if no specific requirements are found."""
        
        extraction_prompt = f"""
        Extract all requirements from this document (not just allergen requirements):
        
        {requirements_text}
        
        For each requirement found, provide:
        1. Unique ID (REQ001, REQ002, etc.)
        2. Description (verbatim from the document)
        3. Category (Font Size, Allergen List, Formatting, Placement, Barcode, Organic, Promotional, etc.)
        4. Source reference (section/paragraph or line number)
        
        If no requirements are found, return an empty array: []
        
        Return as JSON array with fields: id, description, category, source_reference.
        
        Example:
        ```json
        [
        {{
            "id": "REQ001", 
            "description": "IF the product is labeled as organic, THEN a certified organic seal must be visible", 
            "category": "Organic",
            "source_reference": "Line 1"
        }},
        {{
            "id": "REQ002", 
            "description": "IF there is a promotional offer mentioned, THEN include the offer expiry date", 
            "category": "Promotional",
            "source_reference": "Line 2"
        }}
        ]
        ```
        
        IMPORTANT: Always return valid JSON. If you cannot extract any requirements, return an empty array: []
        """
        
        # Use appropriate API based on requirements type
        if requirements_type == 'pdf' and pdf_base64:
            # Use PDF API for native PDF processing
            response = self.llm.call_claude_pdf_api(extraction_prompt, system_prompt, pdf_base64, model='claude-sonnet-4-20250514')
        else:
            # Use regular API for text processing
            response = self.llm.call_claude_api(extraction_prompt, system_prompt, model='claude-3-5-haiku-20241022')
        
        # Extract JSON from the response
        try:
            # Find JSON content between triple backticks if present
            if "```json" in response and "```" in response.split("```json")[1]:
                json_content = response.split("```json")[1].split("```")[0].strip()
            elif "```" in response:
                # Try to find any code block
                json_content = response.split("```")[1].split("```")[0].strip()
            else:
                # Assume the entire response is JSON
                json_content = response
            
            # Clean the JSON content to handle control characters
            # Remove or replace invalid control characters except newlines and tabs
            json_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', json_content)
            # Replace newlines within strings with escaped newlines
            json_content = re.sub(r'(?<!\\)"(?:[^"\\]|\\.)*?(?<!\\)"', lambda m: m.group(0).replace('\n', '\\n'), json_content)
                
            requirements = json.loads(json_content)
            return requirements
        except Exception as e:
            st.error(f"Error parsing extracted requirements: {e}")
            st.error(f"Raw response: {response}")
            # Return empty array as fallback
            return []


    def verify_individual_requirement(self, requirement, markdown_table, image=None, barcode_data=None, metadata=None, requirements_data=None):
        """
        Use structured reasoning to verify if a specific requirement is met in the packaging text.
        
        Args:
            requirement: A dictionary containing requirement details
            markdown_table: The markdown table extracted from the packaging PDF
            image: The image of the packaging document (optional)
            barcode_data: List of barcode objects with position data (optional)
            metadata: Dictionary containing font, font size, and color metadata (optional)
            requirements_data: Original requirements data (text or PDF) for context (optional)
        Returns:
            A dictionary with verification results including reasoning and compliance status
        """
        system_prompt = """You are a regulatory compliance expert. Provide detailed, objective compliance reports."""
        
        # Build the prompt for verification
        verification_prompt = f"""
        You are a regulatory compliance expert. Provide detailed, objective compliance reports.
        I need to verify if the following specific requirement is met in the packaging text:
        
        Requirement ID: {requirement['id']}
        Requirement Description: {requirement['description']}
        Requirement Category: {requirement['category']}
        
        Here is the packaging text to analyze:
        
        {markdown_table}
        """
        
        # Add barcode information if available
        if barcode_data:
            # Create minimal barcode summary for LLM (save tokens)
            barcode_summary = []
            for barcode in barcode_data:
                barcode_summary.append({
                    'id': barcode['id'],
                    'type': barcode['type'],
                    'data': barcode['data'],
                    'valid': barcode['valid']
                })
            
            verification_prompt += f"""
        
        Barcode Information Found:
        {json.dumps(barcode_summary, indent=2)}
        
        When analyzing barcode-related requirements, consider:
        - Barcode ID for evidence reference
        - Barcode type and validation status
        """
        
        # Add metadata information if available
        if metadata and not metadata.get('error'):
            # Create metadata summary for LLM (save tokens)
            metadata_summary = {
                'extraction_method': metadata.get('extraction_method', 'unknown'),
                'has_selectable_text': metadata.get('has_selectable_text', False),
                'pages_processed': metadata.get('pages_processed', 0),
                'dominant_font': metadata.get('fonts', {}),
                'dominant_font_size': metadata.get('font_sizes', {}),
                'dominant_text_color': metadata.get('text_colors', {})
            }
            
            verification_prompt += f"""
        
        Typography and Design Metadata:
        {json.dumps(metadata_summary, indent=2)}
        
        When analyzing typography and design requirements, consider:
        - Font types and their usage frequency
        - Font sizes and their distribution
        - Text colors and their application
        - Whether text is selectable or requires OCR
        """
        
        verification_prompt += f"""
        
        Verify this requirement using these steps:
        1. Break down into checkable criteria
        2. Search for evidence in packaging text (provide Text ID)
        3. For visual elements not in text, describe clearly (text_id = null)
        4. For barcode evidence, use Barcode ID (text_id = null)
        5. Provide specific examples/quotes
        6. Determine: COMPLIANT/NON-COMPLIANT/PARTIALLY COMPLIANT
        - Compliant: All applicable rules are fully met without any deviation.
        - Partially Compliant: Some rules are met, but minor issues/omissions that don't constitute a full failure but need attention.
        - Non-Compliant: One or more critical rules are violated or omitted, posing a regulatory, safety, or logistical risk.
        7. Explain reasoning
        
        For visual evidence, describe:
        - Location (e.g., "top right corner", "bottom section")
        - Visual characteristics (e.g., "large bold text", "red warning box")
        - Content description (e.g., "allergen warning in red box")
        
        If there is barcode evidence, include:
        - Barcode ID
        - Barcode type and validation status
        
        Return JSON with structure:
        ```json
        {{
        "requirement_id": "{requirement['id']}",
        "criteria": ["criterion 1", "criterion 2"],
        "evidence_found": [
            {{"text_id": <Text ID or null>, "evidence_text": "<description>", "barcode_id": "<Barcode ID ONLY if applicable>"}}
        ],
        "compliance_status": "COMPLIANT/NON-COMPLIANT/PARTIALLY COMPLIANT",
        "reasoning": "Detailed explanation",
        "confidence": 0.95
        }}
        ```
        """
        
        # Use vision API if image is provided, otherwise use regular API
        if image:
            response = self.llm.call_claude_vision_api(verification_prompt, system_prompt, image)
        else:
            response = self.llm.call_claude_api(verification_prompt, system_prompt)
        
        # Extract JSON from the response with enhanced error handling
        try:
            # Check if response is empty or None
            if not response or not response.strip():
                st.error("Empty response received from Claude API")
                return {
                    "requirement_id": requirement['id'],
                    "evidence_found": [],
                    "compliance_status": "ERROR",
                    "reasoning": "Empty response received from Claude API",
                    "confidence": 0
                }
            
            # Find JSON content between triple backticks if present
            if "```json" in response and "```" in response.split("```json")[1]:
                json_content = response.split("```json")[1].split("```")[0].strip()
            elif "```" in response:
                # Try to find any code block
                json_content = response.split("```")[1].split("```")[0].strip()
            else:
                # Assume the entire response is JSON
                json_content = response
            
            # Clean the JSON content to handle control characters
            # Remove or replace invalid control characters except newlines and tabs
            json_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', json_content)
            # Replace newlines within strings with escaped newlines
            json_content = re.sub(r'(?<!\\)"(?:[^"\\]|\\.)*?(?<!\\)"', lambda m: m.group(0).replace('\n', '\\n'), json_content)
            
            # Try to parse JSON with multiple fallback strategies
            verification_result = None
            
            # Strategy 1: Direct parsing
            try:
                verification_result = json.loads(json_content)
            except json.JSONDecodeError as e1:
                st.warning(f"Initial JSON parsing failed: {e1}")
                
                # Strategy 2: Try to extract JSON from malformed response
                try:
                    # Look for JSON-like structure
                    json_match = re.search(r'\{.*\}', json_content, re.DOTALL)
                    if json_match:
                        potential_json = json_match.group(0)
                        verification_result = json.loads(potential_json)
                        st.info("Successfully extracted JSON from malformed response")
                except json.JSONDecodeError as e2:
                    st.warning(f"JSON extraction failed: {e2}")
                    
                    # Strategy 3: Create a minimal valid JSON structure
                    try:
                        # Try to extract key information from the response
                        compliance_status = "UNKNOWN"
                        if "COMPLIANT" in response.upper():
                            compliance_status = "COMPLIANT"
                        elif "NON-COMPLIANT" in response.upper():
                            compliance_status = "NON-COMPLIANT"
                        elif "PARTIALLY" in response.upper():
                            compliance_status = "PARTIALLY COMPLIANT"
                        
                        verification_result = {
                            "requirement_id": requirement['id'],
                            "criteria": ["Unable to parse criteria"],
                            "evidence_found": [],
                            "compliance_status": compliance_status,
                            "reasoning": f"Response parsing failed. Raw response: {response[:200]}...",
                            "confidence": 0.1
                        }
                        st.warning("Created fallback JSON structure due to parsing errors")
                    except Exception as e3:
                        st.error(f"Fallback JSON creation failed: {e3}")
                        raise e3
            
            if verification_result:
                return verification_result
            else:
                raise Exception("All JSON parsing strategies failed")
                
        except Exception as e:
            st.error(f"Error parsing verification result: {e}")
            st.error(f"Raw response: {response}")
            # Return a failure result
            return {
                "requirement_id": requirement['id'],
                "evidence_found": [],
                "compliance_status": "ERROR",
                "reasoning": f"Failed to verify requirement due to parsing error: {str(e)}",
                "confidence": 0
            }



    def analyze_compliance(self, requirements_data, packaging_text, packaging_data, image=None, barcode_data=None, metadata=None, model="claude-sonnet-4-20250514"):
        """
        Analyze packaging compliance through multi-step process:
        1. Extract structured requirements
        2. Verify each requirement with structured reasoning
        
        Args:
            requirements_data: The requirements data (text string or PDF dict)
            packaging_text: Markdown table extracted from the packaging PDF
            packaging_data: Structured text with bounding boxes
            image: The image of the packaging document
            barcode_data: List of barcode objects with position data
            metadata: Dictionary containing font, font size, and color metadata
            model: The Claude model to use
            
        Returns:
            A dictionary containing compliance analysis results
        """
        # Step 1: Extract structured requirements
        st.info("Extracting structured requirements...")
        requirements = self.extract_structured_requirements(requirements_data)
        
        if not requirements:
            st.warning("No requirements found in the document. Please check that your requirements file contains valid requirement statements.")
            return {"error": "No requirements found", "requirements": [], "verifications": []}
        
        st.success(f"Extracted {len(requirements)} requirements")
        
        # Step 2: Verify each requirement with structured reasoning
        st.info("Verifying requirements...")
        verifications = []
        
        for i, req in enumerate(requirements):
            st.text(f"Verifying requirement {i+1}/{len(requirements)}: {req['id']}")
            
            # Get verification result
            verification = self.verify_individual_requirement(req, packaging_text, image, barcode_data, metadata, requirements_data)
            verifications.append(verification)
            
        # Step 4: Generate final compliance report
        system_prompt = """You are a regulatory compliance expert. Provide detailed, objective compliance reports."""
        
        # Create minimal summary for LLM (save tokens)
        compliance_summary = []
        for verification in verifications:
            compliance_summary.append({
                'requirement_id': verification.get('requirement_id', 'Unknown'),
                'compliance_status': verification.get('compliance_status', 'UNKNOWN'),
                'confidence': verification.get('confidence', 0),
                'evidence_count': len(verification.get('evidence_found', []))
            })
        
        summary_prompt = f"""
        Based on the verification of {len(requirements)} requirements,
        please provide a final compliance summary report.
        
        Requirements Summary:
        {json.dumps([{'id': req['id'], 'description': req['description'], 'category': req['category']} for req in requirements], indent=2)}
        
        Compliance Results Summary:
        {json.dumps(compliance_summary, indent=2)}
        
        Format your response in the following template:

        ## 🎯 **Analysis Requirements**

        Summarize the overall compliance status with focus on:

        1. **Quantitative Metrics**: Count of fully compliant, partially compliant, and non-compliant requirements
        2. **Critical Issues**: Most urgent compliance gaps requiring immediate attention  
        3. **Strategic Recommendations**: Actionable steps for the artwork designer to fix the compliance issues

        ---

        ## πŸ“‹ **Response Template**

        ### πŸ” **Executive Summary**
        Provide a single, clear statement of overall compliance status
        *Example: "Organization achieved 70% compliance (14/20 requirements); moderate risk profile with 3 critical gaps identified."*

        ---

        ### πŸ“ˆ **Compliance Statistics**

        | **Metric** | **Count** | **Percentage** |
        |------------|-----------|----------------|
        | **Total Requirements** | `[total]` | `100%` |
        | βœ… **Fully Compliant** | `[count]` | `[%]` |
        | ⚠️ **Partially Compliant** | `[count]` | `[%]` |
        | ❌ **Non-Compliant** | `[count]` | `[%]` |

        ---

        ### 🚨 **Priority Findings**

        List 3-5 highest-severity issues in order of criticality:

        1. **[REQ-ID]** - [Brief description of critical issue]
        2. **[REQ-ID]** - [Brief description of high-priority gap]
        3. **[REQ-ID]** - [Brief description of moderate-priority concern]

        ---

        ### πŸ’‘ **Targeted Recommendations**

        For each Priority Finding, provide specific corrective actions:

        | **Finding** | **Recommended Action** | **Priority** |
        |-------------|------------------------|--------------|
        | **[REQ-ID]** | [Specific artwork designer action] | πŸ”΄ **Critical** |
        | **[REQ-ID]** | [Specific artwork designer action] | 🟑 **High** |
        | **[REQ-ID]** | [Specific artwork designer action] | 🟒 **Medium** |

        ---

        ### πŸ“ **Detailed Assessment Results**

        *[Provide comprehensive breakdown of each requirement with status and supporting details]*

        ---

        ### πŸ“Š **Supporting Evidence**

        *[Include relevant data, metrics, or documentation that supports the compliance assessment]*


        """
        
        # Get the final compliance report
        compliance_report = self.llm.call_claude_api(summary_prompt, system_prompt, model='claude-3-5-haiku-20241022')
        
        # Compile all results
        result = {
            "requirements": requirements,
            "verifications": verifications,
            "compliance_report": compliance_report,
            "packaging_data": packaging_data,
            "barcode_data": barcode_data,
            "metadata": metadata
        }
        
        return result