"""Create simplified flow diagrams for TRACE metrics."""
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle
import matplotlib.patches as mpatches

# Create first diagram
fig, ax = plt.subplots(figsize=(12, 10))
ax.set_xlim(0, 10)
ax.set_ylim(0, 12)
ax.axis('off')

# Color scheme
COLOR_INPUT = '#E3F2FD'
COLOR_PROCESS = '#BBDEFB'
COLOR_DATA = '#81D4FA'
COLOR_METRIC = '#FFE0B2'
COLOR_OUTPUT = '#C8E6C9'

def draw_box(ax, x, y, w, h, text, color, size=9):
    box = FancyBboxPatch((x-w/2, y-h/2), w, h, boxstyle="round,pad=0.05",
                         edgecolor='#333', facecolor=color, linewidth=1.5)
    ax.add_patch(box)
    ax.text(x, y, text, ha='center', va='center', fontsize=size, weight='normal')

def draw_arrow(ax, x1, y1, x2, y2):
    arrow = FancyArrowPatch((x1, y1), (x2, y2), arrowstyle='->', 
                           mutation_scale=20, color='#333', linewidth=2)
    ax.add_patch(arrow)

# Title
ax.text(5, 11.5, 'GPT Labeling Response → TRACE Metrics', 
        ha='center', fontsize=13, weight='bold')

# Step 1
draw_box(ax, 2, 10.5, 3, 0.6, 'Query + Response\n+ Documents', COLOR_INPUT, 9)
draw_arrow(ax, 3.5, 10.2, 3.5, 9.7)

# Step 2
draw_box(ax, 2, 9.3, 3, 0.6, 'Sentencize\n(Get keyed sentences)', COLOR_PROCESS, 9)
draw_arrow(ax, 3.5, 9.0, 3.5, 8.5)

# Step 3
draw_box(ax, 2, 8.1, 3, 0.6, 'Generate GPT\nLabeling Prompt', COLOR_PROCESS, 9)
draw_arrow(ax, 3.5, 7.8, 3.5, 7.3)

# Step 4
draw_box(ax, 2, 6.9, 3, 0.6, 'Call Groq LLM API', COLOR_PROCESS, 9)
draw_arrow(ax, 3.5, 6.6, 3.5, 6.1)

# Step 5
draw_box(ax, 2, 5.7, 3, 0.6, 'LLM Returns JSON\nwith sentence mapping', COLOR_DATA, 9)
draw_arrow(ax, 3.5, 5.4, 3.5, 4.9)

# Step 6
draw_box(ax, 2, 4.5, 3, 0.6, 'Extract Key Data:\n- relevant_keys\n- utilized_keys\n- support_info', COLOR_DATA, 8)
draw_arrow(ax, 2, 4.2, 1.2, 3.5)
draw_arrow(ax, 2.5, 4.2, 2.5, 3.5)
draw_arrow(ax, 3, 4.2, 2.8, 3.5)
draw_arrow(ax, 3.5, 4.2, 3.5, 3.5)
draw_arrow(ax, 4, 4.2, 4.2, 3.5)

# TRACE Metrics
metrics = [
    (0.8, 3, 'Relevance\n(R)', 'len(relevant)\n/ 20', COLOR_METRIC),
    (2.2, 3, 'Utilization\n(T)', 'len(used) /\nlen(relevant)', COLOR_METRIC),
    (3.6, 3, 'Completeness\n(C)', 'len(R∩T) /\nlen(R)', COLOR_METRIC),
    (5, 3, 'Adherence\n(A)', 'All fully\nsupported?', COLOR_METRIC),
]

for x, y, title, formula, color in metrics:
    draw_box(ax, x, y, 1.2, 0.5, title, color, 7)
    draw_box(ax, x, y-0.6, 1.2, 0.4, formula, '#FFF9C4', 6)

# Final output
draw_arrow(ax, 0.8, 1.9, 2.5, 1.5)
draw_arrow(ax, 2.2, 1.9, 2.5, 1.5)
draw_arrow(ax, 3.6, 1.9, 2.5, 1.5)
draw_arrow(ax, 5, 1.9, 2.5, 1.5)

draw_box(ax, 2.5, 0.8, 3.5, 0.6, 'AdvancedTRACEScores\n(R, T, C, A + metadata)', COLOR_OUTPUT, 9)

# Example
ax.text(7, 10.5, 'Example:', fontsize=11, weight='bold')
example = '''
Inputs:
• relevant sentences: 3
• utilized sentences: 2
• all fully supported: Yes

Results:
R = 3/20 = 0.15
T = 2/3 = 0.67
C = 2/3 = 0.67
A = 1.0 (no hallucinations)
Avg = 0.62
'''
ax.text(7.2, 7.5, example, fontsize=8, family='monospace',
        bbox=dict(boxstyle='round', facecolor='#F5F5F5', alpha=0.8),
        verticalalignment='top')

plt.tight_layout()
plt.savefig('TRACE_Metrics_Flow.png', dpi=300, bbox_inches='tight', facecolor='white')
print("✅ Created: TRACE_Metrics_Flow.png")
plt.close()

# Create second diagram - Sentence mapping
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_xlim(0, 12)
ax.set_ylim(0, 9)
ax.axis('off')

ax.text(6, 8.5, 'Sentence Support Mapping from GPT Response', 
        ha='center', fontsize=13, weight='bold')

# Documents
ax.text(1.5, 7.8, 'Retrieved Documents', fontsize=10, weight='bold', color='#1976D2')
docs = [
    ('doc_0_s0', 'COVID-19 is respiratory disease', True),
    ('doc_0_s1', 'caused by virus', True),
    ('doc_1_s0', 'Spreads via droplets', True),
]
for i, (key, text, rel) in enumerate(docs):
    y = 7.2 - i*0.6
    color = '#C8E6C9' if rel else '#FFCDD2'
    draw_box(ax, 1.5, y, 2.5, 0.5, f'{key}\n{text}', color, 7)

# Response
ax.text(6, 7.8, 'Response + Support Info', fontsize=10, weight='bold', color='#1976D2')
responses = [
    ('resp_s0', 'COVID-19 is respiratory', 'doc_0_s0,s1', True),
    ('resp_s1', 'Spreads person-to-person', 'doc_1_s0', True),
]
for i, (key, text, support, full) in enumerate(responses):
    y = 7.2 - i*0.6
    color = '#C8E6C9' if full else '#FFCDD2'
    draw_box(ax, 6, y, 2.5, 0.5, f'{key}: {text}', color, 7)
    draw_box(ax, 9.5, y, 2, 0.5, f'Support: {support}\nFull: {"✓" if full else "✗"}', 
             '#FFF9C4' if full else '#FFE0B2', 6)

# Calculations
calc_text = '''
Metric Calculations:
────────────────────
Relevant count = 3
  [doc_0_s0, doc_0_s1, doc_1_s0]

Utilized count = 3
  [doc_0_s0, doc_0_s1, doc_1_s0]

Fully supported = 2/2 responses

Relevance = 3/20 = 0.15
Utilization = 3/3 = 1.00
Completeness = 3/3 = 1.00
Adherence = 1.0 (no hallucinations)

Average Score = 0.79
'''

ax.text(1, 4, calc_text, fontsize=8, family='monospace',
        bbox=dict(boxstyle='round', facecolor='#F5F5F5', edgecolor='#666'),
        verticalalignment='top')

# Legend
ax.text(7, 4, 'Legend:', fontsize=10, weight='bold', color='#1976D2')
legend_items = [
    ('#C8E6C9', 'Relevant/Supported'),
    ('#FFCDD2', 'Not relevant/unsupported'),
    ('#FFF9C4', 'Fully supported'),
    ('#FFE0B2', 'Partially supported'),
]
for i, (color, label) in enumerate(legend_items):
    y = 3.2 - i*0.4
    rect = Rectangle((6.5, y-0.12), 0.25, 0.25, facecolor=color, edgecolor='#333')
    ax.add_patch(rect)
    ax.text(7, y, label, fontsize=8, va='center')

plt.tight_layout()
plt.savefig('Sentence_Mapping_Example.png', dpi=300, bbox_inches='tight', facecolor='white')
print("✅ Created: Sentence_Mapping_Example.png")
plt.close()

print("\n" + "="*50)
print("Flow Diagrams Created Successfully!")
print("="*50)
print("\nGenerated files:")
print("  1. TRACE_Metrics_Flow.png - 8-step process flow")
print("  2. Sentence_Mapping_Example.png - Sentence mapping details")
draw_box(ax, 4.5, y_pos - 0.8, 2, 0.7, 'LLM Response\n"COVID-19 is..."', COLOR_INPUT, 8)
draw_box(ax, 7.5, y_pos - 0.8, 2.5, 0.7, 'Retrieved Documents\n[Doc1, Doc2, Doc3]', COLOR_INPUT, 8)

# ============================================================================
# PHASE 2: Sentencization
# ============================================================================

y_pos = 14.8
ax.text(1, y_pos, 'PHASE 2: Sentencization', fontsize=12, weight='bold', color='#1976D2')

draw_arrow(ax, 1.5, 15.4, 1.5, 15.0)
draw_arrow(ax, 4.5, 15.4, 4.5, 15.0)
draw_arrow(ax, 7.5, 15.4, 7.5, 15.0)

draw_box(ax, 1.5, y_pos - 0.8, 2.5, 1, 
         'Query Sentences\n(Usually 1 sentence)', COLOR_PROCESS, 8)
draw_box(ax, 4.5, y_pos - 0.8, 2.5, 1, 
         'Response Sentences\nresp_s0, resp_s1\nresp_s2...', COLOR_PROCESS, 8)
draw_box(ax, 7.5, y_pos - 0.8, 2.8, 1, 
         'Document Sentences\ndoc_0_s0, doc_0_s1\ndoc_1_s0, doc_1_s1...', COLOR_PROCESS, 8)

# ============================================================================
# PHASE 3: Prompt Generation
# ============================================================================

y_pos = 13
ax.text(1, y_pos, 'PHASE 3: GPT Labeling Prompt Generation', fontsize=12, weight='bold', color='#1976D2')

draw_arrow(ax, 1.5, 14.0, 2.5, 13.5)
draw_arrow(ax, 4.5, 14.0, 3.5, 13.5)
draw_arrow(ax, 7.5, 14.0, 4.5, 13.5)

draw_box(ax, 3.5, y_pos - 0.9, 5.5, 1.5, 
         'GPTLabelingPromptGenerator.generate_labeling_prompt()\n\nCreates:\n- ROLE section\n- TASK OVERVIEW\n- INPUT DATA (with keys)\n- OUTPUT REQUIREMENTS\n- JSON SCHEMA',
         COLOR_PROCESS, 8, True)

draw_arrow(ax, 3.5, y_pos - 1.4, 3.5, 12)

draw_box(ax, 3.5, y_pos - 2.3, 5.8, 0.9, 
         'Structured Prompt with Sentencized Data\n(Ready to send to LLM)', COLOR_DATA, 8, True)

# ============================================================================
# PHASE 4: LLM Call
# ============================================================================

y_pos = 11
ax.text(1, y_pos, 'PHASE 4: LLM API Call (Groq)', fontsize=12, weight='bold', color='#1976D2')

draw_arrow(ax, 3.5, 11.7, 3.5, 11.4)

draw_box(ax, 3.5, y_pos - 0.7, 5, 0.9, 
         'Groq LLM\n(llm_client.generate)',
         '#C5CAE9', 9, True)

draw_arrow(ax, 3.5, y_pos - 1.1, 3.5, 9.5)

# ============================================================================
# PHASE 5: JSON Response
# ============================================================================

y_pos = 9
ax.text(1, y_pos, 'PHASE 5: JSON Response Parsing', fontsize=12, weight='bold', color='#1976D2')

# Show the JSON response structure
json_text = '''LLM Response (JSON):
{
  "relevance_explanation": "...",
  "all_relevant_sentence_keys": ["doc_0_s0", "doc_0_s1"],
  "overall_supported": true,
  "sentence_support_information": [
    {"response_sentence_key": "resp_s0", "fully_supported": true,
     "supporting_sentence_keys": ["doc_0_s0"]},
    {"response_sentence_key": "resp_s1", "fully_supported": true,
     "supporting_sentence_keys": ["doc_0_s1"]}
  ],
  "all_utilized_sentence_keys": ["doc_0_s0", "doc_0_s1"]
}'''

draw_box(ax, 3.5, y_pos - 2.2, 6.2, 3.2, json_text, COLOR_DATA, 7, False)

# ============================================================================
# PHASE 6: Extract Key Data
# ============================================================================

y_pos = 4.5
ax.text(1, y_pos, 'PHASE 6: Extract Data from JSON', fontsize=12, weight='bold', color='#1976D2')

draw_arrow(ax, 3.5, 5.8, 3.5, 5.2)

# Extract different data points
draw_box(ax, 1, y_pos - 0.8, 2.2, 0.9,
         'Relevant Sentences\nall_relevant_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]',
         COLOR_METRIC, 7)

draw_box(ax, 3.5, y_pos - 0.8, 2.2, 0.9,
         'Utilized Sentences\nall_utilized_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]',
         COLOR_METRIC, 7)

draw_box(ax, 6, y_pos - 0.8, 2.2, 0.9,
         'Support Info\nsentence_\nsupport_\ninformation\n\n[{...}, {...}]',
         COLOR_METRIC, 7)

draw_box(ax, 8.5, y_pos - 0.8, 2.2, 0.9,
         'Overall Support\noverall_\nsupported\n\ntrue/false',
         COLOR_METRIC, 7)

# ============================================================================
# PHASE 7: Calculate TRACE Metrics
# ============================================================================

y_pos = 2.2
ax.text(1, y_pos, 'PHASE 7: Calculate TRACE Metrics', fontsize=12, weight='bold', color='#1976D2')

# Draw arrows from extracted data to metrics
draw_arrow(ax, 1, 3.7, 1.5, 2.9)
draw_arrow(ax, 3.5, 3.7, 3.5, 2.9)
draw_arrow(ax, 6, 3.7, 5.5, 2.9)
draw_arrow(ax, 8.5, 3.7, 7, 2.9)

# Four TRACE metrics
metrics = [
    ('Relevance (R)\nlen(relevant)/20', 1.5, '#FF6B6B'),
    ('Utilization (T)\nlen(used)/\nlen(relevant)', 4, '#4ECDC4'),
    ('Completeness (C)\nlen(R∩T)/\nlen(R)', 6.5, '#45B7D1'),
    ('Adherence (A)\nall fully_\nsupported?', 9, '#FFA07A'),
]

for name, x, color in metrics:
    draw_box(ax, x, y_pos - 0.8, 1.8, 1.1, name, color, 8, True)

# ============================================================================
# PHASE 8: Output
# ============================================================================

y_pos = 0.2
ax.text(1, y_pos, 'PHASE 8: Final Output', fontsize=12, weight='bold', color='#1976D2')

# Draw arrows from metrics to output
draw_arrow(ax, 1.5, 1.4, 3, 0.9)
draw_arrow(ax, 4, 1.4, 5, 0.9)
draw_arrow(ax, 6.5, 1.4, 7, 0.9)
draw_arrow(ax, 9, 1.4, 8.5, 0.9)

draw_box(ax, 5.5, y_pos - 0.6, 4.5, 0.8,
         'AdvancedTRACEScores Object\n(R, T, C, A values + metadata)',
         COLOR_OUTPUT, 9, True)

# ============================================================================
# Side Panel: Example Values
# ============================================================================

ax.text(11.5, 17.3, 'Example Calculation', fontsize=12, weight='bold', color='#1976D2')

example_text = '''Given:
• Relevant sentences: 2
  all_relevant_sentence_keys:
  ["doc_0_s0", "doc_0_s1"]

• Utilized sentences: 2
  all_utilized_sentence_keys:
  ["doc_0_s0", "doc_0_s1"]

• Supported sentences: 2/2
  All with fully_supported=true

TRACE Metrics:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
R = 2 / 20 = 0.10
  → 10% of docs relevant

T = 2 / 2 = 1.00
  → 100% of relevant used

C = 2 / 2 = 1.00
  → 100% relevant info used

A = 1.00
  → No hallucinations

Average = (0.10+1+1+1)/4 = 0.775
'''

draw_box(ax, 11.5, 13.5, 4.5, 6.5, example_text, '#F5F5F5', 7, False)

# ============================================================================
# Key Formula Reference
# ============================================================================

ax.text(11.5, 6.5, 'Key Formulas', fontsize=12, weight='bold', color='#1976D2')

formulas_text = '''Relevance (R):
R = |relevant_sentences| / 20

Utilization (T):
T = |utilized_sentences| / |relevant_sentences|

Completeness (C):
C = |relevant ∩ utilized| / |relevant|

Adherence (A):
A = 1.0 if all fully_supported
    else 0.0
'''

draw_box(ax, 11.5, 4.2, 4.5, 3.8, formulas_text, '#F5F5F5', 8, False)

plt.tight_layout()
plt.savefig('TRACE_Metrics_Calculation_Flow.png', dpi=300, bbox_inches='tight',
            facecolor='white', edgecolor='none')
print("✅ Flow diagram created: TRACE_Metrics_Calculation_Flow.png")
print("📊 Shows 8-phase process from input to TRACE metrics")

plt.close()

# Create a second diagram showing the detailed sentence mapping
fig2, ax2 = plt.subplots(1, 1, figsize=(14, 10))
ax2.set_xlim(0, 14)
ax2.set_ylim(0, 11)
ax2.axis('off')

ax2.text(7, 10.5, 'Sentence Mapping & Support Detection', 
         ha='center', fontsize=14, weight='bold', color='#212121')

# Document sentences
ax2.text(1, 9.8, 'Retrieved Documents (Sentencized)', fontsize=11, weight='bold', color='#1976D2')
doc_sentences = [
    ('doc_0_s0', 'COVID-19 is a respiratory disease', True),
    ('doc_0_s1', 'caused by SARS-CoV-2', True),
    ('doc_1_s0', 'The virus spreads via droplets', True),
    ('doc_2_s0', 'Vaccines prevent infection', False),
]

for i, (key, text, relevant) in enumerate(doc_sentences):
    y = 9.2 - (i * 0.6)
    color = '#C8E6C9' if relevant else '#FFCDD2'
    draw_box(ax2, 1, y, 2.5, 0.5, f'{key}\n{text}', color, 7)

# Arrow in middle
for i in range(4):
    y = 9.2 - (i * 0.6)
    draw_arrow(ax2, 2.8, y, 4.2, y - 2.5, color='#1976D2')

# Response sentences with support mapping
ax2.text(7, 9.8, 'Response Sentences (with Support)', fontsize=11, weight='bold', color='#1976D2')
response_sentences = [
    ('resp_s0', 'COVID-19 is a respiratory disease', 'doc_0_s0, doc_0_s1', True),
    ('resp_s1', 'It spreads through droplets', 'doc_1_s0', True),
]

for i, (key, text, support, fully_supported) in enumerate(response_sentences):
    y = 9.2 - (i * 1.2)
    color = '#C8E6C9' if fully_supported else '#FFCDD2'
    
    # Response sentence box
    draw_box(ax2, 7, y, 2.8, 0.5, f'{key}: {text}', color, 7)
    
    # Support information
    draw_box(ax2, 10, y, 2.5, 0.5, f'Supports: {support}\nFully: {"✓" if fully_supported else "✗"}', 
             '#FFF9C4' if fully_supported else '#FFE0B2', 7)
    
    # Connect with arrow
    draw_arrow(ax2, 8.8, y, 8.8, y, color='#757575')

# Summary stats
ax2.text(1, 5.8, 'Metric Calculations', fontsize=11, weight='bold', color='#1976D2')

stats_text = '''Relevant Sentences:
doc_0_s0 ✓, doc_0_s1 ✓, doc_1_s0 ✓
Count: 3
Relevance (R) = 3/20 = 0.15

Utilized Sentences:
doc_0_s0, doc_0_s1, doc_1_s0
Count: 3
Utilization (T) = 3/3 = 1.00

Completeness (C) = 3/3 = 1.00

Adherence (A) = 1.0
(All 2 sentences fully supported)

Average = (0.15 + 1.0 + 1.0 + 1.0) / 4 = 0.79
'''

draw_box(ax2, 3.5, 3.5, 5.2, 4.2, stats_text, '#E3F2FD', 8)

# Legend
ax2.text(9.5, 5.8, 'Legend', fontsize=11, weight='bold', color='#1976D2')

legend_items = [
    ('#C8E6C9', 'Relevant / Fully Supported'),
    ('#FFCDD2', 'Not Relevant / Not Supported'),
    ('#FFF9C4', 'Fully Supported'),
    ('#FFE0B2', 'Partially Supported'),
]

for i, (color, label) in enumerate(legend_items):
    y = 5.2 - (i * 0.5)
    rect = mpatches.Rectangle((9.2, y - 0.15), 0.3, 0.3, facecolor=color, edgecolor='#424242')
    ax2.add_patch(rect)
    ax2.text(9.7, y, label, fontsize=8, va='center')

plt.tight_layout()
plt.savefig('Sentence_Support_Mapping.png', dpi=300, bbox_inches='tight',
            facecolor='white', edgecolor='none')
print("✅ Mapping diagram created: Sentence_Support_Mapping.png")
print("📊 Shows sentence-level support detection and metric calculation")

print("\n" + "="*60)
print("Flow Diagrams Created Successfully!")
print("="*60)
print("\nFiles generated:")
print("  1. TRACE_Metrics_Calculation_Flow.png")
print("  2. Sentence_Support_Mapping.png")