Spaces:
Sleeping
Sleeping
| """Create simplified flow diagrams for TRACE metrics.""" | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle | |
| import matplotlib.patches as mpatches | |
| # Create first diagram | |
| fig, ax = plt.subplots(figsize=(12, 10)) | |
| ax.set_xlim(0, 10) | |
| ax.set_ylim(0, 12) | |
| ax.axis('off') | |
| # Color scheme | |
| COLOR_INPUT = '#E3F2FD' | |
| COLOR_PROCESS = '#BBDEFB' | |
| COLOR_DATA = '#81D4FA' | |
| COLOR_METRIC = '#FFE0B2' | |
| COLOR_OUTPUT = '#C8E6C9' | |
| def draw_box(ax, x, y, w, h, text, color, size=9): | |
| box = FancyBboxPatch((x-w/2, y-h/2), w, h, boxstyle="round,pad=0.05", | |
| edgecolor='#333', facecolor=color, linewidth=1.5) | |
| ax.add_patch(box) | |
| ax.text(x, y, text, ha='center', va='center', fontsize=size, weight='normal') | |
| def draw_arrow(ax, x1, y1, x2, y2): | |
| arrow = FancyArrowPatch((x1, y1), (x2, y2), arrowstyle='->', | |
| mutation_scale=20, color='#333', linewidth=2) | |
| ax.add_patch(arrow) | |
| # Title | |
| ax.text(5, 11.5, 'GPT Labeling Response β TRACE Metrics', | |
| ha='center', fontsize=13, weight='bold') | |
| # Step 1 | |
| draw_box(ax, 2, 10.5, 3, 0.6, 'Query + Response\n+ Documents', COLOR_INPUT, 9) | |
| draw_arrow(ax, 3.5, 10.2, 3.5, 9.7) | |
| # Step 2 | |
| draw_box(ax, 2, 9.3, 3, 0.6, 'Sentencize\n(Get keyed sentences)', COLOR_PROCESS, 9) | |
| draw_arrow(ax, 3.5, 9.0, 3.5, 8.5) | |
| # Step 3 | |
| draw_box(ax, 2, 8.1, 3, 0.6, 'Generate GPT\nLabeling Prompt', COLOR_PROCESS, 9) | |
| draw_arrow(ax, 3.5, 7.8, 3.5, 7.3) | |
| # Step 4 | |
| draw_box(ax, 2, 6.9, 3, 0.6, 'Call Groq LLM API', COLOR_PROCESS, 9) | |
| draw_arrow(ax, 3.5, 6.6, 3.5, 6.1) | |
| # Step 5 | |
| draw_box(ax, 2, 5.7, 3, 0.6, 'LLM Returns JSON\nwith sentence mapping', COLOR_DATA, 9) | |
| draw_arrow(ax, 3.5, 5.4, 3.5, 4.9) | |
| # Step 6 | |
| draw_box(ax, 2, 4.5, 3, 0.6, 'Extract Key Data:\n- relevant_keys\n- utilized_keys\n- support_info', COLOR_DATA, 8) | |
| draw_arrow(ax, 2, 4.2, 1.2, 3.5) | |
| draw_arrow(ax, 2.5, 4.2, 2.5, 3.5) | |
| draw_arrow(ax, 3, 4.2, 2.8, 3.5) | |
| draw_arrow(ax, 3.5, 4.2, 3.5, 3.5) | |
| draw_arrow(ax, 4, 4.2, 4.2, 3.5) | |
| # TRACE Metrics | |
| metrics = [ | |
| (0.8, 3, 'Relevance\n(R)', 'len(relevant)\n/ 20', COLOR_METRIC), | |
| (2.2, 3, 'Utilization\n(T)', 'len(used) /\nlen(relevant)', COLOR_METRIC), | |
| (3.6, 3, 'Completeness\n(C)', 'len(Rβ©T) /\nlen(R)', COLOR_METRIC), | |
| (5, 3, 'Adherence\n(A)', 'All fully\nsupported?', COLOR_METRIC), | |
| ] | |
| for x, y, title, formula, color in metrics: | |
| draw_box(ax, x, y, 1.2, 0.5, title, color, 7) | |
| draw_box(ax, x, y-0.6, 1.2, 0.4, formula, '#FFF9C4', 6) | |
| # Final output | |
| draw_arrow(ax, 0.8, 1.9, 2.5, 1.5) | |
| draw_arrow(ax, 2.2, 1.9, 2.5, 1.5) | |
| draw_arrow(ax, 3.6, 1.9, 2.5, 1.5) | |
| draw_arrow(ax, 5, 1.9, 2.5, 1.5) | |
| draw_box(ax, 2.5, 0.8, 3.5, 0.6, 'AdvancedTRACEScores\n(R, T, C, A + metadata)', COLOR_OUTPUT, 9) | |
| # Example | |
| ax.text(7, 10.5, 'Example:', fontsize=11, weight='bold') | |
| example = ''' | |
| Inputs: | |
| β’ relevant sentences: 3 | |
| β’ utilized sentences: 2 | |
| β’ all fully supported: Yes | |
| Results: | |
| R = 3/20 = 0.15 | |
| T = 2/3 = 0.67 | |
| C = 2/3 = 0.67 | |
| A = 1.0 (no hallucinations) | |
| Avg = 0.62 | |
| ''' | |
| ax.text(7.2, 7.5, example, fontsize=8, family='monospace', | |
| bbox=dict(boxstyle='round', facecolor='#F5F5F5', alpha=0.8), | |
| verticalalignment='top') | |
| plt.tight_layout() | |
| plt.savefig('TRACE_Metrics_Flow.png', dpi=300, bbox_inches='tight', facecolor='white') | |
| print("β Created: TRACE_Metrics_Flow.png") | |
| plt.close() | |
| # Create second diagram - Sentence mapping | |
| fig, ax = plt.subplots(figsize=(12, 8)) | |
| ax.set_xlim(0, 12) | |
| ax.set_ylim(0, 9) | |
| ax.axis('off') | |
| ax.text(6, 8.5, 'Sentence Support Mapping from GPT Response', | |
| ha='center', fontsize=13, weight='bold') | |
| # Documents | |
| ax.text(1.5, 7.8, 'Retrieved Documents', fontsize=10, weight='bold', color='#1976D2') | |
| docs = [ | |
| ('doc_0_s0', 'COVID-19 is respiratory disease', True), | |
| ('doc_0_s1', 'caused by virus', True), | |
| ('doc_1_s0', 'Spreads via droplets', True), | |
| ] | |
| for i, (key, text, rel) in enumerate(docs): | |
| y = 7.2 - i*0.6 | |
| color = '#C8E6C9' if rel else '#FFCDD2' | |
| draw_box(ax, 1.5, y, 2.5, 0.5, f'{key}\n{text}', color, 7) | |
| # Response | |
| ax.text(6, 7.8, 'Response + Support Info', fontsize=10, weight='bold', color='#1976D2') | |
| responses = [ | |
| ('resp_s0', 'COVID-19 is respiratory', 'doc_0_s0,s1', True), | |
| ('resp_s1', 'Spreads person-to-person', 'doc_1_s0', True), | |
| ] | |
| for i, (key, text, support, full) in enumerate(responses): | |
| y = 7.2 - i*0.6 | |
| color = '#C8E6C9' if full else '#FFCDD2' | |
| draw_box(ax, 6, y, 2.5, 0.5, f'{key}: {text}', color, 7) | |
| draw_box(ax, 9.5, y, 2, 0.5, f'Support: {support}\nFull: {"β" if full else "β"}', | |
| '#FFF9C4' if full else '#FFE0B2', 6) | |
| # Calculations | |
| calc_text = ''' | |
| Metric Calculations: | |
| ββββββββββββββββββββ | |
| Relevant count = 3 | |
| [doc_0_s0, doc_0_s1, doc_1_s0] | |
| Utilized count = 3 | |
| [doc_0_s0, doc_0_s1, doc_1_s0] | |
| Fully supported = 2/2 responses | |
| Relevance = 3/20 = 0.15 | |
| Utilization = 3/3 = 1.00 | |
| Completeness = 3/3 = 1.00 | |
| Adherence = 1.0 (no hallucinations) | |
| Average Score = 0.79 | |
| ''' | |
| ax.text(1, 4, calc_text, fontsize=8, family='monospace', | |
| bbox=dict(boxstyle='round', facecolor='#F5F5F5', edgecolor='#666'), | |
| verticalalignment='top') | |
| # Legend | |
| ax.text(7, 4, 'Legend:', fontsize=10, weight='bold', color='#1976D2') | |
| legend_items = [ | |
| ('#C8E6C9', 'Relevant/Supported'), | |
| ('#FFCDD2', 'Not relevant/unsupported'), | |
| ('#FFF9C4', 'Fully supported'), | |
| ('#FFE0B2', 'Partially supported'), | |
| ] | |
| for i, (color, label) in enumerate(legend_items): | |
| y = 3.2 - i*0.4 | |
| rect = Rectangle((6.5, y-0.12), 0.25, 0.25, facecolor=color, edgecolor='#333') | |
| ax.add_patch(rect) | |
| ax.text(7, y, label, fontsize=8, va='center') | |
| plt.tight_layout() | |
| plt.savefig('Sentence_Mapping_Example.png', dpi=300, bbox_inches='tight', facecolor='white') | |
| print("β Created: Sentence_Mapping_Example.png") | |
| plt.close() | |
| print("\n" + "="*50) | |
| print("Flow Diagrams Created Successfully!") | |
| print("="*50) | |
| print("\nGenerated files:") | |
| print(" 1. TRACE_Metrics_Flow.png - 8-step process flow") | |
| print(" 2. Sentence_Mapping_Example.png - Sentence mapping details") | |
| draw_box(ax, 4.5, y_pos - 0.8, 2, 0.7, 'LLM Response\n"COVID-19 is..."', COLOR_INPUT, 8) | |
| draw_box(ax, 7.5, y_pos - 0.8, 2.5, 0.7, 'Retrieved Documents\n[Doc1, Doc2, Doc3]', COLOR_INPUT, 8) | |
| # ============================================================================ | |
| # PHASE 2: Sentencization | |
| # ============================================================================ | |
| y_pos = 14.8 | |
| ax.text(1, y_pos, 'PHASE 2: Sentencization', fontsize=12, weight='bold', color='#1976D2') | |
| draw_arrow(ax, 1.5, 15.4, 1.5, 15.0) | |
| draw_arrow(ax, 4.5, 15.4, 4.5, 15.0) | |
| draw_arrow(ax, 7.5, 15.4, 7.5, 15.0) | |
| draw_box(ax, 1.5, y_pos - 0.8, 2.5, 1, | |
| 'Query Sentences\n(Usually 1 sentence)', COLOR_PROCESS, 8) | |
| draw_box(ax, 4.5, y_pos - 0.8, 2.5, 1, | |
| 'Response Sentences\nresp_s0, resp_s1\nresp_s2...', COLOR_PROCESS, 8) | |
| draw_box(ax, 7.5, y_pos - 0.8, 2.8, 1, | |
| 'Document Sentences\ndoc_0_s0, doc_0_s1\ndoc_1_s0, doc_1_s1...', COLOR_PROCESS, 8) | |
| # ============================================================================ | |
| # PHASE 3: Prompt Generation | |
| # ============================================================================ | |
| y_pos = 13 | |
| ax.text(1, y_pos, 'PHASE 3: GPT Labeling Prompt Generation', fontsize=12, weight='bold', color='#1976D2') | |
| draw_arrow(ax, 1.5, 14.0, 2.5, 13.5) | |
| draw_arrow(ax, 4.5, 14.0, 3.5, 13.5) | |
| draw_arrow(ax, 7.5, 14.0, 4.5, 13.5) | |
| draw_box(ax, 3.5, y_pos - 0.9, 5.5, 1.5, | |
| 'GPTLabelingPromptGenerator.generate_labeling_prompt()\n\nCreates:\n- ROLE section\n- TASK OVERVIEW\n- INPUT DATA (with keys)\n- OUTPUT REQUIREMENTS\n- JSON SCHEMA', | |
| COLOR_PROCESS, 8, True) | |
| draw_arrow(ax, 3.5, y_pos - 1.4, 3.5, 12) | |
| draw_box(ax, 3.5, y_pos - 2.3, 5.8, 0.9, | |
| 'Structured Prompt with Sentencized Data\n(Ready to send to LLM)', COLOR_DATA, 8, True) | |
| # ============================================================================ | |
| # PHASE 4: LLM Call | |
| # ============================================================================ | |
| y_pos = 11 | |
| ax.text(1, y_pos, 'PHASE 4: LLM API Call (Groq)', fontsize=12, weight='bold', color='#1976D2') | |
| draw_arrow(ax, 3.5, 11.7, 3.5, 11.4) | |
| draw_box(ax, 3.5, y_pos - 0.7, 5, 0.9, | |
| 'Groq LLM\n(llm_client.generate)', | |
| '#C5CAE9', 9, True) | |
| draw_arrow(ax, 3.5, y_pos - 1.1, 3.5, 9.5) | |
| # ============================================================================ | |
| # PHASE 5: JSON Response | |
| # ============================================================================ | |
| y_pos = 9 | |
| ax.text(1, y_pos, 'PHASE 5: JSON Response Parsing', fontsize=12, weight='bold', color='#1976D2') | |
| # Show the JSON response structure | |
| json_text = '''LLM Response (JSON): | |
| { | |
| "relevance_explanation": "...", | |
| "all_relevant_sentence_keys": ["doc_0_s0", "doc_0_s1"], | |
| "overall_supported": true, | |
| "sentence_support_information": [ | |
| {"response_sentence_key": "resp_s0", "fully_supported": true, | |
| "supporting_sentence_keys": ["doc_0_s0"]}, | |
| {"response_sentence_key": "resp_s1", "fully_supported": true, | |
| "supporting_sentence_keys": ["doc_0_s1"]} | |
| ], | |
| "all_utilized_sentence_keys": ["doc_0_s0", "doc_0_s1"] | |
| }''' | |
| draw_box(ax, 3.5, y_pos - 2.2, 6.2, 3.2, json_text, COLOR_DATA, 7, False) | |
| # ============================================================================ | |
| # PHASE 6: Extract Key Data | |
| # ============================================================================ | |
| y_pos = 4.5 | |
| ax.text(1, y_pos, 'PHASE 6: Extract Data from JSON', fontsize=12, weight='bold', color='#1976D2') | |
| draw_arrow(ax, 3.5, 5.8, 3.5, 5.2) | |
| # Extract different data points | |
| draw_box(ax, 1, y_pos - 0.8, 2.2, 0.9, | |
| 'Relevant Sentences\nall_relevant_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]', | |
| COLOR_METRIC, 7) | |
| draw_box(ax, 3.5, y_pos - 0.8, 2.2, 0.9, | |
| 'Utilized Sentences\nall_utilized_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]', | |
| COLOR_METRIC, 7) | |
| draw_box(ax, 6, y_pos - 0.8, 2.2, 0.9, | |
| 'Support Info\nsentence_\nsupport_\ninformation\n\n[{...}, {...}]', | |
| COLOR_METRIC, 7) | |
| draw_box(ax, 8.5, y_pos - 0.8, 2.2, 0.9, | |
| 'Overall Support\noverall_\nsupported\n\ntrue/false', | |
| COLOR_METRIC, 7) | |
| # ============================================================================ | |
| # PHASE 7: Calculate TRACE Metrics | |
| # ============================================================================ | |
| y_pos = 2.2 | |
| ax.text(1, y_pos, 'PHASE 7: Calculate TRACE Metrics', fontsize=12, weight='bold', color='#1976D2') | |
| # Draw arrows from extracted data to metrics | |
| draw_arrow(ax, 1, 3.7, 1.5, 2.9) | |
| draw_arrow(ax, 3.5, 3.7, 3.5, 2.9) | |
| draw_arrow(ax, 6, 3.7, 5.5, 2.9) | |
| draw_arrow(ax, 8.5, 3.7, 7, 2.9) | |
| # Four TRACE metrics | |
| metrics = [ | |
| ('Relevance (R)\nlen(relevant)/20', 1.5, '#FF6B6B'), | |
| ('Utilization (T)\nlen(used)/\nlen(relevant)', 4, '#4ECDC4'), | |
| ('Completeness (C)\nlen(Rβ©T)/\nlen(R)', 6.5, '#45B7D1'), | |
| ('Adherence (A)\nall fully_\nsupported?', 9, '#FFA07A'), | |
| ] | |
| for name, x, color in metrics: | |
| draw_box(ax, x, y_pos - 0.8, 1.8, 1.1, name, color, 8, True) | |
| # ============================================================================ | |
| # PHASE 8: Output | |
| # ============================================================================ | |
| y_pos = 0.2 | |
| ax.text(1, y_pos, 'PHASE 8: Final Output', fontsize=12, weight='bold', color='#1976D2') | |
| # Draw arrows from metrics to output | |
| draw_arrow(ax, 1.5, 1.4, 3, 0.9) | |
| draw_arrow(ax, 4, 1.4, 5, 0.9) | |
| draw_arrow(ax, 6.5, 1.4, 7, 0.9) | |
| draw_arrow(ax, 9, 1.4, 8.5, 0.9) | |
| draw_box(ax, 5.5, y_pos - 0.6, 4.5, 0.8, | |
| 'AdvancedTRACEScores Object\n(R, T, C, A values + metadata)', | |
| COLOR_OUTPUT, 9, True) | |
| # ============================================================================ | |
| # Side Panel: Example Values | |
| # ============================================================================ | |
| ax.text(11.5, 17.3, 'Example Calculation', fontsize=12, weight='bold', color='#1976D2') | |
| example_text = '''Given: | |
| β’ Relevant sentences: 2 | |
| all_relevant_sentence_keys: | |
| ["doc_0_s0", "doc_0_s1"] | |
| β’ Utilized sentences: 2 | |
| all_utilized_sentence_keys: | |
| ["doc_0_s0", "doc_0_s1"] | |
| β’ Supported sentences: 2/2 | |
| All with fully_supported=true | |
| TRACE Metrics: | |
| βββββββββββββββββββββββββββββ | |
| R = 2 / 20 = 0.10 | |
| β 10% of docs relevant | |
| T = 2 / 2 = 1.00 | |
| β 100% of relevant used | |
| C = 2 / 2 = 1.00 | |
| β 100% relevant info used | |
| A = 1.00 | |
| β No hallucinations | |
| Average = (0.10+1+1+1)/4 = 0.775 | |
| ''' | |
| draw_box(ax, 11.5, 13.5, 4.5, 6.5, example_text, '#F5F5F5', 7, False) | |
| # ============================================================================ | |
| # Key Formula Reference | |
| # ============================================================================ | |
| ax.text(11.5, 6.5, 'Key Formulas', fontsize=12, weight='bold', color='#1976D2') | |
| formulas_text = '''Relevance (R): | |
| R = |relevant_sentences| / 20 | |
| Utilization (T): | |
| T = |utilized_sentences| / |relevant_sentences| | |
| Completeness (C): | |
| C = |relevant β© utilized| / |relevant| | |
| Adherence (A): | |
| A = 1.0 if all fully_supported | |
| else 0.0 | |
| ''' | |
| draw_box(ax, 11.5, 4.2, 4.5, 3.8, formulas_text, '#F5F5F5', 8, False) | |
| plt.tight_layout() | |
| plt.savefig('TRACE_Metrics_Calculation_Flow.png', dpi=300, bbox_inches='tight', | |
| facecolor='white', edgecolor='none') | |
| print("β Flow diagram created: TRACE_Metrics_Calculation_Flow.png") | |
| print("π Shows 8-phase process from input to TRACE metrics") | |
| plt.close() | |
| # Create a second diagram showing the detailed sentence mapping | |
| fig2, ax2 = plt.subplots(1, 1, figsize=(14, 10)) | |
| ax2.set_xlim(0, 14) | |
| ax2.set_ylim(0, 11) | |
| ax2.axis('off') | |
| ax2.text(7, 10.5, 'Sentence Mapping & Support Detection', | |
| ha='center', fontsize=14, weight='bold', color='#212121') | |
| # Document sentences | |
| ax2.text(1, 9.8, 'Retrieved Documents (Sentencized)', fontsize=11, weight='bold', color='#1976D2') | |
| doc_sentences = [ | |
| ('doc_0_s0', 'COVID-19 is a respiratory disease', True), | |
| ('doc_0_s1', 'caused by SARS-CoV-2', True), | |
| ('doc_1_s0', 'The virus spreads via droplets', True), | |
| ('doc_2_s0', 'Vaccines prevent infection', False), | |
| ] | |
| for i, (key, text, relevant) in enumerate(doc_sentences): | |
| y = 9.2 - (i * 0.6) | |
| color = '#C8E6C9' if relevant else '#FFCDD2' | |
| draw_box(ax2, 1, y, 2.5, 0.5, f'{key}\n{text}', color, 7) | |
| # Arrow in middle | |
| for i in range(4): | |
| y = 9.2 - (i * 0.6) | |
| draw_arrow(ax2, 2.8, y, 4.2, y - 2.5, color='#1976D2') | |
| # Response sentences with support mapping | |
| ax2.text(7, 9.8, 'Response Sentences (with Support)', fontsize=11, weight='bold', color='#1976D2') | |
| response_sentences = [ | |
| ('resp_s0', 'COVID-19 is a respiratory disease', 'doc_0_s0, doc_0_s1', True), | |
| ('resp_s1', 'It spreads through droplets', 'doc_1_s0', True), | |
| ] | |
| for i, (key, text, support, fully_supported) in enumerate(response_sentences): | |
| y = 9.2 - (i * 1.2) | |
| color = '#C8E6C9' if fully_supported else '#FFCDD2' | |
| # Response sentence box | |
| draw_box(ax2, 7, y, 2.8, 0.5, f'{key}: {text}', color, 7) | |
| # Support information | |
| draw_box(ax2, 10, y, 2.5, 0.5, f'Supports: {support}\nFully: {"β" if fully_supported else "β"}', | |
| '#FFF9C4' if fully_supported else '#FFE0B2', 7) | |
| # Connect with arrow | |
| draw_arrow(ax2, 8.8, y, 8.8, y, color='#757575') | |
| # Summary stats | |
| ax2.text(1, 5.8, 'Metric Calculations', fontsize=11, weight='bold', color='#1976D2') | |
| stats_text = '''Relevant Sentences: | |
| doc_0_s0 β, doc_0_s1 β, doc_1_s0 β | |
| Count: 3 | |
| Relevance (R) = 3/20 = 0.15 | |
| Utilized Sentences: | |
| doc_0_s0, doc_0_s1, doc_1_s0 | |
| Count: 3 | |
| Utilization (T) = 3/3 = 1.00 | |
| Completeness (C) = 3/3 = 1.00 | |
| Adherence (A) = 1.0 | |
| (All 2 sentences fully supported) | |
| Average = (0.15 + 1.0 + 1.0 + 1.0) / 4 = 0.79 | |
| ''' | |
| draw_box(ax2, 3.5, 3.5, 5.2, 4.2, stats_text, '#E3F2FD', 8) | |
| # Legend | |
| ax2.text(9.5, 5.8, 'Legend', fontsize=11, weight='bold', color='#1976D2') | |
| legend_items = [ | |
| ('#C8E6C9', 'Relevant / Fully Supported'), | |
| ('#FFCDD2', 'Not Relevant / Not Supported'), | |
| ('#FFF9C4', 'Fully Supported'), | |
| ('#FFE0B2', 'Partially Supported'), | |
| ] | |
| for i, (color, label) in enumerate(legend_items): | |
| y = 5.2 - (i * 0.5) | |
| rect = mpatches.Rectangle((9.2, y - 0.15), 0.3, 0.3, facecolor=color, edgecolor='#424242') | |
| ax2.add_patch(rect) | |
| ax2.text(9.7, y, label, fontsize=8, va='center') | |
| plt.tight_layout() | |
| plt.savefig('Sentence_Support_Mapping.png', dpi=300, bbox_inches='tight', | |
| facecolor='white', edgecolor='none') | |
| print("β Mapping diagram created: Sentence_Support_Mapping.png") | |
| print("π Shows sentence-level support detection and metric calculation") | |
| print("\n" + "="*60) | |
| print("Flow Diagrams Created Successfully!") | |
| print("="*60) | |
| print("\nFiles generated:") | |
| print(" 1. TRACE_Metrics_Calculation_Flow.png") | |
| print(" 2. Sentence_Support_Mapping.png") | |