Spaces:

gopikrishnait
/

CapStoneRAG10

Sleeping

CapStoneRAG10 / archived_scripts /create_trace_flow_diagrams.py

Developer

Initial commit for HuggingFace Spaces - RAG Capstone Project with Qdrant Cloud

1d10b0a about 2 months ago

16.8 kB

	"""Create simplified flow diagrams for TRACE metrics."""
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle
	import matplotlib.patches as mpatches

	# Create first diagram
	fig, ax = plt.subplots(figsize=(12, 10))
	ax.set_xlim(0, 10)
	ax.set_ylim(0, 12)
	ax.axis('off')

	# Color scheme
	COLOR_INPUT = '#E3F2FD'
	COLOR_PROCESS = '#BBDEFB'
	COLOR_DATA = '#81D4FA'
	COLOR_METRIC = '#FFE0B2'
	COLOR_OUTPUT = '#C8E6C9'

	def draw_box(ax, x, y, w, h, text, color, size=9):
	box = FancyBboxPatch((x-w/2, y-h/2), w, h, boxstyle="round,pad=0.05",
	edgecolor='#333', facecolor=color, linewidth=1.5)
	ax.add_patch(box)
	ax.text(x, y, text, ha='center', va='center', fontsize=size, weight='normal')

	def draw_arrow(ax, x1, y1, x2, y2):
	arrow = FancyArrowPatch((x1, y1), (x2, y2), arrowstyle='->',
	mutation_scale=20, color='#333', linewidth=2)
	ax.add_patch(arrow)

	# Title
	ax.text(5, 11.5, 'GPT Labeling Response → TRACE Metrics',
	ha='center', fontsize=13, weight='bold')

	# Step 1
	draw_box(ax, 2, 10.5, 3, 0.6, 'Query + Response\n+ Documents', COLOR_INPUT, 9)
	draw_arrow(ax, 3.5, 10.2, 3.5, 9.7)

	# Step 2
	draw_box(ax, 2, 9.3, 3, 0.6, 'Sentencize\n(Get keyed sentences)', COLOR_PROCESS, 9)
	draw_arrow(ax, 3.5, 9.0, 3.5, 8.5)

	# Step 3
	draw_box(ax, 2, 8.1, 3, 0.6, 'Generate GPT\nLabeling Prompt', COLOR_PROCESS, 9)
	draw_arrow(ax, 3.5, 7.8, 3.5, 7.3)

	# Step 4
	draw_box(ax, 2, 6.9, 3, 0.6, 'Call Groq LLM API', COLOR_PROCESS, 9)
	draw_arrow(ax, 3.5, 6.6, 3.5, 6.1)

	# Step 5
	draw_box(ax, 2, 5.7, 3, 0.6, 'LLM Returns JSON\nwith sentence mapping', COLOR_DATA, 9)
	draw_arrow(ax, 3.5, 5.4, 3.5, 4.9)

	# Step 6
	draw_box(ax, 2, 4.5, 3, 0.6, 'Extract Key Data:\n- relevant_keys\n- utilized_keys\n- support_info', COLOR_DATA, 8)
	draw_arrow(ax, 2, 4.2, 1.2, 3.5)
	draw_arrow(ax, 2.5, 4.2, 2.5, 3.5)
	draw_arrow(ax, 3, 4.2, 2.8, 3.5)
	draw_arrow(ax, 3.5, 4.2, 3.5, 3.5)
	draw_arrow(ax, 4, 4.2, 4.2, 3.5)

	# TRACE Metrics
	metrics = [
	(0.8, 3, 'Relevance\n(R)', 'len(relevant)\n/ 20', COLOR_METRIC),
	(2.2, 3, 'Utilization\n(T)', 'len(used) /\nlen(relevant)', COLOR_METRIC),
	(3.6, 3, 'Completeness\n(C)', 'len(R∩T) /\nlen(R)', COLOR_METRIC),
	(5, 3, 'Adherence\n(A)', 'All fully\nsupported?', COLOR_METRIC),
	]

	for x, y, title, formula, color in metrics:
	draw_box(ax, x, y, 1.2, 0.5, title, color, 7)
	draw_box(ax, x, y-0.6, 1.2, 0.4, formula, '#FFF9C4', 6)

	# Final output
	draw_arrow(ax, 0.8, 1.9, 2.5, 1.5)
	draw_arrow(ax, 2.2, 1.9, 2.5, 1.5)
	draw_arrow(ax, 3.6, 1.9, 2.5, 1.5)
	draw_arrow(ax, 5, 1.9, 2.5, 1.5)

	draw_box(ax, 2.5, 0.8, 3.5, 0.6, 'AdvancedTRACEScores\n(R, T, C, A + metadata)', COLOR_OUTPUT, 9)

	# Example
	ax.text(7, 10.5, 'Example:', fontsize=11, weight='bold')
	example = '''
	Inputs:
	• relevant sentences: 3
	• utilized sentences: 2
	• all fully supported: Yes

	Results:
	R = 3/20 = 0.15
	T = 2/3 = 0.67
	C = 2/3 = 0.67
	A = 1.0 (no hallucinations)
	Avg = 0.62
	'''
	ax.text(7.2, 7.5, example, fontsize=8, family='monospace',
	bbox=dict(boxstyle='round', facecolor='#F5F5F5', alpha=0.8),
	verticalalignment='top')

	plt.tight_layout()
	plt.savefig('TRACE_Metrics_Flow.png', dpi=300, bbox_inches='tight', facecolor='white')
	print("✅ Created: TRACE_Metrics_Flow.png")
	plt.close()

	# Create second diagram - Sentence mapping
	fig, ax = plt.subplots(figsize=(12, 8))
	ax.set_xlim(0, 12)
	ax.set_ylim(0, 9)
	ax.axis('off')

	ax.text(6, 8.5, 'Sentence Support Mapping from GPT Response',
	ha='center', fontsize=13, weight='bold')

	# Documents
	ax.text(1.5, 7.8, 'Retrieved Documents', fontsize=10, weight='bold', color='#1976D2')
	docs = [
	('doc_0_s0', 'COVID-19 is respiratory disease', True),
	('doc_0_s1', 'caused by virus', True),
	('doc_1_s0', 'Spreads via droplets', True),
	]
	for i, (key, text, rel) in enumerate(docs):
	y = 7.2 - i*0.6
	color = '#C8E6C9' if rel else '#FFCDD2'
	draw_box(ax, 1.5, y, 2.5, 0.5, f'{key}\n{text}', color, 7)

	# Response
	ax.text(6, 7.8, 'Response + Support Info', fontsize=10, weight='bold', color='#1976D2')
	responses = [
	('resp_s0', 'COVID-19 is respiratory', 'doc_0_s0,s1', True),
	('resp_s1', 'Spreads person-to-person', 'doc_1_s0', True),
	]
	for i, (key, text, support, full) in enumerate(responses):
	y = 7.2 - i*0.6
	color = '#C8E6C9' if full else '#FFCDD2'
	draw_box(ax, 6, y, 2.5, 0.5, f'{key}: {text}', color, 7)
	draw_box(ax, 9.5, y, 2, 0.5, f'Support: {support}\nFull: {"✓" if full else "✗"}',
	'#FFF9C4' if full else '#FFE0B2', 6)

	# Calculations
	calc_text = '''
	Metric Calculations:
	────────────────────
	Relevant count = 3
	[doc_0_s0, doc_0_s1, doc_1_s0]

	Utilized count = 3
	[doc_0_s0, doc_0_s1, doc_1_s0]

	Fully supported = 2/2 responses

	Relevance = 3/20 = 0.15
	Utilization = 3/3 = 1.00
	Completeness = 3/3 = 1.00
	Adherence = 1.0 (no hallucinations)

	Average Score = 0.79
	'''

	ax.text(1, 4, calc_text, fontsize=8, family='monospace',
	bbox=dict(boxstyle='round', facecolor='#F5F5F5', edgecolor='#666'),
	verticalalignment='top')

	# Legend
	ax.text(7, 4, 'Legend:', fontsize=10, weight='bold', color='#1976D2')
	legend_items = [
	('#C8E6C9', 'Relevant/Supported'),
	('#FFCDD2', 'Not relevant/unsupported'),
	('#FFF9C4', 'Fully supported'),
	('#FFE0B2', 'Partially supported'),
	]
	for i, (color, label) in enumerate(legend_items):
	y = 3.2 - i*0.4
	rect = Rectangle((6.5, y-0.12), 0.25, 0.25, facecolor=color, edgecolor='#333')
	ax.add_patch(rect)
	ax.text(7, y, label, fontsize=8, va='center')

	plt.tight_layout()
	plt.savefig('Sentence_Mapping_Example.png', dpi=300, bbox_inches='tight', facecolor='white')
	print("✅ Created: Sentence_Mapping_Example.png")
	plt.close()

	print("\n" + "="*50)
	print("Flow Diagrams Created Successfully!")
	print("="*50)
	print("\nGenerated files:")
	print(" 1. TRACE_Metrics_Flow.png - 8-step process flow")
	print(" 2. Sentence_Mapping_Example.png - Sentence mapping details")
	draw_box(ax, 4.5, y_pos - 0.8, 2, 0.7, 'LLM Response\n"COVID-19 is..."', COLOR_INPUT, 8)
	draw_box(ax, 7.5, y_pos - 0.8, 2.5, 0.7, 'Retrieved Documents\n[Doc1, Doc2, Doc3]', COLOR_INPUT, 8)

	# ============================================================================
	# PHASE 2: Sentencization
	# ============================================================================

	y_pos = 14.8
	ax.text(1, y_pos, 'PHASE 2: Sentencization', fontsize=12, weight='bold', color='#1976D2')

	draw_arrow(ax, 1.5, 15.4, 1.5, 15.0)
	draw_arrow(ax, 4.5, 15.4, 4.5, 15.0)
	draw_arrow(ax, 7.5, 15.4, 7.5, 15.0)

	draw_box(ax, 1.5, y_pos - 0.8, 2.5, 1,
	'Query Sentences\n(Usually 1 sentence)', COLOR_PROCESS, 8)
	draw_box(ax, 4.5, y_pos - 0.8, 2.5, 1,
	'Response Sentences\nresp_s0, resp_s1\nresp_s2...', COLOR_PROCESS, 8)
	draw_box(ax, 7.5, y_pos - 0.8, 2.8, 1,
	'Document Sentences\ndoc_0_s0, doc_0_s1\ndoc_1_s0, doc_1_s1...', COLOR_PROCESS, 8)

	# ============================================================================
	# PHASE 3: Prompt Generation
	# ============================================================================

	y_pos = 13
	ax.text(1, y_pos, 'PHASE 3: GPT Labeling Prompt Generation', fontsize=12, weight='bold', color='#1976D2')

	draw_arrow(ax, 1.5, 14.0, 2.5, 13.5)
	draw_arrow(ax, 4.5, 14.0, 3.5, 13.5)
	draw_arrow(ax, 7.5, 14.0, 4.5, 13.5)

	draw_box(ax, 3.5, y_pos - 0.9, 5.5, 1.5,
	'GPTLabelingPromptGenerator.generate_labeling_prompt()\n\nCreates:\n- ROLE section\n- TASK OVERVIEW\n- INPUT DATA (with keys)\n- OUTPUT REQUIREMENTS\n- JSON SCHEMA',
	COLOR_PROCESS, 8, True)

	draw_arrow(ax, 3.5, y_pos - 1.4, 3.5, 12)

	draw_box(ax, 3.5, y_pos - 2.3, 5.8, 0.9,
	'Structured Prompt with Sentencized Data\n(Ready to send to LLM)', COLOR_DATA, 8, True)

	# ============================================================================
	# PHASE 4: LLM Call
	# ============================================================================

	y_pos = 11
	ax.text(1, y_pos, 'PHASE 4: LLM API Call (Groq)', fontsize=12, weight='bold', color='#1976D2')

	draw_arrow(ax, 3.5, 11.7, 3.5, 11.4)

	draw_box(ax, 3.5, y_pos - 0.7, 5, 0.9,
	'Groq LLM\n(llm_client.generate)',
	'#C5CAE9', 9, True)

	draw_arrow(ax, 3.5, y_pos - 1.1, 3.5, 9.5)

	# ============================================================================
	# PHASE 5: JSON Response
	# ============================================================================

	y_pos = 9
	ax.text(1, y_pos, 'PHASE 5: JSON Response Parsing', fontsize=12, weight='bold', color='#1976D2')

	# Show the JSON response structure
	json_text = '''LLM Response (JSON):
	{
	"relevance_explanation": "...",
	"all_relevant_sentence_keys": ["doc_0_s0", "doc_0_s1"],
	"overall_supported": true,
	"sentence_support_information": [
	{"response_sentence_key": "resp_s0", "fully_supported": true,
	"supporting_sentence_keys": ["doc_0_s0"]},
	{"response_sentence_key": "resp_s1", "fully_supported": true,
	"supporting_sentence_keys": ["doc_0_s1"]}
	],
	"all_utilized_sentence_keys": ["doc_0_s0", "doc_0_s1"]
	}'''

	draw_box(ax, 3.5, y_pos - 2.2, 6.2, 3.2, json_text, COLOR_DATA, 7, False)

	# ============================================================================
	# PHASE 6: Extract Key Data
	# ============================================================================

	y_pos = 4.5
	ax.text(1, y_pos, 'PHASE 6: Extract Data from JSON', fontsize=12, weight='bold', color='#1976D2')

	draw_arrow(ax, 3.5, 5.8, 3.5, 5.2)

	# Extract different data points
	draw_box(ax, 1, y_pos - 0.8, 2.2, 0.9,
	'Relevant Sentences\nall_relevant_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]',
	COLOR_METRIC, 7)

	draw_box(ax, 3.5, y_pos - 0.8, 2.2, 0.9,
	'Utilized Sentences\nall_utilized_\nsentence_keys\n\n["doc_0_s0",\n "doc_0_s1"]',
	COLOR_METRIC, 7)

	draw_box(ax, 6, y_pos - 0.8, 2.2, 0.9,
	'Support Info\nsentence_\nsupport_\ninformation\n\n[{...}, {...}]',
	COLOR_METRIC, 7)

	draw_box(ax, 8.5, y_pos - 0.8, 2.2, 0.9,
	'Overall Support\noverall_\nsupported\n\ntrue/false',
	COLOR_METRIC, 7)

	# ============================================================================
	# PHASE 7: Calculate TRACE Metrics
	# ============================================================================

	y_pos = 2.2
	ax.text(1, y_pos, 'PHASE 7: Calculate TRACE Metrics', fontsize=12, weight='bold', color='#1976D2')

	# Draw arrows from extracted data to metrics
	draw_arrow(ax, 1, 3.7, 1.5, 2.9)
	draw_arrow(ax, 3.5, 3.7, 3.5, 2.9)
	draw_arrow(ax, 6, 3.7, 5.5, 2.9)
	draw_arrow(ax, 8.5, 3.7, 7, 2.9)

	# Four TRACE metrics
	metrics = [
	('Relevance (R)\nlen(relevant)/20', 1.5, '#FF6B6B'),
	('Utilization (T)\nlen(used)/\nlen(relevant)', 4, '#4ECDC4'),
	('Completeness (C)\nlen(R∩T)/\nlen(R)', 6.5, '#45B7D1'),
	('Adherence (A)\nall fully_\nsupported?', 9, '#FFA07A'),
	]

	for name, x, color in metrics:
	draw_box(ax, x, y_pos - 0.8, 1.8, 1.1, name, color, 8, True)

	# ============================================================================
	# PHASE 8: Output
	# ============================================================================

	y_pos = 0.2
	ax.text(1, y_pos, 'PHASE 8: Final Output', fontsize=12, weight='bold', color='#1976D2')

	# Draw arrows from metrics to output
	draw_arrow(ax, 1.5, 1.4, 3, 0.9)
	draw_arrow(ax, 4, 1.4, 5, 0.9)
	draw_arrow(ax, 6.5, 1.4, 7, 0.9)
	draw_arrow(ax, 9, 1.4, 8.5, 0.9)

	draw_box(ax, 5.5, y_pos - 0.6, 4.5, 0.8,
	'AdvancedTRACEScores Object\n(R, T, C, A values + metadata)',
	COLOR_OUTPUT, 9, True)

	# ============================================================================
	# Side Panel: Example Values
	# ============================================================================

	ax.text(11.5, 17.3, 'Example Calculation', fontsize=12, weight='bold', color='#1976D2')

	example_text = '''Given:
	• Relevant sentences: 2
	all_relevant_sentence_keys:
	["doc_0_s0", "doc_0_s1"]

	• Utilized sentences: 2
	all_utilized_sentence_keys:
	["doc_0_s0", "doc_0_s1"]

	• Supported sentences: 2/2
	All with fully_supported=true

	TRACE Metrics:
	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
	R = 2 / 20 = 0.10
	→ 10% of docs relevant

	T = 2 / 2 = 1.00
	→ 100% of relevant used

	C = 2 / 2 = 1.00
	→ 100% relevant info used

	A = 1.00
	→ No hallucinations

	Average = (0.10+1+1+1)/4 = 0.775
	'''

	draw_box(ax, 11.5, 13.5, 4.5, 6.5, example_text, '#F5F5F5', 7, False)

	# ============================================================================
	# Key Formula Reference
	# ============================================================================

	ax.text(11.5, 6.5, 'Key Formulas', fontsize=12, weight='bold', color='#1976D2')

	formulas_text = '''Relevance (R):
	R = \|relevant_sentences\| / 20

	Utilization (T):
	T = \|utilized_sentences\| / \|relevant_sentences\|

	Completeness (C):
	C = \|relevant ∩ utilized\| / \|relevant\|

	Adherence (A):
	A = 1.0 if all fully_supported
	else 0.0
	'''

	draw_box(ax, 11.5, 4.2, 4.5, 3.8, formulas_text, '#F5F5F5', 8, False)

	plt.tight_layout()
	plt.savefig('TRACE_Metrics_Calculation_Flow.png', dpi=300, bbox_inches='tight',
	facecolor='white', edgecolor='none')
	print("✅ Flow diagram created: TRACE_Metrics_Calculation_Flow.png")
	print("📊 Shows 8-phase process from input to TRACE metrics")

	plt.close()

	# Create a second diagram showing the detailed sentence mapping
	fig2, ax2 = plt.subplots(1, 1, figsize=(14, 10))
	ax2.set_xlim(0, 14)
	ax2.set_ylim(0, 11)
	ax2.axis('off')

	ax2.text(7, 10.5, 'Sentence Mapping & Support Detection',
	ha='center', fontsize=14, weight='bold', color='#212121')

	# Document sentences
	ax2.text(1, 9.8, 'Retrieved Documents (Sentencized)', fontsize=11, weight='bold', color='#1976D2')
	doc_sentences = [
	('doc_0_s0', 'COVID-19 is a respiratory disease', True),
	('doc_0_s1', 'caused by SARS-CoV-2', True),
	('doc_1_s0', 'The virus spreads via droplets', True),
	('doc_2_s0', 'Vaccines prevent infection', False),
	]

	for i, (key, text, relevant) in enumerate(doc_sentences):
	y = 9.2 - (i * 0.6)
	color = '#C8E6C9' if relevant else '#FFCDD2'
	draw_box(ax2, 1, y, 2.5, 0.5, f'{key}\n{text}', color, 7)

	# Arrow in middle
	for i in range(4):
	y = 9.2 - (i * 0.6)
	draw_arrow(ax2, 2.8, y, 4.2, y - 2.5, color='#1976D2')

	# Response sentences with support mapping
	ax2.text(7, 9.8, 'Response Sentences (with Support)', fontsize=11, weight='bold', color='#1976D2')
	response_sentences = [
	('resp_s0', 'COVID-19 is a respiratory disease', 'doc_0_s0, doc_0_s1', True),
	('resp_s1', 'It spreads through droplets', 'doc_1_s0', True),
	]

	for i, (key, text, support, fully_supported) in enumerate(response_sentences):
	y = 9.2 - (i * 1.2)
	color = '#C8E6C9' if fully_supported else '#FFCDD2'

	# Response sentence box
	draw_box(ax2, 7, y, 2.8, 0.5, f'{key}: {text}', color, 7)

	# Support information
	draw_box(ax2, 10, y, 2.5, 0.5, f'Supports: {support}\nFully: {"✓" if fully_supported else "✗"}',
	'#FFF9C4' if fully_supported else '#FFE0B2', 7)

	# Connect with arrow
	draw_arrow(ax2, 8.8, y, 8.8, y, color='#757575')

	# Summary stats
	ax2.text(1, 5.8, 'Metric Calculations', fontsize=11, weight='bold', color='#1976D2')

	stats_text = '''Relevant Sentences:
	doc_0_s0 ✓, doc_0_s1 ✓, doc_1_s0 ✓
	Count: 3
	Relevance (R) = 3/20 = 0.15

	Utilized Sentences:
	doc_0_s0, doc_0_s1, doc_1_s0
	Count: 3
	Utilization (T) = 3/3 = 1.00

	Completeness (C) = 3/3 = 1.00

	Adherence (A) = 1.0
	(All 2 sentences fully supported)

	Average = (0.15 + 1.0 + 1.0 + 1.0) / 4 = 0.79
	'''

	draw_box(ax2, 3.5, 3.5, 5.2, 4.2, stats_text, '#E3F2FD', 8)

	# Legend
	ax2.text(9.5, 5.8, 'Legend', fontsize=11, weight='bold', color='#1976D2')

	legend_items = [
	('#C8E6C9', 'Relevant / Fully Supported'),
	('#FFCDD2', 'Not Relevant / Not Supported'),
	('#FFF9C4', 'Fully Supported'),
	('#FFE0B2', 'Partially Supported'),
	]

	for i, (color, label) in enumerate(legend_items):
	y = 5.2 - (i * 0.5)
	rect = mpatches.Rectangle((9.2, y - 0.15), 0.3, 0.3, facecolor=color, edgecolor='#424242')
	ax2.add_patch(rect)
	ax2.text(9.7, y, label, fontsize=8, va='center')

	plt.tight_layout()
	plt.savefig('Sentence_Support_Mapping.png', dpi=300, bbox_inches='tight',
	facecolor='white', edgecolor='none')
	print("✅ Mapping diagram created: Sentence_Support_Mapping.png")
	print("📊 Shows sentence-level support detection and metric calculation")

	print("\n" + "="*60)
	print("Flow Diagrams Created Successfully!")
	print("="*60)
	print("\nFiles generated:")
	print(" 1. TRACE_Metrics_Calculation_Flow.png")
	print(" 2. Sentence_Support_Mapping.png")