CodeReviewBench

Sleeping

CodeReviewBench / src /display /formatting.py

Alex

zalupa1

982b341 8 months ago

6.15 kB

	"""
	Formatting utilities for display components
	"""

	import re
	from typing import List, Dict, Any, Optional
	from datetime import datetime, timezone

	def format_score(score: float, precision: int = 3) -> str:
	"""Format a score with specified precision"""
	if isinstance(score, (int, float)):
	return f"{score:.{precision}f}"
	return str(score)

	def format_percentage(score: float, precision: int = 1) -> str:
	"""Format a score as percentage"""
	if isinstance(score, (int, float)):
	return f"{score * 100:.{precision}f}%"
	return str(score)

	def format_model_name(name: str) -> str:
	"""Format model name for display"""
	# Remove common prefixes and make more readable
	name = name.strip()
	if "/" in name:
	org, model = name.split("/", 1)
	return f"<span style='color: var(--text-secondary); font-size: 0.9em;'>{org}/</span><strong>{model}</strong>"
	return f"<strong>{name}</strong>"

	def format_timestamp(timestamp: str) -> str:
	"""Format timestamp for display"""
	try:
	dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
	return dt.strftime("%Y-%m-%d %H:%M UTC")
	except:
	return timestamp

	def format_ip_address(ip: str) -> str:
	"""Format IP address for display (partial masking)"""
	if not ip:
	return "Unknown"

	# Mask part of IP for privacy
	parts = ip.split(".")
	if len(parts) == 4:
	return f"{parts[0]}.{parts[1]}.{parts[2]}.xxx"
	return "xxx.xxx.xxx.xxx"

	def format_metric_score(score: int, metric_name: str) -> str:
	"""Format metric score with color coding"""
	if not isinstance(score, (int, float)):
	return str(score)

	# Color coding based on score
	if score >= 8:
	color = "#28a745" # Green
	elif score >= 6:
	color = "#ffc107" # Yellow
	elif score >= 4:
	color = "#fd7e14" # Orange
	else:
	color = "#dc3545" # Red

	return f"<span style='color: {color}; font-weight: 600;'>{score}</span>"

	def format_language_badge(language: str) -> str:
	"""Format programming language as a badge"""
	if not language or language == "All":
	return language

	# Language-specific colors
	colors = {
	"Python": "#3776ab",
	"JavaScript": "#f7df1e",
	"Java": "#ed8b00",
	"C++": "#00599c",
	"C#": "#239120",
	"Go": "#00add8",
	"Rust": "#ce422b",
	"TypeScript": "#3178c6",
	"PHP": "#777bb4",
	"Ruby": "#cc342d",
	"Swift": "#fa7343",
	"Kotlin": "#7f52ff",
	"Scala": "#dc322f",
	"R": "#276dc3",
	"MATLAB": "#e16737"
	}

	color = colors.get(language, "#6c757d")
	return f"<span style='background: {color}; color: white; padding: 2px 8px; border-radius: 12px; font-size: 0.8em; font-weight: 500;'>{language}</span>"

	def format_taxonomy_badge(category: str) -> str:
	"""Format taxonomy category as a badge"""
	if not category or category == "All":
	return category

	# Category-specific colors
	colors = {
	"Bug Detection": "#dc3545",
	"Code Style": "#6f42c1",
	"Performance": "#fd7e14",
	"Security": "#e83e8c",
	"Maintainability": "#20c997",
	"Documentation": "#17a2b8",
	"Testing": "#28a745",
	"Architecture": "#6c757d",
	"Best Practices": "#007bff",
	"Refactoring": "#ffc107"
	}

	color = colors.get(category, "#6c757d")
	return f"<span style='background: {color}; color: white; padding: 2px 8px; border-radius: 12px; font-size: 0.8em; font-weight: 500;'>{category}</span>"

	def format_comment_language_flag(language: str) -> str:
	"""Format comment language with flag emoji"""
	if not language or language == "All":
	return language

	# Language-specific flags
	flags = {
	"English": "🇺🇸",
	"Chinese": "🇨🇳",
	"Spanish": "🇪🇸",
	"French": "🇫🇷",
	"German": "🇩🇪",
	"Japanese": "🇯🇵",
	"Korean": "🇰🇷",
	"Russian": "🇷🇺",
	"Portuguese": "🇵🇹",
	"Italian": "🇮🇹",
	"Dutch": "🇳🇱"
	}

	flag = flags.get(language, "🌐")
	return f"{flag} {language}"

	def sanitize_html(text: str) -> str:
	"""Sanitize HTML content to prevent XSS"""
	if not isinstance(text, str):
	return str(text)

	# Remove potentially dangerous HTML tags
	text = re.sub(r'<script[^>]>.?</script>', '', text, flags=re.DOTALL \| re.IGNORECASE)
	text = re.sub(r'<iframe[^>]>.?</iframe>', '', text, flags=re.DOTALL \| re.IGNORECASE)
	text = re.sub(r'on\w+="[^"]*"', '', text, flags=re.IGNORECASE)
	text = re.sub(r'on\w+=\'[^\']*\'', '', text, flags=re.IGNORECASE)

	return text

	def truncate_text(text: str, max_length: int = 50) -> str:
	"""Truncate text with ellipsis"""
	if not isinstance(text, str):
	text = str(text)

	if len(text) <= max_length:
	return text

	return text[:max_length-3] + "..."

	def format_table_cell(value: Any, column_name: str) -> str:
	"""Format table cell based on column type"""
	if value is None:
	return "N/A"

	# Handle different column types
	if column_name.lower() in ["bleu", "pass@1", "pass@5", "pass@10"]:
	return format_percentage(value)
	elif column_name.lower() == "model":
	return format_model_name(str(value))
	elif column_name.lower() == "programming language":
	return format_language_badge(str(value))
	elif column_name.lower() == "comment language":
	return format_comment_language_flag(str(value))
	elif column_name.lower() == "taxonomy":
	return format_taxonomy_badge(str(value))
	elif column_name.lower() in ["readability", "relevance", "explanation clarity",
	"problem identification", "actionability", "completeness",
	"specificity", "contextual adequacy", "consistency", "brevity"]:
	return format_metric_score(value, column_name.lower())
	else:
	return sanitize_html(str(value))