Spaces:
Running
Running
File size: 7,198 Bytes
b67578f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
"""Formatting utilities for MCP tool outputs."""
import json
from typing import Any, Dict, List
def format_dataset_list(datasets: List[Dict[str, Any]]) -> str:
"""Format a list of datasets for display."""
if not datasets:
return "No datasets found."
lines = ["## Datasets Found\n"]
for i, ds in enumerate(datasets, 1):
lines.append(f"### {i}. {ds['id']}")
lines.append(f"- Downloads: {ds.get('downloads', 'N/A'):,}")
lines.append(f"- Likes: {ds.get('likes', 'N/A')}")
if ds.get('tags'):
lines.append(f"- Tags: {', '.join(ds['tags'][:5])}")
lines.append("")
return "\n".join(lines)
def format_dataset_info(info: Dict[str, Any]) -> str:
"""Format dataset info for display."""
lines = [f"## Dataset: {info['id']}\n"]
lines.append(f"- **Author**: {info.get('author', 'N/A')}")
lines.append(f"- **Downloads**: {info.get('downloads', 0):,}")
lines.append(f"- **Likes**: {info.get('likes', 0)}")
lines.append(f"- **License**: {info.get('license', 'N/A')}")
if info.get('tags'):
lines.append(f"- **Tags**: {', '.join(info['tags'][:10])}")
if info.get('card_summary'):
lines.append("\n### Dataset Card (Summary)")
lines.append(info['card_summary'][:1500] + "..." if len(info.get('card_summary', '')) > 1500 else info['card_summary'])
return "\n".join(lines)
def format_schema(schema: Dict[str, Any]) -> str:
"""Format schema information for display."""
if "error" in schema:
return f"Error: {schema['error']}"
lines = ["## Dataset Schema\n"]
lines.append(f"**Number of columns**: {schema.get('num_columns', 'N/A')}\n")
lines.append("### Columns\n")
lines.append("| Column | Type |")
lines.append("|--------|------|")
for col, dtype in schema.get('features', {}).items():
lines.append(f"| `{col}` | {dtype} |")
return "\n".join(lines)
def format_sample(samples: List[Dict[str, Any]], dataset_id: str) -> str:
"""Format sample rows for display."""
if not samples:
return "No samples available."
if "error" in samples[0]:
return f"Error loading samples: {samples[0]['error']}"
lines = [f"## Sample from `{dataset_id}`\n"]
lines.append(f"Showing {len(samples)} row(s):\n")
for i, row in enumerate(samples, 1):
lines.append(f"### Row {i}")
lines.append("```json")
lines.append(json.dumps(row, indent=2, default=str, ensure_ascii=False)[:1000])
lines.append("```\n")
return "\n".join(lines)
def format_statistics(stats: Dict[str, Any]) -> str:
"""Format statistics for display."""
if "error" in stats:
return f"Error: {stats['error']}"
lines = ["## Dataset Statistics\n"]
lines.append(f"**Total rows**: {stats.get('total_rows', 'N/A'):,}\n")
if stats.get('column_stats'):
lines.append("### Column Statistics\n")
for col, col_stats in stats['column_stats'].items():
lines.append(f"#### `{col}`")
for key, value in col_stats.items():
if isinstance(value, float):
lines.append(f"- {key}: {value:.2f}")
else:
lines.append(f"- {key}: {value}")
lines.append("")
return "\n".join(lines)
def format_quality_report(report: Dict[str, Any]) -> str:
"""Format data quality report for display."""
if "error" in report:
return f"Error: {report['error']}"
lines = ["## Data Quality Report\n"]
# Overall score
if "overall_score" in report:
score = report['overall_score']
emoji = "" if score >= 80 else "" if score >= 60 else ""
lines.append(f"**Overall Quality Score**: {emoji} {score}/100\n")
# Issues
if report.get('issues'):
lines.append("### Issues Found\n")
for issue in report['issues']:
lines.append(f"- {issue}")
lines.append("")
# Column-level quality
if report.get('column_quality'):
lines.append("### Column Quality\n")
lines.append("| Column | Missing % | Unique % | Issues |")
lines.append("|--------|-----------|----------|--------|")
for col, quality in report['column_quality'].items():
missing = quality.get('missing_pct', 0)
unique = quality.get('unique_pct', 0)
issues = quality.get('issues', '-')
lines.append(f"| `{col}` | {missing:.1f}% | {unique:.1f}% | {issues} |")
return "\n".join(lines)
def format_comparison(comparison: Dict[str, Any]) -> str:
"""Format dataset comparison for display."""
if "error" in comparison:
return f"Error: {comparison['error']}"
lines = ["## Dataset Comparison\n"]
lines.append(f"Comparing **{comparison['dataset_a']}** vs **{comparison['dataset_b']}**\n")
lines.append("| Aspect | Dataset A | Dataset B |")
lines.append("|--------|-----------|-----------|")
for aspect, values in comparison.get('comparison', {}).items():
lines.append(f"| {aspect} | {values.get('a', 'N/A')} | {values.get('b', 'N/A')} |")
if comparison.get('common_columns'):
lines.append(f"\n**Common columns**: {', '.join(comparison['common_columns'])}")
if comparison.get('unique_to_a'):
lines.append(f"**Unique to A**: {', '.join(comparison['unique_to_a'])}")
if comparison.get('unique_to_b'):
lines.append(f"**Unique to B**: {', '.join(comparison['unique_to_b'])}")
return "\n".join(lines)
def format_similar_datasets(similar: List[Dict[str, Any]]) -> str:
"""Format similar datasets list for display."""
if not similar:
return "No similar datasets found."
lines = ["## Similar Datasets\n"]
for i, ds in enumerate(similar, 1):
score = ds.get('similarity_score', 0)
lines.append(f"### {i}. {ds['id']} (similarity: {score:.2f})")
lines.append(f"- Downloads: {ds.get('downloads', 'N/A'):,}")
if ds.get('reason'):
lines.append(f"- Why similar: {ds['reason']}")
lines.append("")
return "\n".join(lines)
def format_task_suggestions(suggestions: Dict[str, Any]) -> str:
"""Format ML task suggestions for display."""
if "error" in suggestions:
return f"Error: {suggestions['error']}"
lines = [f"## Suggested ML Tasks for `{suggestions.get('dataset_id', 'dataset')}`\n"]
if suggestions.get('tasks'):
for i, task in enumerate(suggestions['tasks'], 1):
confidence = task.get('confidence', 'medium')
emoji = "" if confidence == 'high' else "" if confidence == 'medium' else ""
lines.append(f"### {i}. {task['name']} {emoji}")
lines.append(f"- **Confidence**: {confidence}")
lines.append(f"- **Reason**: {task.get('reason', 'Based on dataset structure')}")
if task.get('target_column'):
lines.append(f"- **Target column**: `{task['target_column']}`")
if task.get('feature_columns'):
lines.append(f"- **Feature columns**: {', '.join(f'`{c}`' for c in task['feature_columns'][:5])}")
lines.append("")
return "\n".join(lines)
|