| """ |
| Data Visualization Helper |
| Generate chart data for frontend visualization |
| """ |
|
|
| from typing import Dict, List, Any |
| from collections import Counter |
|
|
| class DataVisualizer: |
| """Helper to prepare data for charts and graphs""" |
| |
| def prepare_chart_data(self, data: List[Dict], x_column: str, y_column: str, chart_type: str = 'bar') -> Dict: |
| """Prepare data for chart visualization""" |
| if not data: |
| return {'ok': False, 'error': 'No data provided'} |
| |
| try: |
| labels = [str(row.get(x_column, '')) for row in data] |
| values = [row.get(y_column, 0) for row in data] |
| |
| return { |
| 'ok': True, |
| 'chart_type': chart_type, |
| 'labels': labels, |
| 'values': values, |
| 'x_label': x_column, |
| 'y_label': y_column |
| } |
| except Exception as e: |
| return {'ok': False, 'error': str(e)} |
| |
| def analyze_column(self, data: List[Dict], column: str) -> Dict: |
| """Analyze a column and return statistics""" |
| if not data: |
| return {'ok': False, 'error': 'No data'} |
| |
| values = [row.get(column) for row in data if row.get(column) is not None] |
| |
| if not values: |
| return {'ok': False, 'error': 'No values in column'} |
| |
| |
| sample = values[0] |
| is_numeric = isinstance(sample, (int, float)) |
| |
| result = { |
| 'ok': True, |
| 'column': column, |
| 'count': len(values), |
| 'null_count': len(data) - len(values), |
| 'unique_count': len(set(str(v) for v in values)) |
| } |
| |
| if is_numeric: |
| result.update({ |
| 'type': 'numeric', |
| 'min': min(values), |
| 'max': max(values), |
| 'avg': sum(values) / len(values), |
| 'sum': sum(values) |
| }) |
| else: |
| |
| counter = Counter(str(v) for v in values) |
| result.update({ |
| 'type': 'categorical', |
| 'most_common': counter.most_common(5) |
| }) |
| |
| return result |
| |
| def get_distribution(self, data: List[Dict], column: str, bins: int = 10) -> Dict: |
| """Get distribution of values for histogram""" |
| if not data: |
| return {'ok': False, 'error': 'No data'} |
| |
| values = [row.get(column) for row in data if row.get(column) is not None] |
| |
| if not values: |
| return {'ok': False, 'error': 'No values'} |
| |
| |
| if not isinstance(values[0], (int, float)): |
| |
| counter = Counter(str(v) for v in values) |
| return { |
| 'ok': True, |
| 'type': 'categorical', |
| 'distribution': dict(counter.most_common(bins)) |
| } |
| |
| |
| min_val = min(values) |
| max_val = max(values) |
| bin_size = (max_val - min_val) / bins |
| |
| distribution = {} |
| for i in range(bins): |
| bin_start = min_val + i * bin_size |
| bin_end = bin_start + bin_size |
| bin_label = f"{bin_start:.2f}-{bin_end:.2f}" |
| count = sum(1 for v in values if bin_start <= v < bin_end) |
| distribution[bin_label] = count |
| |
| return { |
| 'ok': True, |
| 'type': 'numeric', |
| 'distribution': distribution, |
| 'bins': bins |
| } |
|
|
| data_visualizer = DataVisualizer() |
|
|