""" Table Generator - Generate tables from text data """ import re from typing import Dict, List, Optional, Tuple import logging logger = logging.getLogger(__name__) class TableGenerator: """ Automatically generate tables from text content. """ def __init__(self): """Initialize table generator.""" pass def extract_table_data(self, text: str) -> List[List[str]]: """ Extract potential table data from text. Args: text: Input text Returns: List of rows (each row is list of cells) """ # Look for tabular patterns or lists lines = text.split("\n") table_data = [] for line in lines: # Split by multiple spaces or commas cells = re.split(r"\s{2,}|,\s*", line.strip()) if len(cells) > 1 and all(cell.strip() for cell in cells): table_data.append([cell.strip() for cell in cells]) return table_data if table_data else self._create_default_table() def _create_default_table(self) -> List[List[str]]: """Create a default example table.""" return [ ["Item", "Description", "Value"], ["Example 1", "Sample data", "100"], ["Example 2", "Sample data", "200"], ] def generate_summary_table(self, text: str) -> List[List[str]]: """ Generate summary table from text content. Args: text: Text to summarize in table format Returns: Table data """ # Extract key points and create summary table sentences = re.split(r"(?<=[.!?])\s+", text)[:5] # First 5 sentences table = [["Point", "Description"]] for i, sentence in enumerate(sentences, 1): # Truncate long sentences desc = sentence[:50] + "..." if len(sentence) > 50 else sentence table.append([f"Point {i}", desc]) return table def generate_comparison_table(self, items: List[str], attributes: List[str], data: Dict) -> List[List[str]]: """ Generate comparison table. Args: items: Items to compare attributes: Comparison attributes data: Data dictionary {item: {attribute: value}} Returns: Comparison table """ table = [["Item"] + attributes] for item in items: row = [item] for attr in attributes: value = data.get(item, {}).get(attr, "-") row.append(str(value)) table.append(row) return table def generate_statistics_table(self, data_points: List[float]) -> List[List[str]]: """ Generate statistics summary table. Args: data_points: List of numerical data points Returns: Statistics table """ if not data_points: return [["Metric", "Value"], ["Average", "N/A"], ["Min", "N/A"], ["Max", "N/A"]] avg = sum(data_points) / len(data_points) min_val = min(data_points) max_val = max(data_points) med_val = sorted(data_points)[len(data_points) // 2] return [ ["Metric", "Value"], ["Count", str(len(data_points))], ["Average", f"{avg:.2f}"], ["Minimum", f"{min_val:.2f}"], ["Maximum", f"{max_val:.2f}"], ["Median", f"{med_val:.2f}"], ] def format_as_markdown(self, table: List[List[str]]) -> str: """ Format table as Markdown. Args: table: Table data Returns: Markdown table string """ if not table: return "" # Create header md_table = "| " + " | ".join(table[0]) + " |\n" md_table += "|" + "|".join(["---" for _ in table[0]]) + "|\n" # Add rows for row in table[1:]: md_table += "| " + " | ".join(str(cell) for cell in row) + " |\n" return md_table def format_as_html(self, table: List[List[str]]) -> str: """ Format table as HTML. Args: table: Table data Returns: HTML table string """ if not table: return "
" html = "\n" # Header row html += "" for cell in table[0]: html += f"" html += "\n" # Body rows html += "\n" for row in table[1:]: html += "" for cell in row: html += f"" html += "\n" html += "\n" html += "
{cell}
{cell}
" return html def format_as_csv(self, table: List[List[str]]) -> str: """ Format table as CSV. Args: table: Table data Returns: CSV string """ import csv import io output = io.StringIO() writer = csv.writer(output) for row in table: writer.writerow(row) return output.getvalue() def generate_from_dataframe(self, df_dict: Dict) -> List[List[str]]: """ Generate table from dataframe-like dictionary. Args: df_dict: Dictionary with column names as keys and data lists as values Returns: Table data """ if not df_dict: return [] # Create header headers = list(df_dict.keys()) table = [headers] # Get number of rows num_rows = max(len(v) for v in df_dict.values()) if df_dict.values() else 0 # Create rows for i in range(num_rows): row = [] for col_name in headers: value = df_dict[col_name][i] if i < len(df_dict[col_name]) else "-" row.append(str(value)) table.append(row) return table