""" Sistema de exportacao de dados em multiplos formatos. Suporta: - JSON - CSV - Markdown - PDF (requer reportlab) """ from typing import List, Dict, Any, Optional import json import csv from io import StringIO, BytesIO from datetime import datetime class DataExporter: """Exportador de dados em multiplos formatos.""" @staticmethod def export_to_json( data: List[Dict[str, Any]], pretty: bool = True ) -> str: """ Exporta dados para JSON. Args: data: Dados a exportar pretty: Se True, formata JSON (indentacao) Returns: String JSON """ if pretty: return json.dumps(data, indent=2, ensure_ascii=False) return json.dumps(data, ensure_ascii=False) @staticmethod def export_to_csv( data: List[Dict[str, Any]], columns: Optional[List[str]] = None ) -> str: """ Exporta dados para CSV. Args: data: Dados a exportar columns: Colunas a incluir (opcional, usa todas se None) Returns: String CSV """ if not data: return "" # Determinar colunas if columns is None: columns = list(data[0].keys()) # Criar CSV output = StringIO() writer = csv.DictWriter(output, fieldnames=columns, extrasaction='ignore') writer.writeheader() for row in data: writer.writerow(row) return output.getvalue() @staticmethod def export_to_markdown( data: List[Dict[str, Any]], title: Optional[str] = None, columns: Optional[List[str]] = None ) -> str: """ Exporta dados para Markdown (tabela). Args: data: Dados a exportar title: Titulo do documento (opcional) columns: Colunas a incluir (opcional) Returns: String Markdown """ if not data: return "# Sem dados\n" # Determinar colunas if columns is None: columns = list(data[0].keys()) # Construir markdown md = [] # Titulo if title: md.append(f"# {title}\n") md.append(f"*Gerado em: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n") # Cabecalho da tabela header = "| " + " | ".join(columns) + " |" separator = "|" + "|".join(["---" for _ in columns]) + "|" md.append(header) md.append(separator) # Linhas for row in data: values = [] for col in columns: value = row.get(col, "") # Escapar pipes e newlines value_str = str(value).replace("|", "\\|").replace("\n", " ") values.append(value_str) line = "| " + " | ".join(values) + " |" md.append(line) return "\n".join(md) @staticmethod def export_to_pdf( data: List[Dict[str, Any]], title: Optional[str] = None, columns: Optional[List[str]] = None ) -> bytes: """ Exporta dados para PDF. Requer reportlab instalado. Args: data: Dados a exportar title: Titulo do documento (opcional) columns: Colunas a incluir (opcional) Returns: Bytes do PDF """ try: from reportlab.lib.pagesizes import letter, A4 from reportlab.lib import colors from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle except ImportError: raise ImportError("reportlab nao instalado. Instale com: pip install reportlab") if not data: return b"" # Determinar colunas if columns is None: columns = list(data[0].keys()) # Criar PDF buffer = BytesIO() doc = SimpleDocTemplate(buffer, pagesize=A4) elements = [] styles = getSampleStyleSheet() # Titulo if title: title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, textColor=colors.HexColor('#1f77b4'), spaceAfter=30 ) elements.append(Paragraph(title, title_style)) elements.append(Spacer(1, 0.2*inch)) # Timestamp timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') elements.append(Paragraph(f"Gerado em: {timestamp}", styles['Normal'])) elements.append(Spacer(1, 0.3*inch)) # Preparar dados da tabela table_data = [columns] # Cabecalho for row in data: row_data = [] for col in columns: value = row.get(col, "") # Truncar valores longos value_str = str(value) if len(value_str) > 50: value_str = value_str[:47] + "..." row_data.append(value_str) table_data.append(row_data) # Criar tabela table = Table(table_data) # Estilo da tabela table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1f77b4')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, 0), 12), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'), ('FONTSIZE', (0, 1), (-1, -1), 10), ])) elements.append(table) # Build PDF doc.build(elements) return buffer.getvalue() class ConversationExporter: """Exportador especializado para conversas RAG.""" @staticmethod def export_conversation_to_markdown( messages: List[Dict[str, str]], title: str = "Conversa RAG", include_contexts: bool = True ) -> str: """ Exporta conversa para Markdown. Args: messages: Lista de mensagens (role, content, contexts) title: Titulo da conversa include_contexts: Se True, inclui contextos recuperados Returns: String Markdown """ md = [] # Cabecalho md.append(f"# {title}\n") md.append(f"*Exportado em: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n") md.append("---\n") # Mensagens for i, msg in enumerate(messages, 1): role = msg.get('role', 'user') content = msg.get('content', '') contexts = msg.get('contexts', []) # Formato da mensagem if role == 'user': md.append(f"## {i}. Voce\n") else: md.append(f"## {i}. Assistente\n") md.append(f"{content}\n") # Contextos (se for resposta do assistente) if include_contexts and role == 'assistant' and contexts: md.append("\n### Contextos Utilizados\n") for j, ctx in enumerate(contexts, 1): similarity = ctx.get('similarity', 0) ctx_content = ctx.get('content', '') md.append(f"{j}. **Similaridade: {similarity:.3f}**\n") md.append(f" > {ctx_content[:200]}...\n") md.append("\n---\n") return "\n".join(md) @staticmethod def export_conversation_to_json( messages: List[Dict[str, str]], metadata: Optional[Dict[str, Any]] = None ) -> str: """ Exporta conversa para JSON. Args: messages: Lista de mensagens metadata: Metadata adicional (opcional) Returns: String JSON """ data = { 'conversation': messages, 'exported_at': datetime.now().isoformat(), 'message_count': len(messages) } if metadata: data['metadata'] = metadata return json.dumps(data, indent=2, ensure_ascii=False) # Funcoes de conveniencia def export_documents_to_csv(documents: List[Dict[str, Any]]) -> str: """ Exporta lista de documentos para CSV. Args: documents: Lista de documentos Returns: String CSV """ exporter = DataExporter() columns = ['id', 'title', 'chunk_count', 'created_at'] return exporter.export_to_csv(documents, columns=columns) def export_search_results_to_markdown( results: List[Dict[str, Any]], query: str ) -> str: """ Exporta resultados de busca para Markdown. Args: results: Resultados da busca query: Query original Returns: String Markdown """ exporter = DataExporter() title = f"Resultados para: {query}" columns = ['content', 'similarity', 'document_id'] return exporter.export_to_markdown(results, title=title, columns=columns)