rag_template / src /export.py
Guilherme Favaron
Sync: Complete project update (Phase 6) - API, Metadata, Eval, Docs
a686b1b
"""
Sistema de exportacao de dados em multiplos formatos.
Suporta:
- JSON
- CSV
- Markdown
- PDF (requer reportlab)
"""
from typing import List, Dict, Any, Optional
import json
import csv
from io import StringIO, BytesIO
from datetime import datetime
class DataExporter:
"""Exportador de dados em multiplos formatos."""
@staticmethod
def export_to_json(
data: List[Dict[str, Any]],
pretty: bool = True
) -> str:
"""
Exporta dados para JSON.
Args:
data: Dados a exportar
pretty: Se True, formata JSON (indentacao)
Returns:
String JSON
"""
if pretty:
return json.dumps(data, indent=2, ensure_ascii=False)
return json.dumps(data, ensure_ascii=False)
@staticmethod
def export_to_csv(
data: List[Dict[str, Any]],
columns: Optional[List[str]] = None
) -> str:
"""
Exporta dados para CSV.
Args:
data: Dados a exportar
columns: Colunas a incluir (opcional, usa todas se None)
Returns:
String CSV
"""
if not data:
return ""
# Determinar colunas
if columns is None:
columns = list(data[0].keys())
# Criar CSV
output = StringIO()
writer = csv.DictWriter(output, fieldnames=columns, extrasaction='ignore')
writer.writeheader()
for row in data:
writer.writerow(row)
return output.getvalue()
@staticmethod
def export_to_markdown(
data: List[Dict[str, Any]],
title: Optional[str] = None,
columns: Optional[List[str]] = None
) -> str:
"""
Exporta dados para Markdown (tabela).
Args:
data: Dados a exportar
title: Titulo do documento (opcional)
columns: Colunas a incluir (opcional)
Returns:
String Markdown
"""
if not data:
return "# Sem dados\n"
# Determinar colunas
if columns is None:
columns = list(data[0].keys())
# Construir markdown
md = []
# Titulo
if title:
md.append(f"# {title}\n")
md.append(f"*Gerado em: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n")
# Cabecalho da tabela
header = "| " + " | ".join(columns) + " |"
separator = "|" + "|".join(["---" for _ in columns]) + "|"
md.append(header)
md.append(separator)
# Linhas
for row in data:
values = []
for col in columns:
value = row.get(col, "")
# Escapar pipes e newlines
value_str = str(value).replace("|", "\\|").replace("\n", " ")
values.append(value_str)
line = "| " + " | ".join(values) + " |"
md.append(line)
return "\n".join(md)
@staticmethod
def export_to_pdf(
data: List[Dict[str, Any]],
title: Optional[str] = None,
columns: Optional[List[str]] = None
) -> bytes:
"""
Exporta dados para PDF.
Requer reportlab instalado.
Args:
data: Dados a exportar
title: Titulo do documento (opcional)
columns: Colunas a incluir (opcional)
Returns:
Bytes do PDF
"""
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib import colors
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
except ImportError:
raise ImportError("reportlab nao instalado. Instale com: pip install reportlab")
if not data:
return b""
# Determinar colunas
if columns is None:
columns = list(data[0].keys())
# Criar PDF
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
elements = []
styles = getSampleStyleSheet()
# Titulo
if title:
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
textColor=colors.HexColor('#1f77b4'),
spaceAfter=30
)
elements.append(Paragraph(title, title_style))
elements.append(Spacer(1, 0.2*inch))
# Timestamp
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
elements.append(Paragraph(f"Gerado em: {timestamp}", styles['Normal']))
elements.append(Spacer(1, 0.3*inch))
# Preparar dados da tabela
table_data = [columns] # Cabecalho
for row in data:
row_data = []
for col in columns:
value = row.get(col, "")
# Truncar valores longos
value_str = str(value)
if len(value_str) > 50:
value_str = value_str[:47] + "..."
row_data.append(value_str)
table_data.append(row_data)
# Criar tabela
table = Table(table_data)
# Estilo da tabela
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1f77b4')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 12),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
('FONTSIZE', (0, 1), (-1, -1), 10),
]))
elements.append(table)
# Build PDF
doc.build(elements)
return buffer.getvalue()
class ConversationExporter:
"""Exportador especializado para conversas RAG."""
@staticmethod
def export_conversation_to_markdown(
messages: List[Dict[str, str]],
title: str = "Conversa RAG",
include_contexts: bool = True
) -> str:
"""
Exporta conversa para Markdown.
Args:
messages: Lista de mensagens (role, content, contexts)
title: Titulo da conversa
include_contexts: Se True, inclui contextos recuperados
Returns:
String Markdown
"""
md = []
# Cabecalho
md.append(f"# {title}\n")
md.append(f"*Exportado em: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n")
md.append("---\n")
# Mensagens
for i, msg in enumerate(messages, 1):
role = msg.get('role', 'user')
content = msg.get('content', '')
contexts = msg.get('contexts', [])
# Formato da mensagem
if role == 'user':
md.append(f"## {i}. Voce\n")
else:
md.append(f"## {i}. Assistente\n")
md.append(f"{content}\n")
# Contextos (se for resposta do assistente)
if include_contexts and role == 'assistant' and contexts:
md.append("\n### Contextos Utilizados\n")
for j, ctx in enumerate(contexts, 1):
similarity = ctx.get('similarity', 0)
ctx_content = ctx.get('content', '')
md.append(f"{j}. **Similaridade: {similarity:.3f}**\n")
md.append(f" > {ctx_content[:200]}...\n")
md.append("\n---\n")
return "\n".join(md)
@staticmethod
def export_conversation_to_json(
messages: List[Dict[str, str]],
metadata: Optional[Dict[str, Any]] = None
) -> str:
"""
Exporta conversa para JSON.
Args:
messages: Lista de mensagens
metadata: Metadata adicional (opcional)
Returns:
String JSON
"""
data = {
'conversation': messages,
'exported_at': datetime.now().isoformat(),
'message_count': len(messages)
}
if metadata:
data['metadata'] = metadata
return json.dumps(data, indent=2, ensure_ascii=False)
# Funcoes de conveniencia
def export_documents_to_csv(documents: List[Dict[str, Any]]) -> str:
"""
Exporta lista de documentos para CSV.
Args:
documents: Lista de documentos
Returns:
String CSV
"""
exporter = DataExporter()
columns = ['id', 'title', 'chunk_count', 'created_at']
return exporter.export_to_csv(documents, columns=columns)
def export_search_results_to_markdown(
results: List[Dict[str, Any]],
query: str
) -> str:
"""
Exporta resultados de busca para Markdown.
Args:
results: Resultados da busca
query: Query original
Returns:
String Markdown
"""
exporter = DataExporter()
title = f"Resultados para: {query}"
columns = ['content', 'similarity', 'document_id']
return exporter.export_to_markdown(results, title=title, columns=columns)