Spaces:
Paused
Paused
| """ | |
| Trace Converter CLI | |
| Command-line interface for converting agent traces from various formats | |
| to Potato's canonical JSONL format. | |
| Usage: | |
| python -m potato.trace_converter --input traces.json --input-format react --output data.jsonl | |
| python -m potato.trace_converter --input traces.json --auto-detect --output data.jsonl | |
| python -m potato.trace_converter --list-formats | |
| """ | |
| import argparse | |
| import json | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| from .registry import converter_registry | |
| logger = logging.getLogger(__name__) | |
| def parse_args(args=None): | |
| parser = argparse.ArgumentParser( | |
| prog="potato-trace-convert", | |
| description="Convert agent traces from various formats to Potato's canonical JSONL format." | |
| ) | |
| parser.add_argument( | |
| "--input", "-i", | |
| help="Input file path (JSON, JSONL, or Parquet)" | |
| ) | |
| parser.add_argument( | |
| "--input-format", "-f", | |
| help="Input format name (e.g., react, langchain, langfuse, atif, webarena, openai, anthropic, swebench, otel, multi_agent, mcp)" | |
| ) | |
| parser.add_argument( | |
| "--output", "-o", | |
| help="Output file path (JSONL). Defaults to stdout." | |
| ) | |
| parser.add_argument( | |
| "--auto-detect", | |
| action="store_true", | |
| help="Auto-detect the input format" | |
| ) | |
| parser.add_argument( | |
| "--list-formats", | |
| action="store_true", | |
| help="List all supported formats and exit" | |
| ) | |
| parser.add_argument( | |
| "--pretty", | |
| action="store_true", | |
| help="Pretty-print JSON output (one object per line, indented)" | |
| ) | |
| parser.add_argument( | |
| "--verbose", "-v", | |
| action="store_true", | |
| help="Enable verbose logging" | |
| ) | |
| return parser.parse_args(args) | |
| def load_input(file_path: str): | |
| """Load input data from JSON, JSONL, or Parquet file.""" | |
| path = Path(file_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"Input file not found: {file_path}") | |
| # Handle Parquet files | |
| if path.suffix.lower() == ".parquet": | |
| import pyarrow.parquet as pq | |
| table = pq.read_table(str(path)) | |
| return table.to_pandas().to_dict("records") | |
| content = path.read_text(encoding="utf-8").strip() | |
| # Try parsing as JSON first | |
| try: | |
| return json.loads(content) | |
| except json.JSONDecodeError: | |
| pass | |
| # Try parsing as JSONL (one JSON object per line) | |
| records = [] | |
| for line_num, line in enumerate(content.splitlines(), 1): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| records.append(json.loads(line)) | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Invalid JSON on line {line_num}: {e}") | |
| return records | |
| def main(args=None): | |
| parsed = parse_args(args) | |
| if parsed.verbose: | |
| logging.basicConfig(level=logging.DEBUG) | |
| else: | |
| logging.basicConfig(level=logging.WARNING) | |
| # List formats | |
| if parsed.list_formats: | |
| print("Supported trace formats:") | |
| print() | |
| for info in converter_registry.list_converters(): | |
| print(f" {info['format_name']:15s} {info['description']}") | |
| if info.get('file_extensions'): | |
| print(f" {'':15s} Extensions: {', '.join(info['file_extensions'])}") | |
| print() | |
| return 0 | |
| # Validate arguments | |
| if not parsed.input: | |
| print("Error: --input is required (or use --list-formats)", file=sys.stderr) | |
| return 1 | |
| # Load input | |
| try: | |
| data = load_input(parsed.input) | |
| except (FileNotFoundError, ValueError) as e: | |
| print(f"Error loading input: {e}", file=sys.stderr) | |
| return 1 | |
| # Determine format | |
| format_name = parsed.input_format | |
| if not format_name: | |
| if parsed.auto_detect: | |
| format_name = converter_registry.detect_format(data) | |
| if not format_name: | |
| print("Error: Could not auto-detect input format. " | |
| "Please specify with --input-format.", file=sys.stderr) | |
| return 1 | |
| print(f"Auto-detected format: {format_name}", file=sys.stderr) | |
| else: | |
| print("Error: --input-format or --auto-detect is required", file=sys.stderr) | |
| return 1 | |
| # Convert | |
| try: | |
| traces = converter_registry.convert(format_name, data) | |
| except ValueError as e: | |
| print(f"Error: {e}", file=sys.stderr) | |
| return 1 | |
| except Exception as e: | |
| print(f"Conversion error: {e}", file=sys.stderr) | |
| return 1 | |
| # Output | |
| output_lines = [] | |
| for trace in traces: | |
| trace_dict = trace.to_dict() | |
| if parsed.pretty: | |
| output_lines.append(json.dumps(trace_dict, ensure_ascii=False, indent=2)) | |
| else: | |
| output_lines.append(json.dumps(trace_dict, ensure_ascii=False)) | |
| output_text = "\n".join(output_lines) + "\n" | |
| if parsed.output: | |
| Path(parsed.output).parent.mkdir(parents=True, exist_ok=True) | |
| Path(parsed.output).write_text(output_text, encoding="utf-8") | |
| print(f"Converted {len(traces)} traces to {parsed.output}", file=sys.stderr) | |
| else: | |
| sys.stdout.write(output_text) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |