Spaces:
Paused
Paused
File size: 5,266 Bytes
aceb1b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | """
Trace Converter CLI
Command-line interface for converting agent traces from various formats
to Potato's canonical JSONL format.
Usage:
python -m potato.trace_converter --input traces.json --input-format react --output data.jsonl
python -m potato.trace_converter --input traces.json --auto-detect --output data.jsonl
python -m potato.trace_converter --list-formats
"""
import argparse
import json
import logging
import sys
from pathlib import Path
from .registry import converter_registry
logger = logging.getLogger(__name__)
def parse_args(args=None):
parser = argparse.ArgumentParser(
prog="potato-trace-convert",
description="Convert agent traces from various formats to Potato's canonical JSONL format."
)
parser.add_argument(
"--input", "-i",
help="Input file path (JSON, JSONL, or Parquet)"
)
parser.add_argument(
"--input-format", "-f",
help="Input format name (e.g., react, langchain, langfuse, atif, webarena, openai, anthropic, swebench, otel, multi_agent, mcp)"
)
parser.add_argument(
"--output", "-o",
help="Output file path (JSONL). Defaults to stdout."
)
parser.add_argument(
"--auto-detect",
action="store_true",
help="Auto-detect the input format"
)
parser.add_argument(
"--list-formats",
action="store_true",
help="List all supported formats and exit"
)
parser.add_argument(
"--pretty",
action="store_true",
help="Pretty-print JSON output (one object per line, indented)"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
return parser.parse_args(args)
def load_input(file_path: str):
"""Load input data from JSON, JSONL, or Parquet file."""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Input file not found: {file_path}")
# Handle Parquet files
if path.suffix.lower() == ".parquet":
import pyarrow.parquet as pq
table = pq.read_table(str(path))
return table.to_pandas().to_dict("records")
content = path.read_text(encoding="utf-8").strip()
# Try parsing as JSON first
try:
return json.loads(content)
except json.JSONDecodeError:
pass
# Try parsing as JSONL (one JSON object per line)
records = []
for line_num, line in enumerate(content.splitlines(), 1):
line = line.strip()
if not line:
continue
try:
records.append(json.loads(line))
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON on line {line_num}: {e}")
return records
def main(args=None):
parsed = parse_args(args)
if parsed.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
# List formats
if parsed.list_formats:
print("Supported trace formats:")
print()
for info in converter_registry.list_converters():
print(f" {info['format_name']:15s} {info['description']}")
if info.get('file_extensions'):
print(f" {'':15s} Extensions: {', '.join(info['file_extensions'])}")
print()
return 0
# Validate arguments
if not parsed.input:
print("Error: --input is required (or use --list-formats)", file=sys.stderr)
return 1
# Load input
try:
data = load_input(parsed.input)
except (FileNotFoundError, ValueError) as e:
print(f"Error loading input: {e}", file=sys.stderr)
return 1
# Determine format
format_name = parsed.input_format
if not format_name:
if parsed.auto_detect:
format_name = converter_registry.detect_format(data)
if not format_name:
print("Error: Could not auto-detect input format. "
"Please specify with --input-format.", file=sys.stderr)
return 1
print(f"Auto-detected format: {format_name}", file=sys.stderr)
else:
print("Error: --input-format or --auto-detect is required", file=sys.stderr)
return 1
# Convert
try:
traces = converter_registry.convert(format_name, data)
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
except Exception as e:
print(f"Conversion error: {e}", file=sys.stderr)
return 1
# Output
output_lines = []
for trace in traces:
trace_dict = trace.to_dict()
if parsed.pretty:
output_lines.append(json.dumps(trace_dict, ensure_ascii=False, indent=2))
else:
output_lines.append(json.dumps(trace_dict, ensure_ascii=False))
output_text = "\n".join(output_lines) + "\n"
if parsed.output:
Path(parsed.output).parent.mkdir(parents=True, exist_ok=True)
Path(parsed.output).write_text(output_text, encoding="utf-8")
print(f"Converted {len(traces)} traces to {parsed.output}", file=sys.stderr)
else:
sys.stdout.write(output_text)
return 0
if __name__ == "__main__":
sys.exit(main())
|