davidjurgens's picture
Deploy: Potato — Codebook Annotation
aceb1b2 verified
Raw
History Blame Contribute Delete
5.27 kB
"""
Trace Converter CLI
Command-line interface for converting agent traces from various formats
to Potato's canonical JSONL format.
Usage:
python -m potato.trace_converter --input traces.json --input-format react --output data.jsonl
python -m potato.trace_converter --input traces.json --auto-detect --output data.jsonl
python -m potato.trace_converter --list-formats
"""
import argparse
import json
import logging
import sys
from pathlib import Path
from .registry import converter_registry
logger = logging.getLogger(__name__)
def parse_args(args=None):
parser = argparse.ArgumentParser(
prog="potato-trace-convert",
description="Convert agent traces from various formats to Potato's canonical JSONL format."
)
parser.add_argument(
"--input", "-i",
help="Input file path (JSON, JSONL, or Parquet)"
)
parser.add_argument(
"--input-format", "-f",
help="Input format name (e.g., react, langchain, langfuse, atif, webarena, openai, anthropic, swebench, otel, multi_agent, mcp)"
)
parser.add_argument(
"--output", "-o",
help="Output file path (JSONL). Defaults to stdout."
)
parser.add_argument(
"--auto-detect",
action="store_true",
help="Auto-detect the input format"
)
parser.add_argument(
"--list-formats",
action="store_true",
help="List all supported formats and exit"
)
parser.add_argument(
"--pretty",
action="store_true",
help="Pretty-print JSON output (one object per line, indented)"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
return parser.parse_args(args)
def load_input(file_path: str):
"""Load input data from JSON, JSONL, or Parquet file."""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Input file not found: {file_path}")
# Handle Parquet files
if path.suffix.lower() == ".parquet":
import pyarrow.parquet as pq
table = pq.read_table(str(path))
return table.to_pandas().to_dict("records")
content = path.read_text(encoding="utf-8").strip()
# Try parsing as JSON first
try:
return json.loads(content)
except json.JSONDecodeError:
pass
# Try parsing as JSONL (one JSON object per line)
records = []
for line_num, line in enumerate(content.splitlines(), 1):
line = line.strip()
if not line:
continue
try:
records.append(json.loads(line))
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON on line {line_num}: {e}")
return records
def main(args=None):
parsed = parse_args(args)
if parsed.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
# List formats
if parsed.list_formats:
print("Supported trace formats:")
print()
for info in converter_registry.list_converters():
print(f" {info['format_name']:15s} {info['description']}")
if info.get('file_extensions'):
print(f" {'':15s} Extensions: {', '.join(info['file_extensions'])}")
print()
return 0
# Validate arguments
if not parsed.input:
print("Error: --input is required (or use --list-formats)", file=sys.stderr)
return 1
# Load input
try:
data = load_input(parsed.input)
except (FileNotFoundError, ValueError) as e:
print(f"Error loading input: {e}", file=sys.stderr)
return 1
# Determine format
format_name = parsed.input_format
if not format_name:
if parsed.auto_detect:
format_name = converter_registry.detect_format(data)
if not format_name:
print("Error: Could not auto-detect input format. "
"Please specify with --input-format.", file=sys.stderr)
return 1
print(f"Auto-detected format: {format_name}", file=sys.stderr)
else:
print("Error: --input-format or --auto-detect is required", file=sys.stderr)
return 1
# Convert
try:
traces = converter_registry.convert(format_name, data)
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
except Exception as e:
print(f"Conversion error: {e}", file=sys.stderr)
return 1
# Output
output_lines = []
for trace in traces:
trace_dict = trace.to_dict()
if parsed.pretty:
output_lines.append(json.dumps(trace_dict, ensure_ascii=False, indent=2))
else:
output_lines.append(json.dumps(trace_dict, ensure_ascii=False))
output_text = "\n".join(output_lines) + "\n"
if parsed.output:
Path(parsed.output).parent.mkdir(parents=True, exist_ok=True)
Path(parsed.output).write_text(output_text, encoding="utf-8")
print(f"Converted {len(traces)} traces to {parsed.output}", file=sys.stderr)
else:
sys.stdout.write(output_text)
return 0
if __name__ == "__main__":
sys.exit(main())