#!/usr/bin/env python3 """ LegitData CLI - Command line interface for realistic data generation. Usage: legitdata generate --ddl schema.sql --url https://example.com \\ --use-case "Retail Analytics" --connection "snowflake://..." --size medium legitdata preview --ddl schema.sql --url https://example.com \\ --use-case "Retail Analytics" --rows 5 """ import argparse import json import sys from typing import Optional def create_parser() -> argparse.ArgumentParser: """Create the argument parser.""" parser = argparse.ArgumentParser( prog='legitdata', description='Generate realistic synthetic data for analytics warehouses' ) subparsers = parser.add_subparsers(dest='command', help='Commands') # Generate command gen_parser = subparsers.add_parser('generate', help='Generate and insert data') gen_parser.add_argument( '--ddl', '-d', required=True, help='Path to DDL file or DDL string' ) gen_parser.add_argument( '--url', '-u', required=True, help='Company website URL for context' ) gen_parser.add_argument( '--use-case', '-c', required=True, help='Analytics use case (e.g., "Retail Analytics")' ) gen_parser.add_argument( '--connection', '-n', required=True, help='Database connection string' ) gen_parser.add_argument( '--size', '-s', default='medium', choices=['small', 'medium', 'large', 'xl'], help='Size preset (default: medium)' ) gen_parser.add_argument( '--rows', '-r', type=str, default=None, help='Custom row counts as JSON (e.g., \'{"fact_sales": 5000}\')' ) gen_parser.add_argument( '--no-truncate', action='store_true', help='Do not truncate tables before insert' ) gen_parser.add_argument( '--dry-run', action='store_true', help='Preview operations without writing to database' ) gen_parser.add_argument( '--no-cache', action='store_true', help='Disable caching' ) gen_parser.add_argument( '--anthropic-key', type=str, default=None, help='Anthropic API key (or set ANTHROPIC_API_KEY env var)' ) # Preview command preview_parser = subparsers.add_parser('preview', help='Preview generated data') preview_parser.add_argument( '--ddl', '-d', required=True, help='Path to DDL file or DDL string' ) preview_parser.add_argument( '--url', '-u', required=True, help='Company website URL for context' ) preview_parser.add_argument( '--use-case', '-c', required=True, help='Analytics use case' ) preview_parser.add_argument( '--table', '-t', type=str, default=None, help='Specific table to preview' ) preview_parser.add_argument( '--rows', '-r', type=int, default=5, help='Number of rows to preview (default: 5)' ) preview_parser.add_argument( '--anthropic-key', type=str, default=None, help='Anthropic API key (or set ANTHROPIC_API_KEY env var)' ) # Cache command cache_parser = subparsers.add_parser('cache', help='Manage cache') cache_parser.add_argument( '--clear', action='store_true', help='Clear all cached data' ) cache_parser.add_argument( '--stats', action='store_true', help='Show cache statistics' ) cache_parser.add_argument( '--type', type=str, default=None, choices=['context', 'search', 'classification', 'generation'], help='Specific cache type to clear' ) return parser def get_anthropic_client(api_key: Optional[str] = None): """Get Anthropic client.""" import os key = api_key or os.environ.get('ANTHROPIC_API_KEY') if not key: print("Warning: No Anthropic API key provided. AI features will be limited.") return None try: from anthropic import Anthropic return Anthropic(api_key=key) except ImportError: print("Warning: anthropic package not installed. Install with: pip install anthropic") return None def cmd_generate(args) -> int: """Execute generate command.""" from legitdata import LegitGenerator # Parse custom row counts row_counts = None if args.rows: try: row_counts = json.loads(args.rows) except json.JSONDecodeError as e: print(f"Error parsing row counts JSON: {e}") return 1 # Get Anthropic client anthropic_client = get_anthropic_client(args.anthropic_key) # Create generator gen = LegitGenerator( url=args.url, use_case=args.use_case, connection_string=args.connection, anthropic_client=anthropic_client, cache_enabled=not args.no_cache, dry_run=args.dry_run ) # Load DDL try: gen.load_ddl(args.ddl) except Exception as e: print(f"Error loading DDL: {e}") return 1 # Generate try: results = gen.generate( size=args.size, row_counts=row_counts, truncate_first=not args.no_truncate ) print("\nResults:") for table, count in results.items(): print(f" {table}: {count} rows") return 0 except Exception as e: print(f"Error generating data: {e}") import traceback traceback.print_exc() return 1 def cmd_preview(args) -> int: """Execute preview command.""" from legitdata import LegitGenerator # Get Anthropic client anthropic_client = get_anthropic_client(args.anthropic_key) # Create generator (no connection needed for preview) gen = LegitGenerator( url=args.url, use_case=args.use_case, connection_string="", # Not needed for preview anthropic_client=anthropic_client, dry_run=True ) # Load DDL try: gen.load_ddl(args.ddl) except Exception as e: print(f"Error loading DDL: {e}") return 1 # Preview try: preview_data = gen.preview( table_name=args.table, num_rows=args.rows ) print("\n" + "=" * 60) print("PREVIEW DATA") print("=" * 60) for table_name, rows in preview_data.items(): print(f"\n{table_name}:") print("-" * 40) for i, row in enumerate(rows): print(f"Row {i + 1}:") for key, value in row.items(): if value is not None: print(f" {key}: {value}") print() return 0 except Exception as e: print(f"Error previewing data: {e}") import traceback traceback.print_exc() return 1 def cmd_cache(args) -> int: """Execute cache command.""" from legitdata.cache import FileCache cache = FileCache() if args.clear: count = cache.clear(args.type) print(f"Cleared {count} cache entries") return 0 if args.stats: stats = cache.get_stats() print("Cache Statistics:") print(f" Total entries: {stats['total']}") for cache_type, count in stats['by_type'].items(): print(f" {cache_type}: {count}") return 0 print("Use --clear or --stats") return 1 def main() -> int: """Main entry point.""" parser = create_parser() args = parser.parse_args() if args.command == 'generate': return cmd_generate(args) elif args.command == 'preview': return cmd_preview(args) elif args.command == 'cache': return cmd_cache(args) else: parser.print_help() return 0 if __name__ == '__main__': sys.exit(main())