| """ |
| Example scripts for Synthetic Dataset Generator |
| Demonstrates various dataset generation scenarios |
| """ |
|
|
| from dataset_generator import SyntheticDatasetGenerator |
| import os |
|
|
|
|
| def example_1_simple_image_classification(): |
| """Example 1: Generate a simple image classification dataset""" |
| print("\n" + "=" * 60) |
| print("Example 1: Simple Image Classification Dataset") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output", |
| use_cuda=True |
| ) |
|
|
| |
| result = generator.generate_image_classification_dataset( |
| classes=["apple", "orange", "banana"], |
| samples_per_class=10, |
| image_size=(512, 512), |
| style="photorealistic", |
| guidance_scale=7.5, |
| num_inference_steps=30 |
| ) |
|
|
| print(f"\n✅ Dataset created at: {result['dataset_path']}") |
| print(f" Total images: {result['labels_count']}") |
| print(f" Classes: {result['metadata']['classes']}") |
|
|
|
|
| def example_2_object_detection(): |
| """Example 2: Generate object detection dataset with COCO format""" |
| print("\n" + "=" * 60) |
| print("Example 2: Object Detection Dataset (COCO format)") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| |
| result = generator.generate_object_detection_dataset( |
| objects=["person", "car", "dog"], |
| num_images=20, |
| objects_per_image=(1, 3), |
| image_size=(1024, 1024), |
| annotation_format="coco" |
| ) |
|
|
| print(f"\n✅ Dataset created at: {result['dataset_path']}") |
| print(f" Format: COCO JSON") |
| print(f" Object classes: {result['metadata']['objects']}") |
|
|
|
|
| def example_3_text_classification(): |
| """Example 3: Generate text classification dataset for sentiment analysis""" |
| print("\n" + "=" * 60) |
| print("Example 3: Text Classification (Sentiment Analysis)") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| |
| result = generator.generate_text_classification_dataset( |
| categories=["positive", "negative", "neutral"], |
| samples_per_category=50, |
| text_type="sentence", |
| min_length=10, |
| max_length=50 |
| ) |
|
|
| print(f"\n✅ Dataset created at: {result['dataset_path']}") |
| print(f" Total samples: {result['samples_count']}") |
| print(f" Categories: {result['metadata']['categories']}") |
| print(f" Splits: train.json, validation.json, test.json") |
|
|
|
|
| def example_4_ner_dataset(): |
| """Example 4: Generate Named Entity Recognition dataset""" |
| print("\n" + "=" * 60) |
| print("Example 4: Named Entity Recognition (NER)") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| |
| result = generator.generate_ner_dataset( |
| entity_types=["PERSON", "ORGANIZATION", "LOCATION"], |
| num_samples=100, |
| entities_per_sample=(2, 4) |
| ) |
|
|
| print(f"\n✅ Dataset created at: {result['dataset_path']}") |
| print(f" Total samples: {result['samples_count']}") |
| print(f" Entity types: {result['metadata']['entity_types']}") |
|
|
|
|
| def example_5_code_generation(): |
| """Example 5: Generate code dataset for bug detection""" |
| print("\n" + "=" * 60) |
| print("Example 5: Code Generation (Bug Detection)") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| |
| result = generator.generate_code_dataset( |
| task_type="bug_detection", |
| programming_languages=["python", "javascript"], |
| num_samples=50 |
| ) |
|
|
| print(f"\n✅ Dataset created at: {result['dataset_path']}") |
| print(f" Total samples: {result['samples_count']}") |
| print(f" Languages: {result['metadata']['languages']}") |
| print(f" Task: {result['metadata']['dataset_type']}") |
|
|
|
|
| def example_6_custom_style_images(): |
| """Example 6: Generate images with different artistic styles""" |
| print("\n" + "=" * 60) |
| print("Example 6: Custom Artistic Styles") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| styles = ["photorealistic", "cartoon", "3d render", "artistic"] |
|
|
| for style in styles: |
| print(f"\nGenerating {style} style images...") |
|
|
| result = generator.generate_image_classification_dataset( |
| classes=["landscape"], |
| samples_per_class=5, |
| image_size=(512, 512), |
| style=style, |
| guidance_scale=8.0, |
| num_inference_steps=30 |
| ) |
|
|
| print(f" ✅ {style}: {result['dataset_path']}") |
|
|
|
|
| def example_7_large_scale_dataset(): |
| """Example 7: Generate a large-scale dataset efficiently""" |
| print("\n" + "=" * 60) |
| print("Example 7: Large-Scale Dataset Generation") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output", |
| use_cuda=True, |
| use_fp16=True |
| ) |
|
|
| |
| result = generator.generate_image_classification_dataset( |
| classes=["cat", "dog", "bird", "car", "tree", "building"], |
| samples_per_class=100, |
| image_size=(512, 512), |
| style="photorealistic", |
| guidance_scale=7.0, |
| num_inference_steps=30 |
| ) |
|
|
| print(f"\n✅ Large dataset created: {result['dataset_path']}") |
| print(f" Total images: {result['labels_count']}") |
| print(f" Estimated time: ~30-60 minutes on RTX 4090") |
|
|
|
|
| def example_8_yolo_format(): |
| """Example 8: Generate object detection in YOLO format""" |
| print("\n" + "=" * 60) |
| print("Example 8: Object Detection (YOLO format)") |
| print("=" * 60 + "\n") |
|
|
| generator = SyntheticDatasetGenerator( |
| output_dir="./examples_output" |
| ) |
|
|
| result = generator.generate_object_detection_dataset( |
| objects=["person", "bicycle", "car", "motorcycle"], |
| num_images=50, |
| objects_per_image=(1, 5), |
| image_size=(640, 640), |
| annotation_format="yolo" |
| ) |
|
|
| print(f"\n✅ YOLO dataset created at: {result['dataset_path']}") |
| print(f" Each image has a corresponding .txt annotation file") |
| print(f" Format: class_id x_center y_center width height (normalized)") |
|
|
|
|
| def run_all_examples(): |
| """Run all examples sequentially""" |
| print("\n" + "=" * 60) |
| print("Running All Examples") |
| print("=" * 60) |
| print("\nNOTE: This will take a while and requires GPU!") |
| print("Press Ctrl+C to cancel\n") |
|
|
| try: |
| |
| example_3_text_classification() |
| example_4_ner_dataset() |
| example_5_code_generation() |
|
|
| |
| response = input("\nRun image generation examples? (y/n): ") |
| if response.lower() == 'y': |
| example_1_simple_image_classification() |
| example_2_object_detection() |
| example_6_custom_style_images() |
| example_8_yolo_format() |
|
|
| response = input("\nRun large-scale example? (600 images, ~30-60 min): (y/n): ") |
| if response.lower() == 'y': |
| example_7_large_scale_dataset() |
|
|
| print("\n" + "=" * 60) |
| print("✅ All examples completed!") |
| print("=" * 60) |
|
|
| except KeyboardInterrupt: |
| print("\n\n❌ Examples cancelled by user") |
| except Exception as e: |
| print(f"\n\n❌ Error: {e}") |
|
|
|
|
| def main(): |
| """Main menu for running examples""" |
| print("\n" + "=" * 60) |
| print("Synthetic Dataset Generator - Examples") |
| print("=" * 60) |
| print("\nAvailable examples:") |
| print(" 1. Simple Image Classification") |
| print(" 2. Object Detection (COCO)") |
| print(" 3. Text Classification (Sentiment)") |
| print(" 4. Named Entity Recognition") |
| print(" 5. Code Generation (Bug Detection)") |
| print(" 6. Custom Artistic Styles") |
| print(" 7. Large-Scale Dataset") |
| print(" 8. Object Detection (YOLO)") |
| print(" 9. Run all examples") |
| print(" 0. Exit") |
|
|
| while True: |
| try: |
| choice = input("\nSelect example (0-9): ").strip() |
|
|
| if choice == '0': |
| print("Goodbye!") |
| break |
| elif choice == '1': |
| example_1_simple_image_classification() |
| elif choice == '2': |
| example_2_object_detection() |
| elif choice == '3': |
| example_3_text_classification() |
| elif choice == '4': |
| example_4_ner_dataset() |
| elif choice == '5': |
| example_5_code_generation() |
| elif choice == '6': |
| example_6_custom_style_images() |
| elif choice == '7': |
| example_7_large_scale_dataset() |
| elif choice == '8': |
| example_8_yolo_format() |
| elif choice == '9': |
| run_all_examples() |
| else: |
| print("Invalid choice. Please select 0-9.") |
|
|
| except KeyboardInterrupt: |
| print("\n\nGoodbye!") |
| break |
| except Exception as e: |
| print(f"\n❌ Error: {e}") |
| print("Please try again or select a different example.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|