Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import threading | |
| from src.synth_data_gen import SynthDataGen | |
| generator = SynthDataGen() | |
| # Update the output format choices based on the selected dataset type | |
| def update_output_format(dataset_type): | |
| if dataset_type in ["Tabular", "Time-series"]: | |
| return gr.update(choices=["JSON", "csv", "Parquet"], value="JSON") | |
| elif dataset_type == "Text": | |
| return gr.update(choices=["JSON", "Markdown"], value="JSON") | |
| def update_pipeline(business_problem, dataset_type, output_format, num_samples, model): | |
| # Check if business problem is empty | |
| if not business_problem.strip(): | |
| yield [gr.update(visible=False), gr.update(visible=True), "❌ Please enter a business problem before generating."] | |
| return | |
| # Initial feedback while generating | |
| yield [gr.update(visible=False), gr.update(visible=False), "⏳ Generating dataset..."] | |
| try: | |
| # Pack inputs into a dictionary for the generator | |
| input_data = { | |
| "business_problem": business_problem, | |
| "dataset_type": dataset_type, | |
| "output_format": output_format, | |
| "num_samples": num_samples, | |
| "model": model | |
| } | |
| # Generate dataset file | |
| file_path = generator.generate_dataset(**input_data) | |
| print("🧪 File result returned:", file_path) | |
| # Check if file exists and return success message + file path | |
| if isinstance(file_path, str) and os.path.exists(file_path): | |
| threading.Timer(60, os.remove, args=[file_path]).start() # Auto-delete after 60s | |
| yield [gr.update(value=file_path, visible=True), gr.update(visible=True), "✅ Dataset ready for download."] | |
| else: | |
| # Handle invalid or missing file | |
| yield [gr.update(visible=False), gr.update(visible=True), "❌ Error: File not created or path invalid."] | |
| except Exception as e: | |
| # Catch and display any errors in the pipeline | |
| yield [gr.update(visible=False), gr.update(visible=True), f"❌ Pipeline error: {e}"] | |
| def build_ui(css_path="assets/styles.css"): | |
| with open(css_path, "r") as f: | |
| css = f.read() | |
| with gr.Blocks(css=css, title="🧬SynthDataGen") as ui: | |
| with gr.Column(elem_id="app-container"): | |
| gr.Markdown("<h1 id='app-title'>SynthDataGen 🧬 </h1>") | |
| gr.Markdown("<h2 id='app-subtitle'>AI-Powered Synthetic Dataset Generator</h2>") | |
| gr.HTML(""" | |
| <div id="intro-text"> | |
| <p>With SynthDataGen, easily generate <strong>diverse datasets in different formats</strong> for testing, development, and AI training.</p> | |
| <h4>🎯 How It Works:</h4> | |
| <ol> | |
| <li>1️⃣ Define your business problem or dataset topic.</li> | |
| <li>2️⃣ Select the dataset type, output format, model, and number of samples.</li> | |
| <li>3️⃣ Receive your synthetic dataset — ready to download and use!</li> | |
| </ol> | |
| </div> | |
| """) | |
| gr.HTML(""" | |
| <div id="learn-more-button"> | |
| <a href="https://github.com/lisek75/synthdatagen_app/blob/main/README.md" class="button-link" target="_blank">Learn More</a> | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| <p><strong>🧠 Need inspiration?</strong> Try one of these examples:</p> | |
| <ul> | |
| <li>Movie summaries for genre classification.</li> | |
| <li>Generate customer chats with realistic dialogue, chat_id, timestamp, names, sentiment label, and aligned transcript.</li> | |
| <li>Create daily stock prices for 2 companies with typical fields like date, ticker, open, close, high, low, and volume.</li> | |
| </ul> | |
| """) | |
| gr.Markdown("<p><strong>Start generating your synthetic datasets now!</strong> 🗂️✨</p>") | |
| with gr.Group(elem_id="input-container"): | |
| business_problem = gr.Textbox( | |
| placeholder="Describe the dataset you want (e.g., Job postings, Customer reviews, Sensor data, Movie titles)", | |
| lines=2, | |
| label="📌 Business Problem", | |
| elem_classes=["label-box"], | |
| elem_id="business-problem-box" | |
| ) | |
| with gr.Row(elem_classes="column-gap"): | |
| with gr.Column(scale=1): | |
| dataset_type = gr.Dropdown( | |
| ["Tabular", "Time-series", "Text"], | |
| value="Tabular", | |
| label="📊 Dataset Type", | |
| elem_classes=["label-box"], | |
| elem_id="custom-dropdown" | |
| ) | |
| with gr.Column(scale=1): | |
| output_format = gr.Dropdown( | |
| choices=["JSON", "csv", "Parquet"], | |
| value="JSON", | |
| label="📁 Output Format", | |
| elem_classes=["label-box"], | |
| elem_id="custom-dropdown" | |
| ) | |
| # Bind the update function to the dataset type dropdown | |
| dataset_type.change( | |
| update_output_format, | |
| inputs=[dataset_type], | |
| outputs=[output_format] | |
| ) | |
| with gr.Row(elem_classes="row-spacer column-gap"): | |
| with gr.Column(scale=1): | |
| model = gr.Dropdown( | |
| ["GPT", "Claude"], | |
| value="GPT", | |
| label="🤖 Model", | |
| elem_classes=["label-box"], | |
| elem_id="custom-dropdown" | |
| ) | |
| with gr.Column(scale=1): | |
| num_samples = gr.Slider( | |
| minimum=10, | |
| maximum=1000, | |
| value=10, | |
| step=1, | |
| interactive=True, | |
| label="🔢 Number of Samples", | |
| elem_classes=["label-box"] | |
| ) | |
| # Hidden file component for dataset download | |
| file_download = gr.File(visible=False, elem_id="download-box", label=None) | |
| # Component to display status messages | |
| status_message = gr.Markdown("", label="Status") | |
| # Button to trigger dataset generation | |
| run_btn = gr.Button("Create a dataset", elem_id="run-btn") | |
| run_btn.click( | |
| update_pipeline, | |
| inputs=[business_problem, dataset_type, output_format, num_samples, model], | |
| outputs=[file_download, run_btn, status_message] | |
| ) | |
| return ui, generator.output_dir | |