File size: 7,630 Bytes
db17eb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849fb14
db17eb5
ffa80b4
849fb14
db17eb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849fb14
 
 
ffa80b4
 
849fb14
 
 
 
 
db17eb5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""Gradio web interface for synthetic data generation."""

import logging
import gradio as gr
from src.pipeline import DatasetPipeline
from src.constants import PROJECT_NAME, VERSION

# Set up logger
logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)

pipeline = DatasetPipeline()

PROJECT_NAME_CAP = PROJECT_NAME.capitalize()
REPO_URL = f"https://github.com/lisekarimi/{PROJECT_NAME}"


def update_output_format(dataset_type):
    """Update output format choices based on selected dataset type."""
    if dataset_type in ["Tabular", "Time-series"]:
        return gr.update(choices=["JSON", "csv", "Parquet"], value="JSON")
    elif dataset_type == "Text":
        return gr.update(choices=["JSON", "Markdown"], value="JSON")


def build_ui(css_path="assets/styles.css"):
    """Build and return the complete Gradio user interface with error handling."""
    # Try to load CSS file with error handling
    try:
        with open(css_path, encoding="utf-8") as f:
            css = f.read()
    except Exception as e:
        css = ""
        logger.warning("⚠️ Failed to load CSS: %s", e)

    # Building the UI with error handling
    try:
        with gr.Blocks(css=css, title=f"🧬{PROJECT_NAME_CAP}") as ui:
            with gr.Column(elem_id="app-container"):
                gr.Markdown(f"<h1 id='app-title'>🏷️ {PROJECT_NAME_CAP} </h1>")
                gr.Markdown(
                    "<h2 id='app-subtitle'>AI-Powered Synthetic Dataset Generator</h2>"
                )

                # Fix the f-string in HTML
                intro_html = f"""
                <div id="intro-text">
                    <p>With {PROJECT_NAME_CAP}, easily generate
                    <strong>diverse datasets</strong>
                    for testing, development, and AI training.</p>

                    <h4>🎯 How It Works:</h4>
                        <p>1️⃣ Define your business problem.</p>
                        <p>2️⃣ Select dataset type, format, model, and samples.</p>
                        <p>3️⃣ Download your synthetic dataset!</p>
                </div>
                """
                gr.HTML(intro_html)

                learn_more_html = """
                    <div id="learn-more-button">
                        <a href="https://datagen.lisekarimi.com/docs"
                           class="button-link">Documentation</a>
                    </div>
                    """
                gr.HTML(learn_more_html)

                examples_md = """
                    <p><strong>🧠 Need inspiration?</strong> Try these examples:</p>
                    <ul>
                    <li>Movie summaries for genre classification.</li>
                    <li>Customer chats with dialogue and sentiment labels.</li>
                    <li>Stock prices with date, ticker, open, close, volume.</li>
                    </ul>
                    """
                gr.Markdown(examples_md)

                gr.Markdown("<p><strong>Start generating now!</strong> πŸ—‚οΈβœ¨</p>")

                with gr.Group(elem_id="input-container"):
                    business_problem = gr.Textbox(
                        placeholder=(
                            "Describe the dataset you want "
                            "(e.g., Job postings, Customer reviews)"
                        ),
                        lines=2,
                        label="πŸ“Œ Business Problem",
                        elem_classes=["label-box"],
                        elem_id="business-problem-box",
                    )

                    with gr.Row(elem_classes="column-gap"):
                        with gr.Column(scale=1):
                            dataset_type = gr.Dropdown(
                                ["Tabular", "Time-series", "Text"],
                                value="Tabular",
                                label="πŸ“Š Dataset Type",
                                elem_classes=["label-box"],
                                elem_id="custom-dropdown",
                            )

                        with gr.Column(scale=1):
                            output_format = gr.Dropdown(
                                choices=["JSON", "csv", "Parquet"],
                                value="JSON",
                                label="πŸ“ Output Format",
                                elem_classes=["label-box"],
                                elem_id="custom-dropdown",
                            )

                        # Bind the update function to the dataset type dropdown
                        dataset_type.change(
                            update_output_format,
                            inputs=[dataset_type],
                            outputs=[output_format],
                        )

                    with gr.Row(elem_classes="row-spacer column-gap"):
                        with gr.Column(scale=1):
                            model = gr.Dropdown(
                                ["GPT", "Claude"],
                                value="GPT",
                                label="πŸ€– Model",
                                elem_classes=["label-box"],
                                elem_id="custom-dropdown",
                            )

                        with gr.Column(scale=1):
                            num_samples = gr.Slider(
                                minimum=10,
                                maximum=1000,
                                value=10,
                                step=1,
                                interactive=True,
                                label="πŸ”’ Number of Samples",
                                elem_classes=["label-box"],
                            )

                # Hidden file component for dataset download
                file_download = gr.File(
                    visible=False, elem_id="download-box", label=None
                )

                # Component to display status messages
                status_message = gr.Markdown("", label="Status")

                # Button to trigger dataset generation
                run_btn = gr.Button("Create a dataset", elem_id="run-btn")
                run_btn.click(
                    pipeline.generate,
                    inputs=[
                        business_problem,
                        dataset_type,
                        output_format,
                        num_samples,
                        model,
                    ],
                    outputs=[file_download, run_btn, status_message],
                )

            # Bottom: version info
            gr.Markdown(
                f"""
                <p class="version-banner">
                    πŸ”– <strong>
                    <a href="{REPO_URL}/blob/main/CHANGELOG.md"
                    target="_blank">Version {VERSION}</a>
                    </strong>
                </p>
                """
            )

            # Floating chat button
            gr.HTML(
                """
                <a href="https://datagen.lisekarimi.com/docs" class="floating-chat-btn"
                    target="_blank">
                    πŸ’¬ Chat with AI Assistant
                </a>
            """
            )

        return ui

    except Exception as e:
        logger.error("❌ Error building UI: %s", e)
        # Return a minimal error UI
        with gr.Blocks() as error_ui:
            gr.Markdown("# Error Loading Application")
            gr.Markdown(f"An error occurred: {str(e)}")
        return error_ui