Spaces:
Sleeping
Sleeping
edbeeching
commited on
Commit
·
7580ee9
1
Parent(s):
08390cd
fix slider
Browse files
app.py
CHANGED
|
@@ -198,7 +198,7 @@ def validate_request(request: GenerationRequest, oauth_token: Optional[Union[gr.
|
|
| 198 |
return request
|
| 199 |
|
| 200 |
|
| 201 |
-
def load_dataset_info(dataset_name, dataset_token=None):
|
| 202 |
"""Load dataset information and return choices for dropdowns"""
|
| 203 |
if not dataset_name.strip():
|
| 204 |
return (
|
|
@@ -237,23 +237,41 @@ def load_dataset_info(dataset_name, dataset_token=None):
|
|
| 237 |
break
|
| 238 |
if not default_column and column_choices:
|
| 239 |
default_column = column_choices[0]
|
|
|
|
|
|
|
|
|
|
| 240 |
else:
|
| 241 |
split_choices = []
|
| 242 |
column_choices = []
|
| 243 |
default_split = None
|
| 244 |
default_column = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# Generate a suggested output dataset name
|
| 247 |
dataset_base_name = dataset_name.split('/')[-1] if '/' in dataset_name else dataset_name
|
| 248 |
suggested_output_name = f"{dataset_base_name}-synthetic"
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
return (
|
| 251 |
gr.update(choices=config_choices, value=default_config, interactive=True), # config
|
| 252 |
gr.update(choices=split_choices, value=default_split, interactive=True), # split
|
| 253 |
gr.update(choices=column_choices, value=default_column, interactive=True), # prompt_column
|
| 254 |
gr.update(value=suggested_output_name, interactive=True), # output_dataset_name
|
| 255 |
-
gr.update(interactive=True), # num_output_samples
|
| 256 |
-
|
| 257 |
)
|
| 258 |
|
| 259 |
except Exception as e:
|
|
@@ -592,7 +610,7 @@ def main():
|
|
| 592 |
# Wire up the Load Dataset Info button
|
| 593 |
load_info_btn.click(
|
| 594 |
load_dataset_info,
|
| 595 |
-
inputs=[input_dataset_name],
|
| 596 |
outputs=[input_dataset_config, input_dataset_split, prompt_column, output_dataset_name, num_output_samples, load_info_status]
|
| 597 |
)
|
| 598 |
|
|
|
|
| 198 |
return request
|
| 199 |
|
| 200 |
|
| 201 |
+
def load_dataset_info(dataset_name, dataset_token=None, oauth_token=None):
|
| 202 |
"""Load dataset information and return choices for dropdowns"""
|
| 203 |
if not dataset_name.strip():
|
| 204 |
return (
|
|
|
|
| 237 |
break
|
| 238 |
if not default_column and column_choices:
|
| 239 |
default_column = column_choices[0]
|
| 240 |
+
|
| 241 |
+
# Get sample count for the default split
|
| 242 |
+
dataset_sample_count = config_info.splits[default_split].num_examples if default_split else 0
|
| 243 |
else:
|
| 244 |
split_choices = []
|
| 245 |
column_choices = []
|
| 246 |
default_split = None
|
| 247 |
default_column = None
|
| 248 |
+
dataset_sample_count = 0
|
| 249 |
+
|
| 250 |
+
# Determine user limits
|
| 251 |
+
is_pro = verify_pro_status(oauth_token) if oauth_token else False
|
| 252 |
+
user_max_samples = MAX_SAMPLES_PRO if is_pro else MAX_SAMPLES_FREE
|
| 253 |
+
|
| 254 |
+
# Set slider maximum to the minimum of dataset samples and user limit
|
| 255 |
+
slider_max = min(dataset_sample_count, user_max_samples) if dataset_sample_count > 0 else user_max_samples
|
| 256 |
|
| 257 |
# Generate a suggested output dataset name
|
| 258 |
dataset_base_name = dataset_name.split('/')[-1] if '/' in dataset_name else dataset_name
|
| 259 |
suggested_output_name = f"{dataset_base_name}-synthetic"
|
| 260 |
|
| 261 |
+
status_msg = f"✅ Dataset info loaded successfully! Found {len(config_choices)} config(s), {len(split_choices)} split(s), and {len(column_choices)} column(s)."
|
| 262 |
+
if dataset_sample_count > 0:
|
| 263 |
+
status_msg += f" Dataset has {dataset_sample_count:,} samples."
|
| 264 |
+
if dataset_sample_count > user_max_samples:
|
| 265 |
+
user_tier = "PRO/Enterprise" if is_pro else "free tier"
|
| 266 |
+
status_msg += f" Limited to {user_max_samples:,} samples for {user_tier} users."
|
| 267 |
+
|
| 268 |
return (
|
| 269 |
gr.update(choices=config_choices, value=default_config, interactive=True), # config
|
| 270 |
gr.update(choices=split_choices, value=default_split, interactive=True), # split
|
| 271 |
gr.update(choices=column_choices, value=default_column, interactive=True), # prompt_column
|
| 272 |
gr.update(value=suggested_output_name, interactive=True), # output_dataset_name
|
| 273 |
+
gr.update(interactive=True, maximum=slider_max, value=0), # num_output_samples
|
| 274 |
+
status_msg
|
| 275 |
)
|
| 276 |
|
| 277 |
except Exception as e:
|
|
|
|
| 610 |
# Wire up the Load Dataset Info button
|
| 611 |
load_info_btn.click(
|
| 612 |
load_dataset_info,
|
| 613 |
+
inputs=[input_dataset_name, gr.State(None), current_oauth_token],
|
| 614 |
outputs=[input_dataset_config, input_dataset_split, prompt_column, output_dataset_name, num_output_samples, load_info_status]
|
| 615 |
)
|
| 616 |
|