Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -361,7 +361,8 @@ def generate_synthetic_inputs(template_spec, num_samples=1, max_retries=3):
|
|
| 361 |
f"- {var['name']}: {var['description']} (Type: {var['type']})"
|
| 362 |
+ (
|
| 363 |
f", Min: {var.get('min', 'N/A')}, Max: {var.get('max', 'N/A')}"
|
| 364 |
-
if var["type"]
|
|
|
|
| 365 |
else ""
|
| 366 |
)
|
| 367 |
+ (f", Options: {var['options']}" if var.get("options") else "")
|
|
@@ -370,26 +371,27 @@ def generate_synthetic_inputs(template_spec, num_samples=1, max_retries=3):
|
|
| 370 |
)
|
| 371 |
|
| 372 |
prompt = f"""
|
| 373 |
-
You are a synthetic data generator. Generate {num_samples} realistic sample(s) for the following input variables:
|
| 374 |
|
| 375 |
-
{input_vars_text}
|
| 376 |
-
|
| 377 |
-
Return the data as a JSON array of objects, where each object contains values for all input variables.
|
| 378 |
-
Each object should follow this structure:
|
| 379 |
-
{{
|
| 380 |
-
"variable_name_1": value1,
|
| 381 |
-
"variable_name_2": value2,
|
| 382 |
-
...
|
| 383 |
-
}}
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
for attempt in range(max_retries):
|
| 395 |
try:
|
|
@@ -671,7 +673,11 @@ with tab1:
|
|
| 671 |
# Add option to either upload a template or create a new one
|
| 672 |
setup_option = st.radio(
|
| 673 |
"Choose how to start your project",
|
| 674 |
-
options=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
index=1,
|
| 676 |
)
|
| 677 |
|
|
@@ -702,12 +708,8 @@ with tab1:
|
|
| 702 |
"Template loaded! Go to the 'Edit Template' tab to customize it."
|
| 703 |
)
|
| 704 |
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
or setup_option == "Upload existing template"
|
| 708 |
-
and not uploaded_template
|
| 709 |
-
):
|
| 710 |
-
# Step 1: Upload Knowledge Base (existing code)
|
| 711 |
st.subheader("Step 1: Upload Knowledge Base")
|
| 712 |
uploaded_files = st.file_uploader(
|
| 713 |
"Upload documents to use as knowledge base",
|
|
@@ -762,6 +764,54 @@ with tab1:
|
|
| 762 |
else:
|
| 763 |
st.warning("Please provide instructions first")
|
| 764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 765 |
with tab2:
|
| 766 |
if st.session_state.show_template_editor and st.session_state.template_spec:
|
| 767 |
st.header("Template Editor")
|
|
@@ -891,6 +941,23 @@ with tab2:
|
|
| 891 |
if opt.strip()
|
| 892 |
]
|
| 893 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
with col3:
|
| 895 |
if st.button("Remove", key=f"remove_input_{i}"):
|
| 896 |
st.session_state.template_spec["input"].pop(i)
|
|
@@ -975,6 +1042,23 @@ with tab2:
|
|
| 975 |
if opt.strip()
|
| 976 |
]
|
| 977 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 978 |
with col3:
|
| 979 |
if st.button("Remove", key=f"remove_output_{i}"):
|
| 980 |
st.session_state.template_spec["output"].pop(i)
|
|
@@ -1048,12 +1132,29 @@ with tab3:
|
|
| 1048 |
|
| 1049 |
elif var_type == "categorical":
|
| 1050 |
options = input_var.get("options", [])
|
|
|
|
|
|
|
|
|
|
| 1051 |
if options:
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1057 |
else:
|
| 1058 |
st.warning(f"No options defined for {var_name}")
|
| 1059 |
|
|
|
|
| 361 |
f"- {var['name']}: {var['description']} (Type: {var['type']})"
|
| 362 |
+ (
|
| 363 |
f", Min: {var.get('min', 'N/A')}, Max: {var.get('max', 'N/A')}"
|
| 364 |
+
if var["type"]
|
| 365 |
+
in ["string", "int", "float", "categorical"] # Added categorical here
|
| 366 |
else ""
|
| 367 |
)
|
| 368 |
+ (f", Options: {var['options']}" if var.get("options") else "")
|
|
|
|
| 371 |
)
|
| 372 |
|
| 373 |
prompt = f"""
|
| 374 |
+
You are a synthetic data generator. Generate {num_samples} realistic sample(s) for the following input variables:
|
| 375 |
|
| 376 |
+
{input_vars_text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
+
Return the data as a JSON array of objects, where each object contains values for all input variables.
|
| 379 |
+
Each object should follow this structure:
|
| 380 |
+
{{
|
| 381 |
+
"variable_name_1": value1,
|
| 382 |
+
"variable_name_2": value2,
|
| 383 |
+
...
|
| 384 |
+
}}
|
| 385 |
+
|
| 386 |
+
Make sure to:
|
| 387 |
+
1. Use appropriate data types (strings, numbers, booleans)
|
| 388 |
+
2. Stay within min/max constraints
|
| 389 |
+
3. Only use provided options for categorical variables
|
| 390 |
+
4. For categorical variables with min > 1 or max > 1, return an array of selected options with length between min and max
|
| 391 |
+
5. Generate realistic and diverse values
|
| 392 |
+
6. Return ONLY the JSON array with no additional text or explanation
|
| 393 |
+
7. The response must be valid JSON that can be parsed directly
|
| 394 |
+
"""
|
| 395 |
|
| 396 |
for attempt in range(max_retries):
|
| 397 |
try:
|
|
|
|
| 673 |
# Add option to either upload a template or create a new one
|
| 674 |
setup_option = st.radio(
|
| 675 |
"Choose how to start your project",
|
| 676 |
+
options=[
|
| 677 |
+
"Upload existing template",
|
| 678 |
+
"Create new template from documents",
|
| 679 |
+
"Create an empty template",
|
| 680 |
+
],
|
| 681 |
index=1,
|
| 682 |
)
|
| 683 |
|
|
|
|
| 708 |
"Template loaded! Go to the 'Edit Template' tab to customize it."
|
| 709 |
)
|
| 710 |
|
| 711 |
+
elif setup_option == "Create new template from documents":
|
| 712 |
+
# Step 1: Upload Knowledge Base
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
st.subheader("Step 1: Upload Knowledge Base")
|
| 714 |
uploaded_files = st.file_uploader(
|
| 715 |
"Upload documents to use as knowledge base",
|
|
|
|
| 764 |
else:
|
| 765 |
st.warning("Please provide instructions first")
|
| 766 |
|
| 767 |
+
elif setup_option == "Create an empty template":
|
| 768 |
+
st.subheader("Create Empty Template")
|
| 769 |
+
st.info(
|
| 770 |
+
"This option creates a minimal template that you can customize in the 'Edit Template' tab."
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
# Optional: Allow setting a name and description for the template
|
| 774 |
+
template_name = st.text_input("Template Name", value="Custom Template")
|
| 775 |
+
template_description = st.text_area(
|
| 776 |
+
"Template Description", value="A custom template created from scratch"
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
if st.button("Create Empty Template"):
|
| 780 |
+
# Create a minimal template structure
|
| 781 |
+
st.session_state.template_spec = {
|
| 782 |
+
"name": template_name,
|
| 783 |
+
"version": "1.0.0",
|
| 784 |
+
"description": template_description,
|
| 785 |
+
"input": [
|
| 786 |
+
{
|
| 787 |
+
"name": "input_1",
|
| 788 |
+
"description": "First input variable",
|
| 789 |
+
"type": "string",
|
| 790 |
+
"min": 1,
|
| 791 |
+
"max": 100,
|
| 792 |
+
}
|
| 793 |
+
],
|
| 794 |
+
"output": [
|
| 795 |
+
{
|
| 796 |
+
"name": "output_1",
|
| 797 |
+
"description": "Generated output",
|
| 798 |
+
"type": "string",
|
| 799 |
+
"min": 10,
|
| 800 |
+
"max": 1000,
|
| 801 |
+
}
|
| 802 |
+
],
|
| 803 |
+
"prompt": "Based on the following information:\n{input_1}\n\nGenerate the following output.",
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
st.session_state.show_template_editor = True
|
| 807 |
+
st.success(
|
| 808 |
+
"Empty template created! Go to the 'Edit Template' tab to customize it."
|
| 809 |
+
)
|
| 810 |
+
|
| 811 |
+
# Optional: Initialize an empty knowledge base
|
| 812 |
+
if "knowledge_base" not in st.session_state:
|
| 813 |
+
st.session_state.knowledge_base = ""
|
| 814 |
+
|
| 815 |
with tab2:
|
| 816 |
if st.session_state.show_template_editor and st.session_state.template_spec:
|
| 817 |
st.header("Template Editor")
|
|
|
|
| 941 |
if opt.strip()
|
| 942 |
]
|
| 943 |
|
| 944 |
+
# Add min and max for categorical variables
|
| 945 |
+
col_min, col_max = st.columns(2)
|
| 946 |
+
with col_min:
|
| 947 |
+
input_var["min"] = st.number_input(
|
| 948 |
+
"Min selections",
|
| 949 |
+
value=int(input_var.get("min", 0)),
|
| 950 |
+
min_value=0,
|
| 951 |
+
key=f"input_cat_min_{i}",
|
| 952 |
+
)
|
| 953 |
+
with col_max:
|
| 954 |
+
input_var["max"] = st.number_input(
|
| 955 |
+
"Max selections",
|
| 956 |
+
value=int(input_var.get("max", 1)),
|
| 957 |
+
min_value=1,
|
| 958 |
+
key=f"input_cat_max_{i}",
|
| 959 |
+
)
|
| 960 |
+
|
| 961 |
with col3:
|
| 962 |
if st.button("Remove", key=f"remove_input_{i}"):
|
| 963 |
st.session_state.template_spec["input"].pop(i)
|
|
|
|
| 1042 |
if opt.strip()
|
| 1043 |
]
|
| 1044 |
|
| 1045 |
+
# Add min and max for categorical variables
|
| 1046 |
+
col_min, col_max = st.columns(2)
|
| 1047 |
+
with col_min:
|
| 1048 |
+
output_var["min"] = st.number_input(
|
| 1049 |
+
"Min selections",
|
| 1050 |
+
value=int(output_var.get("min", 0)),
|
| 1051 |
+
min_value=0,
|
| 1052 |
+
key=f"output_cat_min_{i}",
|
| 1053 |
+
)
|
| 1054 |
+
with col_max:
|
| 1055 |
+
output_var["max"] = st.number_input(
|
| 1056 |
+
"Max selections",
|
| 1057 |
+
value=int(output_var.get("max", 1)),
|
| 1058 |
+
min_value=1,
|
| 1059 |
+
key=f"output_cat_max_{i}",
|
| 1060 |
+
)
|
| 1061 |
+
|
| 1062 |
with col3:
|
| 1063 |
if st.button("Remove", key=f"remove_output_{i}"):
|
| 1064 |
st.session_state.template_spec["output"].pop(i)
|
|
|
|
| 1132 |
|
| 1133 |
elif var_type == "categorical":
|
| 1134 |
options = input_var.get("options", [])
|
| 1135 |
+
min_selections = input_var.get("min", 1)
|
| 1136 |
+
max_selections = input_var.get("max", 1)
|
| 1137 |
+
|
| 1138 |
if options:
|
| 1139 |
+
if min_selections == 1 and max_selections == 1:
|
| 1140 |
+
# Single selection
|
| 1141 |
+
st.session_state.user_inputs[var_name] = st.selectbox(
|
| 1142 |
+
f"Select value for {var_name}",
|
| 1143 |
+
options=options,
|
| 1144 |
+
key=f"use_{var_name}",
|
| 1145 |
+
)
|
| 1146 |
+
else:
|
| 1147 |
+
# Multi-selection
|
| 1148 |
+
st.session_state.user_inputs[var_name] = st.multiselect(
|
| 1149 |
+
f"Select {min_selections}-{max_selections} values for {var_name}",
|
| 1150 |
+
options=options,
|
| 1151 |
+
default=(
|
| 1152 |
+
options[:min_selections]
|
| 1153 |
+
if len(options) >= min_selections
|
| 1154 |
+
else options
|
| 1155 |
+
),
|
| 1156 |
+
key=f"use_{var_name}",
|
| 1157 |
+
)
|
| 1158 |
else:
|
| 1159 |
st.warning(f"No options defined for {var_name}")
|
| 1160 |
|