Spaces:

rawpowertools
/

Template_Generator

Running

App Files Files

DJHumanRPT commited on Mar 18, 2025

Commit

297b883

verified ·

1 Parent(s): e9c8d9a

Update app.py

Browse files

Files changed (1) hide show

app.py +503 -59

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import json
 import PyPDF2
 import re
 from io import BytesIO
 import openai
@@ -26,36 +27,57 @@ def get_openai_client():
     return None
-# Define helper functions for PDF parsing
-def parse_pdf(file):
-    """Extract text from a PDF file."""
-    try:
-        pdf_reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page_num in range(len(pdf_reader.pages)):
-            text += pdf_reader.pages[page_num].extract_text() or ""
-        return text
-    except Exception as e:
-        st.error(f"Error parsing PDF: {str(e)}")
-        return ""
 def parse_documents(uploaded_files):
     """Parse multiple document files and extract their text content."""
     content = ""
     for file in uploaded_files:
         try:
             file_type = file.name.split(".")[-1].lower()
-            if file_type == "pdf":
-                # Create a copy of the file to avoid buffer issues
-                file_copy = BytesIO(file.getvalue())
-                content += parse_pdf(file_copy) + "\n\n"
-            elif file_type == "txt":
-                content += file.getvalue().decode("utf-8") + "\n\n"
             else:
                 st.warning(f"Unsupported file type: {file.name}")
         except Exception as e:
             st.error(f"Error processing file {file.name}: {str(e)}")
     return content
@@ -243,7 +265,7 @@ If document content was provided, design the template to effectively use that in
         response = client.chat.completions.create(
             model=st.session_state.model,
             messages=[{"role": "user", "content": prompt}],
-            max_completion_tokens=4096,
             temperature=0.7,
         )
@@ -340,7 +362,7 @@ Return ONLY the revised prompt template text, with no additional explanations.
         response = client.chat.completions.create(
             model=st.session_state.model,
             messages=[{"role": "user", "content": prompt}],
-            max_completion_tokens=4096,
             temperature=0.7,
         )
@@ -473,25 +495,35 @@ def generate_categorical_permutations(categorical_vars, target_count):
         min_sel = var.get("min", 1)
         max_sel = var.get("max", 1)
         # Single selection case
         if min_sel == 1 and max_sel == 1:
-            option_sets.append([(var_name, opt) for opt in options])
         else:
             # Multi-selection case - generate varied selection sizes
             var_options = []
             # Include min selections
-            for combo in itertools.combinations(options, min_sel):
                 var_options.append((var_name, list(combo)))
             # Include max selections if different from min
             if max_sel != min_sel:
-                for combo in itertools.combinations(options, max_sel):
                     var_options.append((var_name, list(combo)))
             # Include some intermediate selections if applicable
             for size in range(min_sel + 1, max_sel):
-                combos = list(itertools.combinations(options, size))
                 if combos:
                     sample_size = min(3, len(combos))  # Take up to 3 samples
                     for combo in random.sample(combos, sample_size):
@@ -519,12 +551,18 @@ def generate_categorical_permutations(categorical_vars, target_count):
             var = random.choice(categorical_vars)
             var_name = var["name"]
             options = var.get("options", [])
-            if options and len(options) > 1:
                 if var.get("min", 1) == 1 and var.get("max", 1) == 1:
                     # For single selection, choose a different option
                     current = new_perm[var_name]
-                    other_options = [opt for opt in options if opt != current]
                     if other_options:
                         new_perm[var_name] = random.choice(other_options)
                 else:
@@ -537,7 +575,9 @@ def generate_categorical_permutations(categorical_vars, target_count):
                     if len(current_selection) < max_sel and random.random() > 0.5:
                         # Add an item not already in the selection
                         available = [
-                            opt for opt in options if opt not in current_selection
                         ]
                         if available:
                             current_selection.append(random.choice(available))
@@ -893,6 +933,157 @@ The response must be valid JSON that can be parsed directly.
     return results
 # Initialize session state
 if "template_spec" not in st.session_state:
     st.session_state.template_spec = None
@@ -977,7 +1168,7 @@ with tab1:
         uploaded_files = st.file_uploader(
             "Upload documents to use as knowledge base",
             accept_multiple_files=True,
-            type=["pdf", "txt"],
         )
         # Rest of your existing code for document processing...
@@ -992,8 +1183,7 @@ with tab1:
             with st.expander("Preview extracted content"):
                 st.text_area(
                     "Extracted Text",
-                    value=st.session_state.knowledge_base[:10000]
-                    + ("..." if len(st.session_state.knowledge_base) > 1000 else ""),
                     height=200,
                     disabled=True,
                 )
@@ -1079,6 +1269,14 @@ with tab2:
     if st.session_state.show_template_editor and st.session_state.template_spec:
         st.header("Template Editor")
         # Basic template information
         with st.expander("Template Information", expanded=True):
             col1, col2 = st.columns(2)
@@ -1097,6 +1295,87 @@ with tab2:
                 height=100,
             )
         # Prompt Template Section
         with st.expander("Prompt Template", expanded=True):
             st.info("Use {variable_name} to refer to input variables in your template")
@@ -1131,17 +1410,26 @@ with tab2:
         with st.expander("Input Variables", expanded=True):
             st.subheader("Input Variables")
-            # Add input variable button
-            if st.button("Add Input Variable"):
-                new_var = {
-                    "name": f"new_input_{len(st.session_state.template_spec['input']) + 1}",
-                    "description": "New input variable",
-                    "type": "string",
-                    "min": 1,
-                    "max": 100,
-                }
-                st.session_state.template_spec["input"].append(new_var)
-                st.rerun()
             # Display input variables
             for i, input_var in enumerate(st.session_state.template_spec["input"]):
@@ -1192,6 +1480,42 @@ with tab2:
                                 )
                         if var_type == "categorical":
                             options = input_var.get("options", [])
                             options_str = st.text_area(
                                 "Options (one per line)",
@@ -1224,7 +1548,7 @@ with tab2:
                     with col3:
                         if st.button("Remove", key=f"remove_input_{i}"):
                             st.session_state.template_spec["input"].pop(i)
-                            st.rerun()
                     st.divider()
@@ -1232,17 +1556,26 @@ with tab2:
         with st.expander("Output Variables", expanded=True):
             st.subheader("Output Variables")
-            # Add output variable button
-            if st.button("Add Output Variable"):
-                new_var = {
-                    "name": f"new_output_{len(st.session_state.template_spec['output']) + 1}",
-                    "description": "New output variable",
-                    "type": "string",
-                    "min": 1,
-                    "max": 100,
-                }
-                st.session_state.template_spec["output"].append(new_var)
-                st.rerun()
             # Display output variables
             for i, output_var in enumerate(st.session_state.template_spec["output"]):
@@ -1293,6 +1626,42 @@ with tab2:
                                 )
                         if var_type == "categorical":
                             options = output_var.get("options", [])
                             options_str = st.text_area(
                                 "Options (one per line)",
@@ -1325,7 +1694,7 @@ with tab2:
                     with col3:
                         if st.button("Remove", key=f"remove_output_{i}"):
                             st.session_state.template_spec["output"].pop(i)
-                            st.rerun()
                     st.divider()
@@ -1566,17 +1935,92 @@ with tab4:
         if "selected_samples" not in st.session_state:
             st.session_state.selected_samples = []
         # Generate inputs button
         if st.button("Generate Synthetic Inputs"):
             if not st.session_state.get("api_key"):
                 st.error("Please provide an OpenAI API key in the sidebar.")
             else:
                 with st.spinner(f"Generating {num_samples} synthetic input samples..."):
-                    st.session_state.synthetic_inputs = (
-                        generate_synthetic_inputs_hybrid(
-                            st.session_state.template_spec, num_samples=num_samples
                         )
-                    )
                 if st.session_state.synthetic_inputs:
                     st.success(
@@ -1864,4 +2308,4 @@ with tab4:
     else:
         st.info(
             "No template has been generated yet. Go to the 'Setup' tab to create one."
-        )

 import streamlit as st
 import json
 import PyPDF2
+from docling.document_converter import DocumentConverter
 import re
 from io import BytesIO
 import openai
     return None
+@st.cache_resource
+def get_document_converter():
+    """Cache the DocumentConverter to prevent reloading on each interaction"""
+    return None  # Return None initially
+def get_or_create_document_converter():
+    """Get existing converter or create a new one only when needed"""
+    converter = get_document_converter()
+    if converter is None:
+        converter = DocumentConverter()
+        # Update the cached value
+        get_document_converter._cached_obj = converter
+    return converter
+@st.cache_data
 def parse_documents(uploaded_files):
     """Parse multiple document files and extract their text content."""
+    if not uploaded_files:
+        return ""
+    import tempfile
+    import os
+    converter = get_or_create_document_converter()
     content = ""
     for file in uploaded_files:
         try:
             file_type = file.name.split(".")[-1].lower()
+            if file_type in ["pdf", "txt", "docx", "html"]:
+                # Create a temporary file with the correct extension
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=f".{file_type}"
+                ) as tmp_file:
+                    # Write the uploaded file content to the temp file
+                    tmp_file.write(file.getvalue())
+                    tmp_path = tmp_file.name
+                # Convert using the file path instead of the UploadedFile object
+                source = converter.convert(tmp_path)
+                content += source.document.export_to_markdown()
+                # Clean up the temporary file
+                os.unlink(tmp_path)
             else:
                 st.warning(f"Unsupported file type: {file.name}")
         except Exception as e:
             st.error(f"Error processing file {file.name}: {str(e)}")
     return content
         response = client.chat.completions.create(
             model=st.session_state.model,
             messages=[{"role": "user", "content": prompt}],
+            max_tokens=4096,
             temperature=0.7,
         )
         response = client.chat.completions.create(
             model=st.session_state.model,
             messages=[{"role": "user", "content": prompt}],
+            max_tokens=4096,
             temperature=0.7,
         )
         min_sel = var.get("min", 1)
         max_sel = var.get("max", 1)
+        # Get selected options if they exist
+        selected_options = var.get("selected_options", options)
+        # Use only selected options for permutation
+        options_to_use = [opt for opt in options if opt in selected_options]
+        # If no options selected, use all options
+        if not options_to_use:
+            options_to_use = options
         # Single selection case
         if min_sel == 1 and max_sel == 1:
+            option_sets.append([(var_name, opt) for opt in options_to_use])
         else:
             # Multi-selection case - generate varied selection sizes
             var_options = []
             # Include min selections
+            for combo in itertools.combinations(options_to_use, min_sel):
                 var_options.append((var_name, list(combo)))
             # Include max selections if different from min
             if max_sel != min_sel:
+                for combo in itertools.combinations(options_to_use, max_sel):
                     var_options.append((var_name, list(combo)))
             # Include some intermediate selections if applicable
             for size in range(min_sel + 1, max_sel):
+                combos = list(itertools.combinations(options_to_use, size))
                 if combos:
                     sample_size = min(3, len(combos))  # Take up to 3 samples
                     for combo in random.sample(combos, sample_size):
             var = random.choice(categorical_vars)
             var_name = var["name"]
             options = var.get("options", [])
+            selected_options = var.get("selected_options", options)
+            # Use only selected options for variation
+            options_to_use = [opt for opt in options if opt in selected_options]
+            if not options_to_use:
+                options_to_use = options
+            if options_to_use and len(options_to_use) > 1:
                 if var.get("min", 1) == 1 and var.get("max", 1) == 1:
                     # For single selection, choose a different option
                     current = new_perm[var_name]
+                    other_options = [opt for opt in options_to_use if opt != current]
                     if other_options:
                         new_perm[var_name] = random.choice(other_options)
                 else:
                     if len(current_selection) < max_sel and random.random() > 0.5:
                         # Add an item not already in the selection
                         available = [
+                            opt
+                            for opt in options_to_use
+                            if opt not in current_selection
                         ]
                         if available:
                             current_selection.append(random.choice(available))
     return results
+def suggest_variable_values_from_kb(
+    variable_name, variable_type, knowledge_base, client, model="gpt-3.5-turbo"
+):
+    """
+    Use LLM to suggest possible values for a variable based on the knowledge base content.
+    Especially useful for categorical variables to extract options from documents.
+    """
+    if not knowledge_base or not client:
+        return None
+    # Truncate knowledge base if it's too long
+    kb_excerpt = (
+        knowledge_base[:100000] + "..."
+        if len(knowledge_base) > 100000
+        else knowledge_base
+    )
+    prompt = f"""
+    Based on the following knowledge base content, suggest appropriate values for a variable named "{variable_name}" of type "{variable_type}".
+    KNOWLEDGE BASE EXCERPT:
+    {kb_excerpt}
+    TASK:
+    Extract or suggest appropriate values for this variable from the knowledge base.
+    If the variable type is "categorical", return a list of possible options found in the knowledge base.
+    If the variable type is "string", suggest a few example values.
+    If the variable type is "int" or "float", suggest appropriate min/max ranges.
+    If the variable type is "bool", suggest appropriate true/false conditions.
+    Return your response as a JSON object with the following structure:
+    For categorical: {{"options": ["option1", "option2", ...]}}
+    For string: {{"examples": ["example1", "example2", ...], "min": min_length, "max": max_length}}
+    For int/float: {{"min": minimum_value, "max": maximum_value, "examples": [value1, value2, ...]}}
+    For bool: {{"examples": ["condition for true", "condition for false"]}}
+    Only include values that are actually present or strongly implied in the knowledge base.
+    """
+    try:
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=1000,
+            temperature=0.3,
+        )
+        result = response.choices[0].message.content
+        # Extract JSON from the response
+        json_pattern = r"```json\s*([\s\S]*?)\s*```|^\s*\{[\s\S]*\}\s*$"
+        json_match = re.search(json_pattern, result)
+        if json_match:
+            json_str = json_match.group(1) if json_match.group(1) else result
+            json_str = re.sub(r"```.*|```", "", json_str).strip()
+            try:
+                suggestions = json.loads(json_str)
+                return suggestions
+            except:
+                pass
+        else:
+            try:
+                suggestions = json.loads(result)
+                return suggestions
+            except:
+                pass
+        return None
+    except Exception as e:
+        print(f"Error suggesting variable values: {str(e)}")
+        return None
+@st.cache_data
+def analyze_knowledge_base(knowledge_base, _client, model="gpt-4o-mini"):
+    """
+    Analyze the knowledge base to extract potential variable names and values.
+    This can be used to suggest variables when creating a new template.
+    """
+    if not knowledge_base or not client:
+        return None
+    # Truncate knowledge base if it's too long
+    kb_excerpt = (
+        knowledge_base[:100000] + "..."
+        if len(knowledge_base) > 100000
+        else knowledge_base
+    )
+    prompt = f"""
+    Analyze the following knowledge base content and identify potential variables that could be used in a template.
+    KNOWLEDGE BASE EXCERPT:
+    {kb_excerpt}
+    TASK:
+    1. Identify key entities, attributes, or concepts that could be used as variables
+    2. For each variable, suggest an appropriate type (string, int, float, bool, categorical)
+    3. For categorical variables, suggest possible options
+    Return your analysis as a JSON array with the following structure:
+    [
+      {{
+        "name": "variable_name",
+        "description": "what this variable represents",
+        "type": "string/int/float/bool/categorical",
+        "options": ["option1", "option2", ...] (only for categorical type)
+      }},
+      ...
+    ]
+    Focus on extracting variables that appear frequently or seem important in the knowledge base.
+    """
+    try:
+        response = _client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=2000,
+            temperature=0.3,
+        )
+        result = response.choices[0].message.content
+        # Extract JSON from the response
+        json_pattern = r"```json\s*([\s\S]*?)\s*```|^\s*\[[\s\S]*\]\s*$"
+        json_match = re.search(json_pattern, result)
+        if json_match:
+            json_str = json_match.group(1) if json_match.group(1) else result
+            json_str = re.sub(r"```.*|```", "", json_str).strip()
+            try:
+                suggestions = json.loads(json_str)
+                return suggestions
+            except:
+                pass
+        else:
+            try:
+                suggestions = json.loads(result)
+                return suggestions
+            except:
+                pass
+        return None
+    except Exception as e:
+        print(f"Error analyzing knowledge base: {str(e)}")
+        return None
 # Initialize session state
 if "template_spec" not in st.session_state:
     st.session_state.template_spec = None
         uploaded_files = st.file_uploader(
             "Upload documents to use as knowledge base",
             accept_multiple_files=True,
+            type=["pdf", "txt", "html"],
         )
         # Rest of your existing code for document processing...
             with st.expander("Preview extracted content"):
                 st.text_area(
                     "Extracted Text",
+                    value=st.session_state.knowledge_base,
                     height=200,
                     disabled=True,
                 )
     if st.session_state.show_template_editor and st.session_state.template_spec:
         st.header("Template Editor")
+        # Initialize suggested variables in session state if not present
+        if "suggested_variables" not in st.session_state:
+            st.session_state.suggested_variables = []
+        # Initialize a tracking variable for added suggestions
+        if "added_suggestions" not in st.session_state:
+            st.session_state.added_suggestions = set()
         # Basic template information
         with st.expander("Template Information", expanded=True):
             col1, col2 = st.columns(2)
                 height=100,
             )
+        # Knowledge Base Analysis
+        with st.expander("Knowledge Base Analysis", expanded=True):
+            if st.session_state.knowledge_base:
+                st.info("Analyze the knowledge base to suggest variables and values")
+                if st.button(
+                    "Analyze Knowledge Base for Variables", key="analyze_kb_button"
+                ):
+                    client = get_openai_client()
+                    if not client:
+                        st.error(
+                            "Please provide an OpenAI API key to analyze the knowledge base."
+                        )
+                    else:
+                        with st.spinner("Analyzing knowledge base..."):
+                            suggested_vars = analyze_knowledge_base(
+                                st.session_state.knowledge_base, client
+                            )
+                            if suggested_vars:
+                                st.session_state.suggested_variables = suggested_vars
+                                st.success(
+                                    f"Found {len(suggested_vars)} potential variables in the knowledge base"
+                                )
+                            else:
+                                st.warning(
+                                    "Could not extract variables from the knowledge base"
+                                )
+                # Display suggested variables if they exist
+                if st.session_state.suggested_variables:
+                    st.subheader("Suggested Variables")
+                    # Create a container for the variables
+                    for i, var in enumerate(st.session_state.suggested_variables):
+                        # Generate a unique ID for this variable
+                        var_id = f"{var['name']}_{i}"
+                        # Check if this variable has already been added
+                        if var_id in st.session_state.added_suggestions:
+                            continue
+                        with st.container():
+                            col1, col2 = st.columns([3, 1])
+                            with col1:
+                                st.markdown(
+                                    f"**{var['name']}** ({var['type']}): {var['description']}"
+                                )
+                                if var.get("options"):
+                                    st.markdown(f"Options: {', '.join(var['options'])}")
+                            with col2:
+                                # Use a unique key for each button
+                                if st.button("Add", key=f"add_suggested_{var_id}"):
+                                    # Add this variable to the template
+                                    new_var = {
+                                        "name": var["name"],
+                                        "description": var["description"],
+                                        "type": var["type"],
+                                    }
+                                    if var.get("options"):
+                                        new_var["options"] = var["options"]
+                                    if var["type"] in ["string", "int", "float"]:
+                                        new_var["min"] = 1
+                                        new_var["max"] = 100
+                                    # Add to input variables
+                                    st.session_state.template_spec["input"].append(
+                                        new_var
+                                    )
+                                    # Mark this variable as added
+                                    st.session_state.added_suggestions.add(var_id)
+                                    # Show success message
+                                    st.success(
+                                        f"Added {var['name']} to input variables!"
+                                    )
+            else:
+                st.warning(
+                    "No knowledge base available. Please upload documents in the Setup tab first."
+                )
         # Prompt Template Section
         with st.expander("Prompt Template", expanded=True):
             st.info("Use {variable_name} to refer to input variables in your template")
         with st.expander("Input Variables", expanded=True):
             st.subheader("Input Variables")
+            # Add input variable button with smart functionality
+            col1, col2 = st.columns([3, 1])
+            with col1:
+                new_input_name = st.text_input(
+                    "New input variable name", key="new_input_name"
+                )
+            with col2:
+                if st.button("Add Input Variable"):
+                    new_var = {
+                        "name": (
+                            new_input_name
+                            if new_input_name
+                            else f"new_input_{len(st.session_state.template_spec['input']) + 1}"
+                        ),
+                        "description": "New input variable",
+                        "type": "string",
+                        "min": 1,
+                        "max": 100,
+                    }
+                    st.session_state.template_spec["input"].append(new_var)
             # Display input variables
             for i, input_var in enumerate(st.session_state.template_spec["input"]):
                                 )
                         if var_type == "categorical":
+                            # Add a button to suggest options from knowledge base
+                            kb_button_key = f"suggest_input_{i}_{input_var['name']}"
+                            if st.button("Suggest Options from KB", key=kb_button_key):
+                                client = get_openai_client()
+                                if not client:
+                                    st.error(
+                                        "Please provide an OpenAI API key to suggest options."
+                                    )
+                                elif not st.session_state.knowledge_base:
+                                    st.warning(
+                                        "No knowledge base available. Please upload documents first."
+                                    )
+                                else:
+                                    with st.spinner(
+                                        f"Suggesting options for {input_var['name']}..."
+                                    ):
+                                        suggestions = suggest_variable_values_from_kb(
+                                            input_var["name"],
+                                            "categorical",
+                                            st.session_state.knowledge_base,
+                                            client,
+                                        )
+                                        if suggestions and "options" in suggestions:
+                                            # Update the options
+                                            input_var["options"] = suggestions[
+                                                "options"
+                                            ]
+                                            st.success(
+                                                f"Found {len(suggestions['options'])} options"
+                                            )
+                                        else:
+                                            st.warning(
+                                                "Could not find suitable options in the knowledge base"
+                                            )
+                            # Display and edit options
                             options = input_var.get("options", [])
                             options_str = st.text_area(
                                 "Options (one per line)",
                     with col3:
                         if st.button("Remove", key=f"remove_input_{i}"):
                             st.session_state.template_spec["input"].pop(i)
+                            st.rerun()  # Only use rerun for removal
                     st.divider()
         with st.expander("Output Variables", expanded=True):
             st.subheader("Output Variables")
+            # Add output variable button with smart functionality
+            col1, col2 = st.columns([3, 1])
+            with col1:
+                new_output_name = st.text_input(
+                    "New output variable name", key="new_output_name"
+                )
+            with col2:
+                if st.button("Add Output Variable"):
+                    new_var = {
+                        "name": (
+                            new_output_name
+                            if new_output_name
+                            else f"new_output_{len(st.session_state.template_spec['output']) + 1}"
+                        ),
+                        "description": "New output variable",
+                        "type": "string",
+                        "min": 1,
+                        "max": 100,
+                    }
+                    st.session_state.template_spec["output"].append(new_var)
             # Display output variables
             for i, output_var in enumerate(st.session_state.template_spec["output"]):
                                 )
                         if var_type == "categorical":
+                            # Add a button to suggest options from knowledge base
+                            kb_button_key = f"suggest_output_{i}_{output_var['name']}"
+                            if st.button("Suggest Options from KB", key=kb_button_key):
+                                client = get_openai_client()
+                                if not client:
+                                    st.error(
+                                        "Please provide an OpenAI API key to suggest options."
+                                    )
+                                elif not st.session_state.knowledge_base:
+                                    st.warning(
+                                        "No knowledge base available. Please upload documents first."
+                                    )
+                                else:
+                                    with st.spinner(
+                                        f"Suggesting options for {output_var['name']}..."
+                                    ):
+                                        suggestions = suggest_variable_values_from_kb(
+                                            output_var["name"],
+                                            "categorical",
+                                            st.session_state.knowledge_base,
+                                            client,
+                                        )
+                                        if suggestions and "options" in suggestions:
+                                            # Update the options
+                                            output_var["options"] = suggestions[
+                                                "options"
+                                            ]
+                                            st.success(
+                                                f"Found {len(suggestions['options'])} options"
+                                            )
+                                        else:
+                                            st.warning(
+                                                "Could not find suitable options in the knowledge base"
+                                            )
+                            # Display and edit options
                             options = output_var.get("options", [])
                             options_str = st.text_area(
                                 "Options (one per line)",
                     with col3:
                         if st.button("Remove", key=f"remove_output_{i}"):
                             st.session_state.template_spec["output"].pop(i)
+                            st.rerun()  # Only use rerun for removal
                     st.divider()
         if "selected_samples" not in st.session_state:
             st.session_state.selected_samples = []
+        # Add option selection for categorical variables
+        categorical_vars = [
+            var
+            for var in st.session_state.template_spec["input"]
+            if var["type"] == "categorical" and var.get("options")
+        ]
+        if categorical_vars:
+            st.subheader("Categorical Variable Options")
+            st.info(
+                "Select which options to include in the permutations for each categorical variable."
+            )
+            # Create a copy of the template spec for modification
+            template_spec_copy = st.session_state.template_spec.copy()
+            template_spec_copy["input"] = st.session_state.template_spec["input"].copy()
+            # For each categorical variable, allow selecting options
+            for i, var in enumerate(
+                [
+                    v
+                    for v in template_spec_copy["input"]
+                    if v["type"] == "categorical" and v.get("options")
+                ]
+            ):
+                with st.expander(
+                    f"{var['name']} - {var['description']}", expanded=False
+                ):
+                    options = var.get("options", [])
+                    # Initialize selected_options if not present
+                    if "selected_options" not in var:
+                        var["selected_options"] = options.copy()
+                    # Add "Select All" and "Clear All" buttons
+                    col1, col2 = st.columns([1, 1])
+                    with col1:
+                        if st.button(
+                            f"Select All Options for {var['name']}",
+                            key=f"select_all_{i}",
+                        ):
+                            var["selected_options"] = options.copy()
+                    with col2:
+                        if st.button(
+                            f"Clear All Options for {var['name']}", key=f"clear_all_{i}"
+                        ):
+                            var["selected_options"] = []
+                    # Create multiselect for options
+                    var["selected_options"] = st.multiselect(
+                        f"Select options to include for {var['name']}",
+                        options=options,
+                        default=var.get("selected_options", options),
+                        key=f"options_select_{i}",
+                    )
+                    # Show selected count
+                    st.write(
+                        f"Selected {len(var['selected_options'])} out of {len(options)} options"
+                    )
+                    # Update the template spec with the selected options
+                    for j, input_var in enumerate(template_spec_copy["input"]):
+                        if input_var["name"] == var["name"]:
+                            template_spec_copy["input"][j] = var
+                            break
         # Generate inputs button
         if st.button("Generate Synthetic Inputs"):
             if not st.session_state.get("api_key"):
                 st.error("Please provide an OpenAI API key in the sidebar.")
             else:
                 with st.spinner(f"Generating {num_samples} synthetic input samples..."):
+                    # Use the modified template spec with selected options
+                    if categorical_vars:
+                        st.session_state.synthetic_inputs = (
+                            generate_synthetic_inputs_hybrid(
+                                template_spec_copy, num_samples=num_samples
+                            )
+                        )
+                    else:
+                        st.session_state.synthetic_inputs = (
+                            generate_synthetic_inputs_hybrid(
+                                st.session_state.template_spec, num_samples=num_samples
+                            )
                         )
                 if st.session_state.synthetic_inputs:
                     st.success(
     else:
         st.info(
             "No template has been generated yet. Go to the 'Setup' tab to create one."
+        )