Spaces:

rawpowertools
/

Template_Generator

Running

App Files Files

Template_Generator / app.py

DJHumanRPT

Anna's changes to numeric logic

1e4a486 verified 10 months ago

raw

history blame

151 kB

	import streamlit as st
	import json
	import PyPDF2
	from docling.document_converter import DocumentConverter
	import re
	from io import BytesIO
	import openai
	import anthropic # Add import for Anthropic's Claude models
	import pandas as pd
	import itertools
	import random
	import math
	from tqdm import tqdm

	# Setup page config
	st.set_page_config(
	page_title="Template Generator",
	layout="wide",
	initial_sidebar_state="expanded",
	)


	# Initialize OpenAI client (you'll need to provide your API key)
	def get_openai_client():
	api_key = st.session_state.get("api_key", "")
	if api_key:
	return openai.OpenAI(api_key=api_key)
	return None


	def get_anthropic_client():
	api_key = st.session_state.get("anthropic_api_key", "")
	if api_key:
	return anthropic.Anthropic(api_key=api_key)
	return None


	def call_model_api(prompt, model, temperature=0.7, max_tokens=1000):
	"""
	Abstraction function to call the appropriate LLM API based on the model name.

	Args:
	prompt (str): The prompt to send to the model
	model (str): The model name (e.g., "gpt-4", "claude-3-opus-latest")
	temperature (float): Creativity parameter (0.0 to 1.0)
	max_tokens (int): Maximum number of tokens to generate

	Returns:
	str: The generated text response
	"""
	# Check if it's a Claude model
	if model.startswith("claude"):
	client = get_anthropic_client()
	if not client:
	return "Error: No Anthropic API key provided."

	try:
	response = client.messages.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=max_tokens,
	temperature=temperature,
	)
	return response.content[0].text
	except Exception as e:
	return f"Error calling Anthropic API: {str(e)}"

	# Otherwise, use OpenAI
	else:
	client = get_openai_client()
	if not client:
	return "Error: No OpenAI API key provided."

	try:
	response = client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=max_tokens,
	temperature=temperature,
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error calling OpenAI API: {str(e)}"


	# @st.cache_resource
	def get_document_converter():
	"""Cache the DocumentConverter to prevent reloading on each interaction"""
	return None # Return None initially


	def get_or_create_document_converter():
	"""Get existing converter or create a new one only when needed"""
	converter = get_document_converter()
	if converter is None:
	converter = DocumentConverter()
	# Update the cached value
	get_document_converter._cached_obj = converter
	return converter


	def create_example_templates():
	examples = [
	{
	"name": "Character Generator",
	"description": "Generate fantasy character descriptions based on selected traits",
	"version": "1.0.0",
	"input": [
	{
	"name": "race",
	"description": "Character's fantasy race",
	"type": "categorical",
	"options": ["Human", "Elf", "Dwarf", "Orc", "Halfling"],
	"min": 1,
	"max": 1,
	},
	{
	"name": "class",
	"description": "Character's profession or class",
	"type": "categorical",
	"options": ["Warrior", "Mage", "Rogue", "Cleric", "Ranger"],
	"min": 1,
	"max": 1,
	},
	{
	"name": "alignment",
	"description": "Character's moral alignment",
	"type": "categorical",
	"options": [
	"Lawful Good",
	"Neutral",
	"Chaotic Evil",
	"Lawful Evil",
	"Chaotic Good",
	],
	"min": 1,
	"max": 1,
	},
	],
	"output": [
	{
	"name": "character_name",
	"description": "Generated character name",
	"type": "string",
	"min": 3,
	"max": 30,
	},
	{
	"name": "background",
	"description": "Character background story",
	"type": "string",
	"min": 100,
	"max": 500,
	},
	],
	"prompt": "Create a fantasy character with the following traits:\nRace: {race}\nClass: {class}\nAlignment: {alignment}\n\nGenerate a suitable name and background story for this character.",
	},
	{
	"name": "Recipe Generator",
	"description": "Generate cooking recipes based on ingredients and cuisine",
	"version": "1.0.0",
	"input": [
	{
	"name": "cuisine",
	"description": "Style of cooking",
	"type": "categorical",
	"options": ["Italian", "Mexican", "Chinese", "Indian", "French"],
	"min": 1,
	"max": 1,
	},
	{
	"name": "main_ingredient",
	"description": "Primary ingredient",
	"type": "categorical",
	"options": ["Chicken", "Beef", "Fish", "Tofu", "Vegetables"],
	"min": 1,
	"max": 1,
	},
	{
	"name": "dietary_restriction",
	"description": "Dietary requirements",
	"type": "categorical",
	"options": [
	"None",
	"Vegetarian",
	"Vegan",
	"Gluten-free",
	"Dairy-free",
	],
	"min": 1,
	"max": 1,
	},
	],
	"output": [
	{
	"name": "recipe_name",
	"description": "Name of the recipe",
	"type": "string",
	"min": 5,
	"max": 50,
	},
	{
	"name": "ingredients",
	"description": "List of ingredients needed",
	"type": "string",
	"min": 50,
	"max": 300,
	},
	{
	"name": "instructions",
	"description": "Cooking instructions",
	"type": "string",
	"min": 100,
	"max": 500,
	},
	],
	"prompt": "Create a {cuisine} recipe using {main_ingredient} as the main ingredient. The recipe should be {dietary_restriction}.\n\nProvide a recipe name, list of ingredients, and cooking instructions.",
	},
	{
	"name": "Product Description",
	"description": "Generate marketing descriptions for products",
	"version": "1.0.0",
	"input": [
	{
	"name": "product_type",
	"description": "Type of product",
	"type": "categorical",
	"options": [
	"Smartphone",
	"Laptop",
	"Headphones",
	"Smartwatch",
	"Camera",
	],
	"min": 1,
	"max": 1,
	},
	{
	"name": "target_audience",
	"description": "Target customer demographic",
	"type": "categorical",
	"options": [
	"Students",
	"Professionals",
	"Gamers",
	"Creatives",
	"Seniors",
	],
	"min": 1,
	"max": 1,
	},
	{
	"name": "price_tier",
	"description": "Price category",
	"type": "categorical",
	"options": [
	"Budget",
	"Mid-range",
	"Premium",
	"Luxury",
	"Enterprise",
	],
	"min": 1,
	"max": 1,
	},
	],
	"output": [
	{
	"name": "product_name",
	"description": "Generated product name",
	"type": "string",
	"min": 5,
	"max": 30,
	},
	{
	"name": "tagline",
	"description": "Short marketing tagline",
	"type": "string",
	"min": 10,
	"max": 100,
	},
	{
	"name": "description",
	"description": "Full product description",
	"type": "string",
	"min": 100,
	"max": 500,
	},
	],
	"prompt": "Create a marketing description for a {price_tier} {product_type} targeted at {target_audience}.\n\nProvide a product name, catchy tagline, and compelling product description.",
	},
	]

	return examples


	# Create a function to display example outputs
	def create_example_outputs(template):
	# Predefined outputs for each template
	if template["name"] == "Character Generator":
	outputs = {
	"Human Warrior Lawful Good": {
	"character_name": "Sir Galahad Ironheart",
	"background": "Born to a noble family in the kingdom of Valorhaven, Sir Galahad trained from childhood in the arts of combat. After saving the king's daughter from bandits, he was knighted and now serves as captain of the royal guard. His unwavering dedication to justice and honor has made him a legend throughout the realm, though his strict adherence to the code of chivalry sometimes puts him at odds with more pragmatic allies.",
	},
	"Elf Mage Chaotic Good": {
	"character_name": "Lyraniel Starweaver",
	"background": "Raised in the ancient forest of Eldrath, Lyraniel discovered her affinity for arcane magic when she accidentally set a tree ablaze during an argument. Rather than follow the structured magical traditions of her people, she left to study diverse magical practices across the continent. She now uses her considerable powers to protect the innocent and fight tyranny, though her methods are often unpredictable and sometimes cause as much chaos as they resolve.",
	},
	"Dwarf Rogue Neutral": {
	"character_name": "Grimble Lockpick",
	"background": "Once a respected jeweler in the mountain halls of Karak-Dûm, Grimble's curiosity about the perfect lock led him down a different path. Neither malicious nor heroic, he sees himself as a professional who offers specialized services for the right price. His reputation for being able to open any lock or disarm any trap has made him sought after by adventurers and nobles alike, though he remains careful to avoid political entanglements that might limit his freedom.",
	},
	}
	elif template["name"] == "Recipe Generator":
	outputs = {
	"Italian Chicken None": {
	"recipe_name": "Tuscan Herb-Roasted Chicken",
	"ingredients": "- 4 chicken breasts\n- 3 tbsp olive oil\n- 4 cloves garlic, minced\n- 1 tbsp fresh rosemary, chopped\n- 1 tbsp fresh thyme, chopped\n- 1 lemon, zested and juiced\n- 1 cup cherry tomatoes, halved\n- 1/2 cup chicken broth\n- 1/4 cup dry white wine\n- Salt and pepper to taste\n- Fresh basil for garnish",
	"instructions": "1. Preheat oven to 375°F (190°C).\n2. Season chicken breasts with salt and pepper.\n3. In a large oven-safe skillet, heat olive oil over medium-high heat.\n4. Sear chicken breasts for 3-4 minutes per side until golden brown.\n5. Add garlic, rosemary, and thyme to the pan and cook for 1 minute until fragrant.\n6. Add lemon zest, lemon juice, cherry tomatoes, chicken broth, and white wine.\n7. Transfer skillet to the oven and roast for 20-25 minutes until chicken is cooked through.\n8. Garnish with fresh basil before serving.",
	},
	"Mexican Vegetables Vegetarian": {
	"recipe_name": "Roasted Vegetable Enchiladas Verde",
	"ingredients": "- 2 zucchini, diced\n- 1 red bell pepper, diced\n- 1 yellow bell pepper, diced\n- 1 red onion, sliced\n- 2 cups mushrooms, sliced\n- 3 tbsp olive oil\n- 2 tsp cumin\n- 1 tsp chili powder\n- 1 tsp oregano\n- 8 corn tortillas\n- 2 cups salsa verde\n- 1 1/2 cups shredded Monterey Jack cheese\n- 1 avocado, sliced\n- 1/4 cup cilantro, chopped\n- Lime wedges for serving",
	"instructions": "1. Preheat oven to 425°F (220°C).\n2. Toss zucchini, bell peppers, onion, and mushrooms with olive oil, cumin, chili powder, oregano, salt, and pepper.\n3. Spread vegetables on a baking sheet and roast for 20 minutes, stirring halfway through.\n4. Reduce oven temperature to 375°F (190°C).\n5. Warm tortillas slightly to make them pliable.\n6. Fill each tortilla with roasted vegetables and roll up.\n7. Place enchiladas seam-side down in a baking dish.\n8. Pour salsa verde over enchiladas and sprinkle with cheese.\n9. Bake for 20-25 minutes until cheese is melted and bubbly.\n10. Garnish with avocado slices and cilantro. Serve with lime wedges.",
	},
	}
	elif template["name"] == "Product Description":
	outputs = {
	"Smartphone Professionals Premium": {
	"product_name": "ExecuTech Pro X9",
	"tagline": "Seamless productivity meets uncompromising elegance.",
	"description": 'The ExecuTech Pro X9 redefines what a business smartphone can be. Crafted with aerospace-grade materials and featuring our revolutionary 6.7" CrystalClear AMOLED display, the Pro X9 ensures your presentations and video conferences look impeccable in any lighting condition. The advanced 5-lens camera system with AI enhancement captures professional-quality images for your reports and social media, while the dedicated security co-processor keeps your sensitive data protected with military-grade encryption. With an impressive 36-hour battery life and our proprietary RapidCharge technology, the Pro X9 keeps pace with your demanding schedule. Experience the perfect balance of performance and sophistication that successful professionals deserve.',
	},
	"Headphones Gamers Mid-range": {
	"product_name": "SonicStrike GT-500",
	"tagline": "Hear every move. Dominate every game.",
	"description": "Level up your gaming experience with the SonicStrike GT-500 gaming headset. Engineered specifically for competitive gamers, these headphones feature our proprietary 50mm UltraBass drivers that deliver thunderous lows while maintaining crystal-clear highs, allowing you to hear enemy footsteps with pinpoint accuracy. The detachable boom microphone with noise-cancellation ensures your teammates hear your callouts clearly, even in the heat of battle. With memory foam ear cushions wrapped in breathable mesh fabric, the GT-500 remains comfortable during marathon gaming sessions. Compatible with all major gaming platforms and featuring customizable RGB lighting through our GameSync app, the SonicStrike GT-500 offers premium features at a price that won't break the bank. Your gaming advantage starts here.",
	},
	}
	else:
	outputs = {}

	return outputs


	def calculate_cartesian_product_size(categorical_vars):
	"""Calculate the size of the Cartesian product based on selected options."""
	if not categorical_vars:
	return 0, []

	# Calculate the product size
	product_size = 1
	var_counts = []

	for var in categorical_vars:
	options = var.get("options", [])
	# Use selected_options if available, otherwise use all options
	selected_options = var.get("selected_options", options)
	min_sel = var.get("min", 1)
	max_sel = var.get("max", 1)

	# Use only selected options for calculation
	options_to_use = [opt for opt in options if opt in selected_options]

	# If no options selected, use all options
	if not options_to_use:
	options_to_use = options

	# Single selection case
	if min_sel == 1 and max_sel == 1:
	count = len(options_to_use)
	else:
	# Multi-selection case - calculate combinations
	count = 0
	# Include min selections
	from math import comb

	if len(options_to_use) >= min_sel:
	count += comb(len(options_to_use), min_sel)

	# Include max selections if different from min
	if max_sel != min_sel and len(options_to_use) >= max_sel:
	count += comb(len(options_to_use), max_sel)

	# Include some intermediate selections if applicable
	for size in range(min_sel + 1, max_sel):
	if len(options_to_use) >= size:
	count += min(
	3, comb(len(options_to_use), size)
	) # Take up to 3 samples

	var_counts.append({"name": var["name"], "count": count})
	product_size *= max(count, 1) # Avoid multiplying by zero

	return product_size, var_counts


	@st.cache_data
	def parse_documents(uploaded_files):
	"""Parse multiple document files and extract their text content."""
	if not uploaded_files:
	return ""

	import tempfile
	import os

	converter = get_or_create_document_converter()
	content = ""

	for file in uploaded_files:
	try:
	file_type = file.name.split(".")[-1].lower()

	# Handle text files directly
	if file_type == "txt":
	content += file.getvalue().decode("utf-8")
	# Use converter for other supported file types
	elif file_type in ["pdf", "docx", "html"]:
	# Create a temporary file with the correct extension
	with tempfile.NamedTemporaryFile(
	delete=False, suffix=f".{file_type}"
	) as tmp_file:
	# Write the uploaded file content to the temp file
	tmp_file.write(file.getvalue())
	tmp_path = tmp_file.name

	# Convert using the file path instead of the UploadedFile object
	source = converter.convert(tmp_path)
	content += source.document.export_to_markdown()

	# Clean up the temporary file
	os.unlink(tmp_path)
	else:
	st.warning(f"Unsupported file type: {file.name}")
	except Exception as e:
	st.error(f"Error processing file {file.name}: {str(e)}")

	return content


	# Add this function after parse_documents function
	def parse_template_file(uploaded_template):
	"""Parse an uploaded template JSON file and validate its structure."""
	try:
	# Read the file content
	if uploaded_template.name.endswith(".json"):
	template_content = uploaded_template.getvalue().decode("utf-8")
	template_spec = json.loads(template_content)

	# Sanitize the template to remove UI-specific keys
	template_spec = sanitize_template_spec(template_spec)

	# Validate the template structure
	required_keys = [
	"name",
	"version",
	"description",
	"input",
	"output",
	"prompt",
	]
	for key in required_keys:
	if key not in template_spec:
	return None, f"Invalid template: Missing '{key}' field"

	# Validate input and output arrays
	if not isinstance(template_spec["input"], list):
	return None, "Invalid template: 'input' must be an array"
	if not isinstance(template_spec["output"], list):
	return None, "Invalid template: 'output' must be an array"

	# Check that each input and output has required fields
	for i, input_var in enumerate(template_spec["input"]):
	if not all(k in input_var for k in ["name", "description", "type"]):
	return (
	None,
	f"Invalid template: Input variable at index {i} is missing required fields",
	)

	for i, output_var in enumerate(template_spec["output"]):
	if not all(k in output_var for k in ["name", "description", "type"]):
	return (
	None,
	f"Invalid template: Output variable at index {i} is missing required fields",
	)

	return template_spec, None
	else:
	return None, "Uploaded file must be a JSON file"
	except json.JSONDecodeError:
	return None, "Invalid JSON format in the uploaded template file"
	except Exception as e:
	return None, f"Error parsing template file: {str(e)}"


	def sanitize_template_spec(template_spec):
	"""
	Remove UI-specific keys from template specification that shouldn't be part of the template.

	Args:
	template_spec (dict): The template specification to sanitize

	Returns:
	dict: Sanitized template specification
	"""
	if not template_spec:
	return template_spec

	# Create a deep copy to avoid modifying the original
	sanitized_spec = template_spec.copy()

	# List of UI-specific keys that should be removed
	ui_specific_keys = ["previous_options", "selected_options"]

	# Clean input variables
	if "input" in sanitized_spec and isinstance(sanitized_spec["input"], list):
	for i, var in enumerate(sanitized_spec["input"]):
	# Remove UI-specific keys from each variable
	sanitized_spec["input"][i] = {
	k: v for k, v in var.items() if k not in ui_specific_keys
	}

	# Clean output variables
	if "output" in sanitized_spec and isinstance(sanitized_spec["output"], list):
	for i, var in enumerate(sanitized_spec["output"]):
	# Remove UI-specific keys from each variable
	sanitized_spec["output"][i] = {
	k: v for k, v in var.items() if k not in ui_specific_keys
	}

	return sanitized_spec


	# LLM call function
	def call_llm(prompt, model="gpt-3.5-turbo"):
	"""Call the LLM API to generate text based on the prompt."""
	try:
	# Get output specifications from the template if available
	output_specs = ""
	if st.session_state.show_template_editor and st.session_state.template_spec:
	output_vars = st.session_state.template_spec.get("output", [])
	if output_vars:
	output_specs = "Please generate output with the following specifications in JSON format:\n"
	for var in output_vars:
	output_specs += (
	f"- {var['name']}: {var['description']} (Type: {var['type']})"
	)
	if var.get("options"):
	output_specs += f", Options: {var['options']}"
	output_specs += "\n"

	# Add the output specs to the prompt
	prompt = f"{prompt}\n\n{output_specs}\n\nReturn ONLY a JSON object with the output variables, with no additional text or explanation."

	result = call_model_api(
	model=model,
	prompt=prompt,
	max_tokens=1000,
	temperature=st.session_state.get("temperature", 0.7),
	)

	# Try to parse as JSON if the template has output variables
	if (
	st.session_state.show_template_editor
	and st.session_state.template_spec
	and st.session_state.template_spec.get("output")
	):
	# Extract JSON from the response
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\{[\s\S]\}\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else result
	# Clean up any remaining markdown or comments
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	try:
	output_data = json.loads(json_str)
	# Store the parsed JSON in session state for proper rendering
	st.session_state.json_output = output_data
	return output_data
	except:
	pass
	else:
	try:
	output_data = json.loads(result)
	# Store the parsed JSON in session state for proper rendering
	st.session_state.json_output = output_data
	return output_data
	except:
	pass

	# If we couldn't parse as JSON or it's not meant to be JSON, return as is
	return result
	except Exception as e:
	st.error(f"Error calling LLM API: {str(e)}")
	return f"Error: {str(e)}"


	# Function to generate a template based on instructions and documents
	def generate_template_from_instructions(instructions, document_content=""):
	"""
	Use LLM to generate a template specification based on user instructions
	and document content.
	"""

	# Prepare the prompt for the LLM
	prompt = f"""
	You are a template designer for an LLM-powered content generation system.
	Create a template specification based on the following instructions:

	INSTRUCTIONS:
	{instructions}

	{"DOCUMENT CONTENT (EXCERPT):" + document_content + "..." if document_content else "NO DOCUMENTS PROVIDED"}

	Generate a JSON template specification with the following structure:
	{{
	"name": "A descriptive name for the template",
	"version": "1.0.0",
	"description": "A brief description of what this template does",
	"input": [
	{{
	"name": "variable_name",
	"description": "What this variable represents",
	"type": "string/int/float/bool/categorical",
	"min": minimum_value_or_length,
	"max": maximum_value_or_length,
	"options": ["option1", "option2"] (only for categorical type)
	}},
	... more input variables
	],
	"output": [
	{{
	"name": "output_variable_name",
	"description": "What this output represents",
	"type": "string/int/float/bool/categorical"
	}},
	... more output variables
	],
	"prompt": "A template string with {{variable_name}} placeholders that will be replaced with actual values"
	}}

	Make sure the prompt includes all input variables and is designed to produce the expected outputs.
	The prompt should address an LLM as if it was a combination of a system prompt and user input, and must contain information around formatting,
	structure and context for the LLM to generate the desired content as derived from these instructions and/or documents.
	If a 'lore' or 'knowledge_base' should be incorporated, include {{lore}} in the prompt template.
	If document content was provided, design the template to effectively use that information.
	"""

	try:
	# Call the LLM to generate the template
	template_text = call_model_api(
	model=st.session_state.model,
	prompt=prompt,
	max_tokens=4096,
	temperature=0.7,
	)

	# Extract the JSON part from the response
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s{[\s\S]}\s$"
	json_match = re.search(json_pattern, template_text)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else template_text
	# Clean up any remaining markdown or comments
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	template_spec = json.loads(json_str, strict=False)
	return template_spec
	else:
	# If no JSON format found, try to parse the entire response
	try:
	template_spec = json.loads(template_text, strict=False)
	return template_spec
	except:
	st.warning("LLM didn't return valid JSON. Using fallback template.")
	return create_fallback_template(instructions)

	except Exception as e:
	st.error(f"Error generating template: {str(e)}")
	return create_fallback_template(instructions)


	# Add these functions after the generate_template_from_instructions function


	def generate_improved_prompt_template(template_spec, knowledge_base=""):
	"""
	Use LLM to generate an improved prompt template based on current template variables.
	"""
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error("Please provide an OpenAI or Anthropic API key to rewrite the prompt.")
	return template_spec["prompt"]

	# Extract template information for context
	input_vars = template_spec["input"]
	output_vars = template_spec["output"]
	template_description = template_spec["description"]

	# Format variable information for the prompt
	input_vars_text = "\n".join(
	[
	f"- {var['name']}: {var['description']} (Type: {var['type']})"
	+ (f", Options: {var['options']}" if var.get("options") else "")
	for var in input_vars
	]
	)

	output_vars_text = "\n".join(
	[
	f"- {var['name']}: {var['description']} (Type: {var['type']})"
	for var in output_vars
	]
	)

	# Prepare the prompt for the LLM
	prompt = f"""
	You are an expert at designing effective prompts for LLMs. Rewrite the prompt template based on the following details:

	TEMPLATE PURPOSE:
	{template_description}

	INPUT VARIABLES:
	{input_vars_text}

	OUTPUT VARIABLES:
	{output_vars_text}

	{"KNOWLEDGE BASE AVAILABLE:" if knowledge_base else "NO KNOWLEDGE BASE AVAILABLE."}
	{knowledge_base if knowledge_base else ""}

	Current prompt template:
	{template_spec["prompt"]}

	Please create an improved prompt template that:
	1. Uses all input variables (in curly braces like {{variable_name}})
	2. Is designed to generate the specified outputs
	3. Includes {{lore}} where background information or context should be inserted
	4. Is clear, specific, and well-structured
	5. Provides enough guidance to the LLM to generate high-quality results

	Return ONLY the revised prompt template text, with no additional explanations.
	"""

	try:
	# Call the LLM to generate the improved prompt template
	improved_template = call_model_api(
	model=st.session_state.model,
	prompt=prompt,
	max_tokens=4096,
	temperature=0.7,
	)

	# Remove any markdown code block formatting if present
	improved_template = re.sub(r"```.*\n\|```", "", improved_template)

	return improved_template
	except Exception as e:
	st.error(f"Error generating improved prompt: {str(e)}")
	return template_spec["prompt"]


	# Fallback template if generation fails
	def create_fallback_template(instructions=""):
	"""Create a basic template to use as fallback."""
	return {
	"name": "Generated Template",
	"version": "1.0.0",
	"description": instructions,
	"input": [
	{
	"name": "input_1",
	"description": "First input variable",
	"type": "string",
	"min": 1,
	"max": 100,
	}
	],
	"output": [
	{
	"name": "output_1",
	"description": "Generated output",
	"type": "string",
	"min": 10,
	"max": 1000,
	}
	],
	"prompt": "Based on the following information:\n{input_1}\n\nAnd considering this additional context:\n{lore}\n\nGenerate the following output.",
	}


	def generate_synthetic_inputs_hybrid(template_spec, num_samples=10, max_retries=3):
	"""
	Generate synthetic input data using a hybrid approach:
	- Programmatically generate combinations of categorical variables
	- Use LLM to fill in non-categorical variables
	- Process row by row for resilience
	"""
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error("Please provide an OpenAI API key to generate synthetic data.")
	return []

	# Extract all variables from the template
	input_vars = template_spec["input"]

	# Separate categorical and non-categorical variables
	categorical_vars = [
	var for var in input_vars if var["type"] == "categorical" and var.get("options")
	]
	non_categorical_vars = [var for var in input_vars if var not in categorical_vars]

	default_value_vars = [var for var in input_vars if "default_value" in var]

	# Process in batches and show progress
	with st.spinner(f"Generating {num_samples} synthetic inputs..."):
	progress_bar = st.progress(0)
	results = []

	# If we have categorical variables, use them to create base permutations
	if categorical_vars:
	st.info(
	f"Generating permutations for {len(categorical_vars)} categorical variables"
	)
	# Create permutations of categorical values
	permutations = generate_categorical_permutations(
	categorical_vars, num_samples
	)

	# For each permutation, fill in non-categorical variables
	for i, perm in enumerate(permutations):
	# Update progress
	progress_bar.progress(min((i + 1) / len(permutations), 1.0))

	# Create a complete row by adding non-categorical values
	row = perm.copy()

	# Add default values first
	for var in default_value_vars:
	row[var["name"]] = var["default_value"]

	# Generate values for remaining non-categorical variables
	remaining_non_cat_vars = [
	var for var in non_categorical_vars if var not in default_value_vars
	]
	if remaining_non_cat_vars:
	non_cat_values = generate_non_categorical_values(
	remaining_non_cat_vars, perm, max_retries
	)
	row.update(non_cat_values)

	results.append(row)

	# Stop if we have enough samples
	if len(results) >= num_samples:
	break
	else:
	# No categorical variables, generate each row individually
	for i in range(num_samples):
	# Update progress
	progress_bar.progress(min((i + 1) / num_samples, 1.0))

	# Generate a complete row of values
	row = generate_single_row(input_vars, max_retries)
	if row:
	results.append(row)

	# Ensure we have the requested number of samples
	while len(results) < num_samples:
	# Generate additional rows if needed
	row = generate_single_row(input_vars, max_retries)
	if row:
	results.append(row)

	# Ensure progress bar completes
	progress_bar.progress(1.0)

	return results[:num_samples]


	def generate_categorical_permutations(categorical_vars, target_count):
	"""Generate efficient permutations of categorical variables."""
	# Build option sets for each categorical variable
	option_sets = []

	for var in categorical_vars:
	var_name = var["name"]
	options = var.get("options", [])
	min_sel = var.get("min", 1)
	max_sel = var.get("max", 1)

	# Get selected options if they exist
	selected_options = var.get("selected_options", options)

	# Use only selected options for permutation
	options_to_use = [opt for opt in options if opt in selected_options]

	# If no options selected, use all options
	if not options_to_use:
	options_to_use = options

	# Single selection case
	if min_sel == 1 and max_sel == 1:
	option_sets.append([(var_name, opt) for opt in options_to_use])
	else:
	# Multi-selection case - generate varied selection sizes
	var_options = []

	# Include min selections
	for combo in itertools.combinations(options_to_use, min_sel):
	var_options.append((var_name, list(combo)))

	# Include max selections if different from min
	if max_sel != min_sel:
	for combo in itertools.combinations(options_to_use, max_sel):
	var_options.append((var_name, list(combo)))

	# Include some intermediate selections if applicable
	for size in range(min_sel + 1, max_sel):
	combos = list(itertools.combinations(options_to_use, size))
	if combos:
	sample_size = min(3, len(combos)) # Take up to 3 samples
	for combo in random.sample(combos, sample_size):
	var_options.append((var_name, list(combo)))

	option_sets.append(var_options)

	# Generate permutations
	all_permutations = []
	for combo in itertools.product(*option_sets):
	perm = {name: value for name, value in combo}
	all_permutations.append(perm)

	# If we have too many permutations, sample a diverse subset
	if len(all_permutations) > target_count:
	return random.sample(all_permutations, target_count)

	# If we don't have enough, duplicate with variations
	while len(all_permutations) < target_count:
	# Clone an existing permutation
	new_perm = random.choice(all_permutations).copy()

	# Modify a random categorical value if possible
	if categorical_vars:
	var = random.choice(categorical_vars)
	var_name = var["name"]
	options = var.get("options", [])
	selected_options = var.get("selected_options", options)

	# Use only selected options for variation
	options_to_use = [opt for opt in options if opt in selected_options]
	if not options_to_use:
	options_to_use = options

	if options_to_use and len(options_to_use) > 1:
	if var.get("min", 1) == 1 and var.get("max", 1) == 1:
	# For single selection, choose a different option
	current = new_perm[var_name]
	other_options = [opt for opt in options_to_use if opt != current]
	if other_options:
	new_perm[var_name] = random.choice(other_options)
	else:
	# For multi-selection, modify the selection
	current_selection = new_perm[var_name]
	min_sel = var.get("min", 1)
	max_sel = var.get("max", 1)

	# Decide whether to add or remove an item
	if len(current_selection) < max_sel and random.random() > 0.5:
	# Add an item not already in the selection
	available = [
	opt
	for opt in options_to_use
	if opt not in current_selection
	]
	if available:
	current_selection.append(random.choice(available))
	elif len(current_selection) > min_sel:
	# Remove a random item
	idx_to_remove = random.randrange(len(current_selection))
	current_selection.pop(idx_to_remove)

	all_permutations.append(new_perm)

	return all_permutations


	def generate_non_categorical_values(non_cat_vars, existing_values, max_retries):
	"""Generate values for non-categorical variables given existing categorical values."""
	if not non_cat_vars:
	return {}

	# Separate string and numeric variables
	llm_vars = [var for var in non_cat_vars if var["type"] == "string"]
	numeric_vars = [var for var in non_cat_vars if var["type"] in ["int", "float"]]

	# Sample numeric values within the specified range
	result_values = {}
	# result_values_descr = {} # Uncomment to include the var description, i.e. units so the LLM understands the numerical values
	# Otherwise, good practice is to include units in numerical vars names (e.g. price_in_euros instead of price)
	for var in numeric_vars:
	name = var["name"]
	var_min = var.get("min")
	var_max = var.get("max")
	# description = var.get("description")

	if var_min is None or var_max is None:
	result_values[name] = get_default_value(var)
	# result_values_descr[name] = get_default_value(var)
	else:
	try:
	if var["type"] == "int":
	result_values[name] = random.randint(int(var_min), int(var_max))
	# result_values_descr[name] = [result_values[name], description]
	elif var["type"] == "float":
	result_values[name] = round(random.uniform(float(var_min), float(var_max)), 2)
	# result_values_descr[name] = [result_values[name], description]
	except:
	result_values[name] = get_default_value(var)
	# result_values_descr[name] = get_default_value(var)

	# Format the string variables for the prompt
	if llm_vars:
	vars_text = "\n".join(
	[f"- {var['name']}: {var['description']} (Type: string)" for var in llm_vars]
	)
	# Combine categorical and numeric values for LLM context
	# context_values = {existing_values, result_values_descr}
	context_values = {existing_values, result_values}
	print(context_values)

	# Create prompt with existing categorical and numerical values as context
	prompt = f"""
	As a synthetic data generator, create values for these variables:

	{vars_text}

	These values should be coherent with the existing categorical and/or numerical values:
	{json.dumps(context_values, indent=2)}

	Return ONLY a JSON object with the new variable values:
	{{
	"variable_name_1": value1,
	"variable_name_2": value2
	}}
	"""
	# print("*************** PROMPT FOR STR VAR:", prompt)

	for attempt in range(max_retries):
	try:
	response = call_model_api(
	model=st.session_state.model,
	prompt=prompt,
	max_tokens=1000,
	temperature=st.session_state.temperature,
	)

	result = response.strip()

	# Extract JSON
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\{[\s\S]\}\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else result
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	try:
	values = json.loads(json_str, strict=False)
	if isinstance(values, dict):
	result_values.update(values)
	return result_values
	except:
	pass
	else:
	try:
	values = json.loads(result, strict=False)
	if isinstance(values, dict):
	result_values.update(values)
	return result_values
	except:
	pass

	except Exception as e:
	if attempt == max_retries - 1:
	st.warning(f"Failed to generate string values: {str(e)}")

	# Fallback: generate empty values for all string variables
	for var in llm_vars:
	result_values[var["name"]] = get_default_value(var)
	return result_values


	def generate_single_row(all_vars, max_retries):
	"""Generate a complete row of data using hybrid logic:
	- Use LLM for string/categorical vars
	- Sample int/float within range
	"""
	numeric_vars = [var for var in all_vars if var["type"] in ["int", "float"]]
	llm_vars = [var for var in all_vars if var["type"] in ["string", "categorical"]]

	row = {}

	# Sample numeric vars
	for var in numeric_vars:
	name = var["name"]
	var_min = var.get("min")
	var_max = var.get("max")
	if var_min is None or var_max is None:
	row[name] = get_default_value(var)
	else:
	try:
	if var["type"] == "int":
	row[name] = random.randint(int(var_min), int(var_max))
	elif var["type"] == "float":
	row[name] = round(random.uniform(float(var_min), float(var_max)), 2)
	except:
	row[name] = get_default_value(var)

	# Generate string and categorical via LLM
	if llm_vars:
	vars_text = "\n".join(
	[
	f"- {var['name']}: {var['description']} (Type: {var['type']})"
	+ (
	f", Options: {var['options']}" if var["type"] == "categorical" and var.get("options") else ""
	)
	for var in llm_vars
	]
	)

	prompt = f"""
	You are a synthetic data generator. Generate values for the following variables:

	{vars_text}

	Based on this partial row:
	{json.dumps(row, indent=2)}

	Return ONLY a JSON object with the new values:
	{{
	"var_name_1": value1,
	"var_name_2": value2
	}}

	For categorical variables that allow multiple selections, return a list of values.
	"""
	# print("*************** PROMPT FOR STR,CAT VAR:", prompt)

	for attempt in range(max_retries):
	try:
	response = call_model_api(
	model=st.session_state.model,
	prompt=prompt,
	max_tokens=1000,
	temperature=st.session_state.temperature,
	)

	result = response.strip()
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\{[\s\S]\}\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else result
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	values = json.loads(json_str, strict=False)
	if isinstance(values, dict):
	row.update(values)
	break
	else:
	values = json.loads(result, strict=False)
	if isinstance(values, dict):
	row.update(values)
	break

	except Exception as e:
	if attempt == max_retries - 1:
	st.warning(f"Failed to generate string/categorical values: {str(e)}")

	return row if row else None


	def get_default_value(var):
	"""Generate a default value for a variable based on its type."""
	var_type = var["type"]

	if var_type == "string":
	return "N/A"
	elif var_type == "int":
	min_val = var.get("min", 0)
	max_val = var.get("max", 100)
	return min_val
	elif var_type == "float":
	min_val = float(var.get("min", 0))
	max_val = float(var.get("max", 1))
	return min_val
	elif var_type == "bool":
	return False
	elif var_type == "categorical":
	options = var.get("options", [])
	min_sel = var.get("min", 1)

	if options:
	if min_sel == 1 and var.get("max", 1) == 1:
	return options[0]
	else:
	return options[:min_sel]
	else:
	return None

	return None


	def generate_synthetic_outputs(
	template_spec, input_data, knowledge_base="", max_retries=3
	):
	"""Generate synthetic output data based on template and input data with retry logic."""

	output_vars = template_spec["output"]
	prompt_template = template_spec["prompt"]

	# Format output variable information for the prompt
	output_vars_text = "\n".join(
	[
	f"- {var['name']}: {var['description']} (Type: {var['type']}) {'Options: '+str(var['options']) if var.get('options') else ''}"
	for var in output_vars
	]
	)

	input_vars = template_spec["input"]
	input_vars_text = "\n".join(
	[
	f"- {var['name']}: {var['description']} (Type: {var['type']})"
	for var in input_vars
	]
	)

	output_format = "{"
	for var in output_vars:
	output_format += f'"{var["name"]}": output, '
	output_format = output_format.rstrip(", ") + "}"

	results = []

	# Create a progress bar
	progress_bar = st.progress(0)

	try:
	input_var_names = [var["name"] for var in template_spec["input"]]

	for i, input_item in enumerate(input_data):
	# Filter out variables not defined in the template spec
	input_item = {k: v for k, v in input_item.items() if k in input_var_names}
	# Fill the prompt template with input values
	filled_prompt = prompt_template
	for var_name, var_value in input_item.items():
	filled_prompt = filled_prompt.replace(f"{{{var_name}}}", str(var_value))

	# Replace {lore} with knowledge base if present
	if "{lore}" in filled_prompt:
	filled_prompt = filled_prompt.replace("{lore}", knowledge_base)

	# Create a prompt for generating synthetic output
	generation_prompt = f"""
	You are generating synthetic output data based on the following input:

	DEFINITION OF INPUT VARIABLES:
	{input_vars_text}

	INPUT DATA:
	{json.dumps(input_item, indent=2)}

	PROMPT USED:
	{filled_prompt}

	REQUIRED OUTPUT VARIABLES:
	{output_vars_text}

	Generate realistic output data for these variables. Return ONLY a JSON object with the below format, using the names of the required output variables as keys:
	{output_format}

	Use appropriate data types for each variable. Return ONLY the JSON object with no additional text or explanation.
	The response must be valid JSON that can be parsed directly.
	"""
	# debug logs:
	# print("*************Filtered Input:", input_item)
	# print("*************Generated Prompt:", generation_prompt)
	output_data = None
	for attempt in range(max_retries):
	try:
	response = call_model_api(
	model=st.session_state.model,
	prompt=generation_prompt,
	max_tokens=2000,
	temperature=st.session_state.temperature,
	)

	result = response.strip()

	# Extract JSON from the response
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\{[\s\S]\}\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = (
	json_match.group(1) if json_match.group(1) else result
	)
	# Clean up any remaining markdown or comments
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	try:
	output_data = json.loads(json_str, strict=False)
	# Validate that we got a dictionary
	if isinstance(output_data, dict):
	# Check if all required output variables are present
	required_vars = [var["name"] for var in output_vars]
	if all(var in output_data for var in required_vars):
	break # Valid output, exit retry loop
	else:
	missing_vars = [
	var
	for var in required_vars
	if var not in output_data
	]
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Missing output variables: {missing_vars}. Retrying..."
	)
	else:
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Generated output is not a dictionary. Retrying..."
	)
	except json.JSONDecodeError:
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Failed to parse JSON. Retrying..."
	)
	else:
	# Try to parse the entire response as JSON
	try:
	output_data = json.loads(result, strict=False)
	# Validate that we got a dictionary
	if isinstance(output_data, dict):
	# Check if all required output variables are present
	required_vars = [var["name"] for var in output_vars]
	if all(var in output_data for var in required_vars):
	break # Valid output, exit retry loop
	else:
	missing_vars = [
	var
	for var in required_vars
	if var not in output_data
	]
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Missing output variables: {missing_vars}. Retrying..."
	)
	else:
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Generated output is not a dictionary. Retrying..."
	)
	except json.JSONDecodeError:
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Failed to parse JSON. Retrying..."
	)

	except Exception as e:
	st.warning(
	f"Attempt {attempt+1} for input {i+1}: Error generating output: {str(e)}. Retrying..."
	)

	# If we've reached the max retries, log the error
	if attempt == max_retries - 1:
	st.error(
	f"Failed to generate valid output for input {i+1} after {max_retries} attempts."
	)
	output_data = {
	"error": f"Failed to generate valid output after {max_retries} attempts"
	}

	# Combine input and output data
	if output_data:
	combined_data = {input_item, output_data}
	results.append(combined_data)
	else:
	results.append({**input_item, "error": "Failed to generate output"})

	# Update progress bar
	progress_bar.progress((i + 1) / len(input_data))

	finally:
	# Ensure progress bar reaches 100% when done
	if len(input_data) > 0:
	progress_bar.progress(1.0)

	return results


	def suggest_variable_values_from_kb(
	variable_name, variable_type, knowledge_base, model="gpt-3.5-turbo"
	):
	"""
	Use LLM to suggest possible values for a variable based on the knowledge base content.
	Especially useful for categorical variables to extract options from documents.
	"""
	if not knowledge_base:
	return None

	# Truncate knowledge base if it's too long
	kb_excerpt = (
	knowledge_base[:100000] + "..."
	if len(knowledge_base) > 100000
	else knowledge_base
	)

	prompt = f"""
	Based on the following knowledge base content, suggest appropriate values for a variable named "{variable_name}" of type "{variable_type}".

	KNOWLEDGE BASE EXCERPT:
	{kb_excerpt}

	TASK:
	Extract or suggest appropriate values for this variable from the knowledge base.

	If the variable type is "categorical", return a list of possible options found in the knowledge base.
	If the variable type is "string", suggest a few example values.
	If the variable type is "int" or "float", suggest appropriate min/max ranges.
	If the variable type is "bool", suggest appropriate true/false conditions.

	Return your response as a JSON object with the following structure:
	For categorical: {{"options": ["option1", "option2", ...]}}
	For string: {{"examples": ["example1", "example2", ...], "min": min_length, "max": max_length}}
	For int/float: {{"min": minimum_value, "max": maximum_value, "examples": [value1, value2, ...]}}
	For bool: {{"examples": ["condition for true", "condition for false"]}}

	Only include values that are actually present or strongly implied in the knowledge base.
	"""

	try:
	result = call_model_api(
	model=model,
	prompt=prompt,
	max_tokens=1000,
	temperature=0.3,
	)

	# Extract JSON from the response
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\{[\s\S]\}\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else result
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	try:
	suggestions = json.loads(json_str, strict=False)
	return suggestions
	except:
	pass
	else:
	try:
	suggestions = json.loads(result, strict=False)
	return suggestions
	except:
	pass

	return None
	except Exception as e:
	print(f"Error suggesting variable values: {str(e)}")
	return None


	@st.cache_data
	def analyze_knowledge_base(knowledge_base, model="gpt-4o-mini"):
	"""
	Analyze the knowledge base to extract potential variable names and values.
	This can be used to suggest variables when creating a new template.
	"""
	if not knowledge_base:
	return None

	# Truncate knowledge base if it's too long
	kb_excerpt = (
	knowledge_base[:100000] + "..."
	if len(knowledge_base) > 100000
	else knowledge_base
	)

	prompt = f"""
	Analyze the following knowledge base content and identify potential variables that could be used in a template.

	KNOWLEDGE BASE EXCERPT:
	{kb_excerpt}

	TASK:
	1. Identify key entities, attributes, or concepts that could be used as variables
	2. For each variable, suggest an appropriate type (string, int, float, bool, categorical)
	3. For categorical variables, suggest possible options

	Return your analysis as a JSON array with the following structure:
	[
	{{
	"name": "variable_name",
	"description": "what this variable represents",
	"type": "string/int/float/bool/categorical",
	"options": ["option1", "option2", ...] (only for categorical type)
	}},
	...
	]

	Focus on extracting variables that appear frequently or seem important in the knowledge base.
	"""

	try:
	result = call_model_api(
	model=model,
	prompt=prompt,
	max_tokens=2000,
	temperature=0.3,
	)

	# Extract JSON from the response
	json_pattern = r"```json\s([\s\S]?)\s```\|^\s\[[\s\S]\]\s$"
	json_match = re.search(json_pattern, result)

	if json_match:
	json_str = json_match.group(1) if json_match.group(1) else result
	json_str = re.sub(r"```.*\|```", "", json_str).strip()
	try:
	suggestions = json.loads(json_str, strict=False)
	return suggestions
	except:
	pass
	else:
	try:
	suggestions = json.loads(result, strict=False)
	return suggestions
	except:
	pass

	return None
	except Exception as e:
	print(f"Error analyzing knowledge base: {str(e)}")
	return None


	# Initialize session state
	if "template_spec" not in st.session_state:
	st.session_state.template_spec = None
	if "knowledge_base" not in st.session_state:
	st.session_state.knowledge_base = ""
	if "show_template_editor" not in st.session_state:
	st.session_state.show_template_editor = False
	if "user_inputs" not in st.session_state:
	st.session_state.user_inputs = {}
	if "generated_output" not in st.session_state:
	st.session_state.generated_output = ""
	if "uploaded_filenames" not in st.session_state:
	st.session_state.uploaded_filenames = []
	if "kb_cleared" not in st.session_state:
	st.session_state.kb_cleared = False

	# Sidebar setup
	with st.sidebar:
	st.title("Template Generator")
	st.write("Create templates for generating content with LLMs.")

	# API Key inputs
	st.subheader("API Keys")
	api_key = st.text_input("OpenAI API Key", type="password")
	if api_key:
	st.session_state.api_key = api_key

	anthropic_api_key = st.text_input("Anthropic API Key", type="password")
	if anthropic_api_key:
	st.session_state.anthropic_api_key = anthropic_api_key

	# Model selection
	st.subheader("Model Selection")
	model_provider = st.radio(
	"Select Model Provider",
	options=["OpenAI", "Anthropic"],
	index=0,
	)

	if model_provider == "OpenAI":
	st.session_state.model = st.selectbox(
	"Select OpenAI Model",
	options=[
	"gpt-4o-mini",
	"gpt-4.1-mini",
	"gpt-4.1",
	"gpt-4o",
	"gpt-4.1-nano",
	],
	index=1,
	)
	else: # Anthropic
	st.session_state.model = st.selectbox(
	"Select Claude Model",
	options=[
	"claude-3-7-sonnet-latest",
	"claude-3-5-haiku-latest",
	"claude-3-5-sonnet-latest",
	"claude-3-opus-latest",
	],
	index=1, # Default to Sonnet as a good balance of capability and cost
	)

	# Main application layout
	st.title("Template Generator")

	# Create tabs for workflow
	tab1, tab2, tab3 = st.tabs(["Setup", "Edit and Use Template", "Generate Data"])

	with tab1:
	st.header("Project Setup")

	# Add option to either upload a template or create a new one
	setup_option = st.radio(
	"Choose how to start your project",
	options=[
	"Create new template from documents",
	"Upload existing template",
	"Create an empty template",
	],
	index=0,
	)

	if (
	setup_option == "Create new template from documents"
	or setup_option == "Create an empty template"
	):
	# Add Examples section
	st.markdown("---")
	st.subheader("Or try one of our examples")

	# Get example templates
	example_templates = create_example_templates()

	# Create columns for example cards
	cols = st.columns(len(example_templates))

	# Display each example in a card
	for i, (col, template) in enumerate(zip(cols, example_templates)):
	with col:
	st.markdown(f"#### {template['name']}")
	st.markdown(f"{template['description']}")

	# Show input variables
	with st.expander("Inputs and Outputs", expanded=False):
	st.markdown("Inputs:")
	for inp in template["input"]:
	st.markdown(f"- {inp['name']}: {inp['type']}")

	# Show output variables
	st.markdown("Outputs:")
	for out in template["output"]:
	st.markdown(f"- {out['name']}: {out['type']}")

	# Button to use this example
	if st.button(f"Use this example", key=f"use_example_{i}"):
	st.session_state.template_spec = template
	st.session_state.show_template_editor = True

	# Create some example outputs to show
	example_outputs = create_example_outputs(template)

	# Store example outputs in session state
	st.session_state.example_outputs = example_outputs

	# Success message
	st.success(
	f"Example template loaded! Go to the 'Edit Template' tab to see it in action."
	)

	# Rerun to update the UI
	# st.rerun()

	if setup_option == "Upload existing template":
	st.subheader("Upload Template File")
	uploaded_template = st.file_uploader(
	"Upload a template JSON file",
	type=["json"],
	help="Upload a previously created template file (.json)",
	)

	if uploaded_template:
	template_spec, error = parse_template_file(uploaded_template)
	if error:
	st.error(error)
	else:
	# Sanitize the template to remove UI-specific keys
	template_spec = sanitize_template_spec(template_spec)
	st.success(f"Successfully loaded template: {template_spec['name']}")

	# Show template preview
	with st.expander("Template Preview", expanded=False):
	st.json(template_spec)

	# Button to use this template
	if st.button("Use This Template"):
	st.session_state.template_spec = template_spec
	st.session_state.show_template_editor = True
	st.success(
	"Template loaded! Go to the 'Edit Template' tab to customize it."
	)

	elif setup_option == "Create new template from documents":
	# Step 1: Upload Knowledge Base
	st.subheader("Step 1: Upload Knowledge Base")
	uploaded_files = st.file_uploader(
	"Upload documents to use as knowledge base",
	accept_multiple_files=True,
	type=["pdf", "txt", "html"],
	)

	# Rest of your existing code for document processing...
	if uploaded_files and not st.session_state.kb_cleared:
	# Track filenames for UI feedback
	st.session_state.uploaded_filenames = [file.name for file in uploaded_files]

	with st.spinner("Processing documents..."):
	st.session_state.knowledge_base = parse_documents(uploaded_files)
	st.success(f"Processed {len(uploaded_files)} documents")

	with st.expander("Preview extracted content"):
	st.text_area(
	"Extracted Text",
	value=st.session_state.knowledge_base,
	height=200,
	disabled=True,
	)

	# Step 2: Provide Instructions
	st.subheader("Step 2: Provide Instructions")
	instructions = st.text_area(
	"Describe what you want to create",
	placeholder="Describe what you want to create (e.g., 'Create a character background generator with name, faction, and race as inputs...')",
	height=150,
	)

	# Generate Template button
	if st.button("Generate Template"):
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error(
	"Please provide an OpenAI API key in the sidebar before generating a template."
	)
	elif instructions:
	with st.spinner("Analyzing instructions and generating template..."):
	# Generate template based on instructions and document content
	st.session_state.template_spec = (
	generate_template_from_instructions(
	instructions, st.session_state.knowledge_base
	)
	)
	st.session_state.show_template_editor = True
	st.success(
	"Template generated! Go to the 'Edit Template' tab to customize it."
	)
	else:
	st.warning("Please provide instructions first")

	elif setup_option == "Create an empty template":
	st.subheader("Create Empty Template")
	st.info(
	"This option creates a minimal template that you can customize in the 'Edit Template' tab."
	)

	# Optional: Allow setting a name and description for the template
	template_name = st.text_input("Template Name", value="Custom Template")
	template_description = st.text_area(
	"Template Description", value="A custom template created from scratch"
	)

	if st.button("Create Empty Template"):
	# Create a minimal template structure
	st.session_state.template_spec = {
	"name": template_name,
	"version": "1.0.0",
	"description": template_description,
	"input": [
	{
	"name": "input_1",
	"description": "First input variable",
	"type": "string",
	"min": 1,
	"max": 100,
	}
	],
	"output": [
	{
	"name": "output_1",
	"description": "Generated output",
	"type": "string",
	"min": 10,
	"max": 1000,
	}
	],
	"prompt": "Based on the following information:\n{input_1}\n\nGenerate the following output.",
	}

	st.session_state.show_template_editor = True
	st.success(
	"Empty template created! Go to the 'Edit Template' tab to customize it."
	)

	# Optional: Initialize an empty knowledge base
	if "knowledge_base" not in st.session_state:
	st.session_state.knowledge_base = ""

	with tab2:
	if st.session_state.show_template_editor and st.session_state.template_spec:
	st.header("Template Editor")
	st.subheader(st.session_state.template_spec["name"])

	# Initialize session state variables
	if "suggested_variables" not in st.session_state:
	st.session_state.suggested_variables = []
	if "added_suggestions" not in st.session_state:
	st.session_state.added_suggestions = set()
	if (
	"last_template" not in st.session_state
	or st.session_state.last_template != st.session_state.template_spec
	):
	st.session_state.user_inputs = {}
	st.session_state.last_template = st.session_state.template_spec
	if "show_variable_editor" not in st.session_state:
	st.session_state.show_variable_editor = None
	if "show_output_editor" not in st.session_state:
	st.session_state.show_output_editor = None
	if "show_suggested_vars" not in st.session_state:
	st.session_state.show_suggested_vars = False

	# Create main layout with left (settings) and right (generation) columns
	left_col, right_col = st.columns([3, 2])

	# LEFT COLUMN - Settings
	with left_col:
	# Basic template information
	with st.expander("Template Information (Metadata)", expanded=False):
	col1, col2 = st.columns(2)
	with col1:
	st.session_state.template_spec["name"] = st.text_input(
	"Template Name", value=st.session_state.template_spec["name"]
	)
	with col2:
	st.session_state.template_spec["version"] = st.text_input(
	"Version", value=st.session_state.template_spec["version"]
	)

	st.session_state.template_spec["description"] = st.text_area(
	"Description",
	value=st.session_state.template_spec["description"],
	height=100,
	)

	# Prompt Template Section
	with st.expander("Prompt Template", expanded=True):
	st.info(
	"Use {variable_name} to refer to input variables in your template"
	)

	# Add buttons for prompt management
	col1, col2 = st.columns([1, 1])
	with col1:
	rewrite_prompt = st.button("AI Rewrite Prompt")
	with col2:
	reroll_prompt = st.button("Reroll Prompt Variation")

	# Handle prompt rewriting
	if rewrite_prompt or reroll_prompt:
	with st.spinner("Generating improved prompt template..."):
	improved_template = generate_improved_prompt_template(
	st.session_state.template_spec,
	st.session_state.knowledge_base,
	)
	# Only update if we got a valid result back
	if improved_template and len(improved_template) > 10:
	st.session_state.template_spec["prompt"] = improved_template
	st.success("Prompt template updated!")

	# Display the prompt template
	prompt_template = st.text_area(
	"Edit the prompt template",
	value=st.session_state.template_spec["prompt"],
	height=200,
	)
	st.session_state.template_spec["prompt"] = prompt_template

	# Knowledge Base Management Section
	with st.expander("Knowledge Base Management", expanded=False):
	st.info("Upload and manage documents to use as knowledge base")

	# Upload interface
	uploaded_files = st.file_uploader(
	"Upload documents",
	accept_multiple_files=True,
	type=["pdf", "txt", "docx", "html"],
	)

	# Handle document processing
	if uploaded_files:
	# Choose how to handle new uploads
	handle_method = st.radio(
	"How to handle new documents?",
	["Replace existing", "Append to existing"],
	horizontal=True,
	)

	if st.button("Process Documents"):
	parse_documents.clear()
	analyze_knowledge_base.clear()
	st.session_state.kb_cleared = True
	with st.spinner("Processing documents..."):

	if handle_method == "Replace existing":
	new_content = parse_documents(uploaded_files)
	st.session_state.knowledge_base = new_content
	st.session_state.uploaded_filenames = [
	file.name for file in uploaded_files
	]
	else: # Append
	# Find new files by comparing filenames
	new_files = []
	duplicate_files = []

	for file in uploaded_files:
	if file.name in st.session_state.uploaded_filenames:
	duplicate_files.append(file.name)
	else:
	new_files.append(file)
	st.session_state.uploaded_filenames.append(
	file.name
	)

	# Process only new files
	if new_files:
	new_content = parse_documents(new_files)
	st.session_state.knowledge_base += (
	"\n\n" + new_content
	)

	# Provide feedback about duplicates
	if duplicate_files:
	st.info(
	f"Skipped {len(duplicate_files)} duplicate files: {', '.join(duplicate_files)}"
	)

	# Reset any analysis that depends on knowledge base
	if "suggested_variables" in st.session_state:
	st.session_state.suggested_variables = []
	st.session_state.show_suggested_vars = False

	st.success(f"Processed {len(uploaded_files)} documents")
	st.rerun()

	# Display knowledge base information
	if st.session_state.knowledge_base:
	st.write(
	f"Knowledge base size: {len(st.session_state.knowledge_base)} characters"
	)

	# Clear knowledge base button
	# Display uploaded filenames
	if st.session_state.uploaded_filenames:
	st.write("Uploaded files:")
	for filename in st.session_state.uploaded_filenames:
	st.write(f"- {filename}")

	if st.button("Clear Knowledge Base"):
	analyze_knowledge_base.clear()
	st.session_state.knowledge_base = ""
	st.session_state.kb_cleared = True
	st.session_state.uploaded_filenames = []
	if "suggested_variables" in st.session_state:
	st.session_state.suggested_variables = []
	st.session_state.show_suggested_vars = False
	st.success("Knowledge base cleared")
	st.rerun()

	# Option to edit knowledge base directly
	edit_kb = st.checkbox("Edit knowledge base directly")
	if edit_kb:
	new_content = st.text_area(
	"Edit knowledge base content",
	value=st.session_state.knowledge_base,
	height=300,
	)
	if st.button("Update Knowledge Base"):
	analyze_knowledge_base.clear()
	st.session_state.knowledge_base = new_content
	if "suggested_variables" in st.session_state:
	st.session_state.suggested_variables = []
	st.session_state.show_suggested_vars = False
	st.success("Knowledge base updated")
	st.rerun()

	# Add knowledge base as input variable option
	if st.session_state.knowledge_base:
	kb_var_option = st.checkbox(
	"Create input variable from knowledge base"
	)

	if kb_var_option:
	# Allow editing the content to include as variable
	kb_content = st.text_area(
	"Edit knowledge base content for input variable",
	value=st.session_state.knowledge_base,
	height=300,
	)

	# Create input variable name
	kb_var_name = st.text_input(
	"Input variable name", value="kb_content"
	)

	# Add button to create the input variable
	if st.button("Add as input variable"):
	# Check if variable already exists
	var_exists = False
	for var in st.session_state.template_spec["input"]:
	if var["name"] == kb_var_name:
	var_exists = True
	var["description"] = "Knowledge base content"
	var["type"] = "string"
	var["default_value"] = kb_content
	st.success(
	f"Updated existing input variable '{kb_var_name}'"
	)
	break

	if not var_exists:
	# Create new input variable
	new_var = {
	"name": kb_var_name,
	"description": "Knowledge base content",
	"type": "string",
	"min": len(kb_content),
	"max": len(kb_content) * 2,
	"default_value": kb_content,
	}
	st.session_state.template_spec["input"].append(
	new_var
	)
	st.success(
	f"Added new input variable '{kb_var_name}'"
	)

	# Remind user to update prompt template
	st.info(
	f"Remember to use {{{kb_var_name}}} in your prompt template"
	)

	# Knowledge Base Analysis Section
	if st.session_state.knowledge_base:
	with st.expander("Knowledge Base Analysis", expanded=False):
	st.info(
	"Analyze the knowledge base to suggest variables and values"
	)

	if st.button(
	"Analyze Knowledge Base for Variables",
	key="analyze_kb_button_input",
	):
	client = get_openai_client()
	if not client:
	st.error(
	"Please provide an OpenAI API key to analyze the knowledge base."
	)
	else:
	with st.spinner("Analyzing knowledge base..."):
	suggested_vars = analyze_knowledge_base(
	st.session_state.knowledge_base
	)
	if suggested_vars:
	st.session_state.suggested_variables = (
	suggested_vars
	)
	st.session_state.show_suggested_vars = True
	st.success(
	f"Found {len(suggested_vars)} potential variables in the knowledge base"
	)
	else:
	st.warning(
	"Could not extract variables from the knowledge base"
	)

	# Display suggested variables if they exist
	if (
	st.session_state.suggested_variables
	and st.session_state.show_suggested_vars
	):
	st.subheader("Suggested Variables")

	for i, var in enumerate(st.session_state.suggested_variables):
	# Generate a unique ID for this variable
	var_id = f"{var['name']}_{i}"

	# Check if this variable has already been added
	if var_id in st.session_state.added_suggestions:
	continue

	col1, col2 = st.columns([4, 1])
	with col1:
	st.markdown(
	f"{var['name']} ({var['type']}): {var['description']}"
	)
	if var.get("options"):
	st.markdown(f"Options: {', '.join(var['options'])}")
	with col2:
	if st.button("Add", key=f"add_suggested_{var_id}"):
	# Add this variable to the template
	new_var = {
	"name": var["name"],
	"description": var["description"],
	"type": var["type"],
	}
	if var.get("options"):
	new_var["options"] = var["options"]
	if var["type"] in ["string", "int", "float"]:
	new_var["min"] = 1
	new_var["max"] = 100

	# Add to input variables
	st.session_state.template_spec["input"].append(
	new_var
	)

	# Mark this variable as added
	st.session_state.added_suggestions.add(var_id)

	# Show success message
	st.success(
	f"Added {var['name']} to input variables!"
	)

	# Input Variables Section
	with st.expander("Input Variables", expanded=True):
	# Add input variable button
	col1, col2 = st.columns([3, 1])
	with col1:
	new_input_name = st.text_input(
	"New input variable name", key="new_input_name"
	)
	with col2:
	if st.button("Add Input Variable"):
	new_var = {
	"name": (
	new_input_name
	if new_input_name
	else f"new_input_{len(st.session_state.template_spec['input']) + 1}"
	),
	"description": "New input variable",
	"type": "string",
	"min": 1,
	"max": 100,
	}
	st.session_state.template_spec["input"].append(new_var)

	# Display input variables with integrated input fields
	st.subheader("Input Variables")

	# Create a container for the variables
	for i, input_var in enumerate(st.session_state.template_spec["input"]):
	var_name = input_var["name"]
	var_type = input_var["type"]
	var_desc = input_var["description"]

	with st.container():
	# Variable header with description
	st.markdown(f"##### {var_name}\n###### {var_desc}")

	# Create columns for the variable controls
	col1, col2, col3 = st.columns([3, 1, 1])

	with col1:
	# Create the appropriate input field based on variable type
	if var_type == "string":
	# Check if this is a knowledge base variable with default value
	if "default_value" in input_var:
	use_default = st.checkbox(
	f"Use default value for {var_name}",
	value=True,
	key=f"use_default_{var_name}",
	)
	if use_default:
	st.session_state.user_inputs[var_name] = (
	input_var["default_value"]
	)
	st.text_area(
	f"Default value for {var_name}",
	value=input_var["default_value"][:500]
	+ (
	"..."
	if len(input_var["default_value"]) > 500
	else ""
	),
	height=150,
	disabled=True,
	key=f"preview_{var_name}",
	)
	else:
	st.session_state.user_inputs[var_name] = (
	st.text_area(
	f"Enter value for {var_name}",
	value=input_var["default_value"],
	height=150,
	key=f"use_{var_name}",
	)
	)
	else:
	st.session_state.user_inputs[var_name] = (
	st.text_input(
	f"Enter value for {var_name}",
	key=f"use_{var_name}",
	)
	)
	elif var_type == "int":
	st.session_state.user_inputs[var_name] = (
	st.number_input(
	f"Enter value for {var_name}",
	min_value=input_var.get("min", None),
	max_value=input_var.get("max", None),
	step=1,
	key=f"use_{var_name}",
	)
	)
	elif var_type == "float":
	st.session_state.user_inputs[var_name] = (
	st.number_input(
	f"Enter value for {var_name}",
	min_value=float(input_var.get("min", 0)),
	max_value=float(input_var.get("max", 100)),
	key=f"use_{var_name}",
	)
	)
	elif var_type == "bool":
	st.session_state.user_inputs[var_name] = st.checkbox(
	f"Select value for {var_name}",
	key=f"use_{var_name}",
	)
	elif var_type == "categorical":
	options = input_var.get("options", [])
	min_selections = input_var.get("min", 1)
	max_selections = input_var.get("max", 1)

	if options:
	if min_selections == 1 and max_selections == 1:
	# Single selection
	st.session_state.user_inputs[var_name] = (
	st.selectbox(
	f"Select value for {var_name}",
	options=options,
	key=f"use_{var_name}",
	)
	)
	else:
	# Multi-selection
	st.session_state.user_inputs[var_name] = (
	st.multiselect(
	f"Select {min_selections}-{max_selections} values for {var_name}",
	options=options,
	default=(
	options[:min_selections]
	if len(options) >= min_selections
	else options
	),
	key=f"use_{var_name}",
	)
	)
	else:
	st.warning(f"No options defined for {var_name}")

	with col2:
	# Button to edit this variable
	if st.button("Edit Settings", key=f"edit_input_{i}"):
	st.session_state.show_variable_editor = i

	with col3:
	# Button to remove this variable
	if st.button("Remove", key=f"remove_input_{i}"):
	st.session_state.template_spec["input"].pop(i)
	st.rerun()

	# Show editor if this variable is selected
	if st.session_state.show_variable_editor == i:
	with st.container():
	st.markdown("---")
	st.markdown(
	f"##### Variable Settings: {input_var['name']}"
	)

	# Name and description
	input_var["name"] = st.text_input(
	"Name",
	value=input_var["name"],
	key=f"input_name_{i}",
	)
	input_var["description"] = st.text_input(
	"Description",
	value=input_var["description"],
	key=f"input_desc_{i}",
	)

	# Type selection
	var_type = st.selectbox(
	"Type",
	options=[
	"string",
	"int",
	"float",
	"bool",
	"categorical",
	],
	index=[
	"string",
	"int",
	"float",
	"bool",
	"categorical",
	].index(input_var["type"]),
	key=f"input_type_{i}",
	)
	input_var["type"] = var_type

	# Type-specific settings
	if var_type in ["string", "int", "float"]:
	col1, col2 = st.columns(2)
	with col1:
	input_var["min"] = st.number_input(
	"Min",
	value=int(input_var.get("min", 0)),
	key=f"input_min_{i}",
	)
	with col2:
	input_var["max"] = st.number_input(
	"Max",
	value=int(input_var.get("max", 100)),
	key=f"input_max_{i}",
	)

	if var_type == "categorical":
	# Suggest options from KB button
	if st.button(
	"Suggest Options from KB",
	key=f"suggest_input_{i}",
	):
	client = get_openai_client()
	if not client:
	st.error(
	"Please provide an OpenAI API key to suggest options."
	)
	elif not st.session_state.knowledge_base:
	st.warning(
	"No knowledge base available. Please upload documents first."
	)
	else:
	with st.spinner(
	f"Suggesting options for {input_var['name']}..."
	):
	suggestions = (
	suggest_variable_values_from_kb(
	input_var["name"],
	"categorical",
	st.session_state.knowledge_base,
	)
	)
	if (
	suggestions
	and "options" in suggestions
	):
	input_var["options"] = suggestions[
	"options"
	]
	st.success(
	f"Found {len(suggestions['options'])} options"
	)
	else:
	st.warning(
	"Could not find suitable options in the knowledge base"
	)

	# Options editor
	options = input_var.get("options", [])
	options_str = st.text_area(
	"Options (one per line)",
	value="\n".join(options),
	key=f"input_options_{i}",
	)
	input_var["options"] = [
	opt.strip()
	for opt in options_str.split("\n")
	if opt.strip()
	]

	# Min/max selections
	col1, col2 = st.columns(2)
	with col1:
	input_var["min"] = st.number_input(
	"Min selections",
	value=int(input_var.get("min", 1)),
	min_value=0,
	key=f"input_cat_min_{i}",
	)
	with col2:
	input_var["max"] = st.number_input(
	"Max selections",
	value=int(input_var.get("max", 1)),
	min_value=1,
	key=f"input_cat_max_{i}",
	)

	# Close editor button
	if st.button("Done Editing", key=f"done_input_{i}"):
	st.session_state.show_variable_editor = None
	st.rerun()

	st.markdown("---")

	st.divider()

	# Output Variables Section
	with st.expander("Output Variables", expanded=True):
	# Add output variable button
	col1, col2 = st.columns([3, 1])
	with col1:
	new_output_name = st.text_input(
	"New output variable name", key="new_output_name"
	)
	with col2:
	if st.button("Add Output Variable"):
	new_var = {
	"name": (
	new_output_name
	if new_output_name
	else f"new_output_{len(st.session_state.template_spec['output']) + 1}"
	),
	"description": "New output variable",
	"type": "string",
	"min": 1,
	"max": 100,
	}
	st.session_state.template_spec["output"].append(new_var)

	# Display output variables in a table-like format
	st.subheader("Output Variables")

	# Create a container for the variables
	for i, output_var in enumerate(
	st.session_state.template_spec["output"]
	):
	col1, col2, col3 = st.columns([3, 1, 1])

	with col1:
	st.markdown(
	f"{output_var['name']} - {output_var['description']}"
	)

	with col2:
	# Button to edit this variable
	if st.button("Edit", key=f"edit_output_{i}"):
	st.session_state.show_output_editor = i

	with col3:
	# Button to remove this variable
	if st.button("Remove", key=f"remove_output_{i}"):
	st.session_state.template_spec["output"].pop(i)
	st.rerun()

	# Show editor if this variable is selected
	if st.session_state.show_output_editor == i:
	with st.container():
	st.markdown("---")
	st.markdown(
	f"##### Edit Output Variable: {output_var['name']}"
	)

	# Name and description
	output_var["name"] = st.text_input(
	"Name", value=output_var["name"], key=f"output_name_{i}"
	)
	output_var["description"] = st.text_input(
	"Description",
	value=output_var["description"],
	key=f"output_desc_{i}",
	)

	# Type selection
	var_type = st.selectbox(
	"Type",
	options=[
	"string",
	"int",
	"float",
	"bool",
	"categorical",
	],
	index=[
	"string",
	"int",
	"float",
	"bool",
	"categorical",
	].index(output_var["type"]),
	key=f"output_type_{i}",
	)
	output_var["type"] = var_type

	# Type-specific settings
	if var_type in ["string", "int", "float"]:
	col1, col2 = st.columns(2)
	with col1:
	output_var["min"] = st.number_input(
	"Min",
	value=int(output_var.get("min", 0)),
	key=f"output_min_{i}",
	)
	with col2:
	output_var["max"] = st.number_input(
	"Max",
	value=int(output_var.get("max", 100)),
	key=f"output_max_{i}",
	)

	if var_type == "categorical":
	# Suggest options from KB button
	if st.button(
	"Suggest Options from KB", key=f"suggest_output_{i}"
	):
	client = get_openai_client()
	if not client:
	st.error(
	"Please provide an OpenAI API key to suggest options."
	)
	elif not st.session_state.knowledge_base:
	st.warning(
	"No knowledge base available. Please upload documents first."
	)
	else:
	with st.spinner(
	f"Suggesting options for {output_var['name']}..."
	):
	suggestions = (
	suggest_variable_values_from_kb(
	output_var["name"],
	"categorical",
	st.session_state.knowledge_base,
	)
	)
	if suggestions and "options" in suggestions:
	output_var["options"] = suggestions[
	"options"
	]
	st.success(
	f"Found {len(suggestions['options'])} options"
	)
	else:
	st.warning(
	"Could not find suitable options in the knowledge base"
	)

	# Options editor
	options = output_var.get("options", [])
	options_str = st.text_area(
	"Options (one per line)",
	value="\n".join(options),
	key=f"output_options_{i}",
	)
	output_var["options"] = [
	opt.strip()
	for opt in options_str.split("\n")
	if opt.strip()
	]

	# Min/max selections
	col1, col2 = st.columns(2)
	with col1:
	output_var["min"] = st.number_input(
	"Min selections",
	value=int(output_var.get("min", 1)),
	min_value=0,
	key=f"output_cat_min_{i}",
	)
	with col2:
	output_var["max"] = st.number_input(
	"Max selections",
	value=int(output_var.get("max", 1)),
	min_value=1,
	key=f"output_cat_max_{i}",
	)

	# Close editor button
	if st.button("Done Editing", key=f"done_output_{i}"):
	st.session_state.show_output_editor = None
	st.rerun()

	st.markdown("---")

	# Template JSON
	with st.expander("Template JSON", expanded=False):
	st.json(st.session_state.template_spec)

	# Download button
	template_json = json.dumps(st.session_state.template_spec, indent=2)
	st.download_button(
	label="Download Template JSON",
	data=template_json,
	file_name="template_spec.json",
	mime="application/json",
	)

	# RIGHT COLUMN - Generation
	with right_col:
	st.header("Generation")

	# Handle the lore/knowledge base as a special variable
	prompt_template = st.session_state.template_spec["prompt"]
	if "{lore}" in prompt_template:
	with st.expander("Document Knowledge Base", expanded=False):
	st.markdown("##### Document Knowledge Base")

	# Display info about the knowledge base
	if st.session_state.knowledge_base:
	st.success(
	f"Using content from {len(st.session_state.uploaded_filenames) if 'uploaded_filenames' in st.session_state else 'uploaded'} documents as knowledge base"
	)

	# Use a button to toggle knowledge base content view instead of an expander
	if st.button(
	"View/Hide Knowledge Base Content", key="toggle_kb_view"
	):
	st.session_state.show_kb_content = not st.session_state.get(
	"show_kb_content", False
	)

	if st.session_state.get("show_kb_content", False):
	st.text_area(
	"Knowledge base content",
	value=st.session_state.knowledge_base[:2000]
	+ (
	"..."
	if len(st.session_state.knowledge_base) > 2000
	else ""
	),
	height=200,
	disabled=True,
	)

	# Add option to edit if needed
	use_edited_lore = st.checkbox("Edit knowledge base content")
	if use_edited_lore:
	st.session_state.user_inputs["lore"] = st.text_area(
	"Edit knowledge base for this generation",
	value=st.session_state.knowledge_base,
	height=300,
	)
	else:
	st.session_state.user_inputs["lore"] = (
	st.session_state.knowledge_base
	)
	else:
	st.warning(
	"No documents uploaded. You can provide custom lore below."
	)
	st.session_state.user_inputs["lore"] = st.text_area(
	"Enter background information or context",
	placeholder="Enter custom lore or background information here...",
	height=150,
	)
	# Temperature control slider
	st.session_state.temperature = st.slider(
	"Temperature (creativity level)", min_value=0.0, max_value=1.0, value=0.7, step=0.05
	)
	# Generate Output button
	if st.button("Generate Output", key="generate_button"):
	# Check if API key is provided
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error(
	"Please provide an OpenAI or Anthropic API key in the sidebar before generating output."
	)
	else:
	# Fill the prompt template with user-provided values
	filled_prompt = prompt_template
	for var_name, var_value in st.session_state.user_inputs.items():
	filled_prompt = filled_prompt.replace(
	f"{{{var_name}}}", str(var_value)
	)

	# Show the filled prompt
	with st.expander("View populated prompt"):
	st.text_area(
	"Prompt sent to LLM",
	value=filled_prompt,
	height=200,
	disabled=True,
	)

	# Call LLM with the filled prompt
	# Create a single input data item from user inputs
	input_data = [st.session_state.user_inputs.copy()]

	# Create a copy of the template spec
	template_spec_copy = st.session_state.template_spec.copy()

	# Call generate_synthetic_outputs with the input data
	with st.spinner("Generating output..."):
	model_selected = st.session_state.model
	generated_outputs = generate_synthetic_outputs(
	template_spec_copy,
	input_data,
	st.session_state.knowledge_base,
	max_retries=3,
	)

	# Extract the first output (since we only have one input)
	if generated_outputs and len(generated_outputs) > 0:
	# The output contains both input and output fields
	# We only want to display the output fields
	output_vars = [
	var["name"] for var in template_spec_copy["output"]
	]
	output_data = {
	k: v
	for k, v in generated_outputs[0].items()
	if k in output_vars
	}
	st.session_state.generated_output = output_data
	else:
	st.session_state.generated_output = {
	"error": "Failed to generate output"
	}

	# Display generated output
	if (
	"generated_output" in st.session_state
	and st.session_state.generated_output
	):
	st.header("Generated Output")

	# Check if the output is a dictionary (JSON)
	if isinstance(st.session_state.generated_output, dict):
	# Display as JSON
	st.json(st.session_state.generated_output)

	# Option to save the output as JSON
	output_json = json.dumps(
	st.session_state.generated_output, indent=2
	)
	st.download_button(
	label="Download Output (JSON)",
	data=output_json,
	file_name="generated_output.json",
	mime="application/json",
	)
	else:
	# Display as text
	st.write(st.session_state.generated_output)

	# Option to save the output as text
	st.download_button(
	label="Download Output",
	data=str(st.session_state.generated_output),
	file_name="generated_output.txt",
	mime="text/plain",
	)
	else:
	st.info(
	"No template has been generated yet. Go to the 'Setup' tab to create one."
	)

	with tab3:
	if st.session_state.show_template_editor and st.session_state.template_spec:
	st.header("Generate Synthetic Data")

	with st.expander("Template Information", expanded=False):
	st.json(st.session_state.template_spec)

	# Data generation controls
	st.subheader("Generation Settings")

	col1, col2 = st.columns(2)
	with col1:
	num_samples = st.number_input(
	"Number of samples to generate", min_value=1, max_value=100, value=5
	)
	with col2:
	# Store the temperature value in session state
	st.session_state.temperature = st.slider(
	"Temperature (creativity)",
	min_value=0.1,
	max_value=1.0,
	value=0.7,
	step=0.1,
	)

	# Initialize containers for generated data
	if "synthetic_inputs" not in st.session_state:
	st.session_state.synthetic_inputs = []
	if "synthetic_outputs" not in st.session_state:
	st.session_state.synthetic_outputs = []
	if "combined_data" not in st.session_state:
	st.session_state.combined_data = []
	if "show_json_columns" not in st.session_state:
	st.session_state.show_json_columns = False
	if "modified_prompt_template" not in st.session_state:
	st.session_state.modified_prompt_template = ""
	if "selected_samples" not in st.session_state:
	st.session_state.selected_samples = []

	# Add option selection for categorical variables
	categorical_vars = [
	var
	for var in st.session_state.template_spec["input"]
	if var["type"] == "categorical" and var.get("options")
	]

	# In tab3, modify the categorical variable options section
	if categorical_vars:
	st.subheader("Categorical Variable Options")
	st.info(
	"Select which options to include in the permutations for each categorical variable."
	)

	# Create a copy of the template spec for modification
	template_spec_copy = st.session_state.template_spec.copy()
	template_spec_copy["input"] = st.session_state.template_spec["input"].copy()

	# Initialize UI state for categorical variables if not present
	if "categorical_ui_state" not in st.session_state:
	st.session_state.categorical_ui_state = {}

	# For each categorical variable, allow selecting options
	for i, var in enumerate(
	[
	v
	for v in template_spec_copy["input"]
	if v["type"] == "categorical" and v.get("options")
	]
	):
	var_name = var["name"]

	# Initialize UI state for this variable if not present
	if var_name not in st.session_state.categorical_ui_state:
	st.session_state.categorical_ui_state[var_name] = {
	"selected_options": var.get("options", []).copy(),
	"previous_options": var.get("options", []).copy(),
	}

	with st.expander(
	f"{var['name']} - {var['description']}", expanded=False
	):
	options = var.get("options", [])

	# Get UI state for this variable
	ui_state = st.session_state.categorical_ui_state[var_name]

	# Filter selected_options to only include valid options
	ui_state["selected_options"] = [
	opt for opt in ui_state["selected_options"] if opt in options
	]

	# Check for new options that need to be automatically selected
	previous_options = ui_state["previous_options"]

	# Find new options that weren't in the previous options list
	new_options = [
	opt for opt in options if opt not in previous_options
	]

	# Add new options to selected_options
	if new_options:
	ui_state["selected_options"].extend(new_options)

	# Store current options for future comparison
	ui_state["previous_options"] = options.copy()

	# Add "Select All" and "Clear All" buttons
	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button(
	f"Select All Options for {var['name']}",
	key=f"select_all_{i}",
	):
	ui_state["selected_options"] = options.copy()
	with col2:
	if st.button(
	f"Clear All Options for {var['name']}", key=f"clear_all_{i}"
	):
	ui_state["selected_options"] = []

	# Create multiselect for options
	ui_state["selected_options"] = st.multiselect(
	f"Select options to include for {var['name']}",
	options=options,
	default=ui_state["selected_options"],
	key=f"options_select_{i}",
	)

	# Show selected count
	st.write(
	f"Selected {len(ui_state['selected_options'])} out of {len(options)} options"
	)

	# Create a temporary copy of the variable with selected_options for the calculation
	# but don't modify the actual template
	var_copy = var.copy()
	var_copy["selected_options"] = ui_state["selected_options"]

	# Update the template spec copy with the selected options for calculation purposes only
	for j, input_var in enumerate(template_spec_copy["input"]):
	if input_var["name"] == var["name"]:
	template_spec_copy["input"][j] = var_copy
	break

	# Calculate and display Cartesian product size
	product_size, var_counts = calculate_cartesian_product_size(
	[v for v in template_spec_copy["input"] if v["type"] == "categorical"]
	)

	st.subheader("Combination Analysis")
	st.info(f"Total number of possible combinations: {product_size:,}")

	# Display breakdown of combinations
	st.write("Breakdown by variable:")
	for var in var_counts:
	st.write(f"- {var['name']}: {var['count']:,} possible values")

	if product_size > num_samples:
	st.warning(
	f"Note: Only {num_samples} samples will be generated from the {product_size:,} possible combinations"
	)
	elif product_size < num_samples:
	st.warning(
	f"Note: Some combinations will be repeated to reach {num_samples} samples (only {product_size:,} unique combinations possible)"
	)

	# Generate inputs button
	if st.button("Generate Synthetic Inputs"):
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error(
	"Please provide an OpenAI or Anthropic API key in the sidebar."
	)
	else:
	with st.spinner(f"Generating {num_samples} synthetic input samples..."):
	# Create a clean template spec without UI state variables
	clean_template_spec = st.session_state.template_spec.copy()
	clean_template_spec["input"] = st.session_state.template_spec[
	"input"
	].copy()

	# If we have categorical variables, apply the selected options from UI state
	if categorical_vars:
	for i, var in enumerate(clean_template_spec["input"]):
	if (
	var["type"] == "categorical"
	and var.get("options")
	and var["name"] in st.session_state.categorical_ui_state
	):
	# Create a copy of the variable with selected_options for generation
	var_copy = var.copy()
	var_copy["selected_options"] = (
	st.session_state.categorical_ui_state[var["name"]][
	"selected_options"
	]
	)
	clean_template_spec["input"][i] = var_copy

	st.session_state.synthetic_inputs = (
	generate_synthetic_inputs_hybrid(
	clean_template_spec, num_samples=num_samples
	)
	)
	else:
	st.session_state.synthetic_inputs = (
	generate_synthetic_inputs_hybrid(
	clean_template_spec, num_samples=num_samples
	)
	)

	if st.session_state.synthetic_inputs:
	st.success(
	f"Generated {len(st.session_state.synthetic_inputs)} input samples"
	)
	# Reset selected samples when new inputs are generated
	st.session_state.selected_samples = []
	# Reset modified prompt when new inputs are generated
	st.session_state.modified_prompt_template = (
	st.session_state.template_spec["prompt"]
	)

	# Display generated inputs if available
	if st.session_state.synthetic_inputs:
	st.subheader("Generated Input Data")

	# Show data in a table
	input_df = pd.DataFrame(st.session_state.synthetic_inputs)
	st.dataframe(input_df)

	# Download button for inputs
	input_csv = input_df.to_csv(index=False)
	st.download_button(
	label="Download Input Data (CSV)",
	data=input_csv,
	file_name="synthetic_inputs.csv",
	mime="text/csv",
	)

	# Sample selection for output generation
	st.subheader("Generate Outputs")

	# Initialize the modified prompt template if not already done
	if not st.session_state.modified_prompt_template:
	st.session_state.modified_prompt_template = (
	st.session_state.template_spec["prompt"]
	)

	# Allow editing the prompt template
	with st.expander("View/Edit Prompt Template", expanded=False):
	st.info(
	"You can modify the prompt template used for generating outputs. Use {variable_name} to refer to input variables."
	)

	st.session_state.modified_prompt_template = st.text_area(
	"Prompt Template",
	value=st.session_state.modified_prompt_template,
	height=200,
	)

	# Button to reset to original template
	if st.button("Reset to Original Template"):
	st.session_state.modified_prompt_template = (
	st.session_state.template_spec["prompt"]
	)
	st.success("Prompt template reset to original")

	# Sample selection options
	selection_method = st.radio(
	"Select samples for output generation",
	options=["Generate for all samples", "Select specific samples"],
	index=0,
	)

	if selection_method == "Select specific samples":
	# Create a list of sample indices for selection
	sample_options = [
	f"Sample {i+1}"
	for i in range(len(st.session_state.synthetic_inputs))
	]

	# Allow multi-selection of samples
	selected_indices = st.multiselect(
	"Select samples to generate outputs for",
	options=range(len(sample_options)),
	format_func=lambda i: sample_options[i],
	)

	# Store selected samples
	st.session_state.selected_samples = selected_indices

	# Preview selected samples
	if selected_indices:
	st.write(f"Selected {len(selected_indices)} samples:")
	selected_df = pd.DataFrame(
	[st.session_state.synthetic_inputs[i] for i in selected_indices]
	)
	st.dataframe(selected_df)
	else:
	# Use all samples
	st.session_state.selected_samples = list(
	range(len(st.session_state.synthetic_inputs))
	)

	# Preview the prompt for a selected sample
	if st.session_state.selected_samples:
	with st.expander("Preview Prompt for Sample", expanded=False):
	# Let user select which sample to preview
	preview_index = st.selectbox(
	"Select a sample to preview prompt",
	options=st.session_state.selected_samples,
	format_func=lambda i: f"Sample {i+1}",
	)

	# Get the selected sample
	sample = st.session_state.synthetic_inputs[preview_index]

	# Fill the prompt template with sample values
	filled_prompt = st.session_state.modified_prompt_template
	for var_name, var_value in sample.items():
	filled_prompt = filled_prompt.replace(
	f"{{{var_name}}}", str(var_value)
	)

	# Replace {lore} with knowledge base if present
	if "{lore}" in filled_prompt:
	filled_prompt = filled_prompt.replace(
	"{lore}", st.session_state.knowledge_base
	)

	# Show the filled prompt
	st.text_area(
	"Filled Prompt", value=filled_prompt, height=300, disabled=True
	)

	# Advanced output generation options
	with st.expander("Advanced Output Generation Options", expanded=False):
	st.info("Configure options for generating multiple outputs per input")

	# Option to generate multiple outputs for some inputs
	enable_multiple_outputs = st.checkbox(
	"Generate multiple outputs for some inputs",
	help="Enable generating multiple variations of outputs for selected inputs",
	)

	if enable_multiple_outputs:
	# Proportion of inputs to duplicate
	duplicate_proportion = st.slider(
	"Proportion of inputs to generate multiple outputs for",
	min_value=0.0,
	max_value=1.0,
	value=0.2,
	step=0.1,
	help="What fraction of the input samples should have multiple outputs",
	)

	# Number of outputs per duplicated input
	outputs_per_input = st.number_input(
	"Number of outputs per selected input",
	min_value=2,
	max_value=5,
	value=2,
	help="How many different outputs to generate for each selected input",
	)

	# Preview the effect
	if st.session_state.selected_samples:
	num_selected = len(st.session_state.selected_samples)
	num_to_duplicate = math.ceil(
	num_selected * duplicate_proportion
	)
	total_outputs = (num_selected - num_to_duplicate) + (
	num_to_duplicate * outputs_per_input
	)

	st.write(
	f"This will result in approximately {total_outputs} total outputs:"
	)
	st.write(
	f"- {num_selected - num_to_duplicate} inputs with 1 output"
	)
	st.write(
	f"- {num_to_duplicate} inputs with {outputs_per_input} outputs each"
	)

	# Generate outputs button
	if st.button("Generate Outputs for Selected Samples"):
	if not st.session_state.get("api_key") and not st.session_state.get(
	"anthropic_api_key"
	):
	st.error(
	"Please provide an OpenAI or Anthropic API key in the sidebar."
	)
	elif not st.session_state.selected_samples:
	st.error("No samples selected for output generation.")
	else:
	# Create a copy of the template spec with the modified prompt
	modified_template = st.session_state.template_spec.copy()
	modified_template["prompt"] = (
	st.session_state.modified_prompt_template
	)

	# Get only the selected samples
	selected_inputs = [
	st.session_state.synthetic_inputs[i]
	for i in st.session_state.selected_samples
	]

	# Handle multiple outputs if enabled
	if enable_multiple_outputs:
	# Calculate how many inputs should have multiple outputs
	num_to_duplicate = math.ceil(
	len(selected_inputs) * duplicate_proportion
	)

	# Randomly select inputs for multiple outputs
	duplicate_indices = random.sample(
	range(len(selected_inputs)), num_to_duplicate
	)

	# Create the expanded input list
	expanded_inputs = []
	for i, input_data in enumerate(selected_inputs):
	if i in duplicate_indices:
	# Add multiple copies for selected inputs
	expanded_inputs.extend([input_data] * outputs_per_input)
	else:
	# Add single copy for other inputs
	expanded_inputs.append(input_data)

	# Update selected_inputs with the expanded list
	selected_inputs = expanded_inputs

	with st.spinner(
	f"Generating outputs for {len(selected_inputs)} samples..."
	):
	generated_outputs = generate_synthetic_outputs(
	modified_template,
	selected_inputs,
	st.session_state.knowledge_base,
	)

	if generated_outputs:
	# If we're generating for all samples, replace the combined data
	if selection_method == "Generate for all samples":
	st.session_state.combined_data = generated_outputs
	else:
	# For specific samples, we need to handle the case of multiple outputs
	if enable_multiple_outputs:
	# Simply use all generated outputs as the combined data
	st.session_state.combined_data = generated_outputs
	else:
	# Handle single outputs as before
	if not st.session_state.combined_data or len(
	st.session_state.combined_data
	) != len(st.session_state.synthetic_inputs):
	st.session_state.combined_data = [None] * len(
	st.session_state.synthetic_inputs
	)

	# Update only the selected samples
	for i, output_idx in enumerate(
	st.session_state.selected_samples
	):
	if i < len(generated_outputs):
	st.session_state.combined_data[output_idx] = (
	generated_outputs[i]
	)

	# Remove any None values (samples that haven't been generated yet)
	st.session_state.combined_data = [
	item
	for item in st.session_state.combined_data
	if item is not None
	]

	st.success(f"Generated {len(generated_outputs)} outputs")

	# Display combined data if available
	if st.session_state.combined_data:
	st.subheader("Complete Dataset (Inputs + Outputs)")
	# Get all available column names from the data
	all_columns = pd.DataFrame(st.session_state.combined_data).columns.tolist()

	# Let the user select columns to exclude from input JSON
	st.session_state.columns_to_drop = st.multiselect(
	"Select input variables to exclude:",
	options=all_columns,
	default=st.session_state.get("columns_to_drop", []),
	)

	# Add this function before the prepare_dataframe_with_json_columns function

	def prepare_dataframe_for_parquet(df):
	"""
	Convert DataFrame columns to types compatible with Parquet format.

	Args:
	df (pd.DataFrame): Input DataFrame

	Returns:
	pd.DataFrame: DataFrame with converted types
	"""
	df_copy = df.copy()

	for col in df_copy.columns:
	# Check if column contains lists or dictionaries
	if df_copy[col].apply(lambda x: isinstance(x, (list, dict))).any():
	# Convert lists and dictionaries to JSON strings
	df_copy[col] = df_copy[col].apply(
	lambda x: (
	json.dumps(x) if isinstance(x, (list, dict)) else x
	)
	)

	# Check for mixed types that might cause issues
	if (
	df_copy[col]
	.apply(lambda x: isinstance(x, (bool, int, float, str)))
	.all()
	):
	# Column has consistent primitive types, leave as is
	continue
	else:
	# Convert any complex or mixed types to strings
	df_copy[col] = df_copy[col].apply(str)

	return df_copy

	# Create a function to prepare the dataframe with JSON columns
	def prepare_dataframe_with_json_columns(
	data, template_spec, show_json_columns=False, columns_to_drop=None
	):
	df = pd.DataFrame(data)
	# Drop specified columns from the dataframe
	if columns_to_drop:
	df = df.drop(
	columns=[col for col in columns_to_drop if col in df.columns]
	)
	else:
	columns_to_drop = []

	# Create input and output JSON columns
	input_vars = [
	var["name"]
	for var in template_spec["input"]
	if var["name"] not in columns_to_drop
	]
	output_vars = [var["name"] for var in template_spec["output"]]

	# Create input JSON column
	df["input"] = df.apply(
	lambda row: json.dumps(
	{var: row[var] for var in input_vars if var in row}
	),
	axis=1,
	)

	# Create output JSON column
	df["output"] = df.apply(
	lambda row: json.dumps(
	{var: row[var] for var in output_vars if var in row}
	),
	axis=1,
	)

	# If not showing JSON columns in UI, remove them for display only
	if not show_json_columns:
	display_df = df.drop(columns=["input", "output"])
	else:
	display_df = df

	# Return the same filtered df for export (full_df)
	return df, display_df

	# Toggle for showing JSON columns
	st.session_state.show_json_columns = st.checkbox(
	"Show input/output JSON columns",
	value=st.session_state.show_json_columns,
	)

	# Prepare dataframe with JSON columns
	full_df, display_df = prepare_dataframe_with_json_columns(
	st.session_state.combined_data,
	st.session_state.template_spec,
	st.session_state.show_json_columns,
	columns_to_drop=st.session_state.columns_to_drop,
	)

	# Show data in a table
	st.dataframe(display_df)

	# Download buttons for different formats
	col1, col2, col3 = st.columns(3)

	with col1:
	# CSV download
	combined_csv = full_df.to_csv(index=False)
	st.download_button(
	label="Download Dataset (CSV)",
	data=combined_csv,
	file_name="synthetic_dataset.csv",
	mime="text/csv",
	)

	with col2:
	# JSON download using cleaned dataframe
	json_ready_df = full_df.drop(columns=["input", "output"])
	combined_json = json.dumps(
	json_ready_df.to_dict(orient="records"), indent=2
	)
	st.download_button(
	label="Download Dataset (JSON)",
	data=combined_json,
	file_name="synthetic_dataset.json",
	mime="application/json",
	)

	with col3:
	# Parquet download
	try:
	# Create a BytesIO object to hold the Parquet file
	parquet_buffer = BytesIO()
	# Convert DataFrame to Parquet-compatible types
	parquet_df = prepare_dataframe_for_parquet(full_df)
	# Write the DataFrame to the BytesIO object in Parquet format
	parquet_df.to_parquet(parquet_buffer, index=False)
	# Reset the buffer's position to the beginning
	parquet_buffer.seek(0)

	st.download_button(
	label="Download Dataset (Parquet)",
	data=parquet_buffer,
	file_name="synthetic_dataset.parquet",
	mime="application/octet-stream",
	)
	except Exception as e:
	st.error(f"Error creating Parquet file: {str(e)}")
	st.info(
	"To use Parquet format, install pyarrow with: pip install pyarrow"
	)
	else:
	st.info(
	"No template has been generated yet. Go to the 'Setup' tab to create one."
	)