Spaces:

danidanidani
/

GRDN.AI.3

Sleeping

App Files Files Community

GRDN.AI.3 / src /backend /optimization_algo.py

danidanidani

fix: Color scheme (teal+magenta), LLM prompt, algorithm quality

fa0da9f about 2 months ago

raw

history blame contribute delete

18 kB

	import random
	import numpy as np
	import streamlit as st
	from concurrent.futures import ThreadPoolExecutor
	from functools import lru_cache

	# import all functions from src.backend.chatbot
	from src.backend.chatbot import *


	def genetic_algorithm_plants(model, demo_lite):
	# Define the compatibility matrix
	compatibility_matrix = st.session_state.full_mat
	# Define the list of plants
	plant_list = st.session_state.plant_list

	# Define the user-selected plants, number of plant beds, and constraints
	user_plants = st.session_state.input_plants_raw
	num_plant_beds = st.session_state.n_plant_beds
	# 1 <= min_species_per_bed <= max_species_per_bed <= len(user_plants)
	min_species_per_bed = st.session_state.min_species
	# max_species_per_bed >= floor(length(user_plants)-(min_species_per_bed*num_plant_beds-1) & max_species_per_bed <= len(user_plants)
	max_species_per_bed = st.session_state.max_species

	# Genetic Algorithm parameters
	population_size = st.session_state.population_size
	num_generations = st.session_state.num_generations
	tournament_size = st.session_state.tournament_size
	crossover_rate = st.session_state.crossover_rate
	mutation_rate = st.session_state.mutation_rate
	seed_population_rate = st.session_state.seed_population_rate

	# OPTIMIZATION: Create plant name to index mapping for O(1) lookups
	plant_to_index = {plant: idx for idx, plant in enumerate(plant_list)}

	# OPTIMIZATION: Convert compatibility matrix to numpy array ONCE (not in loop)
	compat_array = np.array(compatibility_matrix)

	# OPTIMIZATION: Fitness cache to avoid recalculating fitness for the same grouping
	fitness_cache = {}

	def generate_initial_population(model, demo_lite):
	population = []

	# OPTIMIZATION: Only call LLM if seed_population_rate > 0 and not demo_lite
	# This was a MAJOR bottleneck - LLM initialization and inference is very slow
	num_seeds = int(population_size * st.session_state.seed_population_rate)

	if num_seeds > 0 and not demo_lite and model is not None:
	# we generate just one seed grouping for this beta language model suggestion feature
	seed_grouping = get_language_model_suggestions(model, demo_lite)
	if seed_grouping != "no response yet":
	valid_seed_grouping = validate_and_replace(seed_grouping)
	population.append(valid_seed_grouping)
	print(f" Added 1 LLM-generated seed to population")

	# Fill the rest of the population with random groupings
	# IMPORTANT: Validate all initial groupings for best solution quality
	# A good initial population is critical for finding optimal solutions
	while len(population) < population_size:
	random_grouping = generate_random_grouping()
	# Always validate to ensure we start with high-quality individuals
	valid_grouping = validate_and_replace(random_grouping)
	population.append(valid_grouping)

	return population

	def generate_random_grouping():
	random.shuffle(user_plants)
	remaining_plants = user_plants.copy()
	grouping = []

	total_plants = len(user_plants)
	plants_per_bed = total_plants // num_plant_beds
	extra_plants = total_plants % num_plant_beds

	for bed_index in range(num_plant_beds):
	if bed_index < extra_plants:
	# Distribute extra plants among the first few beds
	num_species_in_bed = plants_per_bed + 1
	else:
	num_species_in_bed = plants_per_bed

	# Ensure the bed size is within the min and max constraints
	num_species_in_bed = max(
	min_species_per_bed, min(num_species_in_bed, max_species_per_bed)
	)

	bed = remaining_plants[:num_species_in_bed]
	remaining_plants = remaining_plants[num_species_in_bed:]
	grouping.append(bed)

	return grouping

	# Perform crossover between two parents, preserving at least one occurrence of each plant
	def crossover(parent1, parent2):
	if random.random() < crossover_rate:
	crossover_point = random.randint(1, num_plant_beds - 1)
	child1 = parent1[:crossover_point] + parent2[crossover_point:]
	child2 = parent2[:crossover_point] + parent1[crossover_point:]

	# Ensure each plant appears at least once in the offspring
	for plant in user_plants:
	if all(plant not in bed for bed in child1):
	# Find a bed with fewer species and add the missing plant
	min_bed_index = min(
	range(len(child1)), key=lambda i: len(child1[i])
	)
	child1[min_bed_index].append(plant)
	if all(plant not in bed for bed in child2):
	# Find a bed with fewer species and add the missing plant
	min_bed_index = min(
	range(len(child2)), key=lambda i: len(child2[i])
	)
	child2[min_bed_index].append(plant)

	return child1, child2
	else:
	return parent1, parent2

	# Perform mutation on an individual, ensuring no bed exceeds the maximum species constraint
	def mutate(individual):
	if random.random() < mutation_rate:
	mutated_bed = random.randint(0, num_plant_beds - 1)
	species_in_bed = individual[mutated_bed]

	# Remove excess species if there are more than the maximum constraint
	if len(species_in_bed) > max_species_per_bed:
	species_in_bed = random.sample(species_in_bed, max_species_per_bed)

	# Add missing plants by performing swaps between current species and missing plants
	missing_plants = [
	plant for plant in user_plants if plant not in species_in_bed
	]
	num_missing_plants = min(
	len(missing_plants), max_species_per_bed - len(species_in_bed)
	)
	for _ in range(num_missing_plants):
	swap_species = random.choice(missing_plants)
	missing_plants.remove(swap_species)
	species_in_bed.append(swap_species)
	species_in_bed.remove(random.choice(species_in_bed))

	individual[mutated_bed] = species_in_bed

	return individual

	# calculate the fitness score of the grouping
	def calculate_fitness(grouping):
	# OPTIMIZATION: Create a hashable key for caching
	grouping_key = tuple(tuple(sorted(bed)) for bed in grouping)
	if grouping_key in fitness_cache:
	return fitness_cache[grouping_key]

	positive_reward_factor = 1000
	negative_penalty_factor = 2000

	# define penalties for not meeting constraints
	penalty_for_exceeding_max = 500
	penalty_for_not_meeting_min = 500
	penalty_for_not_having_all_plants = 1000

	score = 0
	# VECTORIZED FITNESS CALCULATION - Much faster with numpy
	for bed in grouping:
	if len(bed) < 2:
	continue

	# Convert plant names to indices in bulk
	bed_indices = np.array([plant_to_index[plant] for plant in bed])

	# Get all pairwise compatibility scores using numpy advanced indexing
	# This avoids nested loops and is 10-100x faster
	n = len(bed_indices)
	i_indices, j_indices = np.triu_indices(n, k=1)

	# Vectorized compatibility score extraction (using pre-converted array)
	compat_scores = compat_array[bed_indices[i_indices], bed_indices[j_indices]]

	# Vectorized reward/penalty calculation
	positive_scores = compat_scores[compat_scores > 0].sum() * positive_reward_factor
	negative_scores = compat_scores[compat_scores < 0].sum() * negative_penalty_factor

	score += positive_scores + negative_scores

	# apply penalties for not meeting constraints (vectorized)
	bed_sizes = np.array([len(bed) for bed in grouping])
	score -= np.sum(bed_sizes > max_species_per_bed) * penalty_for_exceeding_max
	score -= np.sum(bed_sizes < min_species_per_bed) * penalty_for_not_meeting_min

	if len(set(plant for bed in grouping for plant in bed)) < len(user_plants):
	score -= penalty_for_not_having_all_plants

	# OPTIMIZATION: Cache the result
	fitness_cache[grouping_key] = score
	return score

	# Perform tournament selection
	def tournament_selection(population, population_fitness):
	# OPTIMIZATION: Use pre-calculated fitness scores
	selected = []
	for _ in range(population_size):
	participants_idx = random.sample(range(len(population)), tournament_size)
	winner_idx = max(participants_idx, key=lambda idx: population_fitness[idx])
	selected.append(population[winner_idx])
	return selected

	# OPTIMIZATION: Parallel fitness calculation for speed
	def calculate_fitness_parallel(individuals):
	"""Calculate fitness for multiple individuals in parallel"""
	if len(individuals) <= 10:
	# For small populations, parallel overhead isn't worth it
	return [calculate_fitness(ind) for ind in individuals]

	# Use ThreadPoolExecutor for parallel computation
	with ThreadPoolExecutor(max_workers=4) as executor:
	return list(executor.map(calculate_fitness, individuals))

	# Perform replacement of the population with the offspring, ensuring maximum species constraint is met
	def replacement(population, offspring, population_fitness):
	# OPTIMIZATION: Use pre-calculated fitness and avoid re-sorting
	# Calculate fitness for offspring in parallel
	offspring_fitness = calculate_fitness_parallel(offspring)

	# Adjust the offspring to meet the maximum species constraint
	adjusted_offspring = []
	adjusted_fitness = []
	for idx, individual in enumerate(offspring):
	for bed_idx in range(num_plant_beds):
	species_in_bed = individual[bed_idx]
	if len(species_in_bed) > max_species_per_bed:
	species_in_bed = random.sample(species_in_bed, max_species_per_bed)
	individual[bed_idx] = species_in_bed
	adjusted_offspring.append(individual)
	adjusted_fitness.append(offspring_fitness[idx])

	# Combine population and offspring with their fitness scores
	combined = list(zip(population + adjusted_offspring, population_fitness + adjusted_fitness))
	# Sort by fitness and take top population_size individuals
	combined.sort(key=lambda x: x[1], reverse=True)

	new_population = [ind for ind, _ in combined[:population_size]]
	new_fitness = [fit for _, fit in combined[:population_size]]

	return new_population, new_fitness

	# Genetic Algorithm main function
	def genetic_algorithm(model, demo_lite):
	population = generate_initial_population(model, demo_lite)

	# OPTIMIZATION: Calculate fitness once for initial population (in parallel)
	population_fitness = calculate_fitness_parallel(population)

	for generation in range(num_generations):
	print(f"Generation {generation + 1}")

	selected_population = tournament_selection(population, population_fitness)
	offspring = []

	for _ in range(population_size // 2):
	parent1 = random.choice(selected_population)
	parent2 = random.choice(selected_population)
	child1, child2 = crossover(parent1, parent2)
	child1 = mutate(child1)
	child2 = mutate(child2)
	offspring.extend([child1, child2])

	# OPTIMIZATION: Pass fitness and get updated fitness back
	population, population_fitness = replacement(population, offspring, population_fitness)

	# OPTIMIZATION: Validate periodically to ensure quality (every 10 generations)
	# Too frequent = slow, too rare = poor quality. Every 10 is a good balance.
	if generation % 10 == 0 or generation == num_generations - 1:
	# Only validate if needed - most individuals are valid
	validated_count = 0
	invalid_indices = []

	# Quick check which individuals need validation
	for i in range(len(population)):
	plants_in_grouping = set(plant for bed in population[i] for plant in bed)
	if len(plants_in_grouping) != len(user_plants):
	invalid_indices.append(i)

	# Parallel validation for invalid individuals
	if invalid_indices:
	invalid_individuals = [population[i] for i in invalid_indices]
	validated_individuals = [validate_and_replace(ind) for ind in invalid_individuals]

	# Update population and recalculate fitness
	for idx, validated_ind in zip(invalid_indices, validated_individuals):
	population[idx] = validated_ind
	population_fitness[idx] = calculate_fitness(validated_ind)

	validated_count = len(invalid_indices)
	if validated_count > 0:
	print(f" Validated {validated_count} individuals")

	# Find best solution
	best_idx = max(range(len(population)), key=lambda i: population_fitness[i])
	best_grouping = population[best_idx]

	# Final validation of best solution only
	best_grouping = validate_and_replace(best_grouping)
	best_fitness = calculate_fitness(best_grouping)

	print(f"Best Grouping: {best_grouping}")
	print(f"Fitness Score: {best_fitness}")
	st.session_state.best_grouping = best_grouping
	st.session_state.best_fitness = best_fitness
	# st.write(f"Best Grouping: {best_grouping}")
	# st.write(f"Fitness Score: {best_fitness}")
	return best_grouping

	# def validate_and_replace(grouping):
	# print("Grouping structure before validation:", grouping)
	# all_plants = set(user_plants)
	# for bed in grouping:
	# all_plants -= set(bed)

	# # Replace missing plants
	# for missing_plant in all_plants:
	# replaced = False
	# for bed in grouping:
	# if len(set(bed)) != len(bed): # Check for duplicates
	# for i, plant in enumerate(bed):
	# if bed.count(plant) > 1: # Found a duplicate
	# bed[i] = missing_plant
	# replaced = True
	# break
	# if replaced:
	# break

	# # If no duplicates were found, replace a random plant
	# if not replaced:
	# random_bed = random.choice(grouping)
	# random_bed[random.randint(0, len(random_bed) - 1)] = missing_plant

	# return grouping

	############
	############ experimental

	def adjust_grouping(grouping):
	# Determine the plants that are missing in the grouping
	plants_in_grouping = set(plant for bed in grouping for plant in bed)
	missing_plants = set(user_plants) - plants_in_grouping

	for missing_plant in missing_plants:
	# Find a bed that can accommodate the missing plant without exceeding max_species_per_bed
	suitable_bed = next(
	(bed for bed in grouping if len(bed) < max_species_per_bed), None
	)
	if suitable_bed is not None:
	suitable_bed.append(missing_plant)
	else:
	# If no suitable bed is found, replace a random plant in a random bed
	random_bed = random.choice(grouping)
	random_bed[random.randint(0, len(random_bed) - 1)] = missing_plant

	# Ensure min_species_per_bed and max_species_per_bed constraints
	for bed in grouping:
	while len(bed) < min_species_per_bed:
	additional_plant = random.choice(
	[plant for plant in user_plants if plant not in bed]
	)
	bed.append(additional_plant)
	while len(bed) > max_species_per_bed:
	bed.remove(random.choice(bed))

	return grouping

	def validate_and_replace(grouping):
	# Try multiple configurations to find the best valid grouping
	# This is important for solution quality - don't skip this!
	best_grouping = None
	best_fitness = float("-inf")

	# Try 3 different configurations (balanced between speed and quality)
	for _ in range(3):
	temp_grouping = [bed.copy() for bed in grouping]
	temp_grouping = adjust_grouping(temp_grouping)
	current_fitness = calculate_fitness(temp_grouping)

	if current_fitness > best_fitness:
	best_fitness = current_fitness
	best_grouping = temp_grouping

	return best_grouping

	############
	def get_language_model_suggestions(model, demo_lite):
	# This returns a list of seed groupings based on the compatibility matrix
	st.session_state.seed_groupings = get_seed_groupings_from_LLM(model, demo_lite)
	return st.session_state.seed_groupings

	# Run the genetic algorithm

	best_grouping = genetic_algorithm(model, demo_lite)
	return best_grouping