Spaces:

CUHKLPL
/

stimuli_generator

Running

App Files Files Community

kan0621 commited on Dec 11, 2025

Commit

6b699cc

verified ·

1 Parent(s): 839e4d6

Update backend.py

Browse files

Files changed (1) hide show

backend.py +238 -74

backend.py CHANGED Viewed

@@ -1,17 +1,16 @@
-import openai
-import pandas as pd
 import json
-import time
 import random
-import requests
-from flask import request, jsonify
 from abc import ABC, abstractmethod
-import threading
-import multiprocessing
 from multiprocessing import Process, Queue
-import queue
-import traceback
 # Set OpenAI API key
 # openai.api_key = ""
@@ -66,9 +65,19 @@ Please help me construct one item as stimuli for a psycholinguistic experiment b
 Experimental stimuli design: {experiment_design}
-Existing stimuli: {previous_stimuli}
-Requirement: {generation_requirements} Please return in JSON format.
 """
 # ---- Agent 2 Prompt ----
@@ -132,7 +141,7 @@ class OpenAIClient(ModelClient):
         self.api_key = api_key
         if api_key:
             openai.api_key = api_key
-            print(f"OpenAI API key set successfully, length: {len(api_key)}")
         else:
             print("Warning: No OpenAI API key provided!")
@@ -140,8 +149,6 @@ class OpenAIClient(ModelClient):
         """API call function, will be called by multiprocessing"""
         # set API key in subprocess
         openai.api_key = api_key
-        print(
-            f"OpenAI API key in subprocess: {api_key[:10]}..." if api_key else "None")
         return openai.ChatCompletion.create(
             model=params["model"],
@@ -181,12 +188,18 @@ class OpenAIClient(ModelClient):
                 return json.loads(response['choices'][0]['message']['content'])
             except json.JSONDecodeError as e:
                 print(f"Failed to parse OpenAI JSON response: {e}")
-                return {"error": "Failed to parse response"}
-            except Exception as e:
                 print(f"OpenAI API error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
-                    return {"error": f"API error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
     def get_default_params(self):
         return {"model": "gpt-4o"}
@@ -273,21 +286,29 @@ class CustomModelClient(ModelClient):
     def _api_call(self, request_data, headers):
         """API call function, will be called by multiprocessing"""
-        response = requests.post(
-            self.api_url,
-            headers=headers,
-            json=request_data,
-            timeout=60  # timeout for requests
-        )
-        response.raise_for_status()
-        return response.json()
     def generate_completion(self, prompt, properties, params=None):
         is_deepseek = self.api_url.strip().startswith("https://api.deepseek.com")
         if is_deepseek:
-            import time
             rand_stamp = int(time.time())
             # Generate field list
             field_list = ', '.join([f'"{k}"' for k in properties.keys()])
@@ -347,7 +368,7 @@ class CustomModelClient(ModelClient):
                       json.dumps(request_data, indent=2))
                 result = call_with_timeout(
-                    self._api_call, (request_data, headers), {}, 60)
                 if isinstance(result, dict) and "error" in result:
                     print(f"Custom API timeout attempt {attempt + 1}/3")
@@ -361,15 +382,28 @@ class CustomModelClient(ModelClient):
                 content = result["choices"][0]["message"]["content"]
                 return json.loads(content)
-            except (json.JSONDecodeError, KeyError) as e:
-                print(f"Custom API parsing error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
-                    return {"error": f"API parsing error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
-            except Exception as e:
-                print(f"Custom API error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
-                    return {"error": f"API error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
     def get_default_params(self):
@@ -384,7 +418,6 @@ def create_model_client(model_choice, settings=None):
     """Factory function to create appropriate model client"""
     if model_choice == 'GPT-4o':
         api_key = settings.get('api_key') if settings else None
-        print(f"OpenAI API key length: {len(api_key) if api_key else 0}")
         return OpenAIClient(api_key)
     elif model_choice == 'custom':
         if not settings:
@@ -411,8 +444,15 @@ def check_stimulus_repetition(new_stimulus_dict, previous_stimuli_list):
     for existing_stimulus in previous_stimuli_list:
         for key, new_value in new_stimulus_dict.items():
             # If the key exists in existing_stimulus and the values are the same, it is considered a repetition
-            if key in existing_stimulus and existing_stimulus[key].lower() == str(new_value.lower()):
-                return True
     return False
@@ -422,6 +462,7 @@ def agent_1_generate_stimulus(
         experiment_design,
         previous_stimuli,
         properties,
         prompt_template=AGENT_1_PROMPT_TEMPLATE,
         params=None,
         stop_event=None):
@@ -433,11 +474,15 @@ def agent_1_generate_stimulus(
         return {"stimulus": "STOPPED"}
     # Use fixed generation_requirements
-    generation_requirements = "Please generate a new stimulus in the same format as the existing stimuli, and ensure that the new stimulus is different from those in the existing stimuli."
     prompt = prompt_template.format(
         experiment_design=experiment_design,
         previous_stimuli=previous_stimuli,
         generation_requirements=generation_requirements
     )
@@ -454,8 +499,11 @@ def agent_1_generate_stimulus(
             return {"stimulus": "ERROR/ERROR"}
         return result
-    except Exception as e:
-        print(f"Error in agent_1_generate_stimulus: {e}")
         return {"stimulus": "ERROR/ERROR"}
@@ -498,9 +546,12 @@ def agent_2_validate_stimulus(
             return {"error": f"Failed to validate stimulus: {result.get('error', 'Unknown error')}"}
         return result
-    except Exception as e:
-        print(f"Error in agent_2_validate_stimulus: {e}")
-        return {"error": "Failed to validate stimulus"}
 def agent_2_validate_stimulus_individual(
@@ -614,9 +665,12 @@ Please return in JSON format with only one field: "{property_name}" (boolean: tr
                 "validator", "All criteria passed successfully!")
         return validation_results
-    except Exception as e:
-        print(f"Error in agent_2_validate_stimulus_individual: {e}")
-        return {"error": "Failed to validate stimulus individually"}
 def generate_scoring_requirements(properties):
@@ -677,8 +731,11 @@ def agent_3_score_stimulus(
             return {field: 0 for field in properties.keys()}
         return result
-    except Exception as e:
-        print(f"Error in agent_3_score_stimulus: {e}")
         return {field: 0 for field in properties.keys()}
@@ -810,8 +867,11 @@ Please return in JSON format with only one field: "{aspect_name}" (integer score
                 "scorer", f"Individual scoring completed! Total: {total_score}/{max_possible}")
         return scoring_results
-    except Exception as e:
-        print(f"Error in agent_3_score_stimulus_individual: {e}")
         return {field: 0 for field in properties.keys()}
@@ -860,11 +920,47 @@ def generate_stimuli(settings):
             return True
         return False
     # Immediately check if stopped
     if check_stop("Generation stopped before starting."):
         return None, None
     record_list = []
     agent_1_properties = settings.get('agent_1_properties', {})
     print("Agent 1 Properties:", agent_1_properties)
     if websocket_callback:
@@ -923,8 +1019,9 @@ def generate_stimuli(settings):
     # Get actual total iterations
     total_iter_value = total_iterations.value
     for iteration_num in range(total_iter_value):
-        if check_stop():
-            return None, None
         round_message = f"=== No. {iteration_num + 1} Round ==="
         print(round_message)
@@ -932,9 +1029,11 @@ def generate_stimuli(settings):
             websocket_callback("all", round_message)
         # Step 1: Generate stimulus
         while True:
-            if check_stop():
-                return None, None
             try:
                 stimuli = agent_1_generate_stimulus(
@@ -942,27 +1041,52 @@ def generate_stimuli(settings):
                     experiment_design=experiment_design,
                     previous_stimuli=previous_stimuli,
                     properties=agent_1_properties,
                     prompt_template=AGENT_1_PROMPT_TEMPLATE,
                     params=custom_params,
                     stop_event=stop_event
                 )
                 if isinstance(stimuli, dict) and stimuli.get('stimulus') == 'STOPPED':
-                    if check_stop("Generation stopped after 'Generator'."):
-                        return None, None
                 print("Agent 1 Output:", stimuli)
                 if websocket_callback:
                     websocket_callback(
                         "generator", f"Generator's Output: {json.dumps(stimuli, indent=2)}")
-                if check_stop("Generation stopped after 'Generator'."):
-                    return None, None
                 # Step 1.5: Check if stimulus already exists
                 if check_stimulus_repetition(stimuli, previous_stimuli):
                     repetition_count += 1
                     if ablation["use_agent_2"]:
                         print("Detected repeated stimulus, regenerating...")
@@ -977,8 +1101,9 @@ def generate_stimuli(settings):
                             websocket_callback(
                                 "generator", "Ablation: Skipping Agent 2 (Repetition Check)")
-                if check_stop():
-                    return None, None
                 # Step 2: Validate stimulus
                 # Check if individual validation is enabled
@@ -1011,16 +1136,20 @@ def generate_stimuli(settings):
                     )
                 if isinstance(validation_result, dict) and validation_result.get('error') == 'Stopped by user':
-                    if check_stop("Generation stopped after 'Validator'."):
-                        return None, None
                 print("Agent 2 Output:", validation_result)
                 if websocket_callback:
                     websocket_callback(
                         "validator", f"Validator's Output: {json.dumps(validation_result, indent=2)}")
-                if check_stop("Generation stopped after 'Validator'."):
-                    return None, None
                 # Check if there was an error first
                 if 'error' in validation_result:
@@ -1037,12 +1166,41 @@ def generate_stimuli(settings):
                 if failed_fields:
                     # Some fields failed validation
                     validation_fails += 1
                     print(
                         f"Failed validation for fields: {failed_fields}, regenerating...")
                     if websocket_callback:
                         websocket_callback(
                             "validator", f"Failed validation for fields: {failed_fields}, regenerating...")
                     if ablation["use_agent_2"]:
                         continue  # Regenerate
                     else:
@@ -1080,8 +1238,6 @@ def generate_stimuli(settings):
                     df['error_occurred'] = True
                     df['error_message'] = str(e)
-                    import os
                     os.makedirs("outputs", exist_ok=True)
                     suggested_filename = os.path.join(
                         "outputs", f"experiment_stimuli_results_{session_id}_{timestamp}_{unique_id}.csv")
@@ -1090,12 +1246,16 @@ def generate_stimuli(settings):
                 else:
                     raise e
-        if check_stop("Generation stopped after 'Validator'."):
-            return None, None
         try:
-            if check_stop("Generation stopped before Scorer."):
-                return None, None
             # Step 3: Score
             if ablation["use_agent_3"]:
@@ -1130,16 +1290,20 @@ def generate_stimuli(settings):
                 if isinstance(scores, dict) and all(v == 0 for v in scores.values()):
                     if stop_event.is_set():
-                        if check_stop("Generation stopped after 'Scorer'."):
-                            return None, None
                 print("Agent 3 Output:", scores)
                 if websocket_callback:
                     websocket_callback(
                         "scorer", f"Scorer's Output: {json.dumps(scores, indent=2)}")
-                if check_stop("Generation stopped after 'Scorer'."):
-                    return None, None
             else:
                 print("Ablation: Skipping Agent 3 (Scoring)")
                 if websocket_callback:

 import json
+import os
+import queue
 import random
+import time
+import traceback
 from abc import ABC, abstractmethod
 from multiprocessing import Process, Queue
+import openai
+import pandas as pd
+import requests
+from requests.exceptions import RequestException, Timeout, ConnectionError as RequestsConnectionError
 # Set OpenAI API key
 # openai.api_key = ""
 Experimental stimuli design: {experiment_design}
+Existing stimuli (DO NOT repeat any of these): {previous_stimuli}
+Previously rejected stimuli with validation feedback (learn from these failures and avoid similar issues):
+{rejected_stimuli}
+CRITICAL REQUIREMENTS:
+1. Generate a COMPLETELY NEW and UNIQUE stimulus that is DIFFERENT from ALL existing stimuli above.
+2. Do NOT repeat or slightly modify any existing stimulus - create something entirely original.
+3. Avoid any content that overlaps with existing or rejected stimuli.
+4. Learn from the rejected stimuli above - understand why they failed validation and avoid making similar mistakes.
+{generation_requirements}
+Please return in JSON format.
 """
 # ---- Agent 2 Prompt ----
         self.api_key = api_key
         if api_key:
             openai.api_key = api_key
+            print("OpenAI API key configured successfully")
         else:
             print("Warning: No OpenAI API key provided!")
         """API call function, will be called by multiprocessing"""
         # set API key in subprocess
         openai.api_key = api_key
         return openai.ChatCompletion.create(
             model=params["model"],
                 return json.loads(response['choices'][0]['message']['content'])
             except json.JSONDecodeError as e:
                 print(f"Failed to parse OpenAI JSON response: {e}")
+                return {"error": f"Failed to parse response: {str(e)}"}
+            except (openai.error.APIError, openai.error.RateLimitError) as e:
                 print(f"OpenAI API error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
+                    return {"error": f"OpenAI API error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
+            except openai.error.AuthenticationError as e:
+                print(f"OpenAI authentication error: {e}")
+                return {"error": f"Authentication failed: {str(e)}"}
+            except openai.error.InvalidRequestError as e:
+                print(f"OpenAI invalid request: {e}")
+                return {"error": f"Invalid request: {str(e)}"}
     def get_default_params(self):
         return {"model": "gpt-4o"}
     def _api_call(self, request_data, headers):
         """API call function, will be called by multiprocessing"""
+        try:
+            response = requests.post(
+                self.api_url,
+                headers=headers,
+                json=request_data,
+                timeout=60  # timeout for requests
+            )
+            response.raise_for_status()
+            return response.json()
+        except Timeout:
+            raise Timeout(
+                f"Request to {self.api_url} timed out after 60 seconds")
+        except RequestsConnectionError as e:
+            raise RequestsConnectionError(
+                f"Failed to connect to {self.api_url}: {str(e)}")
+        except RequestException as e:
+            raise RequestException(f"Request failed: {str(e)}")
     def generate_completion(self, prompt, properties, params=None):
         is_deepseek = self.api_url.strip().startswith("https://api.deepseek.com")
         if is_deepseek:
             rand_stamp = int(time.time())
             # Generate field list
             field_list = ', '.join([f'"{k}"' for k in properties.keys()])
                       json.dumps(request_data, indent=2))
                 result = call_with_timeout(
+                    self._api_call, (request_data, headers), {}, 600)
                 if isinstance(result, dict) and "error" in result:
                     print(f"Custom API timeout attempt {attempt + 1}/3")
                 content = result["choices"][0]["message"]["content"]
                 return json.loads(content)
+            except json.JSONDecodeError as e:
+                print(
+                    f"Custom API JSON parsing error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
+                    return {"error": f"API JSON parsing error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
+            except KeyError as e:
+                print(
+                    f"Custom API response missing expected key attempt {attempt + 1}/3: {e}")
+                if attempt == 2:
+                    return {"error": f"API response missing expected key after 3 attempts: {str(e)}"}
+                time.sleep(2 ** attempt)
+            except (Timeout, RequestsConnectionError) as e:
+                print(
+                    f"Custom API connection error attempt {attempt + 1}/3: {e}")
                 if attempt == 2:
+                    return {"error": f"API connection error after 3 attempts: {str(e)}"}
+                time.sleep(2 ** attempt)
+            except RequestException as e:
+                print(f"Custom API request error attempt {attempt + 1}/3: {e}")
+                if attempt == 2:
+                    return {"error": f"API request error after 3 attempts: {str(e)}"}
                 time.sleep(2 ** attempt)
     def get_default_params(self):
     """Factory function to create appropriate model client"""
     if model_choice == 'GPT-4o':
         api_key = settings.get('api_key') if settings else None
         return OpenAIClient(api_key)
     elif model_choice == 'custom':
         if not settings:
     for existing_stimulus in previous_stimuli_list:
         for key, new_value in new_stimulus_dict.items():
             # If the key exists in existing_stimulus and the values are the same, it is considered a repetition
+            if key in existing_stimulus:
+                try:
+                    existing_val = str(existing_stimulus[key]).lower()
+                    new_val = str(new_value).lower()
+                    if existing_val == new_val:
+                        return True
+                except (AttributeError, TypeError):
+                    # Skip comparison if values can't be converted to string
+                    continue
     return False
         experiment_design,
         previous_stimuli,
         properties,
+        rejected_stimuli=None,
         prompt_template=AGENT_1_PROMPT_TEMPLATE,
         params=None,
         stop_event=None):
         return {"stimulus": "STOPPED"}
     # Use fixed generation_requirements
+    generation_requirements = "5. Follow the same JSON format as the existing stimuli."
+    if rejected_stimuli is None:
+        rejected_stimuli = []
     prompt = prompt_template.format(
         experiment_design=experiment_design,
         previous_stimuli=previous_stimuli,
+        rejected_stimuli=rejected_stimuli,
         generation_requirements=generation_requirements
     )
             return {"stimulus": "ERROR/ERROR"}
         return result
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        print(f"Error parsing response in agent_1_generate_stimulus: {e}")
+        return {"stimulus": "ERROR/ERROR"}
+    except (RequestException, Timeout) as e:
+        print(f"Network error in agent_1_generate_stimulus: {e}")
         return {"stimulus": "ERROR/ERROR"}
             return {"error": f"Failed to validate stimulus: {result.get('error', 'Unknown error')}"}
         return result
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        print(f"Error parsing validation response: {e}")
+        return {"error": f"Failed to parse validation response: {str(e)}"}
+    except (RequestException, Timeout) as e:
+        print(f"Network error in validation: {e}")
+        return {"error": f"Network error during validation: {str(e)}"}
 def agent_2_validate_stimulus_individual(
                 "validator", "All criteria passed successfully!")
         return validation_results
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        print(f"Error parsing individual validation response: {e}")
+        return {"error": f"Failed to parse validation response: {str(e)}"}
+    except (RequestException, Timeout) as e:
+        print(f"Network error in individual validation: {e}")
+        return {"error": f"Network error during validation: {str(e)}"}
 def generate_scoring_requirements(properties):
             return {field: 0 for field in properties.keys()}
         return result
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        print(f"Error parsing scoring response: {e}")
+        return {field: 0 for field in properties.keys()}
+    except (RequestException, Timeout) as e:
+        print(f"Network error in scoring: {e}")
         return {field: 0 for field in properties.keys()}
                 "scorer", f"Individual scoring completed! Total: {total_score}/{max_possible}")
         return scoring_results
+    except (json.JSONDecodeError, KeyError, TypeError) as e:
+        print(f"Error parsing individual scoring response: {e}")
+        return {field: 0 for field in properties.keys()}
+    except (RequestException, Timeout) as e:
+        print(f"Network error in individual scoring: {e}")
         return {field: 0 for field in properties.keys()}
             return True
         return False
+    # Helper function to create partial result when error or stop occurs
+    def create_partial_result(record_list, message, is_error=True):
+        nonlocal total_iterations
+        if len(record_list) > 0:
+            df = pd.DataFrame(record_list)
+            session_id = settings.get('session_id', 'default')
+            timestamp = int(time.time())
+            unique_id = ''.join(random.choice('0123456789abcdef')
+                                for _ in range(6))
+            suffix = "_partial" if is_error else "_stopped"
+            suggested_filename = f"experiment_stimuli_results_{session_id}_{timestamp}_{unique_id}{suffix}.csv"
+            df['generation_timestamp'] = timestamp
+            df['batch_id'] = unique_id
+            df['total_iterations'] = total_iterations.value
+            df['stopped_by_user'] = not is_error
+            df['error_occurred'] = is_error
+            df['message'] = message
+            df['completed_iterations'] = len(record_list)
+            os.makedirs("outputs", exist_ok=True)
+            suggested_filename = os.path.join("outputs", suggested_filename)
+            return df, suggested_filename
+        return None, None
+    # Helper function to check stop and return partial data if available
+    def check_stop_and_return(message="Generation stopped by user."):
+        if stop_event.is_set():
+            print(message)
+            if websocket_callback:
+                websocket_callback("all", message)
+            return True, create_partial_result(record_list, message, is_error=False)
+        return False, (None, None)
     # Immediately check if stopped
     if check_stop("Generation stopped before starting."):
         return None, None
     record_list = []
+    rejected_stimuli_memory = []
     agent_1_properties = settings.get('agent_1_properties', {})
     print("Agent 1 Properties:", agent_1_properties)
     if websocket_callback:
     # Get actual total iterations
     total_iter_value = total_iterations.value
     for iteration_num in range(total_iter_value):
+        stopped, partial_result = check_stop_and_return()
+        if stopped:
+            return partial_result
         round_message = f"=== No. {iteration_num + 1} Round ==="
         print(round_message)
             websocket_callback("all", round_message)
         # Step 1: Generate stimulus
+        current_retry_count = 0  # Retry counter for this iteration
         while True:
+            stopped, partial_result = check_stop_and_return()
+            if stopped:
+                return partial_result
             try:
                 stimuli = agent_1_generate_stimulus(
                     experiment_design=experiment_design,
                     previous_stimuli=previous_stimuli,
                     properties=agent_1_properties,
+                    rejected_stimuli=rejected_stimuli_memory,
                     prompt_template=AGENT_1_PROMPT_TEMPLATE,
                     params=custom_params,
                     stop_event=stop_event
                 )
                 if isinstance(stimuli, dict) and stimuli.get('stimulus') == 'STOPPED':
+                    stopped, partial_result = check_stop_and_return(
+                        "Generation stopped after 'Generator'.")
+                    if stopped:
+                        return partial_result
+                # Skip validation if Agent 1 returned an error
+                if isinstance(stimuli, dict) and stimuli.get('stimulus') == 'ERROR/ERROR':
+                    print("Agent 1 returned ERROR, regenerating...")
+                    if websocket_callback:
+                        websocket_callback(
+                            "generator", "Generator returned ERROR, regenerating...")
+                    continue
                 print("Agent 1 Output:", stimuli)
                 if websocket_callback:
                     websocket_callback(
                         "generator", f"Generator's Output: {json.dumps(stimuli, indent=2)}")
+                stopped, partial_result = check_stop_and_return(
+                    "Generation stopped after 'Generator'.")
+                if stopped:
+                    return partial_result
                 # Step 1.5: Check if stimulus already exists
                 if check_stimulus_repetition(stimuli, previous_stimuli):
                     repetition_count += 1
+                    current_retry_count += 1
+                    # Add retry limit to avoid infinite loops (but never accept duplicates)
+                    max_repetition_retries = 50
+                    if current_retry_count > max_repetition_retries:
+                        error_msg = f"Failed to generate unique stimulus after {max_repetition_retries} attempts. Consider adjusting experiment design or reducing target count."
+                        print(error_msg)
+                        if websocket_callback:
+                            websocket_callback("generator", error_msg)
+                        # Return partial results instead of raising exception
+                        return create_partial_result(record_list, error_msg)
                     if ablation["use_agent_2"]:
                         print("Detected repeated stimulus, regenerating...")
                             websocket_callback(
                                 "generator", "Ablation: Skipping Agent 2 (Repetition Check)")
+                stopped, partial_result = check_stop_and_return()
+                if stopped:
+                    return partial_result
                 # Step 2: Validate stimulus
                 # Check if individual validation is enabled
                     )
                 if isinstance(validation_result, dict) and validation_result.get('error') == 'Stopped by user':
+                    stopped, partial_result = check_stop_and_return(
+                        "Generation stopped after 'Validator'.")
+                    if stopped:
+                        return partial_result
                 print("Agent 2 Output:", validation_result)
                 if websocket_callback:
                     websocket_callback(
                         "validator", f"Validator's Output: {json.dumps(validation_result, indent=2)}")
+                stopped, partial_result = check_stop_and_return(
+                    "Generation stopped after 'Validator'.")
+                if stopped:
+                    return partial_result
                 # Check if there was an error first
                 if 'error' in validation_result:
                 if failed_fields:
                     # Some fields failed validation
                     validation_fails += 1
+                    current_retry_count += 1
+                    # Add to rejected memory (only if it's a valid stimulus, not an error)
+                    is_error_stimulus = (
+                        isinstance(stimuli, dict) and
+                        stimuli.get('stimulus') in ['ERROR/ERROR', 'STOPPED']
+                    )
+                    if not is_error_stimulus:
+                        rejected_item = {
+                            "stimulus": stimuli,
+                            "validation_result": validation_result,
+                            "failed_fields": failed_fields
+                        }
+                        rejected_stimuli_memory.append(rejected_item)
+                        # Limit memory size to prevent unbounded growth
+                        MAX_REJECTED_MEMORY = 20
+                        if len(rejected_stimuli_memory) > MAX_REJECTED_MEMORY:
+                            rejected_stimuli_memory = rejected_stimuli_memory[-MAX_REJECTED_MEMORY:]
                     print(
                         f"Failed validation for fields: {failed_fields}, regenerating...")
                     if websocket_callback:
                         websocket_callback(
                             "validator", f"Failed validation for fields: {failed_fields}, regenerating...")
+                    # Check retry limit to avoid infinite loops
+                    max_retries = 50
+                    if current_retry_count > max_retries:
+                        error_msg = f"Failed to generate valid stimulus after {max_retries} attempts. Consider adjusting validation criteria."
+                        print(error_msg)
+                        if websocket_callback:
+                            websocket_callback("validator", error_msg)
+                        # Return partial results instead of raising exception
+                        return create_partial_result(record_list, error_msg)
                     if ablation["use_agent_2"]:
                         continue  # Regenerate
                     else:
                     df['error_occurred'] = True
                     df['error_message'] = str(e)
                     os.makedirs("outputs", exist_ok=True)
                     suggested_filename = os.path.join(
                         "outputs", f"experiment_stimuli_results_{session_id}_{timestamp}_{unique_id}.csv")
                 else:
                     raise e
+        stopped, partial_result = check_stop_and_return(
+            "Generation stopped after 'Validator'.")
+        if stopped:
+            return partial_result
         try:
+            stopped, partial_result = check_stop_and_return(
+                "Generation stopped before Scorer.")
+            if stopped:
+                return partial_result
             # Step 3: Score
             if ablation["use_agent_3"]:
                 if isinstance(scores, dict) and all(v == 0 for v in scores.values()):
                     if stop_event.is_set():
+                        stopped, partial_result = check_stop_and_return(
+                            "Generation stopped after 'Scorer'.")
+                        if stopped:
+                            return partial_result
                 print("Agent 3 Output:", scores)
                 if websocket_callback:
                     websocket_callback(
                         "scorer", f"Scorer's Output: {json.dumps(scores, indent=2)}")
+                stopped, partial_result = check_stop_and_return(
+                    "Generation stopped after 'Scorer'.")
+                if stopped:
+                    return partial_result
             else:
                 print("Ablation: Skipping Agent 3 (Scoring)")
                 if websocket_callback: