prthm11 commited on
Commit
fa61cae
·
verified ·
1 Parent(s): 005564d

Update utils/block_relation_builder.py

Browse files
Files changed (1) hide show
  1. utils/block_relation_builder.py +64 -38
utils/block_relation_builder.py CHANGED
@@ -3252,49 +3252,75 @@ def analyze_opcode_counts(pseudo_code: str) -> list[dict]:
3252
  #--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
3253
  #################################################################################################################################################################
3254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3255
  def separate_scripts(pseudocode_string):
3256
  """
3257
- Separates a block of Scratch pseudocode into a list of individual scripts.
 
 
 
 
 
 
 
 
 
 
 
 
3258
 
3259
- This function finds the start of each "hat" block and slices the
3260
- original string to capture the full code block for each script,
3261
- providing a more robust and reliable separation.
3262
 
3263
- Args:
3264
- pseudocode_string (str): A string containing Scratch pseudocode.
 
 
 
3265
 
3266
- Returns:
3267
- list: A list of strings, where each string is a complete,
3268
- separated script.
3269
- """
3270
- # Define the "hat" block patterns with more robust regex.
3271
- # We use a non-capturing group (?:...) for the patterns.
3272
- # We use a logical OR (|) to combine them into a single pattern.
3273
- delimiter_patterns = (
3274
- r"when green flag clicked|when flag clicked|when \S+ key pressed|"
3275
- r"when this sprite clicked|when backdrop switches to \[.*?\]|"
3276
- r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
3277
- )
3278
-
3279
- # Use re.finditer to get an iterator of all hat block matches.
3280
- # The `re.DOTALL` flag allows the '.' to match newlines.
3281
- matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
3282
-
3283
- scripts = []
3284
- # If no matches are found, return an empty list.
3285
- if not matches:
3286
- return []
3287
-
3288
- # Iterate through the matches to slice the original string.
3289
- for i in range(len(matches)):
3290
- start = matches[i].start()
3291
- end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
3292
-
3293
- # Slice the pseudocode string from the start of one match to the start
3294
- # of the next, or to the end of the string.
3295
- script = pseudocode_string[start:end]
3296
- scripts.append(script.strip())
3297
-
3298
  return scripts
3299
 
3300
  def transform_logic_to_action_flow(source_data, description=""):
 
3252
  #--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
3253
  #################################################################################################################################################################
3254
 
3255
+ # def separate_scripts(pseudocode_string):
3256
+ # """
3257
+ # Separates a block of Scratch pseudocode into a list of individual scripts.
3258
+
3259
+ # This function finds the start of each "hat" block and slices the
3260
+ # original string to capture the full code block for each script,
3261
+ # providing a more robust and reliable separation.
3262
+
3263
+ # Args:
3264
+ # pseudocode_string (str): A string containing Scratch pseudocode.
3265
+
3266
+ # Returns:
3267
+ # list: A list of strings, where each string is a complete,
3268
+ # separated script.
3269
+ # """
3270
+ # # Define the "hat" block patterns with more robust regex.
3271
+ # # We use a non-capturing group (?:...) for the patterns.
3272
+ # # We use a logical OR (|) to combine them into a single pattern.
3273
+ # delimiter_patterns = (
3274
+ # r"when green flag clicked|when flag clicked|when \S+ key pressed|"
3275
+ # r"when this sprite clicked|when backdrop switches to \[.*?\]|"
3276
+ # r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
3277
+ # )
3278
+
3279
+ # # Use re.finditer to get an iterator of all hat block matches.
3280
+ # # The `re.DOTALL` flag allows the '.' to match newlines.
3281
+ # matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
3282
+
3283
+ # scripts = []
3284
+ # # If no matches are found, return an empty list.
3285
+ # if not matches:
3286
+ # return []
3287
+
3288
+ # # Iterate through the matches to slice the original string.
3289
+ # for i in range(len(matches)):
3290
+ # start = matches[i].start()
3291
+ # end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
3292
+
3293
+ # # Slice the pseudocode string from the start of one match to the start
3294
+ # # of the next, or to the end of the string.
3295
+ # script = pseudocode_string[start:end]
3296
+ # scripts.append(script.strip())
3297
+
3298
+ # return scripts
3299
  def separate_scripts(pseudocode_string):
3300
  """
3301
+ Split a block of Scratch pseudocode into individual scripts.
3302
+ Each script starts at a 'when ...' hat block that appears at the start
3303
+ of a line (leading whitespace allowed).
3304
+ """
3305
+ # Robust, non-capturing hat-block patterns (no capturing groups!)
3306
+ patterns = [
3307
+ r"when\s+(?:green\s+flag\s+)?click(?:ed)?\b", # when flag clicked / when green flag clicked
3308
+ r"when\s+(?:\S+(?:\s+\S+)*)\s+key\s+press(?:ed)?\b", # when space key pressed / when up arrow key press
3309
+ r"when\s+this\s+sprite\s+click(?:ed)?\b", # when this sprite clicked
3310
+ r"when\s+backdrop\s+switch(?:es|ed)?\s+to\s*\[[^\]]*\]", # when backdrop switches to [..]
3311
+ r"when\s+I\s+receive(?:d)?\s*\[[^\]]*\]", # when I receive [..] / when I received [..]
3312
+ r"when\s*\[[^\]]*\]\s*>\s*\[[^\]]*\]" # when [sensor] > [value]
3313
+ ]
3314
 
3315
+ # Build a lookahead that finds positions just before a hat block at line-start
3316
+ lookahead = r"(?=^\s*(?:{}))".format("|".join(patterns))
 
3317
 
3318
+ # Use MULTILINE so ^ matches start of lines, DOTALL to allow patterns that may include brackets/newlines as needed
3319
+ parts = re.split(lookahead, pseudocode_string, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
3320
+
3321
+ # Filter out empties and strip leading/trailing whitespace/newlines
3322
+ scripts = [p.strip() for p in parts if p and p.strip()]
3323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3324
  return scripts
3325
 
3326
  def transform_logic_to_action_flow(source_data, description=""):