Spaces:
Running
Running
Update utils/block_relation_builder.py
Browse files- utils/block_relation_builder.py +64 -38
utils/block_relation_builder.py
CHANGED
|
@@ -3252,49 +3252,75 @@ def analyze_opcode_counts(pseudo_code: str) -> list[dict]:
|
|
| 3252 |
#--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
|
| 3253 |
#################################################################################################################################################################
|
| 3254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3255 |
def separate_scripts(pseudocode_string):
|
| 3256 |
"""
|
| 3257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3258 |
|
| 3259 |
-
|
| 3260 |
-
|
| 3261 |
-
providing a more robust and reliable separation.
|
| 3262 |
|
| 3263 |
-
|
| 3264 |
-
|
|
|
|
|
|
|
|
|
|
| 3265 |
|
| 3266 |
-
Returns:
|
| 3267 |
-
list: A list of strings, where each string is a complete,
|
| 3268 |
-
separated script.
|
| 3269 |
-
"""
|
| 3270 |
-
# Define the "hat" block patterns with more robust regex.
|
| 3271 |
-
# We use a non-capturing group (?:...) for the patterns.
|
| 3272 |
-
# We use a logical OR (|) to combine them into a single pattern.
|
| 3273 |
-
delimiter_patterns = (
|
| 3274 |
-
r"when green flag clicked|when flag clicked|when \S+ key pressed|"
|
| 3275 |
-
r"when this sprite clicked|when backdrop switches to \[.*?\]|"
|
| 3276 |
-
r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
|
| 3277 |
-
)
|
| 3278 |
-
|
| 3279 |
-
# Use re.finditer to get an iterator of all hat block matches.
|
| 3280 |
-
# The `re.DOTALL` flag allows the '.' to match newlines.
|
| 3281 |
-
matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
|
| 3282 |
-
|
| 3283 |
-
scripts = []
|
| 3284 |
-
# If no matches are found, return an empty list.
|
| 3285 |
-
if not matches:
|
| 3286 |
-
return []
|
| 3287 |
-
|
| 3288 |
-
# Iterate through the matches to slice the original string.
|
| 3289 |
-
for i in range(len(matches)):
|
| 3290 |
-
start = matches[i].start()
|
| 3291 |
-
end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
|
| 3292 |
-
|
| 3293 |
-
# Slice the pseudocode string from the start of one match to the start
|
| 3294 |
-
# of the next, or to the end of the string.
|
| 3295 |
-
script = pseudocode_string[start:end]
|
| 3296 |
-
scripts.append(script.strip())
|
| 3297 |
-
|
| 3298 |
return scripts
|
| 3299 |
|
| 3300 |
def transform_logic_to_action_flow(source_data, description=""):
|
|
|
|
| 3252 |
#--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
|
| 3253 |
#################################################################################################################################################################
|
| 3254 |
|
| 3255 |
+
# def separate_scripts(pseudocode_string):
|
| 3256 |
+
# """
|
| 3257 |
+
# Separates a block of Scratch pseudocode into a list of individual scripts.
|
| 3258 |
+
|
| 3259 |
+
# This function finds the start of each "hat" block and slices the
|
| 3260 |
+
# original string to capture the full code block for each script,
|
| 3261 |
+
# providing a more robust and reliable separation.
|
| 3262 |
+
|
| 3263 |
+
# Args:
|
| 3264 |
+
# pseudocode_string (str): A string containing Scratch pseudocode.
|
| 3265 |
+
|
| 3266 |
+
# Returns:
|
| 3267 |
+
# list: A list of strings, where each string is a complete,
|
| 3268 |
+
# separated script.
|
| 3269 |
+
# """
|
| 3270 |
+
# # Define the "hat" block patterns with more robust regex.
|
| 3271 |
+
# # We use a non-capturing group (?:...) for the patterns.
|
| 3272 |
+
# # We use a logical OR (|) to combine them into a single pattern.
|
| 3273 |
+
# delimiter_patterns = (
|
| 3274 |
+
# r"when green flag clicked|when flag clicked|when \S+ key pressed|"
|
| 3275 |
+
# r"when this sprite clicked|when backdrop switches to \[.*?\]|"
|
| 3276 |
+
# r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
|
| 3277 |
+
# )
|
| 3278 |
+
|
| 3279 |
+
# # Use re.finditer to get an iterator of all hat block matches.
|
| 3280 |
+
# # The `re.DOTALL` flag allows the '.' to match newlines.
|
| 3281 |
+
# matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
|
| 3282 |
+
|
| 3283 |
+
# scripts = []
|
| 3284 |
+
# # If no matches are found, return an empty list.
|
| 3285 |
+
# if not matches:
|
| 3286 |
+
# return []
|
| 3287 |
+
|
| 3288 |
+
# # Iterate through the matches to slice the original string.
|
| 3289 |
+
# for i in range(len(matches)):
|
| 3290 |
+
# start = matches[i].start()
|
| 3291 |
+
# end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
|
| 3292 |
+
|
| 3293 |
+
# # Slice the pseudocode string from the start of one match to the start
|
| 3294 |
+
# # of the next, or to the end of the string.
|
| 3295 |
+
# script = pseudocode_string[start:end]
|
| 3296 |
+
# scripts.append(script.strip())
|
| 3297 |
+
|
| 3298 |
+
# return scripts
|
| 3299 |
def separate_scripts(pseudocode_string):
|
| 3300 |
"""
|
| 3301 |
+
Split a block of Scratch pseudocode into individual scripts.
|
| 3302 |
+
Each script starts at a 'when ...' hat block that appears at the start
|
| 3303 |
+
of a line (leading whitespace allowed).
|
| 3304 |
+
"""
|
| 3305 |
+
# Robust, non-capturing hat-block patterns (no capturing groups!)
|
| 3306 |
+
patterns = [
|
| 3307 |
+
r"when\s+(?:green\s+flag\s+)?click(?:ed)?\b", # when flag clicked / when green flag clicked
|
| 3308 |
+
r"when\s+(?:\S+(?:\s+\S+)*)\s+key\s+press(?:ed)?\b", # when space key pressed / when up arrow key press
|
| 3309 |
+
r"when\s+this\s+sprite\s+click(?:ed)?\b", # when this sprite clicked
|
| 3310 |
+
r"when\s+backdrop\s+switch(?:es|ed)?\s+to\s*\[[^\]]*\]", # when backdrop switches to [..]
|
| 3311 |
+
r"when\s+I\s+receive(?:d)?\s*\[[^\]]*\]", # when I receive [..] / when I received [..]
|
| 3312 |
+
r"when\s*\[[^\]]*\]\s*>\s*\[[^\]]*\]" # when [sensor] > [value]
|
| 3313 |
+
]
|
| 3314 |
|
| 3315 |
+
# Build a lookahead that finds positions just before a hat block at line-start
|
| 3316 |
+
lookahead = r"(?=^\s*(?:{}))".format("|".join(patterns))
|
|
|
|
| 3317 |
|
| 3318 |
+
# Use MULTILINE so ^ matches start of lines, DOTALL to allow patterns that may include brackets/newlines as needed
|
| 3319 |
+
parts = re.split(lookahead, pseudocode_string, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
|
| 3320 |
+
|
| 3321 |
+
# Filter out empties and strip leading/trailing whitespace/newlines
|
| 3322 |
+
scripts = [p.strip() for p in parts if p and p.strip()]
|
| 3323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3324 |
return scripts
|
| 3325 |
|
| 3326 |
def transform_logic_to_action_flow(source_data, description=""):
|