prthm11 commited on
Commit
b027234
·
verified ·
1 Parent(s): 93a76be

Update utils/block_relation_builder.py

Browse files
Files changed (1) hide show
  1. utils/block_relation_builder.py +291 -21
utils/block_relation_builder.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  import copy
3
  import re
4
- from collections import defaultdict
5
  import secrets
6
  import string
7
  from typing import Dict, Any, TypedDict,Tuple
@@ -1797,7 +1797,8 @@ def classify(line):
1797
  if l.startswith("//"): return None, None
1798
 
1799
  # Hat Blocks (most specific first)
1800
- if re.match(r"when green flag click(ed)?", l): return "event_whenflagclicked", "hat"
 
1801
  if re.match(r"when (.+?) key press(ed)?", l): return "event_whenkeypressed", "hat"
1802
  if re.match(r"when this sprite click(ed)?", l): return "event_whenthisspriteclicked", "hat"
1803
  if l.startswith("when backdrop switches to"): return "event_whenbackdropswitchesto", "hat"
@@ -2353,34 +2354,34 @@ def generate_plan(generated_input, opcode_keys, pseudo_code):
2353
  info["fields"]["VARIABLE"] = [var_name, None]
2354
  if "LIST" in info["fields"]:
2355
  m = re.search(r"(?:to|of|in)\s*\[([^\]]+)\s*v\]", stmt_for_parse)
2356
- if m: info["fields"]["LIST"] = [m.group(1), None]
2357
  if "STOP_OPTION" in info["fields"]:
2358
  m = re.search(r"stop \[([^\]]+)\s*v\]", stmt_for_parse)
2359
  if m: info["fields"]["STOP_OPTION"] = [m.group(1).strip(), None]
2360
  if "STYLE" in info["fields"]:
2361
  m = re.search(r"set rotation style \[([^\]]+)\s*v\]", stmt_for_parse)
2362
- if m: info["fields"]["STYLE"] = [m.group(1), None]
2363
  if "DRAG_MODE" in info["fields"]:
2364
  m = re.search(r"set drag mode \[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2365
- if m: info["fields"]["DRAG_MODE"] = [m.group(1), None]
2366
  if "EFFECT" in info["fields"] and opcode in ["looks_changeeffectby", "looks_seteffectto", "sound_changeeffectby", "sound_seteffectto"]:
2367
  m = re.search(r"(?:change|set)\s*\[([^\]]+)\s*v\] effect", stmt_for_parse, re.IGNORECASE)
2368
- if m: info["fields"]["EFFECT"] = [m.group(1).upper(), None]
2369
  if "NUMBER_NAME" in info["fields"] and opcode in ["looks_costumenumbername", "looks_backdropnumbername"]:
2370
  m = re.search(r"(?:costume|backdrop)\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2371
- if m: info["fields"]["NUMBER_NAME"] = [m.group(1), None]
2372
  if "FRONT_BACK" in info["fields"] and opcode == "looks_gotofrontback":
2373
  m = re.search(r"go to\s*\[([^\]]+)\s*v\] layer", stmt_for_parse, re.IGNORECASE)
2374
- if m: info["fields"]["FRONT_BACK"] = [m.group(1), None]
2375
  if "FORWARD_BACKWARD" in info["fields"] and opcode == "looks_goforwardbackwardlayers":
2376
  m = re.search(r"go\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2377
- if m: info["fields"]["FORWARD_BACKWARD"] = [m.group(1), None]
2378
  if "OPERATOR" in info["fields"] and opcode == "operator_mathop":
2379
  m = re.search(r"\[([^\]]+)\s*v\] of", stmt_for_parse, re.IGNORECASE)
2380
- if m: info["fields"]["OPERATOR"] = [m.group(1).upper(), None]
2381
  if "CURRENTMENU" in info["fields"] and opcode == "sensing_current":
2382
  m = re.search(r"current\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2383
- if m: info["fields"]["CURRENTMENU"] = [m.group(1).upper(), None]
2384
  if "PROPERTY" in info["fields"] and opcode == "sensing_of":
2385
  m = re.search(r"\((.+?)\) of", stmt_for_parse, re.IGNORECASE)
2386
  if m:
@@ -2393,16 +2394,16 @@ def generate_plan(generated_input, opcode_keys, pseudo_code):
2393
  info["fields"]["PROPERTY"] = [prop_map.get(prop, prop), None]
2394
  if "WHENGREATERTHANMENU" in info["fields"] and opcode == "event_whengreaterthan":
2395
  m = re.search(r"when\s*\[([^\]]+)\s*v\] >", stmt_for_parse, re.IGNORECASE)
2396
- if m: info["fields"]["WHENGREATERTHANMENU"] = [m.group(1).upper(), None]
2397
  if "KEY_OPTION" in info["fields"] and opcode == "event_whenkeypressed": # For event_whenkeypressed hat block's field
2398
  m = re.search(r"when\s*\[([^\]]+)\s*v\] key pressed", stmt_for_parse, re.IGNORECASE)
2399
- if m: info["fields"]["KEY_OPTION"] = [m.group(1), None]
2400
  if "BACKDROP" in info["fields"] and opcode == "event_whenbackdropswitchesto": # For event_whenbackdropswitchesto hat block's field
2401
  m = re.search(r"when backdrop switches to\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2402
- if m: info["fields"]["BACKDROP"] = [m.group(1), None]
2403
  if "BROADCAST_OPTION" in info["fields"] and opcode == "event_whenbroadcastreceived": # For event_whenbroadcastreceived hat block's field
2404
  m = re.search(r"when i receive\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2405
- if m: info["fields"]["BROADCAST_OPTION"] = [m.group(1), None]
2406
 
2407
  # Custom block specific parsing
2408
  if opcode == "procedures_definition":
@@ -2458,7 +2459,6 @@ def generate_plan(generated_input, opcode_keys, pseudo_code):
2458
  info["mutation"]["argumentnames"].append(f"arg{idx+1}") # Placeholder name for mutation
2459
 
2460
  info["inputs"][arg_input_name] = parse_reporter_or_value(arg_val_str, key, pick_key, all_generated_blocks) # Pass current block's key
2461
-
2462
  i += 1 # Move to the next line
2463
 
2464
  # Final pass to ensure last blocks have next: None (already handled by stack pops)
@@ -2513,8 +2513,8 @@ def process_scratch_blocks(all_generated_blocks, generated_output_json):
2513
 
2514
  # Initialize dictionaries to store and reuse generated unique IDs
2515
  # This prevents creating multiple unique IDs for the same variable/broadcast across different blocks
2516
- variable_id_map = defaultdict(lambda: generate_secure_token(20))
2517
- broadcast_id_map = defaultdict(lambda: generate_secure_token(20))
2518
 
2519
  # Define the mapping for input field names to their required integer types for shadows
2520
  input_type_mapping = {
@@ -2603,7 +2603,7 @@ def process_scratch_blocks(all_generated_blocks, generated_output_json):
2603
  else:
2604
  # Fallback: try original generated_output_json value if present, else synthesize
2605
  fallback = gen_block_data.get("inputs", {}).get(input_name,
2606
- [1, [11, "message1", generate_secure_token(20)]])
2607
  processed_block["inputs"][input_name] = fallback
2608
  continue
2609
 
@@ -3066,6 +3066,275 @@ def variable_adder_main(project_data):
3066
  return processed_json
3067
  except Exception as e:
3068
  print(f"Error error in the variable initialization opcodes: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3069
  #################################################################################################################################################################
3070
  #--------------------------------------------------[Helper main function]----------------------------------------------------------------------------------------
3071
  #################################################################################################################################################################
@@ -3115,11 +3384,12 @@ when I receive [Game Start v]
3115
  end
3116
  """
3117
  # print(pseudo_code)
3118
- # generated_output_json, initial_opcode_occurrences = generate_blocks_from_opcodes(initial_opcode_counts, all_block_definitions)
 
3119
  # all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
3120
  # processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
3121
  # renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
3122
- # #print(all_generated_blocks)
3123
  # print("--------------\n\n")
3124
  # print(processed_blocks)
3125
  # print("--------------\n\n")
 
1
  import json
2
  import copy
3
  import re
4
+ from collections import defaultdict, Counter
5
  import secrets
6
  import string
7
  from typing import Dict, Any, TypedDict,Tuple
 
1797
  if l.startswith("//"): return None, None
1798
 
1799
  # Hat Blocks (most specific first)
1800
+ # if re.match(r"when green flag click(ed)?", l): return "event_whenflagclicked", "hat"
1801
+ if re.fullmatch(r"when (green )?flag click(ed)?", l.strip(), re.IGNORECASE): return "event_whenflagclicked", "hat"
1802
  if re.match(r"when (.+?) key press(ed)?", l): return "event_whenkeypressed", "hat"
1803
  if re.match(r"when this sprite click(ed)?", l): return "event_whenthisspriteclicked", "hat"
1804
  if l.startswith("when backdrop switches to"): return "event_whenbackdropswitchesto", "hat"
 
2354
  info["fields"]["VARIABLE"] = [var_name, None]
2355
  if "LIST" in info["fields"]:
2356
  m = re.search(r"(?:to|of|in)\s*\[([^\]]+)\s*v\]", stmt_for_parse)
2357
+ if m: info["fields"]["LIST"] = [m.group(1).strip(), None]
2358
  if "STOP_OPTION" in info["fields"]:
2359
  m = re.search(r"stop \[([^\]]+)\s*v\]", stmt_for_parse)
2360
  if m: info["fields"]["STOP_OPTION"] = [m.group(1).strip(), None]
2361
  if "STYLE" in info["fields"]:
2362
  m = re.search(r"set rotation style \[([^\]]+)\s*v\]", stmt_for_parse)
2363
+ if m: info["fields"]["STYLE"] = [m.group(1).strip(), None]
2364
  if "DRAG_MODE" in info["fields"]:
2365
  m = re.search(r"set drag mode \[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2366
+ if m: info["fields"]["DRAG_MODE"] = [m.group(1).strip(), None]
2367
  if "EFFECT" in info["fields"] and opcode in ["looks_changeeffectby", "looks_seteffectto", "sound_changeeffectby", "sound_seteffectto"]:
2368
  m = re.search(r"(?:change|set)\s*\[([^\]]+)\s*v\] effect", stmt_for_parse, re.IGNORECASE)
2369
+ if m: info["fields"]["EFFECT"] = [m.group(1).upper().strip(), None]
2370
  if "NUMBER_NAME" in info["fields"] and opcode in ["looks_costumenumbername", "looks_backdropnumbername"]:
2371
  m = re.search(r"(?:costume|backdrop)\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2372
+ if m: info["fields"]["NUMBER_NAME"] = [m.group(1).strip(), None]
2373
  if "FRONT_BACK" in info["fields"] and opcode == "looks_gotofrontback":
2374
  m = re.search(r"go to\s*\[([^\]]+)\s*v\] layer", stmt_for_parse, re.IGNORECASE)
2375
+ if m: info["fields"]["FRONT_BACK"] = [m.group(1).strip(), None]
2376
  if "FORWARD_BACKWARD" in info["fields"] and opcode == "looks_goforwardbackwardlayers":
2377
  m = re.search(r"go\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2378
+ if m: info["fields"]["FORWARD_BACKWARD"] = [m.group(1).strip(), None]
2379
  if "OPERATOR" in info["fields"] and opcode == "operator_mathop":
2380
  m = re.search(r"\[([^\]]+)\s*v\] of", stmt_for_parse, re.IGNORECASE)
2381
+ if m: info["fields"]["OPERATOR"] = [m.group(1).upper().strip(), None]
2382
  if "CURRENTMENU" in info["fields"] and opcode == "sensing_current":
2383
  m = re.search(r"current\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2384
+ if m: info["fields"]["CURRENTMENU"] = [m.group(1).upper().strip(), None]
2385
  if "PROPERTY" in info["fields"] and opcode == "sensing_of":
2386
  m = re.search(r"\((.+?)\) of", stmt_for_parse, re.IGNORECASE)
2387
  if m:
 
2394
  info["fields"]["PROPERTY"] = [prop_map.get(prop, prop), None]
2395
  if "WHENGREATERTHANMENU" in info["fields"] and opcode == "event_whengreaterthan":
2396
  m = re.search(r"when\s*\[([^\]]+)\s*v\] >", stmt_for_parse, re.IGNORECASE)
2397
+ if m: info["fields"]["WHENGREATERTHANMENU"] = [m.group(1).upper().strip(), None]
2398
  if "KEY_OPTION" in info["fields"] and opcode == "event_whenkeypressed": # For event_whenkeypressed hat block's field
2399
  m = re.search(r"when\s*\[([^\]]+)\s*v\] key pressed", stmt_for_parse, re.IGNORECASE)
2400
+ if m: info["fields"]["KEY_OPTION"] = [m.group(1).strip(), None]
2401
  if "BACKDROP" in info["fields"] and opcode == "event_whenbackdropswitchesto": # For event_whenbackdropswitchesto hat block's field
2402
  m = re.search(r"when backdrop switches to\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2403
+ if m: info["fields"]["BACKDROP"] = [m.group(1).strip(), None]
2404
  if "BROADCAST_OPTION" in info["fields"] and opcode == "event_whenbroadcastreceived": # For event_whenbroadcastreceived hat block's field
2405
  m = re.search(r"when i receive\s*\[([^\]]+)\s*v\]", stmt_for_parse, re.IGNORECASE)
2406
+ if m: info["fields"]["BROADCAST_OPTION"] = [m.group(1).strip(), None]
2407
 
2408
  # Custom block specific parsing
2409
  if opcode == "procedures_definition":
 
2459
  info["mutation"]["argumentnames"].append(f"arg{idx+1}") # Placeholder name for mutation
2460
 
2461
  info["inputs"][arg_input_name] = parse_reporter_or_value(arg_val_str, key, pick_key, all_generated_blocks) # Pass current block's key
 
2462
  i += 1 # Move to the next line
2463
 
2464
  # Final pass to ensure last blocks have next: None (already handled by stack pops)
 
2513
 
2514
  # Initialize dictionaries to store and reuse generated unique IDs
2515
  # This prevents creating multiple unique IDs for the same variable/broadcast across different blocks
2516
+ variable_id_map = defaultdict(lambda: generate_secure_token())
2517
+ broadcast_id_map = defaultdict(lambda: generate_secure_token())
2518
 
2519
  # Define the mapping for input field names to their required integer types for shadows
2520
  input_type_mapping = {
 
2603
  else:
2604
  # Fallback: try original generated_output_json value if present, else synthesize
2605
  fallback = gen_block_data.get("inputs", {}).get(input_name,
2606
+ [1, [11, "message1", generate_secure_token()]])
2607
  processed_block["inputs"][input_name] = fallback
2608
  continue
2609
 
 
3066
  return processed_json
3067
  except Exception as e:
3068
  print(f"Error error in the variable initialization opcodes: {e}")
3069
+
3070
+ #################################################################################################################################################################
3071
+ #--------------------------------------------------[Helper function to generate Opcode]--------------------------------------------------------------------------
3072
+ #################################################################################################################################################################
3073
+
3074
+ def _find_all_opcodes(code_block: str) -> list[str]:
3075
+ """
3076
+ Finds all Scratch opcodes in a given code block using a series of
3077
+ regex patterns. This function is designed to handle multi-line blocks
3078
+ by processing the entire code block and finding all matches. The
3079
+ patterns are ordered from most specific to least specific to prevent
3080
+ misclassification.
3081
+
3082
+ Args:
3083
+ code_block: A string containing the entire pseudo-code.
3084
+
3085
+ Returns:
3086
+ A list of all detected opcode strings.
3087
+ """
3088
+ opcodes = []
3089
+
3090
+ # Define a list of regex patterns and their corresponding opcodes,
3091
+ # ordered from most specific to least specific. The re.DOTALL flag
3092
+ # allows '.' to match newlines, which is crucial for multi-line blocks.
3093
+ patterns = [
3094
+ # --- Multi-line Control Blocks (most specific, non-greedy) ---
3095
+ (r"if <.+?> then(?:.|\n)+?else(?:.|\n)+?end", "control_if_else"), #(to test muliple stack)
3096
+ (r"forever", "control_forever"),
3097
+ (r"if <.+?> then", "control_if"),
3098
+ (r"repeat until <.+?>", "control_repeat_until"),
3099
+ (r"repeat\s+(?:\(.+?\)|\[.+?(?:\s+v)?\]|\S+)", "control_repeat"),
3100
+ (r"stop\s+(?:all|this script|other scripts in sprite|\[(?:all|this script|other scripts in sprite)(?:\s+v)?\])(?!\s+sounds)", "control_stop"),
3101
+ (r"when I start as a clone", "control_start_as_clone"),
3102
+ (r"create clone of \[.+?(?:\s+v)?\]", "control_create_clone_of"),
3103
+ (r"delete this clone", "control_delete_this_clone"),
3104
+ (r"wait\s+(?:\(.+?\)|\[.+?(?:\s+v)?\]|\S+)\s+sec(?:ond)?s?", "control_wait"),
3105
+ (r"wait until <.+?>", "control_wait_until"),
3106
+
3107
+ # --- Event Blocks (most specific) ---
3108
+ # (r"when green flag clicked", "event_whenflagclicked"),
3109
+ (r"when (green )?flag click(ed)?", "event_whenflagclicked"),
3110
+ (r"when\s+(?:key\s+\[(.+?)(?:\s+v)?\]|\[(.+?)(?:\s+v)?\]\s+key)\s+pressed", "event_whenkeypressed"),
3111
+ (r"when this sprite clicked", "event_whenthisspriteclicked"),
3112
+ (r"when backdrop switches to \[.+?(?:\s+v)?\]", "event_whenbackdropswitchesto"),
3113
+ (r"when I receive \[.+?(?:\s+v)?\]", "event_whenbroadcastreceived"),
3114
+ (r"when \[.+?(?:\s+v)?\] > (.+)", "event_whengreaterthan"),
3115
+ (r"broadcast \[.+?(?:\s+v)?\] and wait", "event_broadcastandwait"),
3116
+ (r"broadcast \[.+?(?:\s+v)?\]", "event_broadcast"),
3117
+
3118
+ # --- Data Blocks (Variables and Lists) - specific block types first ---
3119
+ (r"set\s*\[\s*.+?(?:\s+v)?\s*\]\s*to\s*\(?\s*.+?\s*\)?", "data_setvariableto"),
3120
+ (r"change\s*\[\s*.+?(?:\s+v)?\s*\]\s*by\s*\(?\s*.+?\s*\)?", "data_changevariableby"),
3121
+ (r"show variable \[.+?(?:\s+v)?\]", "data_showvariable"),
3122
+ (r"hide variable \[.+?(?:\s+v)?\]", "data_hidevariable"),
3123
+ (r"show list \[.+?(?:\s+v)?\]", "data_showlist"),
3124
+ (r"hide list \[.+?(?:\s+v)?\]", "data_hidelist"),
3125
+ (r"add\s+(?:\[.+?\]|\(.+?\)|\w+)\s+to\s+\[.+?(?:\s+v)?\]", "data_addtolist"),
3126
+ (r"delete\s*\((?!all\)).+?\)\s*of\s*\[.+?(?:\s+v)?\]", "data_deleteoflist"),
3127
+ (r"delete\s*\(all\)\s*of\s*\[.+?(?:\s+v)?\]", "data_deletealloflist"),
3128
+ (r"insert\s+(\(.+?\)|\[.+?\]|\(\[.+?\]\)|[^\s]+)\s+at\s+(\(.+?\)|\[.+?\]|\(\[.+?\]\)|\d+)\s+of\s+\[.+?(?:\s+v)?\]", "data_insertatlist"),
3129
+ (r"replace\s+item\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|[^\s]+)\s+of\s+\[.+?(?:\s+v)?\]\s+with\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|.+)","data_replaceitemoflist"),
3130
+ (r"[<(]\s*\[[^\]]+?\s+v\]\s*contains\s*\[[^\]]+?\]\s*\??\s*[)>]", "data_listcontainsitem"),
3131
+ (r"\(item\s+#\s+of\s+\(?(.+?)\)?\s+in\s+\[.+?(?:\s+v)?\]\)", "data_itemnumoflist"),
3132
+ (r"(?<!replace\s)\(?item(?!\s+#)\s+(\(.+?\)|\[\s*.+?\s*(?:v)?\]|[^\s]+)\s+of\s+\[.+?(?:\s+v)?\]\)?", "data_itemoflist"),
3133
+ (r"\(length of \[.+?(?:\s+v)?\]\)", "data_lengthoflist"),
3134
+
3135
+ # --- Sensing Blocks ---
3136
+ (r"ask \[.+?\] and wait", "sensing_askandwait"),
3137
+ (r"(?<!when\s)key\s+\[.+?(?:\s+v)?\]\s+pressed\??", "sensing_keypressed"),
3138
+ (r"mouse down\??", "sensing_mousedown"),
3139
+ (r"""\s*<?\s*color\s*\[?\s*(#[0-9A-Fa-f]{6})\s*\]?\s*is\s+touching\s*\[?\s*(#[0-9A-Fa-f]{6})\s*\]?\s*\??\s*>?\s*""", "sensing_coloristouchingcolor"),
3140
+ (r"(?<!is\s)touching\s*(?:color\s*)?\[?\s*#([0-9A-Fa-f]{6})\s*\]?\??", "sensing_touchingcolor"),
3141
+ (r"touching \[.+? v\]\??", "sensing_touchingobject"),
3142
+ (r"set drag mode \[.+? v\]", "sensing_setdragmode"),
3143
+ (r"reset timer", "sensing_resettimer"),
3144
+ (r"(?i)(?<!\bat\s)(?<!\bdelete\s)(?<!\binsert\s)(?<!\breplace\s)(?<!\bcreate\s)(?<!\bitem\s)(?<!\bletter\s)"r"(?:"r"\(\s*(?!(?:item\b|\d+|\#|length\b|insert\b|delete\b|replace\b|create\b|letter\b))[^()]{1,200}?\)\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r"|"r"\(\s*(?!(?:item\b|\d+|\#|length\b|insert\b|delete\b|replace\b|create\b|letter\b)).*?of\s*\[[^\]]+?(?:\s+v)?\].*?\)"r"|"r"\[\s*(?!(?:item\b|\d+|\#|length\b|letter\b))[^\]]+?\]\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r"|"r"\b(?:backdrop|costume|x\s+position|y\s+position|direction|size|volume|loudness|answer|day|month|year|username|timer|mouse\s+x|mouse\s+y)\b\s*of\s*\[[^\]]+?(?:\s+v)?\](?!\s*in\s*\[)"r")","sensing_of"),
3145
+ (r"\(current \[.+? v]\)", "sensing_current"),
3146
+ (r"\(?answer\)?", "sensing_answer"), #(to test muliple bracket and alone should treet as the keyword)
3147
+ (r"\(?username\)?", "sensing_username"), #(to test muliple bracket and alone should treet as the keyword)
3148
+
3149
+ # --- Sound Blocks ---
3150
+ (r"play sound \[.+? v\] until done", "sound_playuntildone"),
3151
+ (r"start sound \[.+? v\]", "sound_play"),
3152
+ (r"stop all sounds", "sound_stopallsounds"),
3153
+ (r"change volume by\s*(?:\((.+?)\)|\[(.+?)\]|(.+))", "sound_changevolumeby"),
3154
+ (r"""set\ volume\ to\s+\(?\s*(?:-?\d+(?:\.\d+)?|\[?[a-zA-Z_][\w\s]*\]?(?:\ v)?)\s*\)?\s*%?""", "sound_setvolumeto"),
3155
+
3156
+ (r"\(volume\)", "sound_volume"),
3157
+
3158
+ # --- Motion Blocks ---
3159
+ (r"go to x:\s*\(?(.+?)\)?\s*y:\s*\(?(.+?)\)?", "motion_gotoxy"),
3160
+ (r"set x to (.+)", "motion_setx"),
3161
+ (r"set y to (.+)", "motion_sety"),
3162
+ (r"move\s*\(?(.+?)\)?\s*(?:steps?)?", "motion_movesteps"),
3163
+ (r"turn right\s*\(?(.+?)\)?\s*(?:degrees?)?", "motion_turnright"),
3164
+ (r"turn left\s*\(?(.+?)\)?\s*(?:degrees?)?", "motion_turnleft"),
3165
+ (r"go to\s*(?:random position|mouse-pointer|\[.*?\]|.+)", "motion_goto"), #(to mouse-pointer is not include here for now)
3166
+ (r"point in direction\s*\(?(.+?)\)?", "motion_pointindirection"),
3167
+ (r"point towards \[.+? v\]", "motion_pointtowards"),
3168
+ (r"change x by\s*\(?(.+?)\)?", "motion_changexby"),
3169
+ (r"change y by\s*\(?(.+?)\)?", "motion_changeyby"),
3170
+ (r"glide\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)\s*to\s*x:\s*\(?(.+?)\)?\s*y:\s*\(?(.+?)\)?", "motion_glidesecstoxy"),
3171
+ (r"glide\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)\s*to\s*\[.*?\]", "motion_glideto"),
3172
+ (r"if on edge, bounce", "motion_ifonedgebounce"),
3173
+ (r"set rotation style\s*\[(?:left-right|all around|don't rotate)(?:\s*v)?\]", "motion_setrotationstyle"),
3174
+ (r"\(?x position\)?", "motion_xposition"), #(to x positon may detect where var is used)
3175
+ (r"\(?y position\)?", "motion_yposition"), #(to y position may detect where var is used)
3176
+ (r"\(?direction\)?", "motion_direction"), #(to direction may detect where var is used)
3177
+
3178
+ # --- Looks Blocks ---
3179
+ (r"switch costume to \[.+? v\]", "looks_switchcostumeto"),
3180
+ (r"next costume", "looks_nextcostume"),
3181
+ (r"switch backdrop to \[.+? v\] and wait", "looks_switchbackdroptowait"),
3182
+ (r"switch backdrop to \[.+? v\]", "looks_switchbackdropto"),
3183
+ (r"next backdrop", "looks_nextbackdrop"),
3184
+ (r"^\s*show\s*$", "looks_show"),
3185
+ (r"^\s*hide\s*$", "looks_hide"),
3186
+ (r"say\s+(?:\[.+?\]|\(.+?\)|.+?)\s*for\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)", "looks_sayforsecs"),
3187
+ (r"say\s+(?!.*\bfor\b\s*\(?\d+\)?\s*(?:sec|secs|second|seconds))(?:\[.+?\]|\(.+?\)|.+?)", "looks_say"),
3188
+ (r"think\s*\[.+?\]\s*for\s*\(?(.+?)\)?\s*(?:sec|secs|second|seconds)", "looks_thinkforsecs"),
3189
+ (r"think\s*\[.+?\]", "looks_think"),
3190
+ (r"change size by\s*\(?(.+?)\)?", "looks_changesizeby"),
3191
+ (r"set size to\s*\(?(.+?)\)?\s*%?", "looks_setsizeto"),
3192
+ (r"change\s*\[(.+?)(?:\s*v)?\]\s*effect by\s*\(?(.+?)\)?", "looks_changeeffectby"),
3193
+ (r"set\s*\[(.+?)(?:\s*v)?\]\s*effect to\s*\(?(.+?)\)?", "looks_seteffectto"),
3194
+ (r"clear graphic effects", "looks_cleargraphiceffects"),
3195
+ (r"\(costume \[.+? v\]\)", "looks_costumenumbername"),
3196
+ (r"\(backdrop \[.+? v\]\)", "looks_backdropnumbername"),
3197
+
3198
+ # --- Operators ---
3199
+ (r"<\s*[^<>?]+\s*<\s*[^<>?]+\s*>", "operator_lt"),
3200
+ (r"<\s*[^<>?]+\s*=\s*[^<>?]+\s*>", "operator_equals"),
3201
+ (r"<\s*[^<>?]+\s*>\s*[^<>?]+\s*>", "operator_gt"),
3202
+ (r"<\s*.*?\s+and\s+.*?\s*>", "operator_and"),
3203
+ (r"<\s*.*?\s+or\s+.*?\s*>", "operator_or"),
3204
+ (r"<\s*not\s+.*?\s*>", "operator_not"),
3205
+ (r"(?:\(join\s+(.+?)\s+(.+?)\)|join\s+(.+?)\s+(.+?))", "operator_join"),
3206
+ (r"\(\s*.+?\s*\+\s*.+?\s*\)", "operator_add"),
3207
+ (r"\(\s*(?!-\s*\d+(?:\.\d+)?\s*\))(.+?)\s+-\s+(.+?)\)", "operator_subtract"),
3208
+ (r"\(\s*.+?\s*\*\s*.+?\s*\)", "operator_multiply"),
3209
+ (r"\(\s*.+?\s*/\s*.+?\s*\)", "operator_divide"),
3210
+ (r"\(pick random\s+(.+?)\s+to\s+(.+?)\)", "operator_random"),
3211
+ (r"\(letter\s+(.+?)\s+of\s+(.+?)\)", "operator_letterof"),
3212
+ (r"\(length of\s+(.+?)\)", "operator_length"),
3213
+ (r"\(\s*.+?\s+mod\s+.+?\s*\)", "operator_mod"),
3214
+ (r"\(round\s+(.+?)\)", "operator_round"),
3215
+ (r"[<(]\s*\[(?![^\]]*\s+v\])[^\]]+?\]\s*contains\s*\[[^\]]+?\]\s*\??\s*[)>]", "operator_contains"),
3216
+ (r"\(\s*\[?(abs|floor|ceiling|sqrt|sin|cos|tan|asin|acos|atan|ln|log|e \^|10 \^)\s*(?:v)?\]?\s+of\s+.+?\)", "operator_mathop"),
3217
+
3218
+ ]
3219
+
3220
+ for pattern, opcode in patterns:
3221
+ for match in re.finditer(pattern, code_block, re.DOTALL):
3222
+ opcodes.append(opcode)
3223
+
3224
+ return opcodes
3225
+
3226
+ def analyze_opcode_counts(pseudo_code: str) -> list[dict]:
3227
+ """
3228
+ Analyzes a block of Scratch-like pseudo-code to count the occurrences
3229
+ of each opcode using a multi-pass, regex-based classifier.
3230
+
3231
+ Args:
3232
+ pseudo_code: A string containing the pseudo-code.
3233
+
3234
+ Returns:
3235
+ A list of dictionaries, where each dictionary contains the opcode
3236
+ and its count.
3237
+ """
3238
+ opcode_counts = Counter()
3239
+
3240
+ opcodes_in_code = _find_all_opcodes(pseudo_code)
3241
+ for opcode in opcodes_in_code:
3242
+ opcode_counts[opcode] += 1
3243
+
3244
+ result = [{"opcode": opcode, "count": count} for opcode, count in opcode_counts.items()]
3245
+
3246
+ # Sort the result by opcode for consistent output.
3247
+ result.sort(key=lambda x: x['opcode'])
3248
+
3249
+ return result
3250
+
3251
+ #################################################################################################################################################################
3252
+ #--------------------------------------------------[Helper function to seperate an correct the json]-------------------------------------------------------------
3253
+ #################################################################################################################################################################
3254
+
3255
+ def separate_scripts(pseudocode_string):
3256
+ """
3257
+ Separates a block of Scratch pseudocode into a list of individual scripts.
3258
+
3259
+ This function finds the start of each "hat" block and slices the
3260
+ original string to capture the full code block for each script,
3261
+ providing a more robust and reliable separation.
3262
+
3263
+ Args:
3264
+ pseudocode_string (str): A string containing Scratch pseudocode.
3265
+
3266
+ Returns:
3267
+ list: A list of strings, where each string is a complete,
3268
+ separated script.
3269
+ """
3270
+ # Define the "hat" block patterns with more robust regex.
3271
+ # We use a non-capturing group (?:...) for the patterns.
3272
+ # We use a logical OR (|) to combine them into a single pattern.
3273
+ delimiter_patterns = (
3274
+ r"when green flag clicked|when flag clicked|when \S+ key pressed|"
3275
+ r"when this sprite clicked|when backdrop switches to \[.*?\]|"
3276
+ r"when I receive \[.*?\]|when \[.*?\] > \[.*?\]"
3277
+ )
3278
+
3279
+ # Use re.finditer to get an iterator of all hat block matches.
3280
+ # The `re.DOTALL` flag allows the '.' to match newlines.
3281
+ matches = list(re.finditer(delimiter_patterns, pseudocode_string, flags=re.DOTALL | re.IGNORECASE))
3282
+
3283
+ scripts = []
3284
+ # If no matches are found, return an empty list.
3285
+ if not matches:
3286
+ return []
3287
+
3288
+ # Iterate through the matches to slice the original string.
3289
+ for i in range(len(matches)):
3290
+ start = matches[i].start()
3291
+ end = matches[i+1].start() if i + 1 < len(matches) else len(pseudocode_string)
3292
+
3293
+ # Slice the pseudocode string from the start of one match to the start
3294
+ # of the next, or to the end of the string.
3295
+ script = pseudocode_string[start:end]
3296
+ scripts.append(script.strip())
3297
+
3298
+ return scripts
3299
+
3300
+ def transform_logic_to_action_flow(source_data, description=""):
3301
+ """
3302
+ Transforms a 'refined_logic' JSON structure into an 'action_overall_flow' structure.
3303
+
3304
+ Args:
3305
+ source_data (dict): The input dictionary with 'refined_logic', 'name_variable',
3306
+ and 'pseudocode' keys.
3307
+ description (str): A description to be added to the output structure.
3308
+
3309
+ Returns:
3310
+ dict: A dictionary in the desired 'action_overall_flow' format.
3311
+ """
3312
+ # Check if the required keys exist in the source data
3313
+ if "refined_logic" not in source_data or \
3314
+ "name_variable" not in source_data["refined_logic"] or \
3315
+ "pseudocode" not in source_data["refined_logic"]:
3316
+ raise ValueError("Input dictionary is missing required keys: 'refined_logic', 'name_variable', or 'pseudocode'.")
3317
+
3318
+ # Extract the name and the pseudocode list from the source data
3319
+ name_variable = source_data["refined_logic"]["name_variable"]
3320
+ pseudocode_list = source_data["refined_logic"]["pseudocode"]
3321
+
3322
+ # Transform the list of pseudocode strings into a list of dictionaries
3323
+ # with the "logic" key.
3324
+ plans_list = [{"logic": logic_block} for logic_block in pseudocode_list]
3325
+
3326
+ # Construct the final nested dictionary structure
3327
+ transformed_data = {
3328
+ "action_overall_flow": {
3329
+ name_variable: {
3330
+ "description": description,
3331
+ "plans": plans_list
3332
+ }
3333
+ }
3334
+ }
3335
+
3336
+ return transformed_data
3337
+
3338
  #################################################################################################################################################################
3339
  #--------------------------------------------------[Helper main function]----------------------------------------------------------------------------------------
3340
  #################################################################################################################################################################
 
3384
  end
3385
  """
3386
  # print(pseudo_code)
3387
+ # opcode_counts_result = analyze_opcode_counts(pseudo_code)
3388
+ # generated_output_json, initial_opcode_occurrences = generate_blocks_from_opcodes(opcode_counts_result, all_block_definitions)
3389
  # all_generated_blocks = generate_plan(generated_output_json, initial_opcode_occurrences, pseudo_code)
3390
  # processed_blocks= process_scratch_blocks(all_generated_blocks, generated_output_json)
3391
  # renamed_blocks, renamed_counts = rename_blocks(processed_blocks, initial_opcode_occurrences)
3392
+ # print(opcode_counts_result)
3393
  # print("--------------\n\n")
3394
  # print(processed_blocks)
3395
  # print("--------------\n\n")