Spaces:
Runtime error
Runtime error
Shunfeng Zheng
commited on
Delete 题目生成.py
Browse files
题目生成.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import random
|
| 3 |
-
from collections import defaultdict, deque
|
| 4 |
-
|
| 5 |
-
def generate_localization_samples(n):
|
| 6 |
-
all_data = []
|
| 7 |
-
global_index = 1
|
| 8 |
-
|
| 9 |
-
def is_all_steps_connected(steps):
|
| 10 |
-
# 构建依赖图
|
| 11 |
-
graph = defaultdict(list)
|
| 12 |
-
reverse_graph = defaultdict(list)
|
| 13 |
-
all_ids = set()
|
| 14 |
-
|
| 15 |
-
for step in steps:
|
| 16 |
-
step_id = step["id"]
|
| 17 |
-
inputs = step["inputs"]
|
| 18 |
-
all_ids.add(step_id)
|
| 19 |
-
for inp in inputs:
|
| 20 |
-
if isinstance(inp, int): # 如果引用了前一个 step
|
| 21 |
-
graph[inp].append(step_id)
|
| 22 |
-
reverse_graph[step_id].append(inp)
|
| 23 |
-
|
| 24 |
-
# 最后一个 step ID
|
| 25 |
-
print(steps)
|
| 26 |
-
last_id = steps[-1]["id"]
|
| 27 |
-
|
| 28 |
-
# 从最后一个 step 开始反向遍历,看能否覆盖所有 step
|
| 29 |
-
visited = set()
|
| 30 |
-
queue = deque([last_id])
|
| 31 |
-
while queue:
|
| 32 |
-
curr = queue.popleft()
|
| 33 |
-
visited.add(curr)
|
| 34 |
-
for parent in reverse_graph[curr]:
|
| 35 |
-
if parent not in visited:
|
| 36 |
-
queue.append(parent)
|
| 37 |
-
|
| 38 |
-
return all_ids.issubset(visited)
|
| 39 |
-
|
| 40 |
-
while len(all_data) < n:
|
| 41 |
-
sample = {"index": global_index, "instruction": "", "steps": []}
|
| 42 |
-
num_locations = random.randint(1, 3)
|
| 43 |
-
locations = [f"LOC_{i+1}" for i in range(num_locations)]
|
| 44 |
-
used_locations = set()
|
| 45 |
-
steps = []
|
| 46 |
-
current_id = 1
|
| 47 |
-
all_refs = locations.copy() # step inputs can be LOCs or previous step IDs
|
| 48 |
-
step_definitions = []
|
| 49 |
-
|
| 50 |
-
num_steps = random.randint(2, 5)
|
| 51 |
-
|
| 52 |
-
for _ in range(num_steps):
|
| 53 |
-
func = random.choice(["Relative", "Azimuth", "Between"])
|
| 54 |
-
if func in ["Relative", "Azimuth"]:
|
| 55 |
-
base = random.choice(all_refs)
|
| 56 |
-
if isinstance(base, str):
|
| 57 |
-
used_locations.add(base)
|
| 58 |
-
|
| 59 |
-
if func == "Relative":
|
| 60 |
-
direction = random.choice([
|
| 61 |
-
"north", "south", "east", "west",
|
| 62 |
-
"northeast", "northwest", "southeast", "southwest"
|
| 63 |
-
])
|
| 64 |
-
distance = f"{random.randint(1, 10)} km"
|
| 65 |
-
step_definitions.append({
|
| 66 |
-
"id": current_id,
|
| 67 |
-
"function": "Relative",
|
| 68 |
-
"inputs": [base, direction, distance]
|
| 69 |
-
})
|
| 70 |
-
else:
|
| 71 |
-
angle = f"{random.randint(0, 359)}°"
|
| 72 |
-
distance = f"{random.randint(1, 10)} km"
|
| 73 |
-
step_definitions.append({
|
| 74 |
-
"id": current_id,
|
| 75 |
-
"function": "Azimuth",
|
| 76 |
-
"inputs": [base, angle, distance]
|
| 77 |
-
})
|
| 78 |
-
|
| 79 |
-
all_refs.append(current_id)
|
| 80 |
-
current_id += 1
|
| 81 |
-
|
| 82 |
-
elif func == "Between" and len(all_refs) >= 2:
|
| 83 |
-
base1, base2 = random.sample(all_refs, 2)
|
| 84 |
-
for b in (base1, base2):
|
| 85 |
-
if isinstance(b, str):
|
| 86 |
-
used_locations.add(b)
|
| 87 |
-
step_definitions.append({
|
| 88 |
-
"id": current_id,
|
| 89 |
-
"function": "Between",
|
| 90 |
-
"inputs": [base1, base2]
|
| 91 |
-
})
|
| 92 |
-
all_refs.append(current_id)
|
| 93 |
-
current_id += 1
|
| 94 |
-
|
| 95 |
-
if len(step_definitions) == 0:
|
| 96 |
-
continue # 无有效步骤,跳过重新生成
|
| 97 |
-
|
| 98 |
-
all_locs_used = all(loc in used_locations for loc in locations)
|
| 99 |
-
steps_connected = is_all_steps_connected(step_definitions)
|
| 100 |
-
|
| 101 |
-
if all_locs_used and steps_connected:
|
| 102 |
-
sample["steps"] = step_definitions
|
| 103 |
-
all_data.append(sample)
|
| 104 |
-
global_index += 1
|
| 105 |
-
|
| 106 |
-
# 否则重新生成
|
| 107 |
-
|
| 108 |
-
return all_data
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
def write_custom_json(data, filename):
|
| 112 |
-
def format_step(step):
|
| 113 |
-
inputs = json.dumps(step["inputs"], ensure_ascii=False)
|
| 114 |
-
return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'
|
| 115 |
-
|
| 116 |
-
with open(filename, "w", encoding="utf-8") as f:
|
| 117 |
-
f.write("[\n")
|
| 118 |
-
for i, item in enumerate(data):
|
| 119 |
-
f.write(" {\n")
|
| 120 |
-
f.write(f' "index": {item["index"]},\n')
|
| 121 |
-
f.write(' "instruction": "",\n')
|
| 122 |
-
f.write(' "steps": [\n')
|
| 123 |
-
step_lines = [f" {format_step(step)}" for step in item["steps"]]
|
| 124 |
-
f.write(",\n".join(step_lines))
|
| 125 |
-
f.write("\n ]\n")
|
| 126 |
-
f.write(" }" + (",\n" if i < len(data) - 1 else "\n"))
|
| 127 |
-
f.write("]\n")
|
| 128 |
-
|
| 129 |
-
# 运行
|
| 130 |
-
if __name__ == "__main__":
|
| 131 |
-
samples = generate_localization_samples(100)
|
| 132 |
-
write_custom_json(samples, "localization_samples.json")
|
| 133 |
-
print("✅ Saved to localization_samples.json with all steps contributing.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|