| """ |
| build_tree_training_data.py - Tạo training data có cấu trúc CÂY (tree-structured) |
| Cho phép AI navigate: Root -> Device -> Package -> Bank -> Pin |
| Và ngược lại: Pin -> tìm đường về Root |
| |
| Chạy: python build_tree_training_data.py |
| Output: fpga_tree_training_data.jsonl |
| """ |
|
|
| import json |
| import random |
| from collections import defaultdict |
|
|
| with open("pin_database.json", "r") as f: |
| PIN_DB = json.load(f) |
|
|
| |
| |
| |
|
|
| FPGA_TREE = { |
| "name": "Xilinx Integrated Circuits", |
| "type": "root", |
| "children": { |
| "Spartan-6 FPGA": { |
| "type": "family", |
| "description": "Low-cost FPGA family for high-volume applications", |
| "children": {} |
| } |
| } |
| } |
|
|
| |
| DEVICE_INFO = { |
| "LX4": {"transceiver": False, "description": "Smallest Spartan-6, 3,840 logic cells", "packages": ["TQG144", "CPG196", "CSG225"]}, |
| "LX9": {"transceiver": False, "description": "9,152 logic cells", "packages": ["TQG144", "CPG196", "CSG225", "FT(G)256", "CSG324"]}, |
| "LX16": {"transceiver": False, "description": "15,282 logic cells", "packages": ["CPG196", "CSG225", "FT(G)256", "CSG324"]}, |
| "LX25": {"transceiver": False, "description": "24,051 logic cells", "packages": ["FT(G)256", "CSG324"]}, |
| "LX25T": {"transceiver": True, "description": "24,051 logic cells + GTP transceivers", "packages": ["CSG324", "FG(G)484"]}, |
| "LX45": {"transceiver": False, "description": "43,661 logic cells", "packages": ["CSG324", "FG(G)484", "CS(G)484", "FG(G)676"]}, |
| "LX45T": {"transceiver": True, "description": "43,661 logic cells + GTP transceivers", "packages": ["CSG324", "FG(G)484", "CS(G)484"]}, |
| "LX75": {"transceiver": False, "description": "74,681 logic cells", "packages": ["FG(G)484", "CS(G)484", "FG(G)676"]}, |
| "LX75T": {"transceiver": True, "description": "74,681 logic cells + GTP transceivers", "packages": ["FG(G)484", "CS(G)484", "FG(G)676"]}, |
| "LX100": {"transceiver": False, "description": "101,261 logic cells", "packages": ["FG(G)484", "CS(G)484", "FG(G)676"]}, |
| "LX100T": {"transceiver": True, "description": "101,261 logic cells + GTP transceivers", "packages": ["FG(G)484", "CS(G)484", "FG(G)676", "FG(G)900"]}, |
| "LX150": {"transceiver": False, "description": "147,443 logic cells", "packages": ["FG(G)484", "CS(G)484", "FG(G)676", "FG(G)900"]}, |
| "LX150T": {"transceiver": True, "description": "147,443 logic cells + GTP transceivers", "packages": ["FG(G)484", "CS(G)484", "FG(G)676", "FG(G)900"]}, |
| } |
|
|
| |
| PACKAGE_INFO = { |
| "TQG144": {"type": "TQFP", "pitch": "0.5mm", "size": "20x20mm", "pins": 144, "description": "Thin Quad Flat Pack"}, |
| "CPG196": {"type": "CSBGA", "pitch": "0.5mm", "size": "8x8mm", "pins": 196, "description": "Chip Scale BGA"}, |
| "CSG225": {"type": "CSBGA", "pitch": "0.8mm", "size": "13x13mm", "pins": 225, "description": "Chip Scale BGA"}, |
| "FT(G)256": {"type": "BGA", "pitch": "1.0mm", "size": "17x17mm", "pins": 256, "description": "Fine-pitch Thin BGA"}, |
| "CSG324": {"type": "CSBGA", "pitch": "0.8mm", "size": "15x15mm", "pins": 324, "description": "Chip Scale BGA"}, |
| "FG(G)484": {"type": "BGA", "pitch": "1.0mm", "size": "23x23mm", "pins": 484, "description": "Fine-pitch BGA"}, |
| "CS(G)484": {"type": "CSBGA", "pitch": "0.8mm", "size": "19x19mm", "pins": 484, "description": "Chip Scale BGA"}, |
| "FG(G)676": {"type": "BGA", "pitch": "1.0mm", "size": "27x27mm", "pins": 676, "description": "Fine-pitch BGA"}, |
| "FG(G)900": {"type": "BGA", "pitch": "1.0mm", "size": "31x31mm", "pins": 900, "description": "Fine-pitch BGA"}, |
| } |
|
|
| |
| BANK_LOCATION = { |
| "0": "Top-Left", |
| "1": "Right-Top", |
| "2": "Bottom", |
| "3": "Left", |
| "4": "Left-Top (LX75T/LX100T/LX150T only)", |
| "5": "Right-Top extra (LX75T/LX100T/LX150T only)", |
| "101": "GTP Transceiver Top-Left", |
| "123": "GTP Transceiver Top-Right", |
| "NA": "Dedicated pins (JTAG, Config, Power)", |
| } |
|
|
| |
| for key, db in PIN_DB.items(): |
| package = db["package"] |
| devices = db["devices"] |
| |
| for device in devices: |
| if device not in FPGA_TREE["children"]["Spartan-6 FPGA"]["children"]: |
| dev_info = DEVICE_INFO.get(device, {}) |
| FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device] = { |
| "type": "device", |
| "transceiver": dev_info.get("transceiver", False), |
| "description": dev_info.get("description", ""), |
| "children": {} |
| } |
| |
| if package not in FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device]["children"]: |
| pkg_info = PACKAGE_INFO.get(package, {}) |
| FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device]["children"][package] = { |
| "type": "package", |
| "pins": pkg_info.get("pins", 0), |
| "description": pkg_info.get("description", ""), |
| "children": {} |
| } |
| |
| |
| bank_pins = defaultdict(list) |
| for pin in db["pins"]: |
| bank_pins[pin["bank"]].append(pin) |
| |
| for bank, pins in bank_pins.items(): |
| if bank not in FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device]["children"][package]["children"]: |
| FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device]["children"][package]["children"][bank] = { |
| "type": "bank", |
| "location": BANK_LOCATION.get(bank, "Unknown"), |
| "pin_count": len(pins), |
| "children": {} |
| } |
| |
| for pin in pins: |
| pin_num = pin["pin_number"] |
| FPGA_TREE["children"]["Spartan-6 FPGA"]["children"][device]["children"][package]["children"][bank]["children"][pin_num] = { |
| "type": "pin", |
| "description": pin["pin_description"], |
| "bufio2": pin.get("bufio2_region", ""), |
| "no_connect": pin.get("no_connect", False), |
| } |
|
|
| |
| |
| |
|
|
| def create_tree_path(device, package, bank, pin_num, pin_desc): |
| """Tạo đường đi từ gốc đến lá""" |
| pkg_info = PACKAGE_INFO.get(package, {}) |
| dev_info = DEVICE_INFO.get(device, {}) |
| bank_loc = BANK_LOCATION.get(bank, "Unknown") |
| |
| path = f"Root/Xilinx/Spartan-6/{device}/{package}/Bank_{bank}/{pin_num}" |
| |
| explanation = f"""Đường đi trong cây phân cấp: |
| - **Root**: Xilinx Integrated Circuits |
| - **Family**: Spartan-6 FPGA (low-cost, high-volume) |
| - **Device**: {device} ({dev_info.get('description', '')}) |
| - Transceiver: {'Yes' if dev_info.get('transceiver') else 'No'} |
| - **Package**: {package} ({pkg_info.get('description', '')}) |
| - Type: {pkg_info.get('type', 'N/A')} |
| - Pitch: {pkg_info.get('pitch', 'N/A')} |
| - Size: {pkg_info.get('size', 'N/A')} |
| - Total pins: {pkg_info.get('pins', 'N/A')} |
| - **Bank**: {bank} ({bank_loc}) |
| - **Pin (Leaf Node)**: Ball {pin_num} |
| - **Định nghĩa**: {pin_desc} |
| """ |
| return path, explanation |
|
|
| |
| samples = [] |
|
|
| |
| print("[1/6] Generating TOP-DOWN tree path samples...") |
| count = 0 |
| for key, db in PIN_DB.items(): |
| package = db["package"] |
| devices = db["devices"] |
| |
| for device in devices: |
| dev_info = DEVICE_INFO.get(device, {}) |
| pkg_info = PACKAGE_INFO.get(package, {}) |
| |
| |
| bank_pins = defaultdict(list) |
| for pin in db["pins"]: |
| bank_pins[pin["bank"]].append(pin) |
| |
| |
| for bank, pins in bank_pins.items(): |
| sampled = random.sample(pins, min(3, len(pins))) |
| for pin in sampled: |
| pin_num = pin["pin_number"] |
| pin_desc = pin["pin_description"] |
| path, explanation = create_tree_path(device, package, bank, pin_num, pin_desc) |
| |
| |
| questions = [ |
| f"Đi từ gốc Root xuống lá: Spartan-6 -> {device} -> {package} -> Bank {bank} -> Pin {pin_num}. Node lá này là gì?", |
| f"Trong cây phân cấp FPGA, tìm đường đến ball {pin_num} của {device} {package} Bank {bank}", |
| f"Navigate tree: Root/Xilinx/Spartan-6/{device}/{package}/Bank_{bank}/{pin_num}", |
| f"Từ Spartan-6 đi xuống {device} package {package} bank {bank}, ball {pin_num} là gì?", |
| ] |
| |
| for q in questions: |
| if count >= 3000: |
| break |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": explanation} |
| ] |
| }) |
| count += 1 |
|
|
| |
| print("[2/6] Generating BOTTOM-UP samples...") |
| count = 0 |
| for key, db in PIN_DB.items(): |
| package = db["package"] |
| devices = db["devices"] |
| |
| for device in devices: |
| |
| sampled = random.sample(db["pins"], min(10, len(db["pins"]))) |
| |
| for pin in sampled: |
| pin_num = pin["pin_number"] |
| pin_desc = pin["pin_description"] |
| bank = pin["bank"] |
| |
| |
| sibling_devices = [d for d in devices if d in DEVICE_INFO] |
| |
| pkg_info = PACKAGE_INFO.get(package, {}) |
| dev_info = DEVICE_INFO.get(device, {}) |
| |
| questions = [ |
| f"Tôi có node lá {pin_desc} tại ball {pin_num}. Tìm đường đi ngược về gốc.", |
| f"Pin {pin_num} ({pin_desc}) thuộc device nào, package nào, bank nào?", |
| f"Navigate up: {pin_desc}@{pin_num} -> ? -> ? -> Root", |
| f"Ball {pin_num} trong {package} là {pin_desc}. Nó nằm ở đâu trong cây FPGA?", |
| ] |
| |
| answer = f"""Đường đi ngược từ lá về gốc: |
| - **Node lá**: Ball {pin_num} = **{pin_desc}** |
| - **Bank**: {bank} ({BANK_LOCATION.get(bank, 'Unknown')}) |
| - **Package**: {package} ({pkg_info.get('description', '')}, {pkg_info.get('pins', 'N/A')} pins) |
| - **Device**: {device} ({dev_info.get('description', '')}) |
| - **Family**: Spartan-6 FPGA |
| - **Root**: Xilinx Integrated Circuits |
| |
| Full path: `{pin_num}/{pin_desc} -> Bank {bank} -> {package} -> {device} -> Spartan-6 -> Xilinx` |
| """ |
| |
| for q in questions: |
| if count >= 2000: |
| break |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": answer} |
| ] |
| }) |
| count += 1 |
|
|
| |
| print("[3/6] Generating SIBLING relationship samples...") |
| count = 0 |
| for key, db in PIN_DB.items(): |
| package = db["package"] |
| devices = db["devices"] |
| |
| for device in devices[:1]: |
| |
| bank_pins = defaultdict(list) |
| for pin in db["pins"]: |
| bank_pins[pin["bank"]].append(pin) |
| |
| for bank, pins in bank_pins.items(): |
| if len(pins) >= 3: |
| sampled = random.sample(pins, min(5, len(pins))) |
| pin_list = [f"{p['pin_number']}={p['pin_description']}" for p in sampled] |
| |
| q = f"Trong cây {device}/{package}/Bank_{bank}, liệt kê các node lá (siblings)" |
| a = f"""Các node lá (pins) trong nhánh **{device}/{package}/Bank {bank}** (siblings): |
| |
| | Ball | Pin Description | Chức năng | |
| |------|----------------|-----------| |
| """ |
| for p in sampled: |
| desc = p["pin_description"] |
| func = "User I/O" |
| if "GCLK" in desc: func = "Global Clock" |
| elif "VREF" in desc: func = "Voltage Reference" |
| elif "HSWAPEN" in desc: func = "Hot Swap Enable" |
| elif desc in ["TCK", "TDI", "TDO", "TMS"]: func = "JTAG" |
| elif "CCLK" in desc: func = "Config Clock" |
| elif "PROGRAM" in desc: func = "Config Reset" |
| elif "DONE" in desc: func = "Config Done" |
| elif "GND" in desc: func = "Ground" |
| elif "VCC" in desc: func = "Power" |
| a += f"| {p['pin_number']} | {desc} | {func} |\n" |
| |
| a += f"\nTổng: {len(pins)} pins trong Bank {bank} ({BANK_LOCATION.get(bank, '')})" |
| |
| if count < 1000: |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": a} |
| ] |
| }) |
| count += 1 |
|
|
| |
| print("[4/6] Generating CROSS-BRANCH comparison samples...") |
| count = 0 |
| |
| packages_to_compare = [("FG(G)484", "CS(G)484"), ("FG(G)484", "FG(G)676"), ("CS(G)484", "FG(G)676")] |
| for pkg1, pkg2 in packages_to_compare: |
| |
| db1 = None |
| db2 = None |
| for key, db in PIN_DB.items(): |
| if db["package"] == pkg1 and "LX150T" in db["devices"]: |
| db1 = db |
| if db["package"] == pkg2 and "LX150T" in db["devices"]: |
| db2 = db |
| |
| if db1 and db2: |
| bank_pins1 = defaultdict(list) |
| bank_pins2 = defaultdict(list) |
| for p in db1["pins"]: |
| bank_pins1[p["bank"]].append(p) |
| for p in db2["pins"]: |
| bank_pins2[p["bank"]].append(p) |
| |
| q = f"So sánh cây LX150T {pkg1} và LX150T {pkg2}, Bank 0 khác nhau thế nào?" |
| |
| a = f"""So sánh nhánh **LX150T/{pkg1}/Bank_0** vs **LX150T/{pkg2}/Bank_0**: |
| |
| | Thuộc tính | {pkg1} | {pkg2} | |
| |------------|--------|--------| |
| | Tổng pins | {len(bank_pins1.get('0', []))} | {len(bank_pins2.get('0', []))} | |
| | Package type | {PACKAGE_INFO.get(pkg1, {}).get('type', 'N/A')} | {PACKAGE_INFO.get(pkg2, {}).get('type', 'N/A')} | |
| | Package size | {PACKAGE_INFO.get(pkg1, {}).get('size', 'N/A')} | {PACKAGE_INFO.get(pkg2, {}).get('size', 'N/A')} | |
| | Pitch | {PACKAGE_INFO.get(pkg1, {}).get('pitch', 'N/A')} | {PACKAGE_INFO.get(pkg2, {}).get('pitch', 'N/A')} | |
| |
| **Giống nhau**: Cùng Device (LX150T), cùng Bank 0 (Top-Left region), cùng các chức năng pin (IO_LXXY, GCLK, VREF). |
| **Khác nhau**: Số lượng pin khác do package size khác. Ball assignments (vị trí vật lý) khác nhau hoàn toàn. |
| """ |
| |
| if count < 800: |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": a} |
| ] |
| }) |
| count += 1 |
|
|
| |
| print("[5/6] Generating Part Number -> Tree Path samples...") |
| PART_NUMBERS = { |
| "XC6SLX150T-2FGG484C": {"device": "LX150T", "package": "FG(G)484", "speed": "2", "temp": "Commercial"}, |
| "XC6SLX150T-2CSG484C": {"device": "LX150T", "package": "CS(G)484", "speed": "2", "temp": "Commercial"}, |
| "XC6SLX150T-2FGG676C": {"device": "LX150T", "package": "FG(G)676", "speed": "2", "temp": "Commercial"}, |
| "XC6SLX45-3CSG324I": {"device": "LX45", "package": "CSG324", "speed": "3", "temp": "Industrial"}, |
| "XC6SLX9-2TQG144C": {"device": "LX9", "package": "TQG144", "speed": "2", "temp": "Commercial"}, |
| } |
|
|
| for pn, info in PART_NUMBERS.items(): |
| dev = info["device"] |
| pkg = info["package"] |
| dev_info = DEVICE_INFO.get(dev, {}) |
| pkg_info = PACKAGE_INFO.get(pkg, {}) |
| |
| |
| total_pins = 0 |
| for key, db in PIN_DB.items(): |
| if db["package"] == pkg and dev in db["devices"]: |
| total_pins = db["total_pins"] |
| break |
| |
| q = f"Part number {pn} ánh xạ vào cây phân cấp như thế nào?" |
| a = f"""Part Number **{pn}** -> Tree Path: |
| |
| ``` |
| Root/Xilinx/Spartan-6/{dev}/{pkg} |
| ``` |
| |
| **Phân tích Part Number:** |
| - **XC6S**: Xilinx Spartan-6 Family |
| - **{dev}**: Device {dev} ({dev_info.get('description', '')}) |
| - **-{info['speed']}**: Speed Grade {info['speed']} |
| - **{pkg.replace('(G)', 'G')}**: Package {pkg} ({pkg_info.get('description', '')}) |
| - **C**: {info['temp']} Temperature |
| |
| **Thuộc tính trong cây:** |
| - Node: Device ({dev}) |
| - Transceiver: {'Yes' if dev_info.get('transceiver') else 'No'} |
| - Node con: Package ({pkg}) |
| - Type: {pkg_info.get('type', 'N/A')} |
| - Pins: {total_pins} |
| - Size: {pkg_info.get('size', 'N/A')} |
| - Các lá: {total_pins} pin balls (chia thành các Bank 0-3, NA, 101, 123) |
| """ |
| |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": a} |
| ] |
| }) |
|
|
| |
| print("[6/6] Generating Pin Function -> Tree Search samples...") |
| PIN_FUNCTIONS = { |
| "GCLK": "Global Clock pins", |
| "VREF": "Voltage Reference pins", |
| "HSWAPEN": "Hot Swap Enable pin", |
| "CCLK": "Configuration Clock", |
| "PROGRAM_B": "Program/Reset configuration", |
| "DONE": "Configuration Done indicator", |
| "TCK": "JTAG Test Clock", |
| "TDI": "JTAG Test Data In", |
| "TDO": "JTAG Test Data Out", |
| "TMS": "JTAG Test Mode Select", |
| } |
|
|
| for func_keyword, func_desc in PIN_FUNCTIONS.items(): |
| |
| found_pins = [] |
| for key, db in PIN_DB.items(): |
| package = db["package"] |
| devices = db["devices"] |
| for pin in db["pins"]: |
| if func_keyword in pin["pin_description"]: |
| found_pins.append({ |
| "device": devices[0] if devices else "Unknown", |
| "package": package, |
| "bank": pin["bank"], |
| "ball": pin["pin_number"], |
| "desc": pin["pin_description"], |
| }) |
| if len(found_pins) >= 20: |
| break |
| |
| if found_pins: |
| sampled = random.sample(found_pins, min(8, len(found_pins))) |
| |
| q = f"Tìm tất cả node lá có chức năng {func_keyword} ({func_desc}) trong cây Spartan-6" |
| |
| a = f"""Tìm kiếm trong cây phân cấp: nodes có chức năng **{func_desc}** ({func_keyword}) |
| |
| | Device | Package | Bank | Ball | Pin Description | Path | |
| |--------|---------|------|------|-----------------|------| |
| """ |
| for p in sampled: |
| a += f"| {p['device']} | {p['package']} | {p['bank']} | {p['ball']} | {p['desc']} | Root/Spartan-6/{p['device']}/{p['package']}/Bank_{p['bank']}/{p['ball']} |\n" |
| |
| a += f""" |
| **Pattern trong cây**: Các node {func_keyword} thường nằm rải rác ở nhiều Bank khác nhau, |
| nhưng tập trung nhiều ở Bank 0 và Bank 2 (Top và Bottom regions). |
| """ |
| |
| samples.append({ |
| "messages": [ |
| {"role": "user", "content": q}, |
| {"role": "assistant", "content": a} |
| ] |
| }) |
|
|
| |
| print(f"\nTotal tree-structured samples: {len(samples)}") |
| random.shuffle(samples) |
|
|
| output_file = "fpga_tree_training_data.jsonl" |
| with open(output_file, "w", encoding="utf-8") as f: |
| for s in samples: |
| f.write(json.dumps(s, ensure_ascii=False) + "\n") |
|
|
| print(f"Saved to {output_file}") |
|
|
| |
| print("\n=== Sample 1: TOP-DOWN ===") |
| print(json.dumps(samples[0], indent=2, ensure_ascii=False)[:1500]) |
| print("\n=== Sample 2: BOTTOM-UP ===") |
| bottom_up = [s for s in samples if "ngược" in s["messages"][0]["content"] or "up" in s["messages"][0]["content"].lower()] |
| if bottom_up: |
| print(json.dumps(bottom_up[0], indent=2, ensure_ascii=False)[:1500]) |
|
|