Spaces:
Paused
Paused
File size: 5,026 Bytes
1684743 ba7e5cb a4421ce 4fa7df9 ba7e5cb 4fa7df9 ba7e5cb 1684743 ba7e5cb 1684743 ba7e5cb 1684743 ba7e5cb 1684743 ba7e5cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import ast
import hashlib
def get_category_id(category):
"""Maps categorical roles to integers for vector embedding."""
mapping = {
'unknown': 0, 'import': 1, 'function': 2, 'class': 3,
'if': 4, 'while': 5, 'for': 6, 'try': 7, 'expression': 8,
'spacer': 9, 'elif': 10, 'else': 11, 'except': 12,
'return': 13, 'assigned_variable': 14, 'variable_def': 15
}
return mapping.get(category, 0)
def create_vector(category, level, location, total_lines, parent_path):
"""
Creates a 6D normalized vector:
[Category, Depth, RelativeCenter, Density, ParentDepth, AncestryWeight]
"""
cat_id = get_category_id(category)
start, end = location
total_lines = max(1, total_lines)
# metrics
span = (end - start + 1) / total_lines
center = ((start + end) / 2) / total_lines
parent_depth = len(parent_path)
# Ancestry weight: Simple hash sum of parent IDs to represent unique path
path_str = "".join(parent_path)
parent_weight = (int(hashlib.md5(path_str.encode()).hexdigest(), 16) % 100) / 100.0
return [
cat_id,
level,
float(f"{center:.4f}"),
float(f"{span:.4f}"),
parent_depth,
float(f"{parent_weight:.4f}")
]
def parse_source_to_graph(code):
try:
tree = ast.parse(code)
except SyntaxError as e:
return {"error": f"Syntax Error on line {e.lineno}: {e.msg}"}
lines = code.splitlines(keepends=True)
total_lines = len(lines)
nodes = []
# Recursive visitor
def traverse(node, parent_path=[], level=0, parent_id=None):
category = 'other'
name = getattr(node, 'name', None)
# Unique Node ID based on position to ensure consistency
node_id = f"{type(node).__name__}_{getattr(node, 'lineno', 0)}_{getattr(node, 'col_offset', 0)}"
# Categorization logic
if isinstance(node, (ast.Import, ast.ImportFrom)): category = 'import'; name = "import"
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): category = 'function'
elif isinstance(node, ast.ClassDef): category = 'class'
elif isinstance(node, ast.If): category = 'if'; name = "if"
elif isinstance(node, (ast.For, ast.AsyncFor)): category = 'for'; name = "for"
elif isinstance(node, ast.While): category = 'while'; name = "while"
elif isinstance(node, ast.Return): category = 'return'; name = "return"
elif isinstance(node, (ast.Assign, ast.AnnAssign)): category = 'assigned_variable'; name = "assignment"
elif isinstance(node, ast.Expr): category = 'expression'; name = "expr"
elif isinstance(node, ast.Try): category = 'try'; name = "try"
elif isinstance(node, ast.ExceptHandler): category = 'except'; name = "except"
lineno = getattr(node, 'lineno', 0)
end_lineno = getattr(node, 'end_lineno', lineno)
if lineno == 0: return # Skip nodes without line numbers (e.g. Load context)
# Create source snippet
source_segment = "".join(lines[lineno-1:end_lineno])
# Determine Label
label = name if name else category
if category == 'assigned_variable':
targets = getattr(node, 'targets', []) or [getattr(node, 'target', None)]
if targets and isinstance(targets[0], ast.Name):
label = f"{targets[0].id} ="
vector = create_vector(category, level, (lineno, end_lineno), total_lines, parent_path)
node_data = {
"id": node_id,
"label": label,
"type": category,
"source": source_segment.strip(),
"vector": vector,
"level": level,
"lineno": lineno,
"parent_id": parent_id
}
# Filter: Only visualize structural elements (skip raw expressions unless useful)
if category != 'other':
nodes.append(node_data)
current_path = parent_path + [node_id]
current_parent = node_id
next_level = level + 1
else:
current_path = parent_path
current_parent = parent_id
next_level = level
for child in ast.iter_child_nodes(node):
traverse(child, current_path, next_level, current_parent)
for node in tree.body:
traverse(node)
# Sort by line number for linear visual flow
nodes.sort(key=lambda x: x['lineno'])
return {"nodes": nodes, "connections": generate_connections(nodes)}
def generate_connections(nodes):
connections = []
node_map = {n['id']: n for n in nodes}
for node in nodes:
# 1. Structural Hierarchy (Tree)
if node['parent_id'] and node['parent_id'] in node_map:
connections.append({
"from": node['parent_id'],
"to": node['id'],
"type": "hierarchy"
})
return connections |