lafifi-24 commited on
Commit
8d40657
·
1 Parent(s): 933c2fa
pyan_insperation/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # this modules get most of its code
2
+ # from https://github.com/Technologicat/pyan/
pyan_insperation/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (153 Bytes). View file
 
pyan_insperation/__pycache__/analyzer.cpython-312.pyc ADDED
Binary file (72.6 kB). View file
 
pyan_insperation/__pycache__/anutils.cpython-312.pyc ADDED
Binary file (12.9 kB). View file
 
pyan_insperation/__pycache__/node.cpython-312.pyc ADDED
Binary file (8.14 kB). View file
 
pyan_insperation/analyzer.py ADDED
@@ -0,0 +1,1749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+
3
+ from structlog import get_logger
4
+ import symtable
5
+ from typing import Union
6
+
7
+ from .anutils import (
8
+ ExecuteInInnerScope,
9
+ Scope,
10
+ UnresolvedSuperCallError,
11
+ format_alias,
12
+ get_ast_node_name,
13
+ get_module_name,
14
+ resolve_method_resolution_order,
15
+ sanitize_exprs,
16
+ tail,
17
+ )
18
+ from .node import Flavor, Node
19
+
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+
25
+
26
+ class CallGraphVisitor(ast.NodeVisitor):
27
+ """A visitor that can be walked over a Python AST, and will derive
28
+ information about the objects in the AST and how they use each other.
29
+
30
+ A single CallGraphVisitor object can be run over several ASTs (from a
31
+ set of source files). The resulting information is the aggregate from
32
+ all files. This way use information between objects in different files
33
+ can be gathered."""
34
+
35
+ def __init__(self, files:dict, root: str = None):
36
+ self.logger = logger
37
+
38
+ # full module names for all given files
39
+ self.module_to_filename = {} # inverse mapping for recording which file each AST node came from
40
+ for filename, _ in files.items():
41
+ mod_name = get_module_name(filename, files=files)
42
+ self.module_to_filename[mod_name] = filename
43
+ self.filenames = files.keys()
44
+ self.root = root
45
+ self.files = files
46
+ # data gathered from analysis
47
+ self.defines_edges = {}
48
+ self.uses_edges = {}
49
+ self.nodes = {} # Node name: list of Node objects (in possibly different namespaces)
50
+ self.scopes = {} # fully qualified name of namespace: Scope object
51
+
52
+ self.class_base_ast_nodes = {} # pass 1: class Node: list of AST nodes
53
+ self.class_base_nodes = {} # pass 2: class Node: list of Node objects (local bases, no recursion)
54
+ self.mro = {} # pass 2: class Node: list of Node objects in Python's MRO order
55
+
56
+ # current context for analysis
57
+ self.module_name = None
58
+ self.filename = None
59
+ self.name_stack = [] # for building namespace name, node naming
60
+ self.scope_stack = [] # the Scope objects currently in scope
61
+ self.class_stack = [] # Nodes for class definitions currently in scope
62
+ self.context_stack = [] # for detecting which FunctionDefs are methods
63
+ self.last_value = None
64
+
65
+ # Analyze.
66
+ self.process()
67
+
68
+ def process(self):
69
+ """Analyze the set of files, twice so that any forward-references are picked up."""
70
+ for pas in range(2):
71
+ for filename in self.filenames:
72
+ self.logger.info("========== pass %d, file '%s' ==========" % (pas + 1, filename))
73
+ self.process_one(filename)
74
+ if pas == 0:
75
+ self.resolve_base_classes() # must be done only after all files seen
76
+ self.postprocess()
77
+
78
+ def process_one(self, filename):
79
+ """Analyze the specified Python source file."""
80
+ if filename not in self.filenames:
81
+ raise ValueError(
82
+ "Filename '%s' has not been preprocessed (was not given to __init__, which got %s)"
83
+ % (filename, self.filenames)
84
+ )
85
+
86
+ self.filename = filename
87
+ self.module_name = get_module_name(filename, self.files, root=self.root)
88
+ self.analyze_scopes(self.files[filename], filename) # add to the currently known scopes
89
+ self.visit(ast.parse(self.files[filename], filename))
90
+ self.module_name = None
91
+ self.filename = None
92
+
93
+ def resolve_base_classes(self):
94
+ """Resolve base classes from AST nodes to Nodes.
95
+
96
+ Run this between pass 1 and pass 2 to pick up inherited methods.
97
+ Currently, this can parse ast.Names and ast.Attributes as bases.
98
+ """
99
+ self.logger.debug("Resolving base classes")
100
+ assert len(self.scope_stack) == 0 # only allowed between passes
101
+ for node in self.class_base_ast_nodes: # Node: list of AST nodes
102
+ self.class_base_nodes[node] = []
103
+ for ast_node in self.class_base_ast_nodes[node]:
104
+ # perform the lookup in the scope enclosing the class definition
105
+ self.scope_stack.append(self.scopes[node.namespace])
106
+
107
+ if isinstance(ast_node, ast.Name):
108
+ baseclass_node = self.get_value(ast_node.id)
109
+ elif isinstance(ast_node, ast.Attribute):
110
+ _, baseclass_node = self.get_attribute(ast_node) # don't care about obj, just grab attr
111
+ else: # give up
112
+ baseclass_node = None
113
+
114
+ self.scope_stack.pop()
115
+
116
+ if isinstance(baseclass_node, Node) and baseclass_node.namespace is not None:
117
+ self.class_base_nodes[node].append(baseclass_node)
118
+
119
+ self.logger.debug("All base classes (non-recursive, local level only): %s" % self.class_base_nodes)
120
+
121
+ self.logger.debug("Resolving method resolution order (MRO) for all analyzed classes")
122
+ self.mro = resolve_method_resolution_order(self.class_base_nodes, self.logger)
123
+ self.logger.debug("Method resolution order (MRO) for all analyzed classes: %s" % self.mro)
124
+
125
+ def postprocess(self):
126
+ """Finalize the analysis."""
127
+
128
+ # Compared to the original Pyan, the ordering of expand_unknowns() and
129
+ # contract_nonexistents() has been switched.
130
+ #
131
+ # It seems the original idea was to first convert any unresolved, but
132
+ # specific, references to the form *.name, and then expand those to see
133
+ # if they match anything else. However, this approach has the potential
134
+ # to produce a lot of spurious uses edges (for unrelated functions with
135
+ # a name that happens to match).
136
+ #
137
+ # Now that the analyzer is (very slightly) smarter about resolving
138
+ # attributes and imports, we do it the other way around: we only expand
139
+ # those references that could not be resolved to any known name, and
140
+ # then remove any references pointing outside the analyzed file set.
141
+
142
+ self.expand_unknowns()
143
+ self.resolve_imports()
144
+ self.contract_nonexistents()
145
+ self.cull_inherited()
146
+ self.collapse_inner()
147
+
148
+ ###########################################################################
149
+ # visitor methods
150
+
151
+ # In visit_*(), the "node" argument refers to an AST node.
152
+
153
+ # Python docs:
154
+ # https://docs.python.org/3/library/ast.html#abstract-grammar
155
+
156
+ def resolve_imports(self):
157
+ """
158
+ resolve relative imports and remap nodes
159
+ """
160
+ # first find all imports and map to themselves. we will then remap those that are currently pointing
161
+ # to duplicates or into the void
162
+ imports_to_resolve = {n for items in self.nodes.values() for n in items if n.flavor == Flavor.IMPORTEDITEM}
163
+ # map real definitions
164
+ import_mapping = {}
165
+ while len(imports_to_resolve) > 0:
166
+ from_node = imports_to_resolve.pop()
167
+ if from_node in import_mapping:
168
+ continue
169
+ to_uses = self.uses_edges.get(from_node, set([from_node]))
170
+ assert len(to_uses) == 1
171
+ to_node = to_uses.pop() # resolve alias
172
+ # resolve namespace and get module
173
+ if to_node.namespace == "":
174
+ module_node = to_node
175
+ else:
176
+ assert from_node.name == to_node.name
177
+ module_node = self.get_node("", to_node.namespace)
178
+ module_uses = self.uses_edges.get(module_node)
179
+ if module_uses is not None:
180
+ # check if in module item exists and if yes, map to it
181
+ for candidate_to_node in module_uses:
182
+ if candidate_to_node.name == from_node.name:
183
+ to_node = candidate_to_node
184
+ import_mapping[from_node] = to_node
185
+ if to_node.flavor == Flavor.IMPORTEDITEM and from_node is not to_node: # avoid self-recursion
186
+ imports_to_resolve.add(to_node)
187
+ break
188
+
189
+ # set previously undefined nodes to defined
190
+ # go through undefined attributes
191
+ attribute_import_mapping = {}
192
+ for nodes in self.nodes.values():
193
+ for node in nodes:
194
+ if not node.defined and node.flavor == Flavor.ATTRIBUTE:
195
+ # try to resolve namespace and find imported item mapping
196
+ for from_node, to_node in import_mapping.items():
197
+ if (
198
+ f"{from_node.namespace}.{from_node.name}" == node.namespace
199
+ and from_node.flavor == Flavor.IMPORTEDITEM
200
+ ):
201
+ # use define edges as potential candidates
202
+ if to_node not in self.defines_edges:
203
+ continue
204
+ for candidate_to_node in self.defines_edges[to_node]: #
205
+ if candidate_to_node.name == node.name:
206
+ attribute_import_mapping[node] = candidate_to_node
207
+ break
208
+ import_mapping.update(attribute_import_mapping)
209
+
210
+ # remap nodes based on import mapping
211
+ self.nodes = {name: [import_mapping.get(n, n) for n in items] for name, items in self.nodes.items()}
212
+ self.uses_edges = {
213
+ import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes}
214
+ for from_node, to_nodes in self.uses_edges.items()
215
+ if len(to_nodes) > 0
216
+ }
217
+ self.defines_edges = {
218
+ import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes}
219
+ for from_node, to_nodes in self.defines_edges.items()
220
+ if len(to_nodes) > 0
221
+ }
222
+
223
+ def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000):
224
+ """
225
+ filter callgraph nodes that related to `node` or are in `namespace`
226
+
227
+ Args:
228
+ node: pyan node for which related nodes should be found, if none, filter only for namespace
229
+ namespace: namespace to search in (name of top level module),
230
+ if None, determines namespace from `node`
231
+ max_iter: maximum number of iterations and nodes to iterate
232
+
233
+ Returns:
234
+ self
235
+ """
236
+ # filter the nodes to avoid cluttering the callgraph with irrelevant information
237
+ filtered_nodes = self.get_related_nodes(node, namespace=namespace, max_iter=max_iter)
238
+
239
+ self.nodes = {name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items()}
240
+ self.uses_edges = {
241
+ node: {n for n in nodes if n in filtered_nodes}
242
+ for node, nodes in self.uses_edges.items()
243
+ if node in filtered_nodes
244
+ }
245
+ self.defines_edges = {
246
+ node: {n for n in nodes if n in filtered_nodes}
247
+ for node, nodes in self.defines_edges.items()
248
+ if node in filtered_nodes
249
+ }
250
+ return self
251
+
252
+ def get_related_nodes(
253
+ self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000
254
+ ) -> set:
255
+ """
256
+ get nodes that related to `node` or are in `namespace`
257
+
258
+ Args:
259
+ node: pyan node for which related nodes should be found, if none, filter only for namespace
260
+ namespace: namespace to search in (name of top level module),
261
+ if None, determines namespace from `node`
262
+ max_iter: maximum number of iterations and nodes to iterate
263
+
264
+ Returns:
265
+ set: set of nodes related to `node` including `node` itself
266
+ """
267
+ # check if searching through all nodes is necessary
268
+ if node is None:
269
+ queue = []
270
+ if namespace is None:
271
+ new_nodes = {n for items in self.nodes.values() for n in items}
272
+ else:
273
+ new_nodes = {
274
+ n
275
+ for items in self.nodes.values()
276
+ for n in items
277
+ if n.namespace is not None and namespace in n.namespace
278
+ }
279
+
280
+ else:
281
+ new_nodes = set()
282
+ if namespace is None:
283
+ namespace = node.namespace.strip(".").split(".", 1)[0]
284
+ queue = [node]
285
+
286
+ # use queue system to search through nodes
287
+ # essentially add a node to the queue and then search all connected nodes which are in turn added to the queue
288
+ # until the queue itself is empty or the maximum limit of max_iter searches have been hit
289
+ i = max_iter
290
+ while len(queue) > 0:
291
+ item = queue.pop()
292
+ if item not in new_nodes:
293
+ new_nodes.add(item)
294
+ i -= 1
295
+ if i < 0:
296
+ break
297
+ queue.extend(
298
+ [
299
+ n
300
+ for n in self.uses_edges.get(item, [])
301
+ if n in self.uses_edges and n not in new_nodes and namespace in n.namespace
302
+ ]
303
+ )
304
+ queue.extend(
305
+ [
306
+ n
307
+ for n in self.defines_edges.get(item, [])
308
+ if n in self.defines_edges and n not in new_nodes and namespace in n.namespace
309
+ ]
310
+ )
311
+
312
+ return new_nodes
313
+
314
+ def visit_Module(self, node):
315
+ self.logger.debug("Module %s, %s" % (self.module_name, self.filename))
316
+
317
+ # Modules live in the top-level namespace, ''.
318
+ module_node = self.get_node("", self.module_name, node, flavor=Flavor.MODULE)
319
+ self.associate_node(module_node, node, filename=self.filename)
320
+
321
+ ns = self.module_name
322
+ self.name_stack.append(ns)
323
+ self.scope_stack.append(self.scopes[ns])
324
+ self.context_stack.append("Module %s" % (ns))
325
+ self.generic_visit(node) # visit the **children** of node
326
+ self.context_stack.pop()
327
+ self.scope_stack.pop()
328
+ self.name_stack.pop()
329
+ self.last_value = None
330
+
331
+ if self.add_defines_edge(module_node, None):
332
+ self.logger.info("Def Module %s" % node)
333
+
334
+ def visit_ClassDef(self, node):
335
+ self.logger.debug("ClassDef %s, %s:%s" % (node.name, self.filename, node.lineno))
336
+
337
+ from_node = self.get_node_of_current_namespace()
338
+ ns = from_node.get_name()
339
+ to_node = self.get_node(ns, node.name, node, flavor=Flavor.CLASS)
340
+ if self.add_defines_edge(from_node, to_node):
341
+ self.logger.info("Def from %s to Class %s" % (from_node, to_node))
342
+
343
+ # The graph Node may have been created earlier by a FromImport,
344
+ # in which case its AST node points to the site of the import.
345
+ #
346
+ # Change the AST node association of the relevant graph Node
347
+ # to this AST node (the definition site) to get the correct
348
+ # source line number information in annotated output.
349
+ #
350
+ self.associate_node(to_node, node, self.filename)
351
+
352
+ # Bind the name specified by the AST node to the graph Node
353
+ # in the current scope.
354
+ #
355
+ self.set_value(node.name, to_node)
356
+
357
+ self.class_stack.append(to_node)
358
+ self.name_stack.append(node.name)
359
+ inner_ns = self.get_node_of_current_namespace().get_name()
360
+ self.scope_stack.append(self.scopes[inner_ns])
361
+ self.context_stack.append("ClassDef %s" % (node.name))
362
+
363
+ self.class_base_ast_nodes[to_node] = []
364
+ for b in node.bases:
365
+ # gather info for resolution of inherited attributes in pass 2 (see get_attribute())
366
+ self.class_base_ast_nodes[to_node].append(b)
367
+ # mark uses from a derived class to its bases (via names appearing in a load context).
368
+ self.visit(b)
369
+
370
+ for stmt in node.body:
371
+ self.visit(stmt)
372
+
373
+ self.context_stack.pop()
374
+ self.scope_stack.pop()
375
+ self.name_stack.pop()
376
+ self.class_stack.pop()
377
+
378
+ def visit_FunctionDef(self, node):
379
+ self.logger.debug("FunctionDef %s, %s:%s" % (node.name, self.filename, node.lineno))
380
+
381
+ # To begin with:
382
+ #
383
+ # - Analyze decorators. They belong to the surrounding scope,
384
+ # so we must analyze them before entering the function scope.
385
+ #
386
+ # - Determine whether this definition is for a function, an (instance)
387
+ # method, a static method or a class method.
388
+ #
389
+ # - Grab the name representing "self", if this is either an instance
390
+ # method or a class method. (For a class method, it represents cls,
391
+ # but Pyan only cares about types, not instances.)
392
+ #
393
+ self_name, flavor = self.analyze_functiondef(node)
394
+
395
+ # Now we can create the Node.
396
+ #
397
+ from_node = self.get_node_of_current_namespace()
398
+ ns = from_node.get_name()
399
+ to_node = self.get_node(ns, node.name, node, flavor=flavor)
400
+ if self.add_defines_edge(from_node, to_node):
401
+ self.logger.info("Def from %s to Function %s" % (from_node, to_node))
402
+
403
+ # Same remarks as for ClassDef above.
404
+ #
405
+ self.associate_node(to_node, node, self.filename)
406
+ self.set_value(node.name, to_node)
407
+
408
+ # Enter the function scope
409
+ #
410
+ self.name_stack.append(node.name)
411
+ inner_ns = self.get_node_of_current_namespace().get_name()
412
+ if inner_ns not in self.scopes:
413
+ self.name_stack.pop()
414
+ return
415
+ self.scope_stack.append(self.scopes[inner_ns])
416
+ self.context_stack.append("FunctionDef %s" % (node.name))
417
+
418
+ # Capture which names correspond to function args.
419
+ #
420
+ self.generate_args_nodes(node.args, inner_ns)
421
+
422
+ # self_name is just an ordinary name in the method namespace, except
423
+ # that its value is implicitly set by Python when the method is called.
424
+ #
425
+ # Bind self_name in the function namespace to its initial value,
426
+ # i.e. the current class. (Class, because Pyan cares only about
427
+ # object types, not instances.)
428
+ #
429
+ # After this point, self_name behaves like any other name.
430
+ #
431
+ if self_name is not None:
432
+ class_node = self.get_current_class()
433
+ self.scopes[inner_ns].defs[self_name] = class_node
434
+ self.logger.info('Method def: setting self name "%s" to %s' % (self_name, class_node))
435
+
436
+ # record bindings of args to the given default values, if present
437
+ self.analyze_arguments(node.args)
438
+
439
+ # Analyze the function body
440
+ #
441
+ for stmt in node.body:
442
+ self.visit(stmt)
443
+
444
+ # Exit the function scope
445
+ #
446
+ self.context_stack.pop()
447
+ self.scope_stack.pop()
448
+ self.name_stack.pop()
449
+
450
+ def visit_AsyncFunctionDef(self, node):
451
+ self.visit_FunctionDef(node) # TODO: alias for now; tag async functions in output in a future version?
452
+
453
+ def visit_Lambda(self, node):
454
+ # TODO: avoid lumping together all lambdas in the same namespace.
455
+ self.logger.debug("Lambda, %s:%s" % (self.filename, node.lineno))
456
+ with ExecuteInInnerScope(self, "lambda"):
457
+ inner_ns = self.get_node_of_current_namespace().get_name()
458
+ self.generate_args_nodes(node.args, inner_ns)
459
+ self.analyze_arguments(node.args)
460
+ self.visit(node.body) # single expr
461
+
462
+ def generate_args_nodes(self, ast_args, inner_ns):
463
+ """Capture which names correspond to function args.
464
+
465
+ In the function scope, set them to a nonsense Node,
466
+ to prevent leakage of identifiers of matching name
467
+ from the enclosing scope (due to the local value being None
468
+ until we set it to this nonsense Node).
469
+
470
+ ast_args: node.args from a FunctionDef or Lambda
471
+ inner_ns: namespace of the function or lambda, for scope lookup
472
+ """
473
+ sc = self.scopes[inner_ns]
474
+ # As the name of the nonsense node, we can use any string that
475
+ # is not a valid Python identifier.
476
+ #
477
+ # It has no sensible flavor, so we leave its flavor unspecified.
478
+ nonsense_node = self.get_node(inner_ns, "^^^argument^^^", None)
479
+ # args, vararg (*args), kwonlyargs, kwarg (**kwargs)
480
+ for a in ast_args.args: # positional
481
+ sc.defs[a.arg] = nonsense_node
482
+ if ast_args.vararg is not None: # *args if present
483
+ sc.defs[ast_args.vararg] = nonsense_node
484
+ for a in ast_args.kwonlyargs: # any after *args or *
485
+ sc.defs[a.arg] = nonsense_node
486
+ if ast_args.kwarg is not None: # **kwargs if present
487
+ sc.defs[ast_args.kwarg] = nonsense_node
488
+
489
+ def analyze_arguments(self, ast_args):
490
+ """Analyze an arguments node of the AST.
491
+
492
+ Record bindings of args to the given default values, if present.
493
+
494
+ Used for analyzing FunctionDefs and Lambdas."""
495
+ # https://greentreesnakes.readthedocs.io/en/latest/nodes.html?highlight=functiondef#arguments
496
+ if ast_args.defaults:
497
+ n = len(ast_args.defaults)
498
+ for tgt, val in zip(ast_args.args[-n:], ast_args.defaults):
499
+ targets = sanitize_exprs(tgt)
500
+ values = sanitize_exprs(val)
501
+ self.analyze_binding(targets, values)
502
+ if ast_args.kw_defaults:
503
+ n = len(ast_args.kw_defaults)
504
+ for tgt, val in zip(ast_args.kwonlyargs, ast_args.kw_defaults):
505
+ if val is not None:
506
+ targets = sanitize_exprs(tgt)
507
+ values = sanitize_exprs(val)
508
+ self.analyze_binding(targets, values)
509
+
510
+ def visit_Import(self, node):
511
+ self.logger.debug("Import %s, %s:%s" % ([format_alias(x) for x in node.names], self.filename, node.lineno))
512
+
513
+ # TODO: add support for relative imports (path may be like "....something.something")
514
+ # https://www.python.org/dev/peps/pep-0328/#id10
515
+
516
+ for import_item in node.names: # the names are modules
517
+ self.analyze_module_import(import_item, node)
518
+
519
+ def visit_ImportFrom(self, node):
520
+ self.logger.debug(
521
+ "ImportFrom: from %s import %s, %s:%s"
522
+ % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)
523
+ )
524
+ # Pyan needs to know the package structure, and how the program
525
+ # being analyzed is actually going to be invoked (!), to be able to
526
+ # resolve relative imports correctly.
527
+ #
528
+ # As a solution, we register imports here and later, when all files have been parsed, resolve them.
529
+ from_node = self.get_node_of_current_namespace()
530
+ if node.module is None: # resolve relative imports 'None' such as "from . import foo"
531
+ self.logger.debug(
532
+ "ImportFrom (original) from %s import %s, %s:%s"
533
+ % ("." * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)
534
+ )
535
+ tgt_level = node.level
536
+ current_module_namespace = self.module_name.rsplit(".", tgt_level)[0]
537
+ tgt_name = current_module_namespace
538
+ self.logger.debug(
539
+ "ImportFrom (resolved): from %s import %s, %s:%s"
540
+ % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)
541
+ )
542
+ elif node.level != 0: # resolve from ..module import foo
543
+ self.logger.debug(
544
+ "ImportFrom (original): from %s import %s, %s:%s"
545
+ % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)
546
+ )
547
+ tgt_level = node.level
548
+ current_module_namespace = self.module_name.rsplit(".", tgt_level)[0]
549
+ tgt_name = current_module_namespace + "." + node.module
550
+ self.logger.debug(
551
+ "ImportFrom (resolved): from %s import %s, %s:%s"
552
+ % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)
553
+ )
554
+ else:
555
+ tgt_name = node.module # normal from module.submodule import foo
556
+
557
+ # link each import separately
558
+ for alias in node.names:
559
+ # check if import is module
560
+ if tgt_name + "." + alias.name in self.module_to_filename:
561
+ to_node = self.get_node("", tgt_name + "." + alias.name, node, flavor=Flavor.MODULE)
562
+ else:
563
+ to_node = self.get_node(tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM)
564
+ # if there is alias, add extra edge between alias and node
565
+ if alias.asname is not None:
566
+ alias_name = alias.asname
567
+ else:
568
+ alias_name = alias.name
569
+ self.set_value(alias_name, to_node) # set node to be discoverable in module
570
+ self.logger.info("From setting name %s to %s" % (alias_name, to_node))
571
+
572
+ self.logger.debug("Use from %s to ImportFrom %s" % (from_node, to_node))
573
+ if self.add_uses_edge(from_node, to_node):
574
+ self.logger.info("New edge added for Use from %s to ImportFrom %s" % (from_node, to_node))
575
+
576
+ def analyze_module_import(self, import_item, ast_node):
577
+ """Analyze a names AST node inside an Import or ImportFrom AST node.
578
+
579
+ This handles the case where the objects being imported are modules.
580
+
581
+ import_item: an item of ast_node.names
582
+ ast_node: for recording source location information
583
+ """
584
+ src_name = import_item.name # what is being imported
585
+
586
+ # mark the use site
587
+ #
588
+ # where it is being imported to, i.e. the **user**
589
+ from_node = self.get_node_of_current_namespace()
590
+ # the thing **being used** (under the asname, if any)
591
+ mod_node = self.get_node("", src_name, ast_node, flavor=Flavor.MODULE)
592
+ # if there is alias, add extra edge between alias and node
593
+ if import_item.asname is not None:
594
+ alias_name = import_item.asname
595
+ else:
596
+ alias_name = mod_node.name
597
+ self.add_uses_edge(from_node, mod_node)
598
+ self.logger.info("New edge added for Use import %s in %s" % (mod_node, from_node))
599
+ self.set_value(alias_name, mod_node) # set node to be discoverable in module
600
+ self.logger.info("From setting name %s to %s" % (alias_name, mod_node))
601
+
602
+ # Edmund Horner's original post has info on what this fixed in Python 2.
603
+ # https://ejrh.wordpress.com/2012/01/31/call-graphs-in-python-part-2/
604
+ #
605
+ # Essentially, this should make '.'.join(...) see str.join.
606
+ # Pyan3 currently handles that in resolve_attribute() and get_attribute().
607
+ #
608
+ # Python 3.4 does not have ast.Constant, but 3.6 does.
609
+ # TODO: actually test this with Python 3.6 or later.
610
+ #
611
+ def visit_Constant(self, node):
612
+ self.logger.debug("Constant %s, %s:%s" % (node.value, self.filename, node.lineno))
613
+ t = type(node.value)
614
+ ns = self.get_node_of_current_namespace().get_name()
615
+ tn = t.__name__
616
+ self.last_value = self.get_node(ns, tn, node, flavor=Flavor.ATTRIBUTE)
617
+
618
+ # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load))
619
+ def visit_Attribute(self, node):
620
+ objname = get_ast_node_name(node.value)
621
+ self.logger.debug(
622
+ "Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno)
623
+ )
624
+
625
+ # TODO: self.last_value is a hack. Handle names in store context (LHS)
626
+ # in analyze_binding(), so that visit_Attribute() only needs to handle
627
+ # the load context (i.e. detect uses of the name).
628
+ #
629
+ if isinstance(node.ctx, ast.Store):
630
+ new_value = self.last_value
631
+ try:
632
+ if self.set_attribute(node, new_value):
633
+ self.logger.info("setattr %s on %s to %s" % (node.attr, objname, new_value))
634
+ except UnresolvedSuperCallError:
635
+ # Trying to set something belonging to an unresolved super()
636
+ # of something; just ignore this attempt to setattr.
637
+ return
638
+
639
+ elif isinstance(node.ctx, ast.Load):
640
+ try:
641
+ obj_node, attr_node = self.get_attribute(node)
642
+ except UnresolvedSuperCallError:
643
+ # Avoid adding a wildcard if the lookup failed due to an
644
+ # unresolved super() in the attribute chain.
645
+ return
646
+
647
+ # Both object and attr known.
648
+ if isinstance(attr_node, Node):
649
+ self.logger.info("getattr %s on %s returns %s" % (node.attr, objname, attr_node))
650
+
651
+ # add uses edge
652
+ from_node = self.get_node_of_current_namespace()
653
+ self.logger.debug("Use from %s to %s" % (from_node, attr_node))
654
+ if self.add_uses_edge(from_node, attr_node):
655
+ self.logger.info("New edge added for Use from %s to %s" % (from_node, attr_node))
656
+
657
+ # remove resolved wildcard from current site to <Node *.attr>
658
+ if attr_node.namespace is not None:
659
+ self.remove_wild(from_node, attr_node, node.attr)
660
+
661
+ self.last_value = attr_node
662
+
663
+ # Object known, but attr unknown. Create node and add a uses edge.
664
+ #
665
+ # TODO: this is mainly useful for imports. Should probably disallow
666
+ # creating new attribute nodes for other undefined attrs of known objs.
667
+ #
668
+ # E.g.
669
+ #
670
+ # import math # create <Node math>
671
+ # math.sin # create <Node math.sin> (instead of <Node *.sin> even though math.py is not analyzed)
672
+ #
673
+ # This sometimes creates silly nodes such as (when analyzing Pyan itself)
674
+ # <Node pyan.analyzer.CallGraphVisitor.defines_edges.name.namespace>
675
+ # but these are harmless, as they are considered undefined and
676
+ # will not be visualized.
677
+ #
678
+ elif isinstance(obj_node, Node) and obj_node.namespace is not None:
679
+ tgt_name = node.attr
680
+ from_node = self.get_node_of_current_namespace()
681
+ ns = obj_node.get_name() # fully qualified namespace **of attr**
682
+ to_node = self.get_node(ns, tgt_name, node, flavor=Flavor.ATTRIBUTE)
683
+ self.logger.debug(
684
+ f"Use from {from_node} to {to_node} (target obj {obj_node} known but target attr "
685
+ f"{node.attr} not resolved; maybe fwd ref or unanalyzed import)"
686
+ )
687
+ if self.add_uses_edge(from_node, to_node):
688
+ self.logger.info(
689
+ "New edge added for Use from {from_node} to {to_node} (target obj {obj_node} known but "
690
+ f"target attr {node.attr} not resolved; maybe fwd ref or unanalyzed import)"
691
+ )
692
+
693
+ # remove resolved wildcard from current site to <Node *.attr>
694
+ self.remove_wild(from_node, obj_node, node.attr)
695
+
696
+ self.last_value = to_node
697
+
698
+ # pass on
699
+ else:
700
+ self.visit(node.value)
701
+
702
+ # name access (node.ctx determines whether set (ast.Store) or get (ast.Load))
703
+ def visit_Name(self, node):
704
+ self.logger.debug("Name %s in context %s, %s:%s" % (node.id, type(node.ctx), self.filename, node.lineno))
705
+
706
+ # TODO: self.last_value is a hack. Handle names in store context (LHS)
707
+ # in analyze_binding(), so that visit_Name() only needs to handle
708
+ # the load context (i.e. detect uses of the name).
709
+ if isinstance(node.ctx, ast.Store):
710
+ # when we get here, self.last_value has been set by visit_Assign()
711
+ self.set_value(node.id, self.last_value)
712
+
713
+ # A name in a load context is a use of the object the name points to.
714
+ elif isinstance(node.ctx, ast.Load):
715
+ tgt_name = node.id
716
+ to_node = self.get_value(tgt_name) # resolves "self" if needed
717
+ current_class = self.get_current_class()
718
+ if current_class is None or to_node is not current_class: # add uses edge only if not pointing to "self"
719
+ # TODO if the name is a local variable (i.e. in the innermost scope), and
720
+ # has no known value, then don't try to create a Node for it.
721
+ if not isinstance(to_node, Node):
722
+ # namespace=None means we don't know the namespace yet
723
+ to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN)
724
+
725
+ from_node = self.get_node_of_current_namespace()
726
+ self.logger.debug("Use from %s to Name %s" % (from_node, to_node))
727
+ if self.add_uses_edge(from_node, to_node):
728
+ self.logger.info("New edge added for Use from %s to Name %s" % (from_node, to_node))
729
+
730
+ self.last_value = to_node
731
+
732
+ def visit_Assign(self, node):
733
+ # - chaining assignments like "a = b = c" produces multiple targets
734
+ # - tuple unpacking works as a separate mechanism on top of that (see analyze_binding())
735
+ #
736
+ if len(node.targets) > 1:
737
+ self.logger.debug("Assign (chained with %d outputs)" % (len(node.targets)))
738
+
739
+ # TODO: support lists, dicts, sets (so that we can recognize calls to their methods)
740
+ # TODO: begin with supporting empty lists, dicts, sets
741
+ # TODO: need to be more careful in sanitizing; currently destroys a bare list
742
+
743
+ values = sanitize_exprs(node.value) # values is the same for each set of targets
744
+ for targets in node.targets:
745
+ targets = sanitize_exprs(targets)
746
+ self.logger.debug(
747
+ "Assign %s %s, %s:%s"
748
+ % (
749
+ [get_ast_node_name(x) for x in targets],
750
+ [get_ast_node_name(x) for x in values],
751
+ self.filename,
752
+ node.lineno,
753
+ )
754
+ )
755
+ self.analyze_binding(targets, values)
756
+
757
+ def visit_AnnAssign(self, node): # PEP 526, Python 3.6+
758
+ target = sanitize_exprs(node.target)
759
+ self.last_value = None
760
+ if node.value is not None:
761
+ value = sanitize_exprs(node.value)
762
+ # issue #62: value may be an empty list, so it doesn't always have any elements
763
+ # even after `sanitize_exprs`.
764
+ self.logger.debug(
765
+ "AnnAssign %s %s, %s:%s"
766
+ % (get_ast_node_name(target[0]), get_ast_node_name(value), self.filename, node.lineno)
767
+ )
768
+ self.analyze_binding(target, value)
769
+ else: # just a type declaration
770
+ self.logger.debug(
771
+ "AnnAssign %s <no value>, %s:%s" % (get_ast_node_name(target[0]), self.filename, node.lineno)
772
+ )
773
+ self.last_value = None
774
+ self.visit(target[0])
775
+ # TODO: use the type annotation from node.annotation?
776
+ # http://greentreesnakes.readthedocs.io/en/latest/nodes.html#AnnAssign
777
+
778
+ def visit_AugAssign(self, node):
779
+ targets = sanitize_exprs(node.target)
780
+ values = sanitize_exprs(node.value) # values is the same for each set of targets
781
+
782
+ self.logger.debug(
783
+ "AugAssign %s %s %s, %s:%s"
784
+ % (
785
+ [get_ast_node_name(x) for x in targets],
786
+ type(node.op),
787
+ [get_ast_node_name(x) for x in values],
788
+ self.filename,
789
+ node.lineno,
790
+ )
791
+ )
792
+
793
+ # TODO: maybe no need to handle tuple unpacking in AugAssign? (but simpler to use the same implementation)
794
+ self.analyze_binding(targets, values)
795
+
796
+ # for() is also a binding form.
797
+ #
798
+ # (Without analyzing the bindings, we would get an unknown node for any
799
+ # use of the loop counter(s) in the loop body. This would have confusing
800
+ # consequences in the expand_unknowns() step, if the same name is
801
+ # in use elsewhere.)
802
+ #
803
+ def visit_For(self, node):
804
+ self.logger.debug("For-loop, %s:%s" % (self.filename, node.lineno))
805
+
806
+ targets = sanitize_exprs(node.target)
807
+ values = sanitize_exprs(node.iter)
808
+ self.analyze_binding(targets, values)
809
+
810
+ for stmt in node.body:
811
+ self.visit(stmt)
812
+ for stmt in node.orelse:
813
+ self.visit(stmt)
814
+
815
+ def visit_AsyncFor(self, node):
816
+ self.visit_For(node) # TODO: alias for now; tag async for in output in a future version?
817
+
818
+ def visit_ListComp(self, node):
819
+ self.logger.debug("ListComp, %s:%s" % (self.filename, node.lineno))
820
+ self.analyze_comprehension(node, "listcomp")
821
+
822
+ def visit_SetComp(self, node):
823
+ self.logger.debug("SetComp, %s:%s" % (self.filename, node.lineno))
824
+ self.analyze_comprehension(node, "setcomp")
825
+
826
+ def visit_DictComp(self, node):
827
+ self.logger.debug("DictComp, %s:%s" % (self.filename, node.lineno))
828
+ self.analyze_comprehension(node, "dictcomp", field1="key", field2="value")
829
+
830
+ def visit_GeneratorExp(self, node):
831
+ self.logger.debug("GeneratorExp, %s:%s" % (self.filename, node.lineno))
832
+ self.analyze_comprehension(node, "genexpr")
833
+
834
+ def analyze_comprehension(self, node, label, field1="elt", field2=None):
835
+ # The outermost iterator is evaluated in the current scope;
836
+ # everything else in the new inner scope.
837
+ #
838
+ # See function symtable_handle_comprehension() in
839
+ # https://github.com/python/cpython/blob/master/Python/symtable.c
840
+ # For how it works, see
841
+ # https://stackoverflow.com/questions/48753060/what-are-these-extra-symbols-in-a-comprehensions-symtable
842
+ # For related discussion, see
843
+ # https://bugs.python.org/issue10544
844
+ gens = node.generators # tuple of ast.comprehension
845
+ outermost = gens[0]
846
+ moregens = gens[1:] if len(gens) > 1 else []
847
+
848
+ outermost_iters = sanitize_exprs(outermost.iter)
849
+ outermost_targets = sanitize_exprs(outermost.target)
850
+ for expr in outermost_iters:
851
+ self.visit(expr) # set self.last_value (to something and hope for the best)
852
+ if label not in {"lambda", "listcomp", "setcomp", "dictcomp", "genexpr"}:
853
+ with ExecuteInInnerScope(self, label):
854
+ for expr in outermost_targets:
855
+ self.visit(expr) # use self.last_value
856
+ self.last_value = None
857
+ for expr in outermost.ifs:
858
+ self.visit(expr)
859
+
860
+ # TODO: there's also an is_async field we might want to use in a future version of Pyan.
861
+ for gen in moregens:
862
+ targets = sanitize_exprs(gen.target)
863
+ values = sanitize_exprs(gen.iter)
864
+ self.analyze_binding(targets, values)
865
+ for expr in gen.ifs:
866
+ self.visit(expr)
867
+
868
+ self.visit(getattr(node, field1)) # e.g. node.elt
869
+ if field2:
870
+ self.visit(getattr(node, field2))
871
+
872
+ def visit_Call(self, node):
873
+ self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), self.filename, node.lineno))
874
+
875
+ # visit args to detect uses
876
+ for arg in node.args:
877
+ self.visit(arg)
878
+ for kw in node.keywords:
879
+ self.visit(kw.value)
880
+
881
+ # see if we can predict the result
882
+ try:
883
+ result_node = self.resolve_builtins(node)
884
+ except UnresolvedSuperCallError:
885
+ result_node = None
886
+
887
+ if isinstance(result_node, Node): # resolved result
888
+ self.last_value = result_node
889
+
890
+ from_node = self.get_node_of_current_namespace()
891
+ to_node = result_node
892
+ self.logger.debug("Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node))
893
+ if self.add_uses_edge(from_node, to_node):
894
+ self.logger.info(
895
+ "New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)
896
+ )
897
+
898
+ else: # generic function call
899
+ # Visit the function name part last, so that inside a binding form,
900
+ # it will be left standing as self.last_value.
901
+ self.visit(node.func)
902
+
903
+ # If self.last_value matches a known class i.e. the call was of the
904
+ # form MyClass(), add a uses edge to MyClass.__init__().
905
+ #
906
+ # We need to do this manually, because there is no text "__init__"
907
+ # at the call site.
908
+ #
909
+ # In this lookup to self.class_base_ast_nodes we don't care about
910
+ # the AST nodes; the keys just conveniently happen to be the Nodes
911
+ # of known classes.
912
+ #
913
+ if self.last_value in self.class_base_ast_nodes:
914
+ from_node = self.get_node_of_current_namespace()
915
+ class_node = self.last_value
916
+ to_node = self.get_node(class_node.get_name(), "__init__", None, flavor=Flavor.METHOD)
917
+ self.logger.debug("Use from %s to %s (call creates an instance)" % (from_node, to_node))
918
+ if self.add_uses_edge(from_node, to_node):
919
+ self.logger.info(
920
+ "New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node)
921
+ )
922
+
923
+ def visit_With(self, node):
924
+ self.logger.debug("With (context manager), %s:%s" % (self.filename, node.lineno))
925
+
926
+ def add_uses_enter_exit_of(graph_node):
927
+ # add uses edges to __enter__ and __exit__ methods of given Node
928
+ if isinstance(graph_node, Node):
929
+ from_node = self.get_node_of_current_namespace()
930
+ withed_obj_node = graph_node
931
+
932
+ self.logger.debug("Use from %s to With %s" % (from_node, withed_obj_node))
933
+ for methodname in ("__enter__", "__exit__"):
934
+ to_node = self.get_node(withed_obj_node.get_name(), methodname, None, flavor=Flavor.METHOD)
935
+ if self.add_uses_edge(from_node, to_node):
936
+ self.logger.info("New edge added for Use from %s to %s" % (from_node, to_node))
937
+
938
+ for withitem in node.items:
939
+ expr = withitem.context_expr
940
+ vars = withitem.optional_vars
941
+
942
+ # XXX: we currently visit expr twice (again in analyze_binding()) if vars is not None
943
+ self.last_value = None
944
+ self.visit(expr)
945
+ add_uses_enter_exit_of(self.last_value)
946
+ self.last_value = None
947
+
948
+ if vars is not None:
949
+ # bind optional_vars
950
+ #
951
+ # TODO: For now, we support only the following (most common) case:
952
+ # - only one binding target, vars is ast.Name
953
+ # (not ast.Tuple or something else)
954
+ # - the variable will point to the object that was with'd
955
+ # (i.e. we assume the object's __enter__() method
956
+ # to finish with "return self")
957
+ #
958
+ if isinstance(vars, ast.Name):
959
+ self.analyze_binding(sanitize_exprs(vars), sanitize_exprs(expr))
960
+ else:
961
+ self.visit(vars) # just capture any uses on the With line itself
962
+
963
+ for stmt in node.body:
964
+ self.visit(stmt)
965
+
966
+ ###########################################################################
967
+ # Analysis helpers
968
+
969
+ def analyze_functiondef(self, ast_node):
970
+ """Analyze a function definition.
971
+
972
+ Visit decorators, and if this is a method definition, capture the name
973
+ of the first positional argument to denote "self", like Python does.
974
+
975
+ Return (self_name, flavor), where self_name the name representing self,
976
+ or None if not applicable; and flavor is a Flavor, specifically one of
977
+ FUNCTION, METHOD, STATICMETHOD or CLASSMETHOD."""
978
+
979
+ if not isinstance(ast_node, (ast.AsyncFunctionDef, ast.FunctionDef)):
980
+ raise TypeError("Expected ast.FunctionDef; got %s" % (type(ast_node)))
981
+
982
+ # Visit decorators
983
+ self.last_value = None
984
+ deco_names = []
985
+ for deco in ast_node.decorator_list:
986
+ self.visit(deco) # capture function name of decorator (self.last_value hack)
987
+ deco_node = self.last_value
988
+ if isinstance(deco_node, Node):
989
+ deco_names.append(deco_node.name)
990
+ self.last_value = None
991
+
992
+ # Analyze flavor
993
+ in_class_ns = self.context_stack[-1].startswith("ClassDef")
994
+ if not in_class_ns:
995
+ flavor = Flavor.FUNCTION
996
+ else:
997
+ if "staticmethod" in deco_names:
998
+ flavor = Flavor.STATICMETHOD
999
+ elif "classmethod" in deco_names:
1000
+ flavor = Flavor.CLASSMETHOD
1001
+ else: # instance method
1002
+ flavor = Flavor.METHOD
1003
+
1004
+ # Get the name representing "self", if applicable.
1005
+ #
1006
+ # - ignore static methods
1007
+ # - ignore functions defined inside methods (this new FunctionDef
1008
+ # must be directly in a class namespace)
1009
+ #
1010
+ if flavor in (Flavor.METHOD, Flavor.CLASSMETHOD):
1011
+ # We can treat instance methods and class methods the same,
1012
+ # since Pyan is only interested in object types, not instances.
1013
+ all_args = ast_node.args # args, vararg (*args), kwonlyargs, kwarg (**kwargs)
1014
+ posargs = all_args.args
1015
+ if len(posargs):
1016
+ self_name = posargs[0].arg
1017
+ return self_name, flavor
1018
+
1019
+ return None, flavor
1020
+
1021
+ def analyze_binding(self, targets, values):
1022
+ """Generic handler for binding forms. Inputs must be sanitize_exprs()d."""
1023
+
1024
+ # Before we begin analyzing the assignment, clean up any leftover self.last_value.
1025
+ #
1026
+ # (e.g. from any Name in load context (including function names in a Call)
1027
+ # that did not assign anything.)
1028
+ #
1029
+ self.last_value = None
1030
+
1031
+ # TODO: properly support tuple unpacking
1032
+ #
1033
+ # - the problem is:
1034
+ # a,*b,c = [1,2,3,4,5] --> Name,Starred,Name = List
1035
+ # so a simple analysis of the AST won't get us far here.
1036
+ #
1037
+ # To fix this:
1038
+ #
1039
+ # - find the index of Starred on the LHS
1040
+ # - unpack the RHS into a tuple/list (if possible)
1041
+ # - unpack just one level; the items may be tuples/lists and that's just fine
1042
+ # - if not possible to unpack directly (e.g. enumerate(foo) is a **call**),
1043
+ # don't try to be too smart; just do some generic fallback handling (or give up)
1044
+ # - if RHS unpack successful:
1045
+ # - map the non-starred items directly (one-to-one)
1046
+ # - map the remaining sublist of the RHS to the Starred term
1047
+ # - requires support for tuples/lists of AST nodes as values of Nodes
1048
+ # - but generally, we need that anyway: consider self.a = (f, g, h)
1049
+ # --> any use of self.a should detect the possible use of f, g, and h;
1050
+ # currently this is simply ignored.
1051
+ #
1052
+ # TODO: support Additional Unpacking Generalizations (Python 3.6+):
1053
+ # https://www.python.org/dev/peps/pep-0448/
1054
+
1055
+ if len(targets) == len(values): # handle correctly the most common trivial case "a1,a2,... = b1,b2,..."
1056
+ captured_values = []
1057
+ for value in values:
1058
+ self.visit(value) # RHS -> set self.last_value
1059
+ captured_values.append(self.last_value)
1060
+ self.last_value = None
1061
+ for tgt, val in zip(targets, captured_values):
1062
+ self.last_value = val
1063
+ self.visit(tgt) # LHS, name in a store context
1064
+ self.last_value = None
1065
+ else: # FIXME: for now, do the wrong thing in the non-trivial case
1066
+ # old code, no tuple unpacking support
1067
+ for value in values:
1068
+ self.visit(value) # set self.last_value to **something** on the RHS and hope for the best
1069
+ for tgt in targets: # LHS, name in a store context
1070
+ self.visit(tgt)
1071
+ self.last_value = None
1072
+
1073
+ def resolve_builtins(self, ast_node):
1074
+ """Resolve those calls to built-in functions whose return values
1075
+ can be determined in a simple manner.
1076
+
1077
+ Currently, this supports:
1078
+
1079
+ - str(obj), repr(obj) --> obj.__str__, obj.__repr__
1080
+
1081
+ - super() (any arguments ignored), which works only in pass 2,
1082
+ because the MRO is determined between passes.
1083
+
1084
+ May raise UnresolvedSuperCallError, if the call is to super(),
1085
+ but the result cannot be (currently) determined (usually because either
1086
+ pass 1, or some relevant source file is not in the analyzed set).
1087
+
1088
+ Returns the Node the call resolves to, or None if not determined.
1089
+ """
1090
+ if not isinstance(ast_node, ast.Call):
1091
+ raise TypeError("Expected ast.Call; got %s" % (type(ast_node)))
1092
+
1093
+ func_ast_node = ast_node.func # expr
1094
+ if isinstance(func_ast_node, ast.Name):
1095
+ funcname = func_ast_node.id
1096
+ if funcname == "super":
1097
+ class_node = self.get_current_class()
1098
+ self.logger.debug("Resolving super() of %s" % (class_node))
1099
+ if class_node in self.mro:
1100
+ # Our super() class is the next one in the MRO.
1101
+ #
1102
+ # Note that we consider only the **static type** of the
1103
+ # class itself. The later elements of the MRO - important
1104
+ # for resolving chained super() calls in a dynamic context,
1105
+ # where the dynamic type of the calling object is different
1106
+ # from the static type of the class where the super() call
1107
+ # site is - are never used by Pyan for resolving super().
1108
+ #
1109
+ # This is a limitation of pure lexical scope based static
1110
+ # code analysis.
1111
+ #
1112
+ if len(self.mro[class_node]) > 1:
1113
+ result = self.mro[class_node][1]
1114
+ self.logger.debug("super of %s is %s" % (class_node, result))
1115
+ return result
1116
+ else:
1117
+ msg = "super called for %s, but no known bases" % (class_node)
1118
+ self.logger.info(msg)
1119
+ raise UnresolvedSuperCallError(msg)
1120
+ else:
1121
+ msg = "super called for %s, but MRO not determined for it (maybe still in pass 1?)" % (class_node)
1122
+ self.logger.info(msg)
1123
+ raise UnresolvedSuperCallError(msg)
1124
+
1125
+ if funcname in ("str", "repr"):
1126
+ if len(ast_node.args) == 1: # these take only one argument
1127
+ obj_astnode = ast_node.args[0]
1128
+ if isinstance(obj_astnode, (ast.Name, ast.Attribute)):
1129
+ self.logger.debug("Resolving %s() of %s" % (funcname, get_ast_node_name(obj_astnode)))
1130
+ attrname = "__%s__" % (funcname)
1131
+ # build a temporary ast.Attribute AST node so that we can use get_attribute()
1132
+ tmp_astnode = ast.Attribute(value=obj_astnode, attr=attrname, ctx=obj_astnode.ctx)
1133
+ obj_node, attr_node = self.get_attribute(tmp_astnode)
1134
+ self.logger.debug(
1135
+ "Resolve %s() of %s: returning attr node %s"
1136
+ % (funcname, get_ast_node_name(obj_astnode), attr_node)
1137
+ )
1138
+ return attr_node
1139
+
1140
+ # add implementations for other built-in funcnames here if needed
1141
+
1142
+ def resolve_attribute(self, ast_node):
1143
+ """Resolve an ast.Attribute.
1144
+
1145
+ Nested attributes (a.b.c) are automatically handled by recursion.
1146
+
1147
+ Return (obj,attrname), where obj is a Node (or None on lookup failure),
1148
+ and attrname is the attribute name.
1149
+
1150
+ May pass through UnresolvedSuperCallError, if the attribute resolution
1151
+ failed specifically due to an unresolved super() call.
1152
+ """
1153
+
1154
+ if not isinstance(ast_node, ast.Attribute):
1155
+ raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node)))
1156
+
1157
+ self.logger.debug(
1158
+ "Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), ast_node.attr, type(ast_node.ctx))
1159
+ )
1160
+
1161
+ # Resolve nested attributes
1162
+ #
1163
+ # In pseudocode, e.g. "a.b.c" is represented in the AST as:
1164
+ # ast.Attribute(attr=c, value=ast.Attribute(attr=b, value=a))
1165
+ #
1166
+ if isinstance(ast_node.value, ast.Attribute):
1167
+ obj_node, attr_name = self.resolve_attribute(ast_node.value)
1168
+
1169
+ if isinstance(obj_node, Node) and obj_node.namespace is not None:
1170
+ ns = obj_node.get_name() # fully qualified namespace **of attr**
1171
+ if ns in self.scopes: # imported modules not in the set of analyzed files are not seen by Pyan
1172
+ sc = self.scopes[ns]
1173
+ if attr_name in sc.defs:
1174
+ self.logger.debug("Resolved to attr %s of %s" % (ast_node.attr, sc.defs[attr_name]))
1175
+ return sc.defs[attr_name], ast_node.attr
1176
+
1177
+ # It may happen that ast_node.value has no corresponding graph Node,
1178
+ # if this is a forward-reference, or a reference to a file
1179
+ # not in the analyzed set.
1180
+ #
1181
+ # In this case, return None for the object to let visit_Attribute()
1182
+ # add a wildcard reference to *.attr.
1183
+ #
1184
+ self.logger.debug("Unresolved, returning attr %s of unknown" % (ast_node.attr))
1185
+ return None, ast_node.attr
1186
+ else:
1187
+ # detect str.join() and similar (attributes of constant literals)
1188
+ if isinstance(ast_node.value, (ast.Num, ast.Str)): # TODO: other types?
1189
+ t = type(ast_node.value)
1190
+ tn = t.__name__
1191
+ # Create a namespace-like Node with no associated AST node.
1192
+ # Constants are builtins, so they should live in the
1193
+ # top-level namespace (same level as module names).
1194
+ #
1195
+ # Since get_node() creates only one node per unique
1196
+ # (namespace,name) pair, the AST node would anyway be
1197
+ # frozen to the first constant of any matching type that
1198
+ # the analyzer encountered in the analyzed source code,
1199
+ # which is not useful.
1200
+ #
1201
+ # The CLASS flavor is the best match, as these constants
1202
+ # are object types.
1203
+ #
1204
+ obj_node = self.get_node("", tn, None, flavor=Flavor.CLASS)
1205
+
1206
+ # attribute of a function call. Detect cases like super().dostuff()
1207
+ elif isinstance(ast_node.value, ast.Call):
1208
+ # Note that resolve_builtins() will signal an unresolved
1209
+ # super() by an exception, which we just pass through here.
1210
+ obj_node = self.resolve_builtins(ast_node.value)
1211
+
1212
+ # can't resolve result of general function call
1213
+ if not isinstance(obj_node, Node):
1214
+ self.logger.debug("Unresolved function call as obj, returning attr %s of unknown" % (ast_node.attr))
1215
+ return None, ast_node.attr
1216
+ else:
1217
+ # Get the Node object corresponding to node.value in the current ns.
1218
+ #
1219
+ # (Using the current ns here is correct; this case only gets
1220
+ # triggered when there are no more levels of recursion,
1221
+ # and the leftmost name always resides in the current ns.)
1222
+ obj_node = self.get_value(get_ast_node_name(ast_node.value)) # resolves "self" if needed
1223
+
1224
+ self.logger.debug("Resolved to attr %s of %s" % (ast_node.attr, obj_node))
1225
+ return obj_node, ast_node.attr
1226
+
1227
+ ###########################################################################
1228
+ # Scope analysis
1229
+
1230
+ def analyze_scopes(self, code, filename):
1231
+ """Gather lexical scope information."""
1232
+
1233
+ # Below, ns is the fully qualified ("dotted") name of sc.
1234
+ #
1235
+ # Technically, the module scope is anonymous, but we treat it as if
1236
+ # it was in a namespace named after the module, to support analysis
1237
+ # of several files as a set (keeping their module-level definitions
1238
+ # in different scopes, as we should).
1239
+ #
1240
+ scopes = {}
1241
+
1242
+ def process(parent_ns, table):
1243
+ sc = Scope(table)
1244
+ ns = "%s.%s" % (parent_ns, sc.name) if len(sc.name) else parent_ns
1245
+ scopes[ns] = sc
1246
+ for t in table.get_children():
1247
+ process(ns, t)
1248
+
1249
+ process(self.module_name, symtable.symtable(code, filename, compile_type="exec"))
1250
+
1251
+ # add to existing scopes (while not overwriting any existing definitions with None)
1252
+ for ns in scopes:
1253
+ if ns not in self.scopes: # add new scope info
1254
+ self.scopes[ns] = scopes[ns]
1255
+ else: # update existing scope info
1256
+ sc = scopes[ns]
1257
+ oldsc = self.scopes[ns]
1258
+ for name in sc.defs:
1259
+ if name not in oldsc.defs:
1260
+ oldsc.defs[name] = sc.defs[name]
1261
+
1262
+ self.logger.debug("Scopes now: %s" % (self.scopes))
1263
+
1264
+ def get_current_class(self):
1265
+ """Return the node representing the current class, or None if not inside a class definition."""
1266
+ return self.class_stack[-1] if len(self.class_stack) else None
1267
+
1268
+ def get_node_of_current_namespace(self):
1269
+ """Return the unique node representing the current namespace,
1270
+ based on self.name_stack.
1271
+
1272
+ For a Node n representing a namespace:
1273
+ - n.namespace = fully qualified name of the parent namespace
1274
+ (empty string if at top level)
1275
+ - n.name = name of this namespace
1276
+ - no associated AST node.
1277
+ """
1278
+ assert len(self.name_stack) # name_stack should never be empty (always at least module name)
1279
+
1280
+ namespace = ".".join(self.name_stack[0:-1])
1281
+ name = self.name_stack[-1]
1282
+ return self.get_node(namespace, name, None, flavor=Flavor.NAMESPACE)
1283
+
1284
+ ###########################################################################
1285
+ # Value getter and setter
1286
+
1287
+ def get_value(self, name):
1288
+ """Get the value of name in the current scope. Return the Node, or None
1289
+ if name is not set to a value."""
1290
+
1291
+ # get the innermost scope that has name **and where name has a value**
1292
+ def find_scope(name):
1293
+ for sc in reversed(self.scope_stack):
1294
+ if name in sc.defs and sc.defs[name] is not None:
1295
+ return sc
1296
+
1297
+ sc = find_scope(name)
1298
+ if sc is not None:
1299
+ value = sc.defs[name]
1300
+ if isinstance(value, Node):
1301
+ self.logger.info("Get %s in %s, found in %s, value %s" % (name, self.scope_stack[-1], sc, value))
1302
+ return value
1303
+ else:
1304
+ # TODO: should always be a Node or None
1305
+ self.logger.debug(
1306
+ "Get %s in %s, found in %s: value %s is not a Node" % (name, self.scope_stack[-1], sc, value)
1307
+ )
1308
+ else:
1309
+ self.logger.debug("Get %s in %s: no Node value (or name not in scope)" % (name, self.scope_stack[-1]))
1310
+
1311
+ def set_value(self, name, value):
1312
+ """Set the value of name in the current scope. Value must be a Node."""
1313
+
1314
+ # get the innermost scope that has name (should be the current scope unless name is a global)
1315
+ def find_scope(name):
1316
+ for sc in reversed(self.scope_stack):
1317
+ if name in sc.defs:
1318
+ return sc
1319
+
1320
+ sc = find_scope(name)
1321
+ if sc is not None:
1322
+ if isinstance(value, Node):
1323
+ sc.defs[name] = value
1324
+ self.logger.info("Set %s in %s to %s" % (name, sc, value))
1325
+ else:
1326
+ # TODO: should always be a Node or None
1327
+ self.logger.debug("Set %s in %s: value %s is not a Node" % (name, sc, value))
1328
+ else:
1329
+ self.logger.debug("Set: name %s not in scope" % (name))
1330
+
1331
+ ###########################################################################
1332
+ # Attribute getter and setter
1333
+
1334
+ def get_attribute(self, ast_node):
1335
+ """Get value of an ast.Attribute.
1336
+
1337
+ Supports inherited attributes. If the obj's own namespace has no match
1338
+ for attr, the ancestors of obj are also tried, following the MRO based
1339
+ on the static type of the object, until one of them matches or until
1340
+ all ancestors are exhausted.
1341
+
1342
+ Return pair of Node objects (obj,attr), where each item can be None
1343
+ on lookup failure. (Object not known, or no Node value assigned
1344
+ to its attr.)
1345
+
1346
+ May pass through UnresolvedSuperCallError.
1347
+ """
1348
+
1349
+ if not isinstance(ast_node, ast.Attribute):
1350
+ raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node)))
1351
+ if not isinstance(ast_node.ctx, ast.Load):
1352
+ raise ValueError("Expected a load context, got %s" % (type(ast_node.ctx)))
1353
+
1354
+ obj_node, attr_name = self.resolve_attribute(ast_node)
1355
+
1356
+ if isinstance(obj_node, Node) and obj_node.namespace is not None:
1357
+ ns = obj_node.get_name() # fully qualified namespace **of attr**
1358
+
1359
+ # detect str.join() and similar (attributes of constant literals)
1360
+ #
1361
+ # Any attribute is considered valid for these special types,
1362
+ # but only in a load context. (set_attribute() does not have this
1363
+ # special handling, by design.)
1364
+ #
1365
+ if ns in ("Num", "Str"): # TODO: other types?
1366
+ return obj_node, self.get_node(ns, attr_name, None, flavor=Flavor.ATTRIBUTE)
1367
+
1368
+ # look up attr_name in the given namespace, return Node or None
1369
+ def lookup(ns):
1370
+ if ns in self.scopes:
1371
+ sc = self.scopes[ns]
1372
+ if attr_name in sc.defs:
1373
+ return sc.defs[attr_name]
1374
+
1375
+ # first try directly in object's ns (this works already in pass 1)
1376
+ value_node = lookup(ns)
1377
+ if value_node is not None:
1378
+ return obj_node, value_node
1379
+
1380
+ # next try ns of each ancestor (this works only in pass 2,
1381
+ # after self.mro has been populated)
1382
+ #
1383
+ if obj_node in self.mro:
1384
+ for base_node in tail(self.mro[obj_node]): # the first element is always obj itself
1385
+ ns = base_node.get_name()
1386
+ value_node = lookup(ns)
1387
+ if value_node is not None:
1388
+ break
1389
+ else:
1390
+ return None, None # not found
1391
+ return base_node, value_node # as obj, return the base class in which attr was found
1392
+
1393
+ return obj_node, None # here obj_node is either None or unknown (namespace None)
1394
+
1395
+ def set_attribute(self, ast_node, new_value):
1396
+ """Assign the Node provided as new_value into the attribute described
1397
+ by the AST node ast_node. Return True if assignment was done,
1398
+ False otherwise.
1399
+
1400
+ May pass through UnresolvedSuperCallError.
1401
+ """
1402
+
1403
+ if not isinstance(ast_node, ast.Attribute):
1404
+ raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node)))
1405
+ if not isinstance(ast_node.ctx, ast.Store):
1406
+ raise ValueError("Expected a store context, got %s" % (type(ast_node.ctx)))
1407
+
1408
+ if not isinstance(new_value, Node):
1409
+ return False
1410
+
1411
+ obj_node, attr_name = self.resolve_attribute(ast_node)
1412
+
1413
+ if isinstance(obj_node, Node) and obj_node.namespace is not None:
1414
+ ns = obj_node.get_name() # fully qualified namespace **of attr**
1415
+ if ns in self.scopes:
1416
+ sc = self.scopes[ns]
1417
+ sc.defs[attr_name] = new_value
1418
+ return True
1419
+ return False
1420
+
1421
+ ###########################################################################
1422
+ # Graph creation
1423
+
1424
+ def get_node(self, namespace, name, ast_node=None, flavor=Flavor.UNSPECIFIED):
1425
+ """Return the unique node matching the namespace and name.
1426
+ Create a new node if one doesn't already exist.
1427
+
1428
+ To associate the node with a syntax object in the analyzed source code,
1429
+ an AST node can be passed in. This only takes effect if a new Node
1430
+ is created.
1431
+
1432
+ To associate an AST node to an existing graph node,
1433
+ see associate_node().
1434
+
1435
+ Flavor describes the kind of object the node represents.
1436
+ See the node.Flavor enum for currently supported values.
1437
+
1438
+ For existing nodes, flavor overwrites, if the given flavor is
1439
+ (strictly) more specific than the node's existing one.
1440
+ See node.Flavor.specificity().
1441
+
1442
+ !!!
1443
+ In CallGraphVisitor, always use get_node() to create nodes, because it
1444
+ also sets some important auxiliary information. Do not call the Node
1445
+ constructor directly.
1446
+ !!!
1447
+ """
1448
+
1449
+ if name in self.nodes:
1450
+ for n in self.nodes[name]:
1451
+ if n.namespace == namespace:
1452
+ if Flavor.specificity(flavor) > Flavor.specificity(n.flavor):
1453
+ n.flavor = flavor
1454
+ return n
1455
+
1456
+ # Try to figure out which source file this Node belongs to
1457
+ # (for annotated output).
1458
+ #
1459
+ # Other parts of the analyzer may change the filename later,
1460
+ # if a more authoritative source (e.g. a definition site) is found,
1461
+ # so the filenames should be trusted only after the analysis is
1462
+ # complete.
1463
+ #
1464
+ # TODO: this is tentative. Add in filename only when sure?
1465
+ # (E.g. in visit_ClassDef(), visit_FunctionDef())
1466
+ #
1467
+ if namespace in self.module_to_filename:
1468
+ # If the namespace is one of the modules being analyzed,
1469
+ # the the Node belongs to the correponding file.
1470
+ filename = self.module_to_filename[namespace]
1471
+ else: # Assume the Node belongs to the current file.
1472
+ filename = self.filename
1473
+
1474
+ n = Node(namespace, name, ast_node, filename, flavor)
1475
+
1476
+ # Add to the list of nodes that have this short name.
1477
+ if name in self.nodes:
1478
+ self.nodes[name].append(n)
1479
+ else:
1480
+ self.nodes[name] = [n]
1481
+
1482
+ return n
1483
+
1484
+ def get_parent_node(self, graph_node):
1485
+ """Get the parent node of the given Node. (Used in postprocessing.)"""
1486
+ if "." in graph_node.namespace:
1487
+ ns, name = graph_node.namespace.rsplit(".", 1)
1488
+ else:
1489
+ ns, name = "", graph_node.namespace
1490
+ return self.get_node(ns, name, None)
1491
+
1492
+ def associate_node(self, graph_node, ast_node, filename=None):
1493
+ """Change the AST node (and optionally filename) mapping of a graph node.
1494
+
1495
+ This is useful for generating annotated output with source filename
1496
+ and line number information.
1497
+
1498
+ Sometimes a function in the analyzed code is first seen in a FromImport
1499
+ before its definition has been analyzed. The namespace can be deduced
1500
+ correctly already at that point, but the source line number information
1501
+ has to wait until the actual definition is found (because the line
1502
+ number is contained in the AST node). However, a graph Node must be
1503
+ created immediately when the function is first encountered, in order
1504
+ to have a Node that can act as a "uses" target (namespaced correctly,
1505
+ to avoid a wildcard and the over-reaching expand_unknowns() in cases
1506
+ where they are not needed).
1507
+
1508
+ This method re-associates the given graph Node with a different
1509
+ AST node, which allows updating the context when the definition
1510
+ of a function or class is encountered."""
1511
+ graph_node.ast_node = ast_node
1512
+ if filename is not None:
1513
+ graph_node.filename = filename
1514
+
1515
+ def add_defines_edge(self, from_node, to_node):
1516
+ """Add a defines edge in the graph between two nodes.
1517
+ N.B. This will mark both nodes as defined."""
1518
+ status = False
1519
+ if from_node not in self.defines_edges:
1520
+ self.defines_edges[from_node] = set()
1521
+ status = True
1522
+ from_node.defined = True
1523
+ if to_node is None or to_node in self.defines_edges[from_node]:
1524
+ return status
1525
+ self.defines_edges[from_node].add(to_node)
1526
+ to_node.defined = True
1527
+ return True
1528
+
1529
+ def add_uses_edge(self, from_node, to_node):
1530
+ """Add a uses edge in the graph between two nodes."""
1531
+
1532
+ if from_node not in self.uses_edges:
1533
+ self.uses_edges[from_node] = set()
1534
+ if to_node in self.uses_edges[from_node]:
1535
+ return False
1536
+ self.uses_edges[from_node].add(to_node)
1537
+
1538
+ # for pass 2: remove uses edge to any matching wildcard target node
1539
+ # if the given to_node has a known namespace.
1540
+ #
1541
+ # Prevents the spurious reference to MyClass.f in this example:
1542
+ #
1543
+ # class MyClass:
1544
+ # def __init__(self):
1545
+ # pass
1546
+ # def f():
1547
+ # pass
1548
+ #
1549
+ # def main():
1550
+ # f()
1551
+ #
1552
+ # def f():
1553
+ # pass
1554
+ #
1555
+ # (caused by reference to *.f in pass 1, combined with
1556
+ # expand_unknowns() in postprocessing.)
1557
+ #
1558
+ # TODO: this can still get confused. The wildcard is removed if the
1559
+ # name of *any* resolved uses edge matches, whereas the wildcard
1560
+ # may represent several uses, to different objects.
1561
+ #
1562
+ if to_node.namespace is not None:
1563
+ self.remove_wild(from_node, to_node, to_node.name)
1564
+
1565
+ return True
1566
+
1567
+ def remove_uses_edge(self, from_node, to_node):
1568
+ """Remove a uses edge from the graph. (Used in postprocessing.)"""
1569
+
1570
+ if from_node in self.uses_edges:
1571
+ u = self.uses_edges[from_node]
1572
+ if to_node in u:
1573
+ u.remove(to_node)
1574
+
1575
+ def remove_wild(self, from_node, to_node, name):
1576
+ """Remove uses edge from from_node to wildcard *.name.
1577
+
1578
+ This needs both to_node and name because in case of a bound name
1579
+ (e.g. attribute lookup) the name field of the *target value* does not
1580
+ necessarily match the formal name in the wildcard.
1581
+
1582
+ Used for cleaning up forward-references once resolved.
1583
+ This prevents spurious edges due to expand_unknowns()."""
1584
+
1585
+ if name is None: # relative imports may create nodes with name=None.
1586
+ return
1587
+
1588
+ if from_node not in self.uses_edges: # no uses edges to remove
1589
+ return
1590
+
1591
+ # Keep wildcard if the target is actually an unresolved argument
1592
+ # (see visit_FunctionDef())
1593
+ if to_node.get_name().find("^^^argument^^^") != -1:
1594
+ return
1595
+
1596
+ # Here we may prefer to err in one of two ways:
1597
+ #
1598
+ # a) A node seemingly referring to itself is actually referring
1599
+ # to somewhere else that was not fully resolved, so don't remove
1600
+ # the wildcard.
1601
+ #
1602
+ # Example:
1603
+ #
1604
+ # import sympy as sy
1605
+ # def simplify(expr):
1606
+ # sy.simplify(expr)
1607
+ #
1608
+ # If the source file of sy.simplify is not included in the set of
1609
+ # analyzed files, this will generate a reference to *.simplify,
1610
+ # which is formally satisfied by this function itself.
1611
+ #
1612
+ # (Actually, after commit e3c32b782a89b9eb225ef36d8557ebf172ff4ba5,
1613
+ # this example is bad; sy.simplify will be recognized as an
1614
+ # unknown attr of a known object, so no wildcard is generated.)
1615
+ #
1616
+ # b) A node seemingly referring to itself is actually referring
1617
+ # to itself (it can be e.g. a recursive function). Remove the wildcard.
1618
+ #
1619
+ # Bad example:
1620
+ #
1621
+ # def f(count):
1622
+ # if count > 0:
1623
+ # return 1 + f(count-1)
1624
+ # return 0
1625
+ #
1626
+ # (This example is bad, because visit_FunctionDef() will pick up
1627
+ # the f in the top-level namespace, so no reference to *.f
1628
+ # should be generated in this particular case.)
1629
+ #
1630
+ # We choose a).
1631
+ #
1632
+ # TODO: do we need to change our opinion now that also recursive calls are visualized?
1633
+ #
1634
+ if to_node == from_node:
1635
+ return
1636
+
1637
+ matching_wilds = [n for n in self.uses_edges[from_node] if n.namespace is None and n.name == name]
1638
+ assert len(matching_wilds) < 2 # the set can have only one wild of matching name
1639
+ if len(matching_wilds):
1640
+ wild_node = matching_wilds[0]
1641
+ self.logger.info("Use from %s to %s resolves %s; removing wildcard" % (from_node, to_node, wild_node))
1642
+ self.remove_uses_edge(from_node, wild_node)
1643
+
1644
+ ###########################################################################
1645
+ # Postprocessing
1646
+
1647
+ def contract_nonexistents(self):
1648
+ """For all use edges to non-existent (i.e. not defined nodes) X.name, replace with edge to *.name."""
1649
+
1650
+ new_uses_edges = []
1651
+ removed_uses_edges = []
1652
+ for n in self.uses_edges:
1653
+ for n2 in self.uses_edges[n]:
1654
+ if n2.namespace is not None and not n2.defined:
1655
+ n3 = self.get_node(None, n2.name, n2.ast_node)
1656
+ n3.defined = False
1657
+ new_uses_edges.append((n, n3))
1658
+ removed_uses_edges.append((n, n2))
1659
+ self.logger.info("Contracting non-existent from %s to %s as %s" % (n, n2, n3))
1660
+
1661
+ for from_node, to_node in new_uses_edges:
1662
+ self.add_uses_edge(from_node, to_node)
1663
+
1664
+ for from_node, to_node in removed_uses_edges:
1665
+ self.remove_uses_edge(from_node, to_node)
1666
+
1667
+ def expand_unknowns(self):
1668
+ """For each unknown node *.name, replace all its incoming edges with edges to X.name for all possible Xs.
1669
+
1670
+ Also mark all unknown nodes as not defined (so that they won't be visualized)."""
1671
+
1672
+ new_defines_edges = []
1673
+ for n in self.defines_edges:
1674
+ for n2 in self.defines_edges[n]:
1675
+ if n2.namespace is None:
1676
+ for n3 in self.nodes[n2.name]:
1677
+ if n3.namespace is not None:
1678
+ new_defines_edges.append((n, n3))
1679
+
1680
+ for from_node, to_node in new_defines_edges:
1681
+ self.add_defines_edge(from_node, to_node)
1682
+ self.logger.info("Expanding unknowns: new defines edge from %s to %s" % (from_node, to_node))
1683
+
1684
+ new_uses_edges = []
1685
+ for n in self.uses_edges:
1686
+ for n2 in self.uses_edges[n]:
1687
+ if n2.namespace is None:
1688
+ for n3 in self.nodes[n2.name]:
1689
+ if n3.namespace is not None:
1690
+ new_uses_edges.append((n, n3))
1691
+
1692
+ for from_node, to_node in new_uses_edges:
1693
+ self.add_uses_edge(from_node, to_node)
1694
+ self.logger.info("Expanding unknowns: new uses edge from %s to %s" % (from_node, to_node))
1695
+
1696
+ for name in self.nodes:
1697
+ for n in self.nodes[name]:
1698
+ if n.namespace is None:
1699
+ n.defined = False
1700
+
1701
+ def cull_inherited(self):
1702
+ """
1703
+ For each use edge from W to X.name, if it also has an edge to W to Y.name where
1704
+ Y is used by X, then remove the first edge.
1705
+ """
1706
+
1707
+ removed_uses_edges = []
1708
+ for n in self.uses_edges:
1709
+ for n2 in self.uses_edges[n]:
1710
+ inherited = False
1711
+ for n3 in self.uses_edges[n]:
1712
+ if (
1713
+ n3.name == n2.name
1714
+ and n2.namespace is not None
1715
+ and n3.namespace is not None
1716
+ and n3.namespace != n2.namespace
1717
+ ):
1718
+ pn2 = self.get_parent_node(n2)
1719
+ pn3 = self.get_parent_node(n3)
1720
+ # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]:
1721
+ # remove the second edge W to Y.name (TODO: add an option to choose this)
1722
+ if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name
1723
+ inherited = True
1724
+
1725
+ if inherited and n in self.uses_edges:
1726
+ removed_uses_edges.append((n, n2))
1727
+ self.logger.info("Removing inherited edge from %s to %s" % (n, n2))
1728
+
1729
+ for from_node, to_node in removed_uses_edges:
1730
+ self.remove_uses_edge(from_node, to_node)
1731
+
1732
+ def collapse_inner(self):
1733
+ """Combine lambda and comprehension Nodes with their parent Nodes to reduce visual noise.
1734
+ Also mark those original nodes as undefined, so that they won't be visualized."""
1735
+
1736
+ # Lambdas and comprehensions do not define any names in the enclosing
1737
+ # scope, so we only need to treat the uses edges.
1738
+
1739
+ # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration)
1740
+ # temporary solution is adding list to force a copy of 'self.nodes'
1741
+ for name in list(self.nodes):
1742
+ if name in ("lambda", "listcomp", "setcomp", "dictcomp", "genexpr"):
1743
+ for n in self.nodes[name]:
1744
+ pn = self.get_parent_node(n)
1745
+ if n in self.uses_edges:
1746
+ for n2 in self.uses_edges[n]: # outgoing uses edges
1747
+ self.logger.info("Collapsing inner from %s to %s, uses %s" % (n, pn, n2))
1748
+ self.add_uses_edge(pn, n2)
1749
+ n.defined = False
pyan_insperation/anutils.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import ast
4
+ import os.path
5
+
6
+ from .node import Flavor
7
+
8
+
9
+ def head(lst):
10
+ if len(lst):
11
+ return lst[0]
12
+
13
+
14
+ def tail(lst):
15
+ if len(lst) > 1:
16
+ return lst[1:]
17
+ else:
18
+ return []
19
+
20
+
21
+ def get_module_name(filename, files:dict, root: str = None):
22
+ """Try to determine the full module name of a source file, by figuring out
23
+ if its directory looks like a package (i.e. has an __init__.py file or
24
+ there is a .py file in it )."""
25
+
26
+ if os.path.basename(filename) == "__init__.py":
27
+ # init file means module name is directory name
28
+ module_path = os.path.dirname(filename)
29
+ else:
30
+ # otherwise it is the filename without extension
31
+ module_path = filename.replace(".py", "")
32
+
33
+ # find the module root - walk up the tree and check if it contains .py files - if yes. it is the new root
34
+ directories = [(module_path, True)]
35
+ if root is None:
36
+ while directories[0][0] != os.path.dirname(directories[0][0]):
37
+ potential_root = os.path.dirname(directories[0][0])
38
+ #is_root = any([f == "__init__.py" for f in os.listdir(potential_root)]) # old code
39
+ is_root = True if f"{potential_root}/__init__.py" in files.keys() else False
40
+ directories.insert(0, (potential_root, is_root))
41
+
42
+ # keep directories where itself of parent is root
43
+ while not directories[0][1]:
44
+ directories.pop(0)
45
+
46
+ else: # root is already known - just walk up until it is matched
47
+ while directories[0][0] != root:
48
+ potential_root = os.path.dirname(directories[0][0])
49
+ directories.insert(0, (potential_root, True))
50
+
51
+ mod_name = ".".join([os.path.basename(f[0]) for f in directories])
52
+ return mod_name
53
+
54
+
55
+ def format_alias(x):
56
+ """Return human-readable description of an ast.alias (used in Import and ImportFrom nodes)."""
57
+ if not isinstance(x, ast.alias):
58
+ raise TypeError("Can only format an ast.alias; got %s" % type(x))
59
+
60
+ if x.asname is not None:
61
+ return "%s as %s" % (x.name, x.asname)
62
+ else:
63
+ return "%s" % (x.name)
64
+
65
+
66
+ def get_ast_node_name(x):
67
+ """Return human-readable name of ast.Attribute or ast.Name. Pass through anything else."""
68
+ if isinstance(x, ast.Attribute):
69
+ # x.value might also be an ast.Attribute (think "x.y.z")
70
+ return "%s.%s" % (get_ast_node_name(x.value), x.attr)
71
+ elif isinstance(x, ast.Name):
72
+ return x.id
73
+ else:
74
+ return x
75
+
76
+
77
+ # Helper for handling binding forms.
78
+ def sanitize_exprs(exprs):
79
+ """Convert ast.Tuples in exprs to Python tuples; wrap result in a Python tuple."""
80
+
81
+ def process(expr):
82
+ if isinstance(expr, (ast.Tuple, ast.List)):
83
+ return expr.elts # .elts is a Python tuple
84
+ else:
85
+ return [expr]
86
+
87
+ if isinstance(exprs, (tuple, list)):
88
+ return [process(expr) for expr in exprs]
89
+ else:
90
+ return process(exprs)
91
+
92
+
93
+ def resolve_method_resolution_order(class_base_nodes, logger):
94
+ """Compute the method resolution order (MRO) for each of the analyzed classes.
95
+
96
+ class_base_nodes: dict cls: [base1, base2, ..., baseN]
97
+ where dict and basej are all Node objects.
98
+ """
99
+
100
+ # https://en.wikipedia.org/wiki/C3_linearization#Description
101
+
102
+ class LinearizationImpossible(Exception):
103
+ pass
104
+
105
+ from functools import reduce
106
+ from operator import add
107
+
108
+ def C3_find_good_head(heads, tails): # find an element of heads which is not in any of the tails
109
+ flat_tails = reduce(add, tails, []) # flatten the outer level
110
+ for hd in heads:
111
+ if hd not in flat_tails:
112
+ break
113
+ else: # no break only if there are cyclic dependencies.
114
+ raise LinearizationImpossible(
115
+ "MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails)
116
+ )
117
+ return hd
118
+
119
+ def remove_all(elt, lst): # remove all occurrences of elt from lst, return a copy
120
+ return [x for x in lst if x != elt]
121
+
122
+ def remove_all_in(elt, lists): # remove elt from all lists, return a copy
123
+ return [remove_all(elt, lst) for lst in lists]
124
+
125
+ def C3_merge(lists):
126
+ out = []
127
+ while True:
128
+ logger.debug("MRO: C3 merge: out: %s, lists: %s" % (out, lists))
129
+ heads = [head(lst) for lst in lists if head(lst) is not None]
130
+ if not len(heads):
131
+ break
132
+ tails = [tail(lst) for lst in lists]
133
+ logger.debug("MRO: C3 merge: heads: %s, tails: %s" % (heads, tails))
134
+ hd = C3_find_good_head(heads, tails)
135
+ logger.debug("MRO: C3 merge: chose head %s" % (hd))
136
+ out.append(hd)
137
+ lists = remove_all_in(hd, lists)
138
+ return out
139
+
140
+ mro = {} # result
141
+ try:
142
+ memo = {} # caching/memoization
143
+
144
+ def C3_linearize(node):
145
+ logger.debug("MRO: C3 linearizing %s" % (node))
146
+ seen.add(node)
147
+ if node not in memo:
148
+ # unknown class or no ancestors
149
+ if node not in class_base_nodes or not len(class_base_nodes[node]):
150
+ memo[node] = [node]
151
+ else: # known and has ancestors
152
+ lists = []
153
+ # linearization of parents...
154
+ for baseclass_node in class_base_nodes[node]:
155
+ if baseclass_node not in seen:
156
+ lists.append(C3_linearize(baseclass_node))
157
+ # ...and the parents themselves (in the order they appear in the ClassDef)
158
+ logger.debug("MRO: parents of %s: %s" % (node, class_base_nodes[node]))
159
+ lists.append(class_base_nodes[node])
160
+ logger.debug("MRO: C3 merging %s" % (lists))
161
+ memo[node] = [node] + C3_merge(lists)
162
+ logger.debug("MRO: C3 linearized %s, result %s" % (node, memo[node]))
163
+ return memo[node]
164
+
165
+ for node in class_base_nodes:
166
+ logger.debug("MRO: analyzing class %s" % (node))
167
+ seen = set() # break cycles (separately for each class we start from)
168
+ mro[node] = C3_linearize(node)
169
+ except LinearizationImpossible as e:
170
+ logger.error(e)
171
+
172
+ # generic fallback: depth-first search of lists of ancestors
173
+ #
174
+ # (so that we can try to draw *something* if the code to be
175
+ # analyzed is so badly formed that the MRO algorithm fails)
176
+
177
+ memo = {} # caching/memoization
178
+
179
+ def lookup_bases_recursive(node):
180
+ seen.add(node)
181
+ if node not in memo:
182
+ out = [node] # first look up in obj itself...
183
+ if node in class_base_nodes: # known class?
184
+ for baseclass_node in class_base_nodes[node]: # ...then in its bases
185
+ if baseclass_node not in seen:
186
+ out.append(baseclass_node)
187
+ out.extend(lookup_bases_recursive(baseclass_node))
188
+ memo[node] = out
189
+ return memo[node]
190
+
191
+ mro = {}
192
+ for node in class_base_nodes:
193
+ logger.debug("MRO: generic fallback: analyzing class %s" % (node))
194
+ seen = set() # break cycles (separately for each class we start from)
195
+ mro[node] = lookup_bases_recursive(node)
196
+
197
+ return mro
198
+
199
+
200
+ class UnresolvedSuperCallError(Exception):
201
+ """For specifically signaling an unresolved super()."""
202
+
203
+ pass
204
+
205
+
206
+ class Scope:
207
+ """Adaptor that makes scopes look somewhat like those from the Python 2
208
+ compiler module, as far as Pyan's CallGraphVisitor is concerned."""
209
+
210
+ def __init__(self, table):
211
+ """table: SymTable instance from symtable.symtable()"""
212
+ name = table.get_name()
213
+ if name == "top":
214
+ name = "" # Pyan defines the top level as anonymous
215
+ self.name = name
216
+ self.type = table.get_type() # useful for __repr__()
217
+ self.defs = {iden: None for iden in table.get_identifiers()} # name:assigned_value
218
+
219
+ def __repr__(self):
220
+ return "<Scope: %s %s>" % (self.type, self.name)
221
+
222
+
223
+ # A context manager, sort of a friend of CallGraphVisitor (depends on implementation details)
224
+ class ExecuteInInnerScope:
225
+ """Execute a code block with the scope stack augmented with an inner scope.
226
+
227
+ Used to analyze lambda, listcomp et al. The scope must still be present in
228
+ analyzer.scopes.
229
+
230
+ !!!
231
+ Will add a defines edge from the current namespace to the inner scope,
232
+ marking both nodes as defined.
233
+ !!!
234
+ """
235
+
236
+ def __init__(self, analyzer, scopename):
237
+ """analyzer: CallGraphVisitor instance
238
+ scopename: name of the inner scope"""
239
+ self.analyzer = analyzer
240
+ self.scopename = scopename
241
+
242
+ def __enter__(self):
243
+ # The inner scopes pollute the graph too much; we will need to collapse
244
+ # them in postprocessing. However, we must use them during analysis to
245
+ # follow the Python 3 scoping rules correctly.
246
+
247
+ analyzer = self.analyzer
248
+ scopename = self.scopename
249
+
250
+ analyzer.name_stack.append(scopename)
251
+ inner_ns = analyzer.get_node_of_current_namespace().get_name()
252
+ if inner_ns not in analyzer.scopes:
253
+ analyzer.name_stack.pop()
254
+ raise ValueError("Unknown scope '%s'" % (inner_ns))
255
+
256
+ analyzer.scope_stack.append(analyzer.scopes[inner_ns])
257
+ analyzer.context_stack.append(scopename)
258
+
259
+ return self
260
+
261
+ def __exit__(self, errtype, errvalue, traceback):
262
+ # TODO: do we need some error handling here?
263
+ analyzer = self.analyzer
264
+ scopename = self.scopename
265
+
266
+ analyzer.context_stack.pop()
267
+ analyzer.scope_stack.pop()
268
+ analyzer.name_stack.pop()
269
+
270
+ # Add a defines edge, which will mark the inner scope as defined,
271
+ # allowing any uses to other objects from inside the lambda/listcomp/etc.
272
+ # body to be visualized.
273
+ #
274
+ # All inner scopes of the same scopename (lambda, listcomp, ...) in the
275
+ # current ns will be grouped into a single node, as they have no name.
276
+ # We create a namespace-like node that has no associated AST node,
277
+ # as it does not represent any unique AST node.
278
+ from_node = analyzer.get_node_of_current_namespace()
279
+ ns = from_node.get_name()
280
+ to_node = analyzer.get_node(ns, scopename, None, flavor=Flavor.NAMESPACE)
281
+ if analyzer.add_defines_edge(from_node, to_node):
282
+ analyzer.logger.info("Def from %s to %s %s" % (from_node, scopename, to_node))
283
+ analyzer.last_value = to_node # Make this inner scope node assignable to track its uses.
pyan_insperation/node.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """Abstract node representing data gathered from the analysis."""
3
+
4
+ from enum import Enum
5
+
6
+
7
+ def make_safe_label(label):
8
+ """Avoid name clashes with GraphViz reserved words such as 'graph'."""
9
+ unsafe_words = ("digraph", "graph", "cluster", "subgraph", "node")
10
+ out = label
11
+ for word in unsafe_words:
12
+ out = out.replace(word, "%sX" % word)
13
+ return out.replace(".", "__").replace("*", "")
14
+
15
+
16
+ class Flavor(Enum):
17
+ """Flavor describes the kind of object a node represents."""
18
+
19
+ UNSPECIFIED = "---" # as it says on the tin
20
+ UNKNOWN = "???" # not determined by analysis (wildcard)
21
+
22
+ NAMESPACE = "namespace" # node representing a namespace
23
+ ATTRIBUTE = "attribute" # attr of something, but not known if class or func.
24
+
25
+ IMPORTEDITEM = "import" # imported item of unanalyzed type
26
+
27
+ MODULE = "module"
28
+ CLASS = "class"
29
+ FUNCTION = "function"
30
+ METHOD = "method" # instance method
31
+ STATICMETHOD = "staticmethod"
32
+ CLASSMETHOD = "classmethod"
33
+ NAME = "name" # Python name (e.g. "x" in "x = 42")
34
+
35
+ # Flavors have a partial ordering in specificness of the information.
36
+ #
37
+ # This sort key scores higher on flavors that are more specific,
38
+ # allowing selective overwriting (while defining the override rules
39
+ # here, where that information belongs).
40
+ #
41
+ @staticmethod
42
+ def specificity(flavor):
43
+ if flavor in (Flavor.UNSPECIFIED, Flavor.UNKNOWN):
44
+ return 0
45
+ elif flavor in (Flavor.NAMESPACE, Flavor.ATTRIBUTE):
46
+ return 1
47
+ elif flavor == Flavor.IMPORTEDITEM:
48
+ return 2
49
+ else:
50
+ return 3
51
+
52
+ def __repr__(self):
53
+ return self.value
54
+
55
+
56
+ class Node:
57
+ """A node is an object in the call graph.
58
+
59
+ Nodes have names, and reside in namespaces.
60
+
61
+ The namespace is a dot-delimited string of names. It can be blank, '',
62
+ denoting the top level.
63
+
64
+ The fully qualified name of a node is its namespace, a dot, and its name;
65
+ except at the top level, where the leading dot is omitted.
66
+
67
+ If the namespace has the special value None, it is rendered as *, and the
68
+ node is considered as an unknown node. A uses edge to an unknown node is
69
+ created when the analysis cannot determine which actual node is being used.
70
+
71
+ A graph node can be associated with an AST node from the analysis.
72
+ This identifies the syntax object the node represents, and as a bonus,
73
+ provides the line number at which the syntax object appears in the
74
+ analyzed code. The filename, however, must be given manually.
75
+
76
+ Nodes can also represent namespaces. These namespace nodes do not have an
77
+ associated AST node. For a namespace node, the "namespace" argument is the
78
+ **parent** namespace, and the "name" argument is the (last component of
79
+ the) name of the namespace itself. For example,
80
+
81
+ Node("mymodule", "main", None)
82
+
83
+ represents the namespace "mymodule.main".
84
+
85
+ Flavor describes the kind of object the node represents.
86
+ See the Flavor enum for currently supported values.
87
+ """
88
+
89
+ def __init__(self, namespace, name, ast_node, filename, flavor):
90
+ self.namespace = namespace
91
+ self.name = name
92
+ self.ast_node = ast_node
93
+ self.filename = filename
94
+ self.flavor = flavor
95
+ self.defined = namespace is None # assume that unknown nodes are defined
96
+
97
+ def get_short_name(self):
98
+ """Return the short name (i.e. excluding the namespace), of this Node.
99
+ Names of unknown nodes will include the *. prefix."""
100
+
101
+ if self.namespace is None:
102
+ return "*." + self.name
103
+ else:
104
+ return self.name
105
+
106
+ def get_annotated_name(self):
107
+ """Return the short name, plus module and line number of definition site, if available.
108
+ Names of unknown nodes will include the *. prefix."""
109
+ if self.namespace is None:
110
+ return "*." + self.name
111
+ else:
112
+ if self.get_level() >= 1 and self.ast_node is not None:
113
+ return "%s\\n(%s:%d)" % (self.name, self.filename, self.ast_node.lineno)
114
+ else:
115
+ return self.name
116
+
117
+ def get_long_annotated_name(self):
118
+ """Return the short name, plus namespace, and module and line number of definition site, if available.
119
+ Names of unknown nodes will include the *. prefix."""
120
+ if self.namespace is None:
121
+ return "*." + self.name
122
+ else:
123
+ if self.get_level() >= 1:
124
+ if self.ast_node is not None:
125
+ return "%s\\n\\n(%s:%d,\\n%s in %s)" % (
126
+ self.name,
127
+ self.filename,
128
+ self.ast_node.lineno,
129
+ repr(self.flavor),
130
+ self.namespace,
131
+ )
132
+ else:
133
+ return "%s\\n\\n(%s in %s)" % (self.name, repr(self.flavor), self.namespace)
134
+ else:
135
+ return self.name
136
+
137
+ def get_name(self):
138
+ """Return the full name of this node."""
139
+
140
+ if self.namespace == "":
141
+ return self.name
142
+ elif self.namespace is None:
143
+ return "*." + self.name
144
+ else:
145
+ return self.namespace + "." + self.name
146
+
147
+ def get_level(self):
148
+ """Return the level of this node (in terms of nested namespaces).
149
+
150
+ The level is defined as the number of '.' in the namespace, plus one.
151
+ Top level is level 0.
152
+
153
+ """
154
+ if self.namespace == "":
155
+ return 0
156
+ else:
157
+ return 1 + self.namespace.count(".")
158
+
159
+ def get_toplevel_namespace(self):
160
+ """Return the name of the top-level namespace of this node, or "" if none."""
161
+ if self.namespace == "":
162
+ return ""
163
+ if self.namespace is None: # group all unknowns in one namespace, "*"
164
+ return "*"
165
+
166
+ idx = self.namespace.find(".")
167
+ if idx > -1:
168
+ return self.namespace[0:idx]
169
+ else:
170
+ return self.namespace
171
+
172
+ def get_label(self):
173
+ """Return a label for this node, suitable for use in graph formats.
174
+ Unique nodes should have unique labels; and labels should not contain
175
+ problematic characters like dots or asterisks."""
176
+
177
+ return make_safe_label(self.get_name())
178
+
179
+ def get_namespace_label(self):
180
+ """Return a label for the namespace of this node, suitable for use
181
+ in graph formats. Unique nodes should have unique labels; and labels
182
+ should not contain problematic characters like dots or asterisks."""
183
+
184
+ return make_safe_label(self.namespace)
185
+
186
+ def __repr__(self):
187
+ return "<Node %s:%s>" % (repr(self.flavor), self.get_name())