File size: 10,699 Bytes
8d40657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283


import ast
import os.path

from .node import Flavor


def head(lst):
    if len(lst):
        return lst[0]


def tail(lst):
    if len(lst) > 1:
        return lst[1:]
    else:
        return []


def get_module_name(filename, files:dict, root: str = None):
    """Try to determine the full module name of a source file, by figuring out
    if its directory looks like a package (i.e. has an __init__.py file or
    there is a .py file in it )."""

    if os.path.basename(filename) == "__init__.py":
        # init file means module name is directory name
        module_path = os.path.dirname(filename)
    else:
        # otherwise it is the filename without extension
        module_path = filename.replace(".py", "")

    # find the module root - walk up the tree and check if it contains .py files - if yes. it is the new root
    directories = [(module_path, True)]
    if root is None:
        while directories[0][0] != os.path.dirname(directories[0][0]):
            potential_root = os.path.dirname(directories[0][0])
            #is_root = any([f == "__init__.py" for f in os.listdir(potential_root)]) # old code
            is_root = True if f"{potential_root}/__init__.py" in files.keys() else False
            directories.insert(0, (potential_root, is_root))

        # keep directories where itself of parent is root
        while not directories[0][1]:
            directories.pop(0)

    else:  # root is already known - just walk up until it is matched
        while directories[0][0] != root:
            potential_root = os.path.dirname(directories[0][0])
            directories.insert(0, (potential_root, True))

    mod_name = ".".join([os.path.basename(f[0]) for f in directories])
    return mod_name


def format_alias(x):
    """Return human-readable description of an ast.alias (used in Import and ImportFrom nodes)."""
    if not isinstance(x, ast.alias):
        raise TypeError("Can only format an ast.alias; got %s" % type(x))

    if x.asname is not None:
        return "%s as %s" % (x.name, x.asname)
    else:
        return "%s" % (x.name)


def get_ast_node_name(x):
    """Return human-readable name of ast.Attribute or ast.Name. Pass through anything else."""
    if isinstance(x, ast.Attribute):
        # x.value might also be an ast.Attribute (think "x.y.z")
        return "%s.%s" % (get_ast_node_name(x.value), x.attr)
    elif isinstance(x, ast.Name):
        return x.id
    else:
        return x


# Helper for handling binding forms.
def sanitize_exprs(exprs):
    """Convert ast.Tuples in exprs to Python tuples; wrap result in a Python tuple."""

    def process(expr):
        if isinstance(expr, (ast.Tuple, ast.List)):
            return expr.elts  # .elts is a Python tuple
        else:
            return [expr]

    if isinstance(exprs, (tuple, list)):
        return [process(expr) for expr in exprs]
    else:
        return process(exprs)


def resolve_method_resolution_order(class_base_nodes, logger):
    """Compute the method resolution order (MRO) for each of the analyzed classes.

    class_base_nodes: dict cls: [base1, base2, ..., baseN]
                      where dict and basej are all Node objects.
    """

    # https://en.wikipedia.org/wiki/C3_linearization#Description

    class LinearizationImpossible(Exception):
        pass

    from functools import reduce
    from operator import add

    def C3_find_good_head(heads, tails):  # find an element of heads which is not in any of the tails
        flat_tails = reduce(add, tails, [])  # flatten the outer level
        for hd in heads:
            if hd not in flat_tails:
                break
        else:  # no break only if there are cyclic dependencies.
            raise LinearizationImpossible(
                "MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails)
            )
        return hd

    def remove_all(elt, lst):  # remove all occurrences of elt from lst, return a copy
        return [x for x in lst if x != elt]

    def remove_all_in(elt, lists):  # remove elt from all lists, return a copy
        return [remove_all(elt, lst) for lst in lists]

    def C3_merge(lists):
        out = []
        while True:
            logger.debug("MRO: C3 merge: out: %s, lists: %s" % (out, lists))
            heads = [head(lst) for lst in lists if head(lst) is not None]
            if not len(heads):
                break
            tails = [tail(lst) for lst in lists]
            logger.debug("MRO: C3 merge: heads: %s, tails: %s" % (heads, tails))
            hd = C3_find_good_head(heads, tails)
            logger.debug("MRO: C3 merge: chose head %s" % (hd))
            out.append(hd)
            lists = remove_all_in(hd, lists)
        return out

    mro = {}  # result
    try:
        memo = {}  # caching/memoization

        def C3_linearize(node):
            logger.debug("MRO: C3 linearizing %s" % (node))
            seen.add(node)
            if node not in memo:
                #  unknown class                     or no ancestors
                if node not in class_base_nodes or not len(class_base_nodes[node]):
                    memo[node] = [node]
                else:  # known and has ancestors
                    lists = []
                    # linearization of parents...
                    for baseclass_node in class_base_nodes[node]:
                        if baseclass_node not in seen:
                            lists.append(C3_linearize(baseclass_node))
                    # ...and the parents themselves (in the order they appear in the ClassDef)
                    logger.debug("MRO: parents of %s: %s" % (node, class_base_nodes[node]))
                    lists.append(class_base_nodes[node])
                    logger.debug("MRO: C3 merging %s" % (lists))
                    memo[node] = [node] + C3_merge(lists)
            logger.debug("MRO: C3 linearized %s, result %s" % (node, memo[node]))
            return memo[node]

        for node in class_base_nodes:
            logger.debug("MRO: analyzing class %s" % (node))
            seen = set()  # break cycles (separately for each class we start from)
            mro[node] = C3_linearize(node)
    except LinearizationImpossible as e:
        logger.error(e)

        # generic fallback: depth-first search of lists of ancestors
        #
        # (so that we can try to draw *something* if the code to be
        #  analyzed is so badly formed that the MRO algorithm fails)

        memo = {}  # caching/memoization

        def lookup_bases_recursive(node):
            seen.add(node)
            if node not in memo:
                out = [node]  # first look up in obj itself...
                if node in class_base_nodes:  # known class?
                    for baseclass_node in class_base_nodes[node]:  # ...then in its bases
                        if baseclass_node not in seen:
                            out.append(baseclass_node)
                            out.extend(lookup_bases_recursive(baseclass_node))
                memo[node] = out
            return memo[node]

        mro = {}
        for node in class_base_nodes:
            logger.debug("MRO: generic fallback: analyzing class %s" % (node))
            seen = set()  # break cycles (separately for each class we start from)
            mro[node] = lookup_bases_recursive(node)

    return mro


class UnresolvedSuperCallError(Exception):
    """For specifically signaling an unresolved super()."""

    pass


class Scope:
    """Adaptor that makes scopes look somewhat like those from the Python 2
    compiler module, as far as Pyan's CallGraphVisitor is concerned."""

    def __init__(self, table):
        """table: SymTable instance from symtable.symtable()"""
        name = table.get_name()
        if name == "top":
            name = ""  # Pyan defines the top level as anonymous
        self.name = name
        self.type = table.get_type()  # useful for __repr__()
        self.defs = {iden: None for iden in table.get_identifiers()}  # name:assigned_value

    def __repr__(self):
        return "<Scope: %s %s>" % (self.type, self.name)


# A context manager, sort of a friend of CallGraphVisitor (depends on implementation details)
class ExecuteInInnerScope:
    """Execute a code block with the scope stack augmented with an inner scope.

    Used to analyze lambda, listcomp et al. The scope must still be present in
    analyzer.scopes.

    !!!
    Will add a defines edge from the current namespace to the inner scope,
    marking both nodes as defined.
    !!!
    """

    def __init__(self, analyzer, scopename):
        """analyzer: CallGraphVisitor instance
        scopename: name of the inner scope"""
        self.analyzer = analyzer
        self.scopename = scopename

    def __enter__(self):
        # The inner scopes pollute the graph too much; we will need to collapse
        # them in postprocessing. However, we must use them during analysis to
        # follow the Python 3 scoping rules correctly.

        analyzer = self.analyzer
        scopename = self.scopename

        analyzer.name_stack.append(scopename)
        inner_ns = analyzer.get_node_of_current_namespace().get_name()
        if inner_ns not in analyzer.scopes:
            analyzer.name_stack.pop()
            raise ValueError("Unknown scope '%s'" % (inner_ns))
        
        analyzer.scope_stack.append(analyzer.scopes[inner_ns])
        analyzer.context_stack.append(scopename)

        return self

    def __exit__(self, errtype, errvalue, traceback):
        # TODO: do we need some error handling here?
        analyzer = self.analyzer
        scopename = self.scopename

        analyzer.context_stack.pop()
        analyzer.scope_stack.pop()
        analyzer.name_stack.pop()

        # Add a defines edge, which will mark the inner scope as defined,
        # allowing any uses to other objects from inside the lambda/listcomp/etc.
        # body to be visualized.
        #
        # All inner scopes of the same scopename (lambda, listcomp, ...) in the
        # current ns will be grouped into a single node, as they have no name.
        # We create a namespace-like node that has no associated AST node,
        # as it does not represent any unique AST node.
        from_node = analyzer.get_node_of_current_namespace()
        ns = from_node.get_name()
        to_node = analyzer.get_node(ns, scopename, None, flavor=Flavor.NAMESPACE)
        if analyzer.add_defines_edge(from_node, to_node):
            analyzer.logger.info("Def from %s to %s %s" % (from_node, scopename, to_node))
        analyzer.last_value = to_node  # Make this inner scope node assignable to track its uses.