Spaces:

Tom-Dev-space
/

code_graph

Sleeping

App Files Files Community

tlarsson commited on May 8, 2025

Commit

b82e7a7

verified ·

1 Parent(s): 763a717

Upload 3 files

Browse files

Files changed (3) hide show

app.py +117 -278
introduction.py +44 -0
utility.py +83 -0

app.py CHANGED Viewed

@@ -1,288 +1,87 @@
-import ast
 import base64
 import dash
 from dash import html, dcc, dash_table
 import dash_cytoscape as cyto
 from dash.dependencies import Input, Output, State
-import json
-import urllib.parse
 app = dash.Dash(__name__, suppress_callback_exceptions=True)
 server = app.server
-uploaded_files = {}
-def parse_functions_from_files(file_dict):
-    functions = {}
-    defined_funcs = set()
-    def infer_type_from_value(value_node):
-        if isinstance(value_node, ast.Call) and isinstance(value_node.func, ast.Attribute):
-            if value_node.func.attr == "read_csv":
-                return "pd.DataFrame"
-            elif value_node.func.attr == "DataFrame":
-                return "pd.DataFrame"
-            elif value_node.func.attr == "array":
-                return "np.ndarray"
-        elif isinstance(value_node, ast.List):
-            return "list"
-        elif isinstance(value_node, ast.Dict):
-            return "dict"
-        elif isinstance(value_node, ast.Set):
-            return "set"
-        elif isinstance(value_node, ast.Constant):
-            if isinstance(value_node.value, str):
-                return "str"
-            elif isinstance(value_node.value, bool):
-                return "bool"
-            elif isinstance(value_node.value, int):
-                return "int"
-            elif isinstance(value_node.value, float):
-                return "float"
-        return "?"
-    for fname, code in file_dict.items():
-        tree = ast.parse(code)
-        for node in ast.walk(tree):
-            if isinstance(node, ast.FunctionDef):
-                defined_funcs.add(node.name)
-    for fname, code in file_dict.items():
-        tree = ast.parse(code)
-        for node in ast.walk(tree):
-            if isinstance(node, ast.FunctionDef):
-                func_name = node.name
-                args = []
-                arg_types = {}
-                local_assignments = {}
-                returns = []
-                calls = []
-                reads_state = set()
-                writes_state = set()
-                for arg in node.args.args:
-                    name = arg.arg
-                    if arg.annotation:
-                        arg_type = ast.unparse(arg.annotation)
-                    else:
-                        arg_type = "?"
-                    arg_types[name] = arg_type
-                    args.append(f"{name}: {arg_type}")
-                for sub in ast.walk(node):
-                    if isinstance(sub, ast.Call) and isinstance(sub.func, ast.Name):
-                        if sub.func.id in defined_funcs:
-                            calls.append(sub.func.id)
-                    elif isinstance(sub, ast.Assign):
-                        for target in sub.targets:
-                            if isinstance(target, ast.Name):
-                                local_assignments[target.id] = infer_type_from_value(sub.value)
-                    elif isinstance(sub, ast.Return):
-                        if sub.value is None:
-                            continue
-                        elif isinstance(sub.value, ast.Tuple):
-                            for elt in sub.value.elts:
-                                label = ast.unparse(elt)
-                                ret_type = local_assignments.get(label, infer_type_from_value(elt))
-                                returns.append(f"{label}: {ret_type}")
-                        else:
-                            label = ast.unparse(sub.value)
-                            ret_type = local_assignments.get(label, infer_type_from_value(sub.value))
-                            returns.append(f"{label}: {ret_type}")
-                    elif isinstance(sub, ast.Subscript):
-                        if (isinstance(sub.value, ast.Attribute)
-                            and isinstance(sub.value.value, ast.Name)
-                            and sub.value.value.id == "st"
-                            and sub.value.attr == "session_state"):
-                            key = None
-                            try:
-                                if isinstance(sub.slice, ast.Constant):
-                                    key = sub.slice.value
-                                elif isinstance(sub.slice, ast.Index) and isinstance(sub.slice.value, ast.Constant):
-                                    key = sub.slice.value.value
-                            except Exception:
-                                pass
-                            if key:
-                                if isinstance(sub.ctx, ast.Store):
-                                    writes_state.add(str(key))
-                                else:
-                                    reads_state.add(str(key))
-                functions[func_name] = {
-                    "args": args,
-                    "returns": returns,
-                    "calls": calls,
-                    "filename": fname,
-                    "reads_state": sorted(reads_state),
-                    "writes_state": sorted(writes_state)
-                }
-    return functions
-def get_reachable_functions(start, graph):
-    visited = set()
-    stack = [start]
-    while stack:
-        node = stack.pop()
-        if node not in visited:
-            visited.add(node)
-            stack.extend(graph.get(node, []))
-    return visited
 app.layout = html.Div([
     html.H2("Function Dependency Visualizer (AST-Based)"),
     dcc.Tabs(id="tab-selector", value="intro", children=[
         dcc.Tab(label="📘 Introduction", value="intro"),
-        dcc.Tab(label="📊 Graph Explorer", value="graph")
     ]),
-    html.Div(id="tab-content")
 ])
 @app.callback(
-    Output("tab-content", "children"),
-    Input("tab-selector", "value")
-)
-def render_tab(tab):
-    if tab == "intro":
-        return dcc.Markdown("""
-### 📘 Introduction
-This tool analyzes uploaded Python files and visualizes how functions call one another.
----
-#### 🔍 Features
-- Upload multiple `.py` files
-- Select a top-level function to explore all functions it calls
-- Function table shows:
-  - Arguments and return values with inferred types (e.g. `df: pd.DataFrame`)
-  - Streamlit `session_state` variables read/written (if applicable)
-- Graph shows:
-  - Call order on edges (`1`, `2`, `3`)
-  - Thicker lines if a function is called multiple times
----
-#### 📂 How to Use
-1. Switch to the **Graph Explorer** tab
-2. Upload one or more `.py` files
-3. Pick the entry point function
-4. Explore the graph and table dynamically
----
-### 👋 About the Creator
-This tool was built by **Tomas Larsson**, a data scientist and financial modeler with a passion for making complex topics easy to explore and understand.
-Tomas is also the creator of [**my.moneytoolbox.com**](https://mymoneytoolbox.com), a blog focused on:
-- Tax-efficient investing
-- Retirement modeling
-- Personal finance analytics
-- Tools for DIY investors and early retirees
-Whether you're a fellow data enthusiast or someone planning their financial future, Tomas's blog is a resource-rich destination with transparent tools, clear explanations, and practical guidance.
-        """, style={"padding": "20px", "maxWidth": "900px"})
-    return html.Div([
-        html.Div([
-            dcc.Upload(id="upload", children=html.Button("Upload Python Files"), multiple=True),
-            html.Div(id="main-function-ui", style={"marginRight": "10px", "width": "300px"}),
-            html.A("Download Graph JSON", id="download-link", download="graph.json", href="", target="_blank")
-        ], style={"display": "flex", "flexDirection": "row", "alignItems": "center", "gap": "10px"}),
-        html.Div(id="file-name", style={"marginTop": "10px"}),
-        cyto.Cytoscape(
-            id="cytoscape-graph",
-            layout={"name": "breadthfirst", "directed": True, "padding": 30, "spacingFactor": 1.5},
-            style={"width": "100%", "height": "600px"},
-            elements=[],
-            stylesheet=[
-                {"selector": "node", "style": {
-                    "label": "data(label)",
-                    "text-wrap": "wrap",
-                    "text-max-width": 120,
-                    "text-valign": "center",
-                    "background-color": "#aed6f1"
-                }},
-                {"selector": "edge", "style": {
-                    "curve-style": "bezier",
-                    "target-arrow-shape": "triangle",
-                    "arrow-scale": 2,
-                    "line-color": "#888",
-                    "target-arrow-color": "#888",
-                    "label": "data(label)",
-                    "font-size": "14px",
-                    "text-margin-y": -10
-                }}
-            ],
-            userZoomingEnabled=True,
-            userPanningEnabled=True,
-            minZoom=0.2,
-            maxZoom=2,
-            wheelSensitivity=0.1
-        ),
-        html.Hr(),
-        html.H4("Function Input/Output Table"),
-        dash_table.DataTable(
-            id="function-table",
-            columns=[],
-            style_table={"overflowX": "auto"},
-            style_cell={
-                "textAlign": "left",
-                "whiteSpace": "normal",
-                "wordBreak": "break-word",
-                "maxWidth": 300
-            }
-        )
-    ])
-@app.callback(
-    Output("main-function-ui", "children"),
-    Output("file-name", "children"),
     Input("upload", "contents"),
     State("upload", "filename")
 )
-def store_multi_upload(list_of_contents, list_of_names):
-    if not list_of_contents or not list_of_names:
-        return "", ""
-    uploaded_files.clear()
-    for content, name in zip(list_of_contents, list_of_names):
         _, content_string = content.split(",")
         uploaded_files[name] = base64.b64decode(content_string).decode("utf-8")
     parsed = parse_functions_from_files(uploaded_files)
-    main_candidates = [fn for fn, f in parsed.items() if len(f["calls"]) > 0]
-    options = [{"label": fn, "value": fn} for fn in main_candidates]
-    dropdown = dcc.Dropdown(id="main-function", options=options, value=options[0]["value"] if options else None)
-    return dropdown, f"Uploaded files: {', '.join(list_of_names)}"
 @app.callback(
-    Output("cytoscape-graph", "elements"),
-    Output("function-table", "data"),
-    Output("function-table", "columns"),
-    Output("download-link", "href"),
-    Input("main-function", "value")
 )
-def update_multi_graph(main_func):
-    if not uploaded_files or not main_func:
-        return [], [], [], ""
-    parsed = parse_functions_from_files(uploaded_files)
-    graph = {k: v["calls"] for k, v in parsed.items()}
-    reachable = get_reachable_functions(main_func, graph)
     nodes = [{"data": {"id": name, "label": name}} for name in reachable]
     edges = []
     for src in reachable:
         call_sequence = parsed[src]["calls"]
         call_index = 1
         for tgt in call_sequence:
-            if tgt not in reachable:
-                continue
-            if not any(e["data"]["source"] == src and e["data"]["target"] == tgt for e in edges):
                 edge = {
                     "data": {
                         "source": src,
@@ -291,39 +90,79 @@ def update_multi_graph(main_func):
                     },
                     "style": {
                         "line-width": 4 if call_sequence.count(tgt) > 1 else 2
-                    },
-                    "classes": "crossfile" if parsed[src]["filename"] != parsed[tgt]["filename"] else ""
                 }
                 edges.append(edge)
                 call_index += 1
-    raw_table_data = [{
-        "Function": fn,
-        "Arguments": ", ".join(parsed[fn]["args"]),
-        "Returns": ", ".join(parsed[fn]["returns"]),
-        "Reads State": ", ".join(parsed[fn]["reads_state"]),
-        "Writes State": ", ".join(parsed[fn]["writes_state"]),
-        "File": parsed[fn]["filename"]
-    } for fn in reachable]
-    show_reads = any(row["Reads State"] for row in raw_table_data)
-    show_writes = any(row["Writes State"] for row in raw_table_data)
-    columns = [
-        {"name": "Function", "id": "Function"},
-        {"name": "Arguments", "id": "Arguments"},
-        {"name": "Returns", "id": "Returns"},
-    ]
-    if show_reads:
-        columns.append({"name": "Reads State", "id": "Reads State"})
-    if show_writes:
-        columns.append({"name": "Writes State", "id": "Writes State"})
-    columns.append({"name": "File", "id": "File"})
-    json_data = json.dumps({"nodes": nodes, "edges": edges}, indent=2)
-    href_data = "data:application/json;charset=utf-8," + urllib.parse.quote(json_data)
-    return nodes + edges, raw_table_data, columns, href_data
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 import base64
 import dash
 from dash import html, dcc, dash_table
 import dash_cytoscape as cyto
 from dash.dependencies import Input, Output, State
+import pandas as pd
+from utility import parse_functions_from_files, get_reachable_functions, get_backtrace_functions
+from introduction import get_intro_markdown
 app = dash.Dash(__name__, suppress_callback_exceptions=True)
 server = app.server
 app.layout = html.Div([
     html.H2("Function Dependency Visualizer (AST-Based)"),
+    dcc.Store(id="parsed-functions-store"),
+    dcc.Upload(id="upload", children=html.Button("Upload Python Files"), multiple=True),
+    dcc.Dropdown(id="main-function", style={"width": "300px"}),
+    dcc.Checklist(id="backtrace-toggle", options=[{"label": "Backtrace Mode", "value": "backtrace"}], value=[]),
+    dcc.Checklist(id="function-detail-toggle", options=[{"label": "Show Function-Level Detail in summary tab", "value": "show"}], value=[]),
     dcc.Tabs(id="tab-selector", value="intro", children=[
         dcc.Tab(label="📘 Introduction", value="intro"),
+        dcc.Tab(label="📊 Graph Explorer", value="graph"),
+        dcc.Tab(label="📁 File Summary", value="summary")
     ]),
+    html.Div(id="intro-tab"),
+    html.Div(id="graph-tab"),
+    html.Div(id="summary-tab")
 ])
 @app.callback(
+    Output("main-function", "options"),
+    Output("main-function", "value"),
+    Output("parsed-functions-store", "data"),
     Input("upload", "contents"),
     State("upload", "filename")
 )
+def handle_upload(contents, filenames):
+    if not contents:
+        return [], None, {}
+    uploaded_files = {}
+    for content, name in zip(contents, filenames):
         _, content_string = content.split(",")
         uploaded_files[name] = base64.b64decode(content_string).decode("utf-8")
     parsed = parse_functions_from_files(uploaded_files)
+    options = [{"label": fn, "value": fn} for fn in parsed if parsed[fn]["calls"]]
+    return options, options[0]["value"] if options else None, parsed
 @app.callback(
+    Output("intro-tab", "style"),
+    Output("graph-tab", "style"),
+    Output("summary-tab", "style"),
+    Input("tab-selector", "value")
 )
+def toggle_tabs(tab):
+    return (
+        {"display": "block"} if tab == "intro" else {"display": "none"},
+        {"display": "block"} if tab == "graph" else {"display": "none"},
+        {"display": "block"} if tab == "summary" else {"display": "none"}
+    )
+@app.callback(Output("intro-tab", "children"), Input("tab-selector", "value"))
+def show_intro(_):
+    return get_intro_markdown()
+@app.callback(
+    Output("graph-tab", "children"),
+    Input("main-function", "value"),
+    Input("backtrace-toggle", "value"),
+    State("parsed-functions-store", "data")
+)
+def update_graph(main_func, backtrace_mode, parsed):
+    if not parsed or not main_func:
+        return html.Div("Upload files and select a main function.")
+    graph = {k: v["calls"] for k, v in parsed.items()}
+    reachable = get_backtrace_functions(main_func, graph) if "backtrace" in backtrace_mode else get_reachable_functions(main_func, graph)
     nodes = [{"data": {"id": name, "label": name}} for name in reachable]
+    #edges = [{"data": {"source": src, "target": tgt}, "classes": "edge"} for src in reachable for tgt in parsed[src]["calls"] if tgt in reachable]
     edges = []
     for src in reachable:
         call_sequence = parsed[src]["calls"]
         call_index = 1
         for tgt in call_sequence:
+            if tgt in reachable:
                 edge = {
                     "data": {
                         "source": src,
                     },
                     "style": {
                         "line-width": 4 if call_sequence.count(tgt) > 1 else 2
+                    }
                 }
                 edges.append(edge)
                 call_index += 1
+    return cyto.Cytoscape(
+        id="cytoscape-graph",
+        layout={"name": "breadthfirst"},
+        style={"width": "100%", "height": "600px"},
+        elements=nodes + edges,
+        stylesheet=[
+            {"selector": "node", "style": {"label": "data(label)", "text-wrap": "wrap"}},
+            #{"selector": "edge", "style": {"curve-style": "bezier", "target-arrow-shape": "triangle", "target-arrow-color": "#888", "line-color": "#888"}}
+            {"selector": "edge", "style": {
+                "label": "data(label)",
+                "curve-style": "bezier",
+                "target-arrow-shape": "triangle",
+                "target-arrow-color": "#888",
+                "line-color": "#888",
+                "font-size": "14px",
+                "text-margin-y": -10
+            }}
+        ],
+        userZoomingEnabled=True,
+        userPanningEnabled=True,
+        minZoom=0.2,
+        maxZoom=2,
+        wheelSensitivity=0.1
+    )
+@app.callback(
+    Output("summary-tab", "children"),
+    Input("parsed-functions-store", "data"),
+    Input("function-detail-toggle", "value")
+)
+def update_summary(parsed, toggle):
+    if not parsed:
+        return html.Div("No summary available.")
+    reverse_calls = {k: [] for k in parsed}
+    for caller, meta in parsed.items():
+        for callee in meta["calls"]:
+            reverse_calls[callee].append(caller)
+    if "show" in toggle:
+        df = pd.DataFrame([{
+            "Function": fn,
+            "File": parsed[fn]["filename"],
+            "Arguments": ", ".join(parsed[fn]["args"]),
+            "Returns": ", ".join(parsed[fn]["returns"]),
+            "Reads State": ", ".join(parsed[fn]["reads_state"]),
+            "Writes State": ", ".join(parsed[fn]["writes_state"]),
+            "Called By": len(reverse_calls[fn])
+        } for fn in parsed])
+    else:
+        file_summary = {}
+        for func, meta in parsed.items():
+            fname = meta["filename"]
+            file_summary.setdefault(fname, {"Total": 0, "Calls Others": 0, "Called By Others": 0, "Unused": 0})
+            file_summary[fname]["Total"] += 1
+            if meta["calls"]: file_summary[fname]["Calls Others"] += 1
+            if reverse_calls[func]: file_summary[fname]["Called By Others"] += 1
+            if not meta["calls"] and not reverse_calls[func]: file_summary[fname]["Unused"] += 1
+        df = pd.DataFrame([{"File": f, **stats} for f, stats in file_summary.items()])
+    return dash_table.DataTable(
+        data=df.to_dict("records"),
+        columns=[{"name": c, "id": c} for c in df.columns],
+        style_table={"overflowX": "auto"},
+        style_cell={"whiteSpace": "normal", "textAlign": "left", "padding": "5px", "maxWidth": 300}
+    )
 if __name__ == "__main__":
+    app.run(debug=False, port=7860)

introduction.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from dash import dcc
+def get_intro_markdown():
+    return dcc.Markdown("""
+### 📘 Introduction
+This tool analyzes uploaded Python files and visualizes how functions call one another.
+---
+#### 🔍 Features
+- Upload multiple `.py` files
+- Select a top-level function to explore all functions it calls
+- Function table shows:
+  - Arguments and return values with inferred types (e.g. `df: pd.DataFrame`)
+  - Streamlit `session_state` variables read/written (if applicable)
+- Graph shows:
+  - Call order on edges (`1`, `2`, `3`)
+  - Thicker lines if a function is called multiple times
+---
+#### 📂 How to Use
+1. Switch to the **Graph Explorer** tab
+2. Upload one or more `.py` files
+3. Pick the entry point function
+4. Explore the graph and table dynamically
+---
+### 👋 About the Creator
+This tool was built by **Tomas Larsson**, a data scientist and financial modeler with a passion for making complex topics easy to explore and understand.
+Tomas is also the creator of [**my.moneytoolbox.com**](https://mymoneytoolbox.com), a blog focused on:
+- Tax-efficient investing
+- Retirement modeling
+- Personal finance analytics
+- Tools for DIY investors and early retirees
+Whether you're a fellow data enthusiast or someone planning their financial future, Tomas's blog is a resource-rich destination with transparent tools, clear explanations, and practical guidance.
+        """)

utility.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import ast
+def parse_functions_from_files(file_dict):
+    functions = {}
+    defined_funcs = set()
+    def infer_type_from_value(value_node):
+        if isinstance(value_node, ast.Call) and isinstance(value_node.func, ast.Attribute):
+            if value_node.func.attr in ("read_csv", "DataFrame"): return "pd.DataFrame"
+            if value_node.func.attr == "array": return "np.ndarray"
+        elif isinstance(value_node, ast.List): return "list"
+        elif isinstance(value_node, ast.Dict): return "dict"
+        elif isinstance(value_node, ast.Set): return "set"
+        elif isinstance(value_node, ast.Constant):
+            if isinstance(value_node.value, str): return "str"
+            if isinstance(value_node.value, bool): return "bool"
+            if isinstance(value_node.value, int): return "int"
+            if isinstance(value_node.value, float): return "float"
+        return "?"
+    for fname, code in file_dict.items():
+        tree = ast.parse(code)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef): defined_funcs.add(node.name)
+    for fname, code in file_dict.items():
+        tree = ast.parse(code)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef):
+                func_name = node.name
+                args, returns, calls = [], [], []
+                local_assignments = {}
+                reads_state, writes_state = set(), set()
+                for arg in node.args.args:
+                    arg_type = ast.unparse(arg.annotation) if arg.annotation else "?"
+                    args.append(f"{arg.arg}: {arg_type}")
+                for sub in ast.walk(node):
+                    if isinstance(sub, ast.Call) and isinstance(sub.func, ast.Name):
+                        if sub.func.id in defined_funcs: calls.append(sub.func.id)
+                    elif isinstance(sub, ast.Assign):
+                        for target in sub.targets:
+                            if isinstance(target, ast.Name):
+                                local_assignments[target.id] = infer_type_from_value(sub.value)
+                    elif isinstance(sub, ast.Return):
+                        if sub.value is None: continue
+                        if isinstance(sub.value, ast.Tuple):
+                            for elt in sub.value.elts:
+                                label = ast.unparse(elt)
+                                returns.append(f"{label}: {local_assignments.get(label, infer_type_from_value(elt))}")
+                        else:
+                            label = ast.unparse(sub.value)
+                            returns.append(f"{label}: {local_assignments.get(label, infer_type_from_value(sub.value))}")
+                functions[func_name] = {
+                    "args": args,
+                    "returns": returns,
+                    "calls": calls,
+                    "filename": fname,
+                    "reads_state": sorted(reads_state),
+                    "writes_state": sorted(writes_state)
+                }
+    return functions
+def get_reachable_functions(start, graph):
+    visited, stack = set(), [start]
+    while stack:
+        node = stack.pop()
+        if node not in visited:
+            visited.add(node)
+            stack.extend(graph.get(node, []))
+    return visited
+def get_backtrace_functions(target, graph):
+    reverse_graph = {}
+    for caller, callees in graph.items():
+        for callee in callees:
+            reverse_graph.setdefault(callee, []).append(caller)
+    visited, stack = set(), [target]
+    while stack:
+        node = stack.pop()
+        if node not in visited:
+            visited.add(node)
+            stack.extend(reverse_graph.get(node, []))
+    return visited