sisense-datamodel / streamlit_graph_app.py
vyankatesh2003's picture
yes
56875d9 verified
import streamlit as st
import json
from collections import defaultdict, deque
import tempfile
import os
# try to import pyvis, handle gracefully if not available
try:
from pyvis.network import Network
PYVIS_AVAILABLE = True
except ImportError:
PYVIS_AVAILABLE = False
st.error("⚠️ pyvis package is not available. Please install it: `pip install pyvis`")
st.stop()
# page config
st.set_page_config(
page_title="Sisense Table Relationship Graph",
page_icon="πŸ”—",
layout="wide",
initial_sidebar_state="expanded"
)
@st.cache_data
def load_model():
"""load sisense model file"""
with open('assethub-v3.smodel', 'r') as f:
return json.load(f)
@st.cache_data
def build_mappings(model):
"""build table and column mappings"""
table_map = {}
column_map = {}
dataset_map = {}
for dataset in model.get('datasets', []):
dataset_oid = dataset.get('oid')
dataset_name = dataset.get('name', 'unknown')
dataset_map[dataset_oid] = dataset_name
schema = dataset.get('schema', {})
tables = schema.get('tables', [])
for table in tables:
table_oid = table.get('oid')
table_name = table.get('name', table.get('id', 'unknown'))
table_map[table_oid] = table_name
for column in table.get('columns', []):
column_oid = column.get('oid')
column_name = column.get('name', column.get('id', 'unknown'))
column_map[(dataset_oid, table_oid, column_oid)] = column_name
return table_map, column_map, dataset_map
@st.cache_data
def build_graph(model, table_map, column_map):
"""build graph from relations"""
graph = defaultdict(set)
join_details = []
for relation in model.get('relations', []):
columns = relation.get('columns', [])
if len(columns) != 2:
continue
col1 = columns[0]
col2 = columns[1]
table1_oid = col1.get('table')
table2_oid = col2.get('table')
dataset1_oid = col1.get('dataset')
dataset2_oid = col2.get('dataset')
column1_oid = col1.get('column')
column2_oid = col2.get('column')
col1_name = column_map.get((dataset1_oid, table1_oid, column1_oid), 'unknown')
col2_name = column_map.get((dataset2_oid, table2_oid, column2_oid), 'unknown')
graph[table1_oid].add(table2_oid)
graph[table2_oid].add(table1_oid)
join_details.append({
'table1': table1_oid,
'table2': table2_oid,
'table1_name': table_map.get(table1_oid, 'unknown'),
'table2_name': table_map.get(table2_oid, 'unknown'),
'column1': col1_name,
'column2': col2_name
})
return graph, join_details
@st.cache_data
def build_tree(root, graph, table_map):
"""build bfs tree structure"""
tree = {}
visited = set()
queue = deque([(root, 0)])
visited.add(root)
while queue:
current, level = queue.popleft()
if current not in tree:
tree[current] = {
'name': table_map.get(current, 'unknown'),
'level': level,
'children': []
}
for neighbor in graph.get(current, []):
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, level + 1))
tree[current]['children'].append(neighbor)
if neighbor not in tree:
tree[neighbor] = {
'name': table_map.get(neighbor, 'unknown'),
'level': level + 1,
'children': []
}
return tree
def find_root_table(graph, table_map):
"""find most connected table as root"""
if not graph:
return list(table_map.keys())[0] if table_map else None
max_connections = 0
root_table = None
for table_oid, connections in graph.items():
if len(connections) > max_connections:
max_connections = len(connections)
root_table = table_oid
return root_table
def get_bfs_levels(root, graph):
"""get nodes grouped by bfs level"""
levels = {}
visited = set()
queue = deque([(root, 0)])
visited.add(root)
while queue:
node, level = queue.popleft()
if level not in levels:
levels[level] = []
levels[level].append(node)
for neighbor in graph.get(node, []):
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, level + 1))
return levels
def prioritize_back_edges(back_edges_list, graph, top_n=None):
"""prioritize back edges by importance"""
edge_scores = []
for u, v in back_edges_list:
connections_u = len(graph.get(u, set()))
connections_v = len(graph.get(v, set()))
score = connections_u + connections_v
edge_scores.append((score, (u, v)))
edge_scores.sort(reverse=True)
if top_n:
return [edge for _, edge in edge_scores[:top_n]]
return [edge for _, edge in edge_scores]
def create_pyvis_network(graph_data, table_map, tree_structure, join_lookup,
show_tree_edges=True, show_back_edges=True,
back_edges_list=None, max_back_edges=None,
show_levels=None, selected_node=None, tree_mode=False):
"""create interactive pyvis network"""
if not PYVIS_AVAILABLE:
raise ImportError("pyvis is not available")
net = Network(
height='800px',
width='100%',
bgcolor='#ffffff',
font_color='#000000',
directed=True,
layout='hierarchical' if (show_tree_edges or tree_mode) else None,
cdn_resources='inline' # use inline resources for better compatibility
)
# set physics options with edge repulsion and straight edges
# forceAtlas2Based provides natural edge repulsion and node spacing
if tree_mode or show_tree_edges:
# tree mode with edge repulsion
net.set_options("""
{
"edges": {
"smooth": {
"enabled": false,
"type": "straight"
}
},
"physics": {
"enabled": true,
"forceAtlas2Based": {
"gravitationalConstant": -80,
"centralGravity": 0.01,
"springLength": 300,
"springConstant": 0.12,
"damping": 0.5,
"avoidOverlap": 1
},
"solver": "forceAtlas2Based",
"stabilization": {
"enabled": true,
"iterations": 250,
"fit": true
}
}
}
""")
else:
# force-directed with edge repulsion
net.set_options("""
{
"edges": {
"smooth": {
"enabled": false,
"type": "straight"
}
},
"physics": {
"enabled": true,
"forceAtlas2Based": {
"gravitationalConstant": -80,
"centralGravity": 0.01,
"springLength": 200,
"springConstant": 0.12,
"damping": 0.5,
"avoidOverlap": 1
},
"solver": "forceAtlas2Based",
"stabilization": {
"enabled": true,
"iterations": 250,
"fit": true
}
}
}
""")
# level colors
level_colors = {
0: '#3399ff', # blue
1: '#4dcc4d', # green
2: '#ff9933', # orange
3: '#cc66cc', # pink
4: '#9999ff', # purple
}
# add nodes
all_nodes = set()
if show_tree_edges:
for node in tree_structure.keys():
all_nodes.add(node)
if show_back_edges and back_edges_list:
for u, v in back_edges_list:
all_nodes.add(u)
all_nodes.add(v)
# filter by levels if specified
if show_levels is not None:
filtered_nodes = set()
for node in all_nodes:
level = tree_structure.get(node, {}).get('level', 99)
if level in show_levels:
filtered_nodes.add(node)
all_nodes = filtered_nodes
for node_oid in all_nodes:
level = tree_structure.get(node_oid, {}).get('level', 0)
color = level_colors.get(level, '#cccccc')
name = table_map.get(node_oid, 'unknown')
# highlight selected node
border_color = '#ff0000' if node_oid == selected_node else '#000000'
border_width = 5 if node_oid == selected_node else 2
net.add_node(
node_oid,
label=name,
color=color,
level=level,
title=f"{name}<br>Level: {level}<br>ID: {node_oid}",
borderWidth=border_width,
borderColor=border_color,
font={'size': 14, 'face': 'Arial'}
)
# add tree edges
if show_tree_edges:
tree_edges = set()
def collect_tree_edges(node_oid, tree, parent_oid=None):
if parent_oid:
tree_edges.add((parent_oid, node_oid))
for child_oid in tree[node_oid]['children']:
collect_tree_edges(child_oid, tree, node_oid)
root = find_root_table(graph_data, table_map)
if root in tree_structure:
collect_tree_edges(root, tree_structure)
for u, v in tree_edges:
if u in all_nodes and v in all_nodes:
net.add_edge(
u, v,
label="",
color='#0066cc',
width=3,
arrows='to',
title="",
smooth=False
)
# add back edges
if show_back_edges and back_edges_list:
filtered_back_edges = back_edges_list
if max_back_edges:
filtered_back_edges = prioritize_back_edges(back_edges_list, graph_data, top_n=max_back_edges)
for u, v in filtered_back_edges:
if u in all_nodes and v in all_nodes:
net.add_edge(
u, v,
label="",
color='#ff0000',
width=2,
dashes=True,
arrows='',
title="",
smooth=False
)
return net
def apply_edge_modifications(graph, join_details, removed_edges, added_edges):
"""apply edge modifications to graph and join_details"""
modified_graph = defaultdict(set)
modified_join_details = []
# copy original graph
for node, neighbors in graph.items():
modified_graph[node] = neighbors.copy()
# remove edges
for u, v in removed_edges:
if u in modified_graph:
modified_graph[u].discard(v)
if v in modified_graph:
modified_graph[v].discard(u)
# add edges
for edge_info in added_edges:
u = edge_info['table1']
v = edge_info['table2']
modified_graph[u].add(v)
modified_graph[v].add(u)
modified_join_details.append(edge_info)
# add original joins that weren't removed
for join in join_details:
u, v = join['table1'], join['table2']
if (u, v) not in removed_edges and (v, u) not in removed_edges:
modified_join_details.append(join)
return modified_graph, modified_join_details
# main app
st.title("πŸ”— Sisense Table Relationship Graph")
st.markdown("Interactive visualization of table relationships with dynamic edge management")
# initialize session state for edge management
if 'removed_edges' not in st.session_state:
st.session_state.removed_edges = set()
if 'added_edges' not in st.session_state:
st.session_state.added_edges = []
# load data
try:
model = load_model()
table_map, column_map, dataset_map = build_mappings(model)
original_graph, original_join_details = build_graph(model, table_map, column_map)
# apply modifications
graph, join_details = apply_edge_modifications(
original_graph,
original_join_details,
st.session_state.removed_edges,
st.session_state.added_edges
)
# build tree
root_table = find_root_table(graph, table_map)
tree_structure = build_tree(root_table, graph, table_map)
bfs_levels = get_bfs_levels(root_table, graph)
# build join lookup
join_lookup = {}
for join in join_details:
key1 = (join['table1'], join['table2'])
key2 = (join['table2'], join['table1'])
join_lookup[key1] = join
join_lookup[key2] = join
# identify back edges
tree_edges_set = set()
def collect_tree_edges_set(node_oid, tree, parent_oid=None):
if parent_oid:
tree_edges_set.add((parent_oid, node_oid))
tree_edges_set.add((node_oid, parent_oid))
for child_oid in tree[node_oid]['children']:
collect_tree_edges_set(child_oid, tree, node_oid)
collect_tree_edges_set(root_table, tree_structure)
back_edges = []
for join in join_details:
u, v = join['table1'], join['table2']
if (u, v) not in tree_edges_set and (v, u) not in tree_edges_set:
back_edges.append((u, v))
# sidebar controls
st.sidebar.header("βš™οΈ Controls")
# tree mode toggle
st.sidebar.subheader("View Mode")
tree_mode = st.sidebar.checkbox("Tree Mode (Clean View)", value=True)
# edge type controls
st.sidebar.subheader("Edge Types")
if tree_mode:
show_tree_edges = True
show_back_edges = False
st.sidebar.info("🌳 Tree mode: showing only tree edges")
else:
show_tree_edges = st.sidebar.checkbox("Show Tree Edges", value=True)
show_back_edges = st.sidebar.checkbox("Show Back Edges", value=False)
# back edge filtering
if show_back_edges:
st.sidebar.subheader("Back Edge Filtering")
max_back_edges_option = st.sidebar.selectbox(
"Show Back Edges",
["All", "Top 10", "Top 20", "Top 50", "Top 100"],
index=1
)
max_back_edges_map = {
"All": None,
"Top 10": 10,
"Top 20": 20,
"Top 50": 50,
"Top 100": 100
}
max_back_edges = max_back_edges_map[max_back_edges_option]
else:
max_back_edges = None
# level filtering
st.sidebar.subheader("Level Filtering")
all_levels = sorted(bfs_levels.keys())
selected_levels = st.sidebar.multiselect(
"Show Levels",
all_levels,
default=all_levels,
format_func=lambda x: f"Level {x} ({len(bfs_levels[x])} tables)"
)
# edge management
st.sidebar.subheader("πŸ”§ Edge Management")
# remove edge section
with st.sidebar.expander("Remove Edges", expanded=False):
all_edges = []
for join in join_details:
u, v = join['table1'], join['table2']
u_name = table_map.get(u, 'unknown')
v_name = table_map.get(v, 'unknown')
edge_key = (u, v)
if edge_key not in st.session_state.removed_edges and (v, u) not in st.session_state.removed_edges:
all_edges.append({
'key': edge_key,
'label': f"{u_name} ↔ {v_name}",
'join': join
})
if all_edges:
edge_options = [f"{edge['label']} ({edge['join']['column1']} ↔ {edge['join']['column2']})"
for edge in all_edges]
selected_edge_idx = st.selectbox("Select edge to remove", range(len(edge_options)),
format_func=lambda x: edge_options[x])
if st.button("Remove Edge", key="remove_edge"):
selected_edge = all_edges[selected_edge_idx]
edge_key = selected_edge['key']
st.session_state.removed_edges.add(edge_key)
# remove from added_edges if it was manually added
st.session_state.added_edges = [
e for e in st.session_state.added_edges
if not ((e['table1'] == edge_key[0] and e['table2'] == edge_key[1]) or
(e['table1'] == edge_key[1] and e['table2'] == edge_key[0]))
]
st.rerun()
else:
st.info("No edges available to remove")
# add edge section
with st.sidebar.expander("Add Edge", expanded=False):
all_table_names = sorted([table_map.get(oid, 'unknown') for oid in table_map.keys()])
source_table = st.selectbox("Source Table", all_table_names, key="add_source")
target_table = st.selectbox("Target Table", all_table_names, key="add_target")
# find oids
source_oid = None
target_oid = None
for oid, name in table_map.items():
if name == source_table:
source_oid = oid
if name == target_table:
target_oid = oid
if source_oid and target_oid and source_oid != target_oid:
# check if edge already exists in current graph (after modifications)
edge_exists_in_graph = target_oid in graph.get(source_oid, set())
# check if already in added_edges
already_added = any(
(e['table1'] == source_oid and e['table2'] == target_oid) or
(e['table1'] == target_oid and e['table2'] == source_oid)
for e in st.session_state.added_edges
)
if edge_exists_in_graph and not already_added:
st.warning("Edge already exists in the graph")
elif already_added:
st.info("Edge is already queued to be added")
else:
col1_name = st.text_input("Source Column", key="add_col1", placeholder="e.g., id")
col2_name = st.text_input("Target Column", key="add_col2", placeholder="e.g., foreign_id")
if st.button("Add Edge", key="add_edge"):
new_edge = {
'table1': source_oid,
'table2': target_oid,
'table1_name': source_table,
'table2_name': target_table,
'column1': col1_name or 'unknown',
'column2': col2_name or 'unknown'
}
st.session_state.added_edges.append(new_edge)
st.rerun()
elif source_oid == target_oid:
st.warning("Source and target cannot be the same")
# show modified edges summary
if st.session_state.removed_edges or st.session_state.added_edges:
with st.sidebar.expander("Modified Edges Summary", expanded=False):
if st.session_state.removed_edges:
st.write("**Removed:**")
for u, v in list(st.session_state.removed_edges)[:10]: # show first 10
u_name = table_map.get(u, 'unknown')
v_name = table_map.get(v, 'unknown')
st.write(f"- {u_name} ↔ {v_name}")
if len(st.session_state.removed_edges) > 10:
st.write(f"... and {len(st.session_state.removed_edges) - 10} more")
if st.session_state.added_edges:
st.write("**Added:**")
for edge in st.session_state.added_edges[:10]: # show first 10
st.write(f"- {edge['table1_name']} ↔ {edge['table2_name']}")
if len(st.session_state.added_edges) > 10:
st.write(f"... and {len(st.session_state.added_edges) - 10} more")
# reset edges
if st.sidebar.button("Reset All Edge Changes", type="secondary"):
st.session_state.removed_edges = set()
st.session_state.added_edges = []
st.rerun()
# node search
st.sidebar.subheader("Node Search")
all_table_names = sorted([table_map.get(oid, 'unknown') for oid in table_map.keys()])
search_term = st.sidebar.text_input("Search table", "")
selected_node = None
if search_term:
matching_tables = [name for name in all_table_names if search_term.lower() in name.lower()]
if matching_tables:
selected_table = st.sidebar.selectbox("Select table", matching_tables)
# find oid for selected table
for oid, name in table_map.items():
if name == selected_table:
selected_node = oid
break
# stats
st.sidebar.subheader("πŸ“Š Statistics")
st.sidebar.metric("Total Tables", len(table_map))
st.sidebar.metric("Tree Edges", len(tree_edges_set) // 2)
st.sidebar.metric("Back Edges", len(back_edges))
st.sidebar.metric("BFS Levels", len(bfs_levels))
st.sidebar.metric("Removed Edges", len(st.session_state.removed_edges))
st.sidebar.metric("Added Edges", len(st.session_state.added_edges))
# create network
net = create_pyvis_network(
graph,
table_map,
tree_structure,
join_lookup,
show_tree_edges=show_tree_edges,
show_back_edges=show_back_edges,
back_edges_list=back_edges,
max_back_edges=max_back_edges,
show_levels=selected_levels if selected_levels else None,
selected_node=selected_node,
tree_mode=tree_mode
)
# save and display
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as tmp_file:
net.save_graph(tmp_file.name)
html_file = tmp_file.name
with open(html_file, 'r', encoding='utf-8') as f:
html_content = f.read()
# use iframe with sandbox for better compatibility
st.components.v1.html(html_content, height=850, scrolling=True)
# cleanup
try:
os.unlink(html_file)
except OSError:
pass # file might already be deleted
except Exception as e:
st.error(f"❌ Error generating graph visualization: {str(e)}")
st.exception(e)
st.info("πŸ’‘ If running on Snowflake, ensure pyvis is installed in your environment.")
# info section
with st.expander("ℹ️ About this visualization"):
st.markdown("""
**Graph Structure:**
- **Tree Edges** (blue, solid): Parent-child relationships from BFS tree
- **Back Edges** (red, dashed): Additional edges that create cycles
**View Modes:**
- **Tree Mode**: Clean tree-only view (default) - shows only hierarchical tree structure
- **Full Graph Mode**: Toggle to show both tree and back edges
**Edge Management:**
- **Remove Edges**: Select and remove any edge from the graph
- **Add Edges**: Manually add new edges between tables
- Changes persist during the session
- Use "Reset All Edge Changes" to restore original graph
**Controls:**
- Toggle edge types to show/hide tree edges or back edges
- Filter back edges by importance (top N most connected)
- Filter by BFS level to focus on specific hierarchy levels
- Search and highlight specific tables
**Interactivity:**
- Click and drag nodes to rearrange
- Hover over nodes/edges for details
- Zoom with mouse wheel
- Pan by dragging background
""")
except FileNotFoundError:
st.error("❌ File 'assethub-v3.smodel' not found. Please ensure the file is in the same directory.")
except Exception as e:
st.error(f"❌ Error loading data: {str(e)}")
st.exception(e)