| | import tempfile |
| | import streamlit as st |
| | import pandas as pd |
| | import networkx as nx |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | import io |
| | import base64 |
| | import json |
| | from matplotlib import pylab |
| | from PIL import Image |
| |
|
| | |
| |
|
| | |
| |
|
| |
|
| | def get_table_download_link(df, file_name, file_description): |
| | csv = df.to_csv(index=False) |
| | b64 = base64.b64encode(csv.encode()).decode() |
| | href = f'<a href="data:file/csv;base64,{b64}" download="{file_name}">{file_description}</a>' |
| | return href |
| |
|
| |
|
| |
|
| | st.title("Social Network Analysis") |
| |
|
| | |
| | |
| | def constructEdgeListFromDict(messagesDF): |
| | |
| | messagesDF = messagesDF[['user', 'replytoauthor']] |
| | |
| | |
| | edgesDF = messagesDF.dropna(subset=['replytoauthor']) |
| | |
| | |
| | edgesDFWeight = edgesDF.groupby(['user', 'replytoauthor']).size().reset_index(name='weight') |
| | |
| | return(edgesDFWeight) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | def save_graph(graph, file_name): |
| | |
| | plt.figure(num=None, figsize=(20, 20), dpi=80) |
| | plt.axis('off') |
| | fig = plt.figure(1) |
| | pos = nx.spring_layout(graph) |
| | nx.draw_networkx_nodes(graph, pos) |
| | nx.draw_networkx_edges(graph, pos) |
| | nx.draw_networkx_labels(graph, pos) |
| |
|
| | cut = 1.00 |
| | xmax = cut * max(xx for xx, yy in pos.values()) |
| | ymax = cut * max(yy for xx, yy in pos.values()) |
| | plt.xlim(0, xmax) |
| | plt.ylim(0, ymax) |
| |
|
| | |
| | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: |
| | plt.savefig(tmpfile.name, bbox_inches="tight") |
| | img = Image.open(tmpfile.name) |
| |
|
| | pylab.close() |
| | del fig |
| |
|
| | return img |
| |
|
| |
|
| | def getNodesCommunity(communities): |
| | vertexComms = {} |
| | for com, com_vertices in enumerate(communities): |
| | for v in list(com_vertices): |
| | |
| | vertexComms[v] = com |
| | return(vertexComms) |
| |
|
| | def getGraphCommunities(G, seed = 42): |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | G_un = G.to_undirected() |
| | community_generator = nx.community.greedy_modularity_communities(G_un, 'weight') |
| | communities = sorted(community_generator, key=len, reverse=True) |
| | nodeComms = getNodesCommunity(communities) |
| | del G_un |
| | return(nodeComms) |
| |
|
| | def getNodesPageRank(G): |
| | pr = nx.pagerank(G, alpha = 0.85, weight = 'weight') |
| | return(pr) |
| | |
| | |
| |
|
| | def getNodesCentralityBetwenness(G): |
| | G_un = G.to_undirected() |
| | betweenness = nx.betweenness_centrality(G_un, weight = 'weight') |
| | return (betweenness) |
| | |
| | |
| |
|
| | def getNodesIndegreeOutdegree(G): |
| | inDeg = G.in_degree(weight='weight') |
| | outDeg = G.out_degree(weight='weight') |
| | vs = list(G.nodes) |
| |
|
| | result = {v: inDeg[v]/(outDeg[v] + 1) for v in vs} |
| | return (result) |
| |
|
| | |
| | def socialNetworkAnalysisMetrics(G): |
| |
|
| | print("[SNA] Starting Social Network Analysis") |
| |
|
| | print("[SNA] Community detection: Modularity Maximization...") |
| | communities = getGraphCommunities(G) |
| | print('[SNA] Community detection: Modularity Maximization -- Done') |
| | |
| |
|
| | print("[SNA] Centrality Measure: Page Rank...") |
| | pagerank = getNodesPageRank(G) |
| | print("[SNA] Centrality Measure: Page Rank -- Done") |
| |
|
| | print("[SNA] Centrality Measure: Betweenness...") |
| | betweenness = getNodesCentralityBetwenness(G) |
| | print("[SNA] Centrality Measure: Betweenness -- Done") |
| |
|
| | print("[SNA] Centrality Measure: Indegree/Outdegree...") |
| | inOut = getNodesIndegreeOutdegree(G) |
| | print("[SNA] Centrality Measure: Indegree/Outdegree -- Done") |
| |
|
| | |
| | print("[SNA] Merging results...") |
| | allResults = [] |
| | for v in list(G.nodes): |
| | allResults.append( |
| | {'user': v, |
| | 'pagerank': pagerank[str(v)], |
| | 'community': communities[str(v)], |
| | 'betweenness': betweenness[str(v)], |
| | 'indegree_outdegree': inOut[str(v)] |
| | }) |
| | print("[SNA] Merging results -- Done") |
| | del pagerank |
| | del communities |
| | del betweenness |
| | del inOut |
| | return(allResults) |
| | |
| | uploaded_file = st.file_uploader("Choose a CSV file", type="csv") |
| |
|
| | if uploaded_file is not None: |
| | df = pd.read_csv(uploaded_file, sep=';', encoding='utf8') |
| |
|
| | |
| | df.columns= df.columns.str.lower() |
| | |
| |
|
| | st.write(df) |
| |
|
| | |
| | edgelistDF = constructEdgeListFromDict(df) |
| |
|
| | |
| | G = nx.from_pandas_edgelist(edgelistDF, source='user', target='replytoauthor', edge_attr='weight', create_using=nx.DiGraph) |
| |
|
| |
|
| | st.write("Graph size: ",G.size()) |
| |
|
| | |
| | |
| | with st.spinner('Loading image'): |
| | |
| | |
| | |
| | img = save_graph(G, "my_graph.png") |
| |
|
| | |
| | if img is not None: |
| | st.write("Here is the graph visualization:") |
| | st.image(img) |
| |
|
| |
|
| | with st.spinner('Getting SNA metrics'): |
| | result = socialNetworkAnalysisMetrics(G) |
| | |
| | |
| |
|
| | jsonString = json.dumps(result) |
| | df_result = pd.read_json(jsonString) |
| | st.write(df_result) |
| |
|
| |
|
| | |
| | fig, ax = plt.subplots() |
| | ax2 = df_result.plot.scatter(x='pagerank', |
| | y='betweenness', |
| | c='community', |
| | colormap='CMRmap', |
| | ax=ax) |
| |
|
| | |
| | ax.set_title('Pagerank vs Betweenness') |
| | ax.set_xlabel('Pagerank') |
| | ax.set_ylabel('Betweenness') |
| | |
| | st.pyplot(fig) |
| |
|
| |
|
| | |
| | st.write("Top Betweenness: ") |
| | def bar_plot3(df): |
| | sns.set(rc={'figure.figsize':(16, 9)}) |
| | g = df.groupby('user', as_index=False)['betweenness'].sum().sort_values(by='betweenness', ascending=False).head(10) |
| | bar_plot_fig = sns.barplot(data=g, x='user', y='betweenness', hue='user', dodge=False) |
| | bar_plot_fig.set(xticklabels=[]) |
| | return bar_plot_fig.get_figure() |
| | |
| | st.pyplot(bar_plot3(df_result)) |
| | |
| | |
| | def pagerank_bar_plot(df): |
| | sns.set(rc={'figure.figsize':(16, 9)}) |
| | g = df.groupby('user', as_index=False)['pagerank'].sum().sort_values(by='pagerank', ascending=False).head(10) |
| | pagerank_bar_plot_fig = sns.barplot(data=g, x='user', y='pagerank', hue='user', dodge=False) |
| | pagerank_bar_plot_fig.set(xticklabels=[]) |
| | return pagerank_bar_plot_fig.get_figure() |
| |
|
| | |
| | st.title("PageRank Bar Plot") |
| | st.pyplot(pagerank_bar_plot(df_result)) |
| |
|
| | |
| | def indegree_outdegree_bar_plot(df): |
| | sns.set(rc={'figure.figsize':(16, 9)}) |
| | g = df.groupby('user', as_index=False)['indegree_outdegree'].sum().sort_values(by='indegree_outdegree', ascending=False).head(10) |
| | indegree_outdegree_bar_plot_fig = sns.barplot(data=g, x='user', y='indegree_outdegree', hue='user', dodge=False) |
| | indegree_outdegree_bar_plot_fig.set(xticklabels=[]) |
| | return indegree_outdegree_bar_plot_fig.get_figure() |
| |
|
| | |
| | st.title("In-degree Out-degree Bar Plot") |
| | st.pyplot(indegree_outdegree_bar_plot(df_result)) |
| |
|
| |
|
| | |
| | def pagerank_bar_plot2(df): |
| | sns.set(rc={'figure.figsize':(16, 9)}) |
| | g = df.groupby('user', as_index=False)['pagerank'].sum().sort_values(by='pagerank', ascending=False).head(10) |
| | pagerank_bar_plot_fig = sns.barplot(data=g, x='user', y='pagerank', hue='user', dodge=False) |
| | pagerank_bar_plot_fig.set(xticklabels=[]) |
| | return pagerank_bar_plot_fig.get_figure() |
| |
|
| | |
| | |
| | st.pyplot(pagerank_bar_plot2(df_result)) |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| |
|
| |
|