import tempfile import streamlit as st import pandas as pd import networkx as nx import matplotlib.pyplot as plt import seaborn as sns import io import base64 import json from matplotlib import pylab # type: ignore from PIL import Image # from SNA import constructEdgeListFromDict, save_graph, getNodesCommunity, getGraphCommunities, getNodesPageRank, getNodesCentralityBetwenness,getNodesIndegreeOutdegree, socialNetworkAnalysisMetrics # Add your existing functions here def get_table_download_link(df, file_name, file_description): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() href = f'{file_description}' return href st.title("Social Network Analysis") #------------------------------------------- #These are the functions from your colab def constructEdgeListFromDict(messagesDF): # Subset to only have the needed data messagesDF = messagesDF[['user', 'replytoauthor']] # Remove unexisting edges (reply to is empty) edgesDF = messagesDF.dropna(subset=['replytoauthor']) # Process edges (group_by nickname, replytoauthor -> weight by occurrences) edgesDFWeight = edgesDF.groupby(['user', 'replytoauthor']).size().reset_index(name='weight') return(edgesDFWeight) # def save_graph(graph,file_name): # #initialze Figure # plt.figure(num=None, figsize=(20, 20), dpi=80) # plt.axis('off') # fig = plt.figure(1) # pos = nx.spring_layout(graph) # type: ignore # nx.draw_networkx_nodes(graph,pos) # nx.draw_networkx_edges(graph,pos) # nx.draw_networkx_labels(graph,pos) # cut = 1.00 # xmax = cut * max(xx for xx, yy in pos.values()) # ymax = cut * max(yy for xx, yy in pos.values()) # plt.xlim(0, xmax) # plt.ylim(0, ymax) # plt.show() # plt.savefig(file_name,bbox_inches="tight") # pylab.close() # del fig # @st.cache_data def save_graph(graph, file_name): # Initialize Figure plt.figure(num=None, figsize=(20, 20), dpi=80) plt.axis('off') fig = plt.figure(1) pos = nx.spring_layout(graph) # type: ignore nx.draw_networkx_nodes(graph, pos) # type: ignore nx.draw_networkx_edges(graph, pos) # type: ignore nx.draw_networkx_labels(graph, pos) # type: ignore cut = 1.00 xmax = cut * max(xx for xx, yy in pos.values()) ymax = cut * max(yy for xx, yy in pos.values()) plt.xlim(0, xmax) plt.ylim(0, ymax) # Save the figure to a temporary file and load it as an image with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: plt.savefig(tmpfile.name, bbox_inches="tight") img = Image.open(tmpfile.name) pylab.close() del fig return img def getNodesCommunity(communities): vertexComms = {} for com, com_vertices in enumerate(communities): for v in list(com_vertices): # vertexComms.append({'nickname': v, 'community': com}) vertexComms[v] = com return(vertexComms) def getGraphCommunities(G, seed = 42): # If only getting the largest component, the other components. # would be grouped in a community by themselves. # Using only for "large" graphs (100k nodes or above) #if(G.number_of_nodes() >= 100000): # largestComponent = max(nx.weakly_connected_components(G), key=len) # G = G.subgraph(largestComponent) # Algorithm: Label Propagation. Con: Detects way too many comms. # community_generator = community.asyn_lpa_communities(G, 'weight', seed = seed) # Algorithm: Modularity Maximization. Does not take into account direction of interactions. G_un = G.to_undirected() # Graph must be undirected. community_generator = nx.community.greedy_modularity_communities(G_un, 'weight') communities = sorted(community_generator, key=len, reverse=True) nodeComms = getNodesCommunity(communities) del G_un return(nodeComms) def getNodesPageRank(G): pr = nx.pagerank(G, alpha = 0.85, weight = 'weight') return(pr) # result = [{'nickname': k, 'pagerank': pr[k]} for k in pr.keys()] #return (result) def getNodesCentralityBetwenness(G): G_un = G.to_undirected() # Graph must be undirected. betweenness = nx.betweenness_centrality(G_un, weight = 'weight') return (betweenness) #result = [{'nickname': k, 'betweenness': betweenness[k]} for k in betweenness.keys()] #return (result) def getNodesIndegreeOutdegree(G): inDeg = G.in_degree(weight='weight') outDeg = G.out_degree(weight='weight') vs = list(G.nodes) result = {v: inDeg[v]/(outDeg[v] + 1) for v in vs} return (result) # @st.cache_data def socialNetworkAnalysisMetrics(G): print("[SNA] Starting Social Network Analysis") print("[SNA] Community detection: Modularity Maximization...") communities = getGraphCommunities(G) print('[SNA] Community detection: Modularity Maximization -- Done') # printInfoAboutCommunities(infomap) print("[SNA] Centrality Measure: Page Rank...") pagerank = getNodesPageRank(G) print("[SNA] Centrality Measure: Page Rank -- Done") print("[SNA] Centrality Measure: Betweenness...") betweenness = getNodesCentralityBetwenness(G) print("[SNA] Centrality Measure: Betweenness -- Done") print("[SNA] Centrality Measure: Indegree/Outdegree...") inOut = getNodesIndegreeOutdegree(G) print("[SNA] Centrality Measure: Indegree/Outdegree -- Done") # Merge results print("[SNA] Merging results...") allResults = [] for v in list(G.nodes): allResults.append( {'user': v, 'pagerank': pagerank[str(v)], 'community': communities[str(v)], 'betweenness': betweenness[str(v)], 'indegree_outdegree': inOut[str(v)] }) print("[SNA] Merging results -- Done") del pagerank del communities del betweenness del inOut return(allResults) #------------------------------------------- uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: df = pd.read_csv(uploaded_file, sep=';', encoding='utf8') # rename columns to lower case df.columns= df.columns.str.lower() # df.columns st.write(df) # create edge list edgelistDF = constructEdgeListFromDict(df) # create graph G = nx.from_pandas_edgelist(edgelistDF, source='user', target='replytoauthor', edge_attr='weight', create_using=nx.DiGraph) # type: ignore st.write("Graph size: ",G.size()) #Assuming that the graph g has nodes and edges entered with st.spinner('Loading image'): # img = None # if img is None: # # save_graph(G,"my_graph.pdf") img = save_graph(G, "my_graph.png") # Download button if img is not None: st.write("Here is the graph visualization:") st.image(img) with st.spinner('Getting SNA metrics'): result = socialNetworkAnalysisMetrics(G) # st.write(result) # Convert JSON to DataFrame jsonString = json.dumps(result) df_result = pd.read_json(jsonString) st.write(df_result) #------------------------------ fig, ax = plt.subplots() ax2 = df_result.plot.scatter(x='pagerank', y='betweenness', c='community', colormap='CMRmap', ax=ax) # set the title and labels ax.set_title('Pagerank vs Betweenness') ax.set_xlabel('Pagerank') ax.set_ylabel('Betweenness') # display the plot using streamlit st.pyplot(fig) # Create a function to generate the bar plot st.write("Top Betweenness: ") def bar_plot3(df): sns.set(rc={'figure.figsize':(16, 9)}) g = df.groupby('user', as_index=False)['betweenness'].sum().sort_values(by='betweenness', ascending=False).head(10) bar_plot_fig = sns.barplot(data=g, x='user', y='betweenness', hue='user', dodge=False) bar_plot_fig.set(xticklabels=[]) return bar_plot_fig.get_figure() # Display the bar plot in Streamlit st.pyplot(bar_plot3(df_result)) # Create a function to generate the pagerank bar plot def pagerank_bar_plot(df): # type: ignore sns.set(rc={'figure.figsize':(16, 9)}) g = df.groupby('user', as_index=False)['pagerank'].sum().sort_values(by='pagerank', ascending=False).head(10) pagerank_bar_plot_fig = sns.barplot(data=g, x='user', y='pagerank', hue='user', dodge=False) pagerank_bar_plot_fig.set(xticklabels=[]) return pagerank_bar_plot_fig.get_figure() # Display the pagerank bar plot in Streamlit st.title("PageRank Bar Plot") st.pyplot(pagerank_bar_plot(df_result)) # Create a function to generate the indegree_outdegree bar plot def indegree_outdegree_bar_plot(df): sns.set(rc={'figure.figsize':(16, 9)}) g = df.groupby('user', as_index=False)['indegree_outdegree'].sum().sort_values(by='indegree_outdegree', ascending=False).head(10) indegree_outdegree_bar_plot_fig = sns.barplot(data=g, x='user', y='indegree_outdegree', hue='user', dodge=False) indegree_outdegree_bar_plot_fig.set(xticklabels=[]) return indegree_outdegree_bar_plot_fig.get_figure() # Display the indegree_outdegree bar plot in Streamlit st.title("In-degree Out-degree Bar Plot") st.pyplot(indegree_outdegree_bar_plot(df_result)) # Update the existing pagerank_bar_plot function def pagerank_bar_plot2(df): sns.set(rc={'figure.figsize':(16, 9)}) g = df.groupby('user', as_index=False)['pagerank'].sum().sort_values(by='pagerank', ascending=False).head(10) pagerank_bar_plot_fig = sns.barplot(data=g, x='user', y='pagerank', hue='user', dodge=False) pagerank_bar_plot_fig.set(xticklabels=[]) return pagerank_bar_plot_fig.get_figure() # Display the updated pagerank bar plot in Streamlit st.pyplot(pagerank_bar_plot2(df_result)) # # Update the existing indegree_outdegree_bar_plot function # def indegree_outdegree_bar_plot(df): # sns.set(rc={'figure.figsize':(16, 9)}) # g = df.groupby('user', as_index=False)['indegree_outdegree'].sum().sort_values(by='indegree_outdegree', ascending=False).head(10) # indegree_outdegree_bar_plot_fig = sns.barplot(data=g, x='user', y='indegree_outdegree', hue='user', dodge=False) # indegree_outdegree_bar_plot_fig.set(xticklabels=[]) # return indegree_outdegree_bar_plot_fig.get_figure() # # Display the updated indegree_outdegree bar plot in Streamlit # st.pyplot(indegree_outdegree_bar_plot(df_result))