import streamlit as st import pandas as pd import networkx as nx from pyvis.network import Network from matplotlib.colors import to_hex import matplotlib.pyplot as plt import numpy as np from collections import defaultdict # Set page configuration st.set_page_config(layout="wide") # Title st.title("Faculty Collaboration Network Analysis -FY23 & FY24") # Load data @st.cache_data def load_data(): award_df = pd.read_csv('award.csv') award_df_remove_amount = pd.read_csv('award_display_title.csv') faculty_college_df = pd.read_csv('faculty_college.csv') return award_df, award_df_remove_amount, faculty_college_df award_df, award_df_remove_amount, faculty_college_df = load_data() award_df.columns = award_df.columns.str.strip() # Clean column names faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of'])) def convert_amount(amount_str): amount_str = amount_str.replace("$", "").replace(",", "") amount_str = round(float(amount_str), 2) return amount_str award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount) # Create graph and process data @st.cache_resource def create_network(df, college_map): G = nx.Graph() # faculty_colleges = defaultdict(list) faculty_amounts = defaultdict(float) colorblind_palette = [ '#E6194B', # Emergency Red (stop-sign red) '#3CB44B', # Traffic Cone Green '#4363D8', # Deep Ocean Blue '#FFE119', # Taxi Yellow '#911EB4', # Royal Purple '#F58231', # Construction Orange '#42D4F4', # Poolside Cyan '#FABEBE', # Bubblegum Pink (lightest pink kept) '#00A4CC', # Airplane Blue (sky-cyan hybrid) '#A6FF47', # Alien Green (neon yellow-green) '#FF4500', # Lava Orange (red-orange differentiation) '#5E0DAC', # Amethyst Purple (blue-purple hybrid) '#00FFAF', # Glowstick Green (blue-green) '#FF9933', # Highway Orange (golden orange) '#4B0082', # Midnight Indigo (deep blue-purple) '#8B0000', # Barn Red (dark red differentiation) '#00CED1', # Tropical Teal (bright blue-green) '#FFD300' # School Bus Yellow (pure golden yellow) ] # colorblind_palette =[ # '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', # '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347', # '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6', # '#FFAAA5', '#C8C6A7', '#92967D' # ] # colorblind_palette = [ # '#E6194B', # Bright red # '#3CB44B', # Green # '#4363D8', # Blue # '#FFE119', # Yellow # '#911EB4', # Purple # '#F58231', # Orange # '#42D4F4', # Cyan # '#FABEBE' # Light pink # ] # colorblind_palette = [ # '#FF1E1E', '#00E5D0', '#00AAFF', '#7AFF86', # '#FFDD00', '#FF7575', ''#8A2BE2', '#FF9500', # '#83EAFF', '#59FFAA', '#BFFF59', '#FFB380', # '#FF6666', '#E0FF4D', '#66FFC2' # ] for _, row in df.iterrows(): # Process PI information pi = str(row['PI Name']).strip() if not pi or pi == 'nan': continue # college = str(row['College']).strip() amount = row['Authorized Amount'] # Process Co-PI information co_pi_names = row['Co PI Name'] if pd.isna(co_pi_names): co_pis = [] else: co_pis = [name.strip() for name in str(co_pi_names).split('|') if name.strip() not in ['', 'nan']] # Add PI node and attributes G.add_node(pi) # faculty_colleges[pi].append(college) faculty_amounts[pi] += amount # Add Co-PI nodes and edges for co_pi in co_pis: if co_pi and co_pi != pi: # Prevent self-loops G.add_node(co_pi) # faculty_colleges[co_pi].append(college) faculty_amounts[co_pi] += amount G.add_edge(pi, co_pi) # # Determine dominant college for each faculty member # college_map = {} # for faculty, colleges in faculty_colleges.items(): # college_counts = defaultdict(int) # for c in colleges: # if c and c != 'nan': # college_counts[c] += 1 # if college_counts: # college_map[faculty] = max(college_counts, key=college_counts.get) # else: # college_map[faculty] = 'Unknown' # Get college for each node college_assignment = {node: college_map.get(node, 'Unknown') for node in G.nodes()} # Create color mapping unique_colleges = sorted(list(set(college_assignment.values()))) # Create color mapping with cycling if needed college_colors = {} for i, college in enumerate(unique_colleges): college_colors[college] = colorblind_palette[i % len(colorblind_palette)] # Add explicit color for Unknown # college_colors['Unknown'] = '#888888' # unique_colleges = sorted(list(set(college_map.values()))) # colormap = plt.cm.get_cmap('tab20', len(unique_colleges)) # college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)} # Calculate node sizes based on total funding amounts = list(faculty_amounts.values()) if amounts: min_amount = min(amounts) max_amount = max(amounts) size_range = (20, 40) # Min and max node sizes if max_amount == min_amount: node_sizes = [size_range[0]] * len(amounts) else: node_sizes = [size_range[0] + (size_range[1] - size_range[0]) * (amt - min_amount) / (max_amount - min_amount) for amt in amounts] else: node_sizes = [size_range[0]] * len(faculty_amounts) # Add attributes to nodes for i, node in enumerate(G.nodes()): college = college_assignment.get(node, 'Unknown') G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888') G.nodes[node]['size'] = node_sizes[i] G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}") return G, college_colors # Create network G, college_colors = create_network(award_df, faculty_college_map) # Create pyvis network nt = Network( height='800px', width='100%', bgcolor='#ffffff', font_color='#333333', notebook=True ) nt.from_nx(G) nt.toggle_hide_edges_on_drag(True) # nt.show_buttons(filter_=['physics', 'nodes']) # Save and show network nt.save_graph('network.html') with open('network.html', 'r', encoding='utf-8') as f: html = f.read() # Add some explanation st.markdown(""" **Network Interaction Guide:** - Drag nodes to rearrange the network - Scroll to zoom in/out to see the details: Faculty Name | College - Click and drag background to pan - Hover over nodes to see details - Use the control panel (click the gear icon) to adjust physics settings - Double-click on an award title to view the full text. """) # Show college color legend st.subheader("College Legend") cols = st.columns(4) for i, (college, color) in enumerate(college_colors.items()): cols[i%4].markdown(f" {college}", unsafe_allow_html=True) # Display network st.subheader("Collaboration Network") st.components.v1.html(html, height=800, scrolling=True) # Show raw data st.subheader("Award Data") st.dataframe(award_df_remove_amount, use_container_width=True) st.subheader("Faculty Data") st.dataframe(faculty_college_df, use_container_width=True)