Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import networkx as nx | |
| from pyvis.network import Network | |
| from matplotlib.colors import to_hex | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from collections import defaultdict | |
| # Set page configuration | |
| st.set_page_config(layout="wide") | |
| # Title | |
| st.title("Faculty Collaboration Network Analysis -FY23 & FY24") | |
| # Load data | |
| def load_data(): | |
| award_df = pd.read_csv('award.csv') | |
| award_df_remove_amount = pd.read_csv('award_display_title.csv') | |
| faculty_college_df = pd.read_csv('faculty_college.csv') | |
| return award_df, award_df_remove_amount, faculty_college_df | |
| award_df, award_df_remove_amount, faculty_college_df = load_data() | |
| award_df.columns = award_df.columns.str.strip() # Clean column names | |
| faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x) | |
| faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of'])) | |
| def convert_amount(amount_str): | |
| amount_str = amount_str.replace("$", "").replace(",", "") | |
| amount_str = round(float(amount_str), 2) | |
| return amount_str | |
| award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount) | |
| # Create graph and process data | |
| def create_network(df, college_map): | |
| G = nx.Graph() | |
| # faculty_colleges = defaultdict(list) | |
| faculty_amounts = defaultdict(float) | |
| colorblind_palette = [ | |
| '#E6194B', # Emergency Red (stop-sign red) | |
| '#3CB44B', # Traffic Cone Green | |
| '#4363D8', # Deep Ocean Blue | |
| '#FFE119', # Taxi Yellow | |
| '#911EB4', # Royal Purple | |
| '#F58231', # Construction Orange | |
| '#42D4F4', # Poolside Cyan | |
| '#FABEBE', # Bubblegum Pink (lightest pink kept) | |
| '#00A4CC', # Airplane Blue (sky-cyan hybrid) | |
| '#A6FF47', # Alien Green (neon yellow-green) | |
| '#FF4500', # Lava Orange (red-orange differentiation) | |
| '#5E0DAC', # Amethyst Purple (blue-purple hybrid) | |
| '#00FFAF', # Glowstick Green (blue-green) | |
| '#FF9933', # Highway Orange (golden orange) | |
| '#4B0082', # Midnight Indigo (deep blue-purple) | |
| '#8B0000', # Barn Red (dark red differentiation) | |
| '#00CED1', # Tropical Teal (bright blue-green) | |
| '#FFD300' # School Bus Yellow (pure golden yellow) | |
| ] | |
| # colorblind_palette =[ | |
| # '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', | |
| # '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347', | |
| # '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6', | |
| # '#FFAAA5', '#C8C6A7', '#92967D' | |
| # ] | |
| # colorblind_palette = [ | |
| # '#E6194B', # Bright red | |
| # '#3CB44B', # Green | |
| # '#4363D8', # Blue | |
| # '#FFE119', # Yellow | |
| # '#911EB4', # Purple | |
| # '#F58231', # Orange | |
| # '#42D4F4', # Cyan | |
| # '#FABEBE' # Light pink | |
| # ] | |
| # colorblind_palette = [ | |
| # '#FF1E1E', '#00E5D0', '#00AAFF', '#7AFF86', | |
| # '#FFDD00', '#FF7575', ''#8A2BE2', '#FF9500', | |
| # '#83EAFF', '#59FFAA', '#BFFF59', '#FFB380', | |
| # '#FF6666', '#E0FF4D', '#66FFC2' | |
| # ] | |
| for _, row in df.iterrows(): | |
| # Process PI information | |
| pi = str(row['PI Name']).strip() | |
| if not pi or pi == 'nan': | |
| continue | |
| # college = str(row['College']).strip() | |
| amount = row['Authorized Amount'] | |
| # Process Co-PI information | |
| co_pi_names = row['Co PI Name'] | |
| if pd.isna(co_pi_names): | |
| co_pis = [] | |
| else: | |
| co_pis = [name.strip() for name in str(co_pi_names).split('|') if name.strip() not in ['', 'nan']] | |
| # Add PI node and attributes | |
| G.add_node(pi) | |
| # faculty_colleges[pi].append(college) | |
| faculty_amounts[pi] += amount | |
| # Add Co-PI nodes and edges | |
| for co_pi in co_pis: | |
| if co_pi and co_pi != pi: # Prevent self-loops | |
| G.add_node(co_pi) | |
| # faculty_colleges[co_pi].append(college) | |
| faculty_amounts[co_pi] += amount | |
| G.add_edge(pi, co_pi) | |
| # # Determine dominant college for each faculty member | |
| # college_map = {} | |
| # for faculty, colleges in faculty_colleges.items(): | |
| # college_counts = defaultdict(int) | |
| # for c in colleges: | |
| # if c and c != 'nan': | |
| # college_counts[c] += 1 | |
| # if college_counts: | |
| # college_map[faculty] = max(college_counts, key=college_counts.get) | |
| # else: | |
| # college_map[faculty] = 'Unknown' | |
| # Get college for each node | |
| college_assignment = {node: college_map.get(node, 'Unknown') | |
| for node in G.nodes()} | |
| # Create color mapping | |
| unique_colleges = sorted(list(set(college_assignment.values()))) | |
| # Create color mapping with cycling if needed | |
| college_colors = {} | |
| for i, college in enumerate(unique_colleges): | |
| college_colors[college] = colorblind_palette[i % len(colorblind_palette)] | |
| # Add explicit color for Unknown | |
| # college_colors['Unknown'] = '#888888' | |
| # unique_colleges = sorted(list(set(college_map.values()))) | |
| # colormap = plt.cm.get_cmap('tab20', len(unique_colleges)) | |
| # college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)} | |
| # Calculate node sizes based on total funding | |
| amounts = list(faculty_amounts.values()) | |
| if amounts: | |
| min_amount = min(amounts) | |
| max_amount = max(amounts) | |
| size_range = (20, 40) # Min and max node sizes | |
| if max_amount == min_amount: | |
| node_sizes = [size_range[0]] * len(amounts) | |
| else: | |
| node_sizes = [size_range[0] + (size_range[1] - size_range[0]) * | |
| (amt - min_amount) / (max_amount - min_amount) | |
| for amt in amounts] | |
| else: | |
| node_sizes = [size_range[0]] * len(faculty_amounts) | |
| # Add attributes to nodes | |
| for i, node in enumerate(G.nodes()): | |
| college = college_assignment.get(node, 'Unknown') | |
| G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888') | |
| G.nodes[node]['size'] = node_sizes[i] | |
| G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}") | |
| return G, college_colors | |
| # Create network | |
| G, college_colors = create_network(award_df, faculty_college_map) | |
| # Create pyvis network | |
| nt = Network( | |
| height='800px', | |
| width='100%', | |
| bgcolor='#ffffff', | |
| font_color='#333333', | |
| notebook=True | |
| ) | |
| nt.from_nx(G) | |
| nt.toggle_hide_edges_on_drag(True) | |
| # nt.show_buttons(filter_=['physics', 'nodes']) | |
| # Save and show network | |
| nt.save_graph('network.html') | |
| with open('network.html', 'r', encoding='utf-8') as f: | |
| html = f.read() | |
| # Add some explanation | |
| st.markdown(""" | |
| **Network Interaction Guide:** | |
| - Drag nodes to rearrange the network | |
| - Scroll to zoom in/out to see the details: Faculty Name | College | |
| - Click and drag background to pan | |
| - Hover over nodes to see details | |
| - Use the control panel (click the gear icon) to adjust physics settings | |
| - Double-click on an award title to view the full text. | |
| """) | |
| # Show college color legend | |
| st.subheader("College Legend") | |
| cols = st.columns(4) | |
| for i, (college, color) in enumerate(college_colors.items()): | |
| cols[i%4].markdown(f"<span style='color:{color}'>■</span> {college}", unsafe_allow_html=True) | |
| # Display network | |
| st.subheader("Collaboration Network") | |
| st.components.v1.html(html, height=800, scrolling=True) | |
| # Show raw data | |
| st.subheader("Award Data") | |
| st.dataframe(award_df_remove_amount, use_container_width=True) | |
| st.subheader("Faculty Data") | |
| st.dataframe(faculty_college_df, use_container_width=True) | |