TDAI-DS's picture
Update app.py
3ccb722 verified
import streamlit as st
import pandas as pd
import networkx as nx
from pyvis.network import Network
from matplotlib.colors import to_hex
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
# Set page configuration
st.set_page_config(layout="wide")
# Title
st.title("Faculty Collaboration Network Analysis -FY23 & FY24")
# Load data
@st.cache_data
def load_data():
award_df = pd.read_csv('award.csv')
award_df_remove_amount = pd.read_csv('award_display_title.csv')
faculty_college_df = pd.read_csv('faculty_college.csv')
return award_df, award_df_remove_amount, faculty_college_df
award_df, award_df_remove_amount, faculty_college_df = load_data()
award_df.columns = award_df.columns.str.strip() # Clean column names
faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of']))
def convert_amount(amount_str):
amount_str = amount_str.replace("$", "").replace(",", "")
amount_str = round(float(amount_str), 2)
return amount_str
award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount)
# Create graph and process data
@st.cache_resource
def create_network(df, college_map):
G = nx.Graph()
# faculty_colleges = defaultdict(list)
faculty_amounts = defaultdict(float)
colorblind_palette = [
'#E6194B', # Emergency Red (stop-sign red)
'#3CB44B', # Traffic Cone Green
'#4363D8', # Deep Ocean Blue
'#FFE119', # Taxi Yellow
'#911EB4', # Royal Purple
'#F58231', # Construction Orange
'#42D4F4', # Poolside Cyan
'#FABEBE', # Bubblegum Pink (lightest pink kept)
'#00A4CC', # Airplane Blue (sky-cyan hybrid)
'#A6FF47', # Alien Green (neon yellow-green)
'#FF4500', # Lava Orange (red-orange differentiation)
'#5E0DAC', # Amethyst Purple (blue-purple hybrid)
'#00FFAF', # Glowstick Green (blue-green)
'#FF9933', # Highway Orange (golden orange)
'#4B0082', # Midnight Indigo (deep blue-purple)
'#8B0000', # Barn Red (dark red differentiation)
'#00CED1', # Tropical Teal (bright blue-green)
'#FFD300' # School Bus Yellow (pure golden yellow)
]
# colorblind_palette =[
# '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4',
# '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347',
# '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6',
# '#FFAAA5', '#C8C6A7', '#92967D'
# ]
# colorblind_palette = [
# '#E6194B', # Bright red
# '#3CB44B', # Green
# '#4363D8', # Blue
# '#FFE119', # Yellow
# '#911EB4', # Purple
# '#F58231', # Orange
# '#42D4F4', # Cyan
# '#FABEBE' # Light pink
# ]
# colorblind_palette = [
# '#FF1E1E', '#00E5D0', '#00AAFF', '#7AFF86',
# '#FFDD00', '#FF7575', ''#8A2BE2', '#FF9500',
# '#83EAFF', '#59FFAA', '#BFFF59', '#FFB380',
# '#FF6666', '#E0FF4D', '#66FFC2'
# ]
for _, row in df.iterrows():
# Process PI information
pi = str(row['PI Name']).strip()
if not pi or pi == 'nan':
continue
# college = str(row['College']).strip()
amount = row['Authorized Amount']
# Process Co-PI information
co_pi_names = row['Co PI Name']
if pd.isna(co_pi_names):
co_pis = []
else:
co_pis = [name.strip() for name in str(co_pi_names).split('|') if name.strip() not in ['', 'nan']]
# Add PI node and attributes
G.add_node(pi)
# faculty_colleges[pi].append(college)
faculty_amounts[pi] += amount
# Add Co-PI nodes and edges
for co_pi in co_pis:
if co_pi and co_pi != pi: # Prevent self-loops
G.add_node(co_pi)
# faculty_colleges[co_pi].append(college)
faculty_amounts[co_pi] += amount
G.add_edge(pi, co_pi)
# # Determine dominant college for each faculty member
# college_map = {}
# for faculty, colleges in faculty_colleges.items():
# college_counts = defaultdict(int)
# for c in colleges:
# if c and c != 'nan':
# college_counts[c] += 1
# if college_counts:
# college_map[faculty] = max(college_counts, key=college_counts.get)
# else:
# college_map[faculty] = 'Unknown'
# Get college for each node
college_assignment = {node: college_map.get(node, 'Unknown')
for node in G.nodes()}
# Create color mapping
unique_colleges = sorted(list(set(college_assignment.values())))
# Create color mapping with cycling if needed
college_colors = {}
for i, college in enumerate(unique_colleges):
college_colors[college] = colorblind_palette[i % len(colorblind_palette)]
# Add explicit color for Unknown
# college_colors['Unknown'] = '#888888'
# unique_colleges = sorted(list(set(college_map.values())))
# colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
# college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
# Calculate node sizes based on total funding
amounts = list(faculty_amounts.values())
if amounts:
min_amount = min(amounts)
max_amount = max(amounts)
size_range = (20, 40) # Min and max node sizes
if max_amount == min_amount:
node_sizes = [size_range[0]] * len(amounts)
else:
node_sizes = [size_range[0] + (size_range[1] - size_range[0]) *
(amt - min_amount) / (max_amount - min_amount)
for amt in amounts]
else:
node_sizes = [size_range[0]] * len(faculty_amounts)
# Add attributes to nodes
for i, node in enumerate(G.nodes()):
college = college_assignment.get(node, 'Unknown')
G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
G.nodes[node]['size'] = node_sizes[i]
G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}")
return G, college_colors
# Create network
G, college_colors = create_network(award_df, faculty_college_map)
# Create pyvis network
nt = Network(
height='800px',
width='100%',
bgcolor='#ffffff',
font_color='#333333',
notebook=True
)
nt.from_nx(G)
nt.toggle_hide_edges_on_drag(True)
# nt.show_buttons(filter_=['physics', 'nodes'])
# Save and show network
nt.save_graph('network.html')
with open('network.html', 'r', encoding='utf-8') as f:
html = f.read()
# Add some explanation
st.markdown("""
**Network Interaction Guide:**
- Drag nodes to rearrange the network
- Scroll to zoom in/out to see the details: Faculty Name | College
- Click and drag background to pan
- Hover over nodes to see details
- Use the control panel (click the gear icon) to adjust physics settings
- Double-click on an award title to view the full text.
""")
# Show college color legend
st.subheader("College Legend")
cols = st.columns(4)
for i, (college, color) in enumerate(college_colors.items()):
cols[i%4].markdown(f"<span style='color:{color}'>■</span> {college}", unsafe_allow_html=True)
# Display network
st.subheader("Collaboration Network")
st.components.v1.html(html, height=800, scrolling=True)
# Show raw data
st.subheader("Award Data")
st.dataframe(award_df_remove_amount, use_container_width=True)
st.subheader("Faculty Data")
st.dataframe(faculty_college_df, use_container_width=True)