Spaces:

TDAI-DS
/

Collaboration-Network

Sleeping

App Files Files Community

Collaboration-Network / app.py

TDAI-DS

Update app.py

3ccb722 verified 9 months ago

raw

history blame contribute delete

7.6 kB

	import streamlit as st
	import pandas as pd
	import networkx as nx
	from pyvis.network import Network
	from matplotlib.colors import to_hex
	import matplotlib.pyplot as plt
	import numpy as np
	from collections import defaultdict

	# Set page configuration
	st.set_page_config(layout="wide")

	# Title
	st.title("Faculty Collaboration Network Analysis -FY23 & FY24")

	# Load data
	@st.cache_data
	def load_data():
	award_df = pd.read_csv('award.csv')
	award_df_remove_amount = pd.read_csv('award_display_title.csv')
	faculty_college_df = pd.read_csv('faculty_college.csv')
	return award_df, award_df_remove_amount, faculty_college_df


	award_df, award_df_remove_amount, faculty_college_df = load_data()
	award_df.columns = award_df.columns.str.strip() # Clean column names
	faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

	faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of']))

	def convert_amount(amount_str):
	amount_str = amount_str.replace("$", "").replace(",", "")
	amount_str = round(float(amount_str), 2)
	return amount_str

	award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount)

	# Create graph and process data
	@st.cache_resource
	def create_network(df, college_map):
	G = nx.Graph()
	# faculty_colleges = defaultdict(list)
	faculty_amounts = defaultdict(float)

	colorblind_palette = [
	'#E6194B', # Emergency Red (stop-sign red)
	'#3CB44B', # Traffic Cone Green
	'#4363D8', # Deep Ocean Blue
	'#FFE119', # Taxi Yellow
	'#911EB4', # Royal Purple
	'#F58231', # Construction Orange
	'#42D4F4', # Poolside Cyan
	'#FABEBE', # Bubblegum Pink (lightest pink kept)
	'#00A4CC', # Airplane Blue (sky-cyan hybrid)
	'#A6FF47', # Alien Green (neon yellow-green)
	'#FF4500', # Lava Orange (red-orange differentiation)
	'#5E0DAC', # Amethyst Purple (blue-purple hybrid)
	'#00FFAF', # Glowstick Green (blue-green)
	'#FF9933', # Highway Orange (golden orange)
	'#4B0082', # Midnight Indigo (deep blue-purple)
	'#8B0000', # Barn Red (dark red differentiation)
	'#00CED1', # Tropical Teal (bright blue-green)
	'#FFD300' # School Bus Yellow (pure golden yellow)
	]

	# colorblind_palette =[
	# '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4',
	# '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347',
	# '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6',
	# '#FFAAA5', '#C8C6A7', '#92967D'
	# ]

	# colorblind_palette = [
	# '#E6194B', # Bright red
	# '#3CB44B', # Green
	# '#4363D8', # Blue
	# '#FFE119', # Yellow
	# '#911EB4', # Purple
	# '#F58231', # Orange
	# '#42D4F4', # Cyan
	# '#FABEBE' # Light pink
	# ]


	# colorblind_palette = [
	# '#FF1E1E', '#00E5D0', '#00AAFF', '#7AFF86',
	# '#FFDD00', '#FF7575', ''#8A2BE2', '#FF9500',
	# '#83EAFF', '#59FFAA', '#BFFF59', '#FFB380',
	# '#FF6666', '#E0FF4D', '#66FFC2'
	# ]

	for _, row in df.iterrows():
	# Process PI information
	pi = str(row['PI Name']).strip()
	if not pi or pi == 'nan':
	continue

	# college = str(row['College']).strip()
	amount = row['Authorized Amount']

	# Process Co-PI information
	co_pi_names = row['Co PI Name']
	if pd.isna(co_pi_names):
	co_pis = []
	else:
	co_pis = [name.strip() for name in str(co_pi_names).split('\|') if name.strip() not in ['', 'nan']]

	# Add PI node and attributes
	G.add_node(pi)
	# faculty_colleges[pi].append(college)
	faculty_amounts[pi] += amount

	# Add Co-PI nodes and edges
	for co_pi in co_pis:
	if co_pi and co_pi != pi: # Prevent self-loops
	G.add_node(co_pi)
	# faculty_colleges[co_pi].append(college)
	faculty_amounts[co_pi] += amount
	G.add_edge(pi, co_pi)

	# # Determine dominant college for each faculty member
	# college_map = {}
	# for faculty, colleges in faculty_colleges.items():
	# college_counts = defaultdict(int)
	# for c in colleges:
	# if c and c != 'nan':
	# college_counts[c] += 1
	# if college_counts:
	# college_map[faculty] = max(college_counts, key=college_counts.get)
	# else:
	# college_map[faculty] = 'Unknown'

	# Get college for each node
	college_assignment = {node: college_map.get(node, 'Unknown')
	for node in G.nodes()}

	# Create color mapping
	unique_colleges = sorted(list(set(college_assignment.values())))

	# Create color mapping with cycling if needed
	college_colors = {}
	for i, college in enumerate(unique_colleges):
	college_colors[college] = colorblind_palette[i % len(colorblind_palette)]

	# Add explicit color for Unknown
	# college_colors['Unknown'] = '#888888'
	# unique_colleges = sorted(list(set(college_map.values())))
	# colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
	# college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}

	# Calculate node sizes based on total funding
	amounts = list(faculty_amounts.values())
	if amounts:
	min_amount = min(amounts)
	max_amount = max(amounts)
	size_range = (20, 40) # Min and max node sizes
	if max_amount == min_amount:
	node_sizes = [size_range[0]] * len(amounts)
	else:
	node_sizes = [size_range[0] + (size_range[1] - size_range[0]) *
	(amt - min_amount) / (max_amount - min_amount)
	for amt in amounts]
	else:
	node_sizes = [size_range[0]] * len(faculty_amounts)

	# Add attributes to nodes
	for i, node in enumerate(G.nodes()):
	college = college_assignment.get(node, 'Unknown')
	G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
	G.nodes[node]['size'] = node_sizes[i]
	G.nodes[node]['title'] = (f"{node} \| College: {college_map.get(node, 'Unknown')}")

	return G, college_colors

	# Create network
	G, college_colors = create_network(award_df, faculty_college_map)

	# Create pyvis network
	nt = Network(
	height='800px',
	width='100%',
	bgcolor='#ffffff',
	font_color='#333333',
	notebook=True
	)
	nt.from_nx(G)
	nt.toggle_hide_edges_on_drag(True)
	# nt.show_buttons(filter_=['physics', 'nodes'])

	# Save and show network
	nt.save_graph('network.html')
	with open('network.html', 'r', encoding='utf-8') as f:
	html = f.read()

	# Add some explanation
	st.markdown("""
	Network Interaction Guide:
	- Drag nodes to rearrange the network
	- Scroll to zoom in/out to see the details: Faculty Name \| College
	- Click and drag background to pan
	- Hover over nodes to see details
	- Use the control panel (click the gear icon) to adjust physics settings
	- Double-click on an award title to view the full text.
	""")

	# Show college color legend
	st.subheader("College Legend")
	cols = st.columns(4)
	for i, (college, color) in enumerate(college_colors.items()):
	cols[i%4].markdown(f"<span style='color:{color}'>■</span> {college}", unsafe_allow_html=True)

	# Display network
	st.subheader("Collaboration Network")
	st.components.v1.html(html, height=800, scrolling=True)


	# Show raw data
	st.subheader("Award Data")
	st.dataframe(award_df_remove_amount, use_container_width=True)

	st.subheader("Faculty Data")
	st.dataframe(faculty_college_df, use_container_width=True)