Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import networkx as nx
|
| 4 |
+
from pyvis.network import Network
|
| 5 |
+
from matplotlib.colors import to_hex
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import numpy as np
|
| 8 |
+
from collections import defaultdict
|
| 9 |
+
|
| 10 |
+
# Set page configuration
|
| 11 |
+
st.set_page_config(layout="wide")
|
| 12 |
+
|
| 13 |
+
# Title
|
| 14 |
+
st.title("Faculty Collaboration Network Analysis -FY23 & FY24")
|
| 15 |
+
|
| 16 |
+
# Load data
|
| 17 |
+
@st.cache_data
|
| 18 |
+
def load_data():
|
| 19 |
+
return pd.read_csv('award.csv')
|
| 20 |
+
|
| 21 |
+
df = load_data()
|
| 22 |
+
df.columns = df.columns.str.strip() # Clean column names
|
| 23 |
+
|
| 24 |
+
def convert_amount(amount_str):
|
| 25 |
+
amount_str = amount_str.replace("$", "").replace(",", "")
|
| 26 |
+
amount_str = round(float(amount_str), 2)
|
| 27 |
+
return amount_str
|
| 28 |
+
|
| 29 |
+
df['Authorized Amount'] = df['Authorized Amount'].apply(convert_amount)
|
| 30 |
+
|
| 31 |
+
# Create graph and process data
|
| 32 |
+
@st.cache_resource
|
| 33 |
+
def create_network(df):
|
| 34 |
+
G = nx.Graph()
|
| 35 |
+
faculty_colleges = defaultdict(list)
|
| 36 |
+
faculty_amounts = defaultdict(float)
|
| 37 |
+
|
| 38 |
+
for _, row in df.iterrows():
|
| 39 |
+
# Process PI information
|
| 40 |
+
pi = str(row['PI Name']).strip()
|
| 41 |
+
if not pi or pi == 'nan':
|
| 42 |
+
continue
|
| 43 |
+
|
| 44 |
+
college = str(row['College']).strip()
|
| 45 |
+
amount = row['Authorized Amount']
|
| 46 |
+
|
| 47 |
+
# Process Co-PI information
|
| 48 |
+
co_pi_names = row['Co PI Name']
|
| 49 |
+
if pd.isna(co_pi_names):
|
| 50 |
+
co_pis = []
|
| 51 |
+
else:
|
| 52 |
+
co_pis = [name.strip() for name in str(co_pi_names).split('|') if name.strip() not in ['', 'nan']]
|
| 53 |
+
|
| 54 |
+
# Add PI node and attributes
|
| 55 |
+
G.add_node(pi)
|
| 56 |
+
faculty_colleges[pi].append(college)
|
| 57 |
+
faculty_amounts[pi] += amount
|
| 58 |
+
|
| 59 |
+
# Add Co-PI nodes and edges
|
| 60 |
+
for co_pi in co_pis:
|
| 61 |
+
if co_pi and co_pi != pi: # Prevent self-loops
|
| 62 |
+
G.add_node(co_pi)
|
| 63 |
+
faculty_colleges[co_pi].append(college)
|
| 64 |
+
faculty_amounts[co_pi] += amount
|
| 65 |
+
G.add_edge(pi, co_pi)
|
| 66 |
+
|
| 67 |
+
# Determine dominant college for each faculty member
|
| 68 |
+
college_map = {}
|
| 69 |
+
for faculty, colleges in faculty_colleges.items():
|
| 70 |
+
college_counts = defaultdict(int)
|
| 71 |
+
for c in colleges:
|
| 72 |
+
if c and c != 'nan':
|
| 73 |
+
college_counts[c] += 1
|
| 74 |
+
if college_counts:
|
| 75 |
+
college_map[faculty] = max(college_counts, key=college_counts.get)
|
| 76 |
+
else:
|
| 77 |
+
college_map[faculty] = 'Unknown'
|
| 78 |
+
|
| 79 |
+
# Create color mapping
|
| 80 |
+
unique_colleges = sorted(list(set(college_map.values())))
|
| 81 |
+
colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
|
| 82 |
+
college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
|
| 83 |
+
|
| 84 |
+
# Calculate node sizes based on total funding
|
| 85 |
+
amounts = list(faculty_amounts.values())
|
| 86 |
+
if amounts:
|
| 87 |
+
min_amount = min(amounts)
|
| 88 |
+
max_amount = max(amounts)
|
| 89 |
+
size_range = (20, 40) # Min and max node sizes
|
| 90 |
+
if max_amount == min_amount:
|
| 91 |
+
node_sizes = [size_range[0]] * len(amounts)
|
| 92 |
+
else:
|
| 93 |
+
node_sizes = [size_range[0] + (size_range[1] - size_range[0]) *
|
| 94 |
+
(amt - min_amount) / (max_amount - min_amount)
|
| 95 |
+
for amt in amounts]
|
| 96 |
+
else:
|
| 97 |
+
node_sizes = [size_range[0]] * len(faculty_amounts)
|
| 98 |
+
|
| 99 |
+
# Add attributes to nodes
|
| 100 |
+
for i, node in enumerate(G.nodes()):
|
| 101 |
+
G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
|
| 102 |
+
G.nodes[node]['size'] = node_sizes[i]
|
| 103 |
+
G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}"
|
| 104 |
+
f" | Total Funding: ${faculty_amounts.get(node, 0):,.2f}")
|
| 105 |
+
|
| 106 |
+
return G, college_colors
|
| 107 |
+
|
| 108 |
+
# Create network
|
| 109 |
+
G, college_colors = create_network(df)
|
| 110 |
+
|
| 111 |
+
# Create pyvis network
|
| 112 |
+
nt = Network(
|
| 113 |
+
height='800px',
|
| 114 |
+
width='100%',
|
| 115 |
+
bgcolor='#ffffff',
|
| 116 |
+
font_color='#333333',
|
| 117 |
+
notebook=True
|
| 118 |
+
)
|
| 119 |
+
nt.from_nx(G)
|
| 120 |
+
nt.toggle_hide_edges_on_drag(True)
|
| 121 |
+
nt.show_buttons(filter_=['physics', 'nodes'])
|
| 122 |
+
|
| 123 |
+
# Save and show network
|
| 124 |
+
nt.save_graph('network.html')
|
| 125 |
+
with open('network.html', 'r', encoding='utf-8') as f:
|
| 126 |
+
html = f.read()
|
| 127 |
+
|
| 128 |
+
# Add some explanation
|
| 129 |
+
st.markdown("""
|
| 130 |
+
**Network Interaction Guide:**
|
| 131 |
+
- Drag nodes to rearrange the network
|
| 132 |
+
- Scroll to zoom in/out to see the details: Faculty Name | College | Total Funding
|
| 133 |
+
- Click and drag background to pan
|
| 134 |
+
- Hover over nodes to see details
|
| 135 |
+
- Use the control panel (click the gear icon) to adjust physics settings
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Show college color legend
|
| 139 |
+
st.subheader("College Legend")
|
| 140 |
+
cols = st.columns(4)
|
| 141 |
+
for i, (college, color) in enumerate(college_colors.items()):
|
| 142 |
+
cols[i%4].markdown(f"<span style='color:{color}'>■</span> {college}", unsafe_allow_html=True)
|
| 143 |
+
|
| 144 |
+
# Display network
|
| 145 |
+
st.subheader("Collaboration Network")
|
| 146 |
+
st.components.v1.html(html, height=800, scrolling=True)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# Show raw data
|
| 150 |
+
st.subheader("Award Data")
|
| 151 |
+
st.dataframe(df, use_container_width=True)
|