Spaces:

TDAI-DS
/

Collaboration-Network

Sleeping

File size: 7,595 Bytes

import streamlit as st
import pandas as pd
import networkx as nx
from pyvis.network import Network
from matplotlib.colors import to_hex
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

# Set page configuration
st.set_page_config(layout="wide")

# Title
st.title("Faculty Collaboration Network Analysis -FY23 & FY24")

# Load data
@st.cache_data
def load_data():
    award_df = pd.read_csv('award.csv')
    award_df_remove_amount = pd.read_csv('award_display_title.csv')
    faculty_college_df = pd.read_csv('faculty_college.csv')
    return award_df, award_df_remove_amount, faculty_college_df
    

award_df, award_df_remove_amount, faculty_college_df = load_data()
award_df.columns = award_df.columns.str.strip()  # Clean column names
faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of']))

def convert_amount(amount_str):   
    amount_str = amount_str.replace("$", "").replace(",", "")
    amount_str = round(float(amount_str), 2)
    return amount_str

award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount)

# Create graph and process data
@st.cache_resource
def create_network(df, college_map):
    G = nx.Graph()
    # faculty_colleges = defaultdict(list)
    faculty_amounts = defaultdict(float)

    colorblind_palette = [
    '#E6194B',  # Emergency Red (stop-sign red)
    '#3CB44B',  # Traffic Cone Green
    '#4363D8',  # Deep Ocean Blue
    '#FFE119',  # Taxi Yellow
    '#911EB4',  # Royal Purple
    '#F58231',  # Construction Orange
    '#42D4F4',  # Poolside Cyan
    '#FABEBE',  # Bubblegum Pink (lightest pink kept)
    '#00A4CC',  # Airplane Blue (sky-cyan hybrid)
    '#A6FF47',  # Alien Green (neon yellow-green)
    '#FF4500',  # Lava Orange (red-orange differentiation)
    '#5E0DAC',  # Amethyst Purple (blue-purple hybrid)
    '#00FFAF',  # Glowstick Green (blue-green)
    '#FF9933',  # Highway Orange (golden orange)
    '#4B0082',  # Midnight Indigo (deep blue-purple)
    '#8B0000',  # Barn Red (dark red differentiation)
    '#00CED1',  # Tropical Teal (bright blue-green)
    '#FFD300'   # School Bus Yellow (pure golden yellow)
    ]

    # colorblind_palette =[
    # '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', 
    # '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347',
    # '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6',
    # '#FFAAA5', '#C8C6A7', '#92967D'
    # ]

#     colorblind_palette = [
#     '#E6194B',  # Bright red
#     '#3CB44B',  # Green
#     '#4363D8',  # Blue
#     '#FFE119',  # Yellow
#     '#911EB4',  # Purple
#     '#F58231',  # Orange
#     '#42D4F4',  # Cyan
#     '#FABEBE'   # Light pink
# ]


#     colorblind_palette = [
#     '#FF1E1E', '#00E5D0', '#00AAFF', '#7AFF86', 
#     '#FFDD00', '#FF7575', ''#8A2BE2', '#FF9500',
#     '#83EAFF', '#59FFAA', '#BFFF59', '#FFB380',
#     '#FF6666', '#E0FF4D', '#66FFC2'
# ]

    for _, row in df.iterrows():
        # Process PI information
        pi = str(row['PI Name']).strip()
        if not pi or pi == 'nan':
            continue
            
        # college = str(row['College']).strip()
        amount = row['Authorized Amount']
        
        # Process Co-PI information
        co_pi_names = row['Co PI Name']
        if pd.isna(co_pi_names):
            co_pis = []
        else:
            co_pis = [name.strip() for name in str(co_pi_names).split('|') if name.strip() not in ['', 'nan']]

        # Add PI node and attributes
        G.add_node(pi)
        # faculty_colleges[pi].append(college)
        faculty_amounts[pi] += amount

        # Add Co-PI nodes and edges
        for co_pi in co_pis:
            if co_pi and co_pi != pi:  # Prevent self-loops
                G.add_node(co_pi)
                # faculty_colleges[co_pi].append(college)
                faculty_amounts[co_pi] += amount
                G.add_edge(pi, co_pi)

    # # Determine dominant college for each faculty member
    # college_map = {}
    # for faculty, colleges in faculty_colleges.items():
    #     college_counts = defaultdict(int)
    #     for c in colleges:
    #         if c and c != 'nan':
    #             college_counts[c] += 1
    #     if college_counts:
    #         college_map[faculty] = max(college_counts, key=college_counts.get)
    #     else:
    #         college_map[faculty] = 'Unknown'

    # Get college for each node
    college_assignment = {node: college_map.get(node, 'Unknown') 
                        for node in G.nodes()}

    # Create color mapping
    unique_colleges = sorted(list(set(college_assignment.values())))
    
    # Create color mapping with cycling if needed
    college_colors = {}
    for i, college in enumerate(unique_colleges):
        college_colors[college] = colorblind_palette[i % len(colorblind_palette)]

    # Add explicit color for Unknown
    # college_colors['Unknown'] = '#888888'
    # unique_colleges = sorted(list(set(college_map.values())))
    # colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
    # college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}

    # Calculate node sizes based on total funding
    amounts = list(faculty_amounts.values())
    if amounts:
        min_amount = min(amounts)
        max_amount = max(amounts)
        size_range = (20, 40)  # Min and max node sizes
        if max_amount == min_amount:
            node_sizes = [size_range[0]] * len(amounts)
        else:
            node_sizes = [size_range[0] + (size_range[1] - size_range[0]) * 
                         (amt - min_amount) / (max_amount - min_amount) 
                         for amt in amounts]
    else:
        node_sizes = [size_range[0]] * len(faculty_amounts)

    # Add attributes to nodes
    for i, node in enumerate(G.nodes()):
        college = college_assignment.get(node, 'Unknown')
        G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
        G.nodes[node]['size'] = node_sizes[i]
        G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}")
    
    return G, college_colors

# Create network
G, college_colors = create_network(award_df, faculty_college_map)

# Create pyvis network
nt = Network(
    height='800px',
    width='100%',
    bgcolor='#ffffff',
    font_color='#333333',
    notebook=True
)
nt.from_nx(G)
nt.toggle_hide_edges_on_drag(True)
# nt.show_buttons(filter_=['physics', 'nodes'])

# Save and show network
nt.save_graph('network.html')
with open('network.html', 'r', encoding='utf-8') as f:
    html = f.read()

# Add some explanation
st.markdown("""
**Network Interaction Guide:**
- Drag nodes to rearrange the network
- Scroll to zoom in/out to see the details: Faculty Name | College 
- Click and drag background to pan
- Hover over nodes to see details
- Use the control panel (click the gear icon) to adjust physics settings
- Double-click on an award title to view the full text.
""")

# Show college color legend
st.subheader("College Legend")
cols = st.columns(4)
for i, (college, color) in enumerate(college_colors.items()):
    cols[i%4].markdown(f"<span style='color:{color}'>■</span> {college}", unsafe_allow_html=True)

# Display network
st.subheader("Collaboration Network")
st.components.v1.html(html, height=800, scrolling=True)


# Show raw data
st.subheader("Award Data")
st.dataframe(award_df_remove_amount, use_container_width=True)

st.subheader("Faculty Data")
st.dataframe(faculty_college_df, use_container_width=True)