Spaces:

TDAI-DS
/

Collaboration-Network

Sleeping

App Files Files Community

TDAI-DS commited on Feb 26, 2025

Commit

8333b37

verified ·

1 Parent(s): a39f65e

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -27

app.py CHANGED Viewed

@@ -16,32 +16,45 @@ st.title("Faculty Collaboration Network Analysis -FY23 & FY24")
 # Load data
 @st.cache_data
 def load_data():
-    return pd.read_csv('award.csv')
-df = load_data()
-df.columns = df.columns.str.strip()  # Clean column names
 def convert_amount(amount_str):
     amount_str = amount_str.replace("$", "").replace(",", "")
     amount_str = round(float(amount_str), 2)
     return amount_str
-df['Authorized Amount'] = df['Authorized Amount'].apply(convert_amount)
 # Create graph and process data
 @st.cache_resource
-def create_network(df):
     G = nx.Graph()
-    faculty_colleges = defaultdict(list)
     faculty_amounts = defaultdict(float)
     for _, row in df.iterrows():
         # Process PI information
         pi = str(row['PI Name']).strip()
         if not pi or pi == 'nan':
             continue
-        college = str(row['College']).strip()
         amount = row['Authorized Amount']
         # Process Co-PI information
@@ -53,33 +66,46 @@ def create_network(df):
         # Add PI node and attributes
         G.add_node(pi)
-        faculty_colleges[pi].append(college)
         faculty_amounts[pi] += amount
         # Add Co-PI nodes and edges
         for co_pi in co_pis:
             if co_pi and co_pi != pi:  # Prevent self-loops
                 G.add_node(co_pi)
-                faculty_colleges[co_pi].append(college)
                 faculty_amounts[co_pi] += amount
                 G.add_edge(pi, co_pi)
-    # Determine dominant college for each faculty member
-    college_map = {}
-    for faculty, colleges in faculty_colleges.items():
-        college_counts = defaultdict(int)
-        for c in colleges:
-            if c and c != 'nan':
-                college_counts[c] += 1
-        if college_counts:
-            college_map[faculty] = max(college_counts, key=college_counts.get)
-        else:
-            college_map[faculty] = 'Unknown'
     # Create color mapping
-    unique_colleges = sorted(list(set(college_map.values())))
-    colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
-    college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
     # Calculate node sizes based on total funding
     amounts = list(faculty_amounts.values())
@@ -98,6 +124,7 @@ def create_network(df):
     # Add attributes to nodes
     for i, node in enumerate(G.nodes()):
         G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
         G.nodes[node]['size'] = node_sizes[i]
         G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}"
@@ -106,7 +133,7 @@ def create_network(df):
     return G, college_colors
 # Create network
-G, college_colors = create_network(df)
 # Create pyvis network
 nt = Network(
@@ -127,7 +154,12 @@ with open('network.html', 'r', encoding='utf-8') as f:
 # Add some explanation
 st.markdown("""
-**Network Interaction Guide: Scroll to zoom | Click-drag background to pan| Hover nodes for details (Name/College/Funding)**
 """)
 # Show college color legend
@@ -142,5 +174,6 @@ st.components.v1.html(html, height=800, scrolling=True)
 # Show raw data
-st.subheader("Award Data - (data source: OSU E-Activity)")
-st.dataframe(df, use_container_width=True)

 # Load data
 @st.cache_data
 def load_data():
+    award_df = pd.read_csv('award.csv')
+    faculty_college_df = pd.read_csv('faculty_college_20250226.csv')
+    return award_df, faculty_college_df
+award_df, faculty_college_df = load_data()
+award_df.columns = award_df.columns.str.strip()  # Clean column names
+faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
+faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of']))
 def convert_amount(amount_str):
     amount_str = amount_str.replace("$", "").replace(",", "")
     amount_str = round(float(amount_str), 2)
     return amount_str
+award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount)
 # Create graph and process data
 @st.cache_resource
+def create_network(df, college_map):
     G = nx.Graph()
+    # faculty_colleges = defaultdict(list)
     faculty_amounts = defaultdict(float)
+    colorblind_palette =[
+    '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4',
+    '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347',
+    '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6',
+    '#FFAAA5', '#C8C6A7', '#92967D'
+    ]
     for _, row in df.iterrows():
         # Process PI information
         pi = str(row['PI Name']).strip()
         if not pi or pi == 'nan':
             continue
+        # college = str(row['College']).strip()
         amount = row['Authorized Amount']
         # Process Co-PI information
         # Add PI node and attributes
         G.add_node(pi)
+        # faculty_colleges[pi].append(college)
         faculty_amounts[pi] += amount
         # Add Co-PI nodes and edges
         for co_pi in co_pis:
             if co_pi and co_pi != pi:  # Prevent self-loops
                 G.add_node(co_pi)
+                # faculty_colleges[co_pi].append(college)
                 faculty_amounts[co_pi] += amount
                 G.add_edge(pi, co_pi)
+    # # Determine dominant college for each faculty member
+    # college_map = {}
+    # for faculty, colleges in faculty_colleges.items():
+    #     college_counts = defaultdict(int)
+    #     for c in colleges:
+    #         if c and c != 'nan':
+    #             college_counts[c] += 1
+    #     if college_counts:
+    #         college_map[faculty] = max(college_counts, key=college_counts.get)
+    #     else:
+    #         college_map[faculty] = 'Unknown'
+    # Get college for each node
+    college_assignment = {node: college_map.get(node, 'Unknown')
+                        for node in G.nodes()}
     # Create color mapping
+    unique_colleges = sorted(list(set(college_assignment.values())))
+    # Create color mapping with cycling if needed
+    college_colors = {}
+    for i, college in enumerate(unique_colleges):
+        college_colors[college] = colorblind_palette[i % len(colorblind_palette)]
+    # Add explicit color for Unknown
+    college_colors['Unknown'] = '#888888'
+    # unique_colleges = sorted(list(set(college_map.values())))
+    # colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
+    # college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
     # Calculate node sizes based on total funding
     amounts = list(faculty_amounts.values())
     # Add attributes to nodes
     for i, node in enumerate(G.nodes()):
+        college = college_assignment.get(node, 'Unknown')
         G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
         G.nodes[node]['size'] = node_sizes[i]
         G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}"
     return G, college_colors
 # Create network
+G, college_colors = create_network(award_df, faculty_college_map)
 # Create pyvis network
 nt = Network(
 # Add some explanation
 st.markdown("""
+**Network Interaction Guide:**
+- Drag nodes to rearrange the network
+- Scroll to zoom in/out to see the details: Faculty Name | College | Total Funding
+- Click and drag background to pan
+- Hover over nodes to see details
+- Use the control panel (click the gear icon) to adjust physics settings
 """)
 # Show college color legend
 # Show raw data
+st.subheader("Award Data")
+st.dataframe(award_df, use_container_width=True)