TDAI-DS commited on
Commit
8333b37
·
verified ·
1 Parent(s): a39f65e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -27
app.py CHANGED
@@ -16,32 +16,45 @@ st.title("Faculty Collaboration Network Analysis -FY23 & FY24")
16
  # Load data
17
  @st.cache_data
18
  def load_data():
19
- return pd.read_csv('award.csv')
 
 
 
 
 
 
 
20
 
21
- df = load_data()
22
- df.columns = df.columns.str.strip() # Clean column names
23
 
24
  def convert_amount(amount_str):
25
  amount_str = amount_str.replace("$", "").replace(",", "")
26
  amount_str = round(float(amount_str), 2)
27
  return amount_str
28
 
29
- df['Authorized Amount'] = df['Authorized Amount'].apply(convert_amount)
30
 
31
  # Create graph and process data
32
  @st.cache_resource
33
- def create_network(df):
34
  G = nx.Graph()
35
- faculty_colleges = defaultdict(list)
36
  faculty_amounts = defaultdict(float)
37
 
 
 
 
 
 
 
 
38
  for _, row in df.iterrows():
39
  # Process PI information
40
  pi = str(row['PI Name']).strip()
41
  if not pi or pi == 'nan':
42
  continue
43
 
44
- college = str(row['College']).strip()
45
  amount = row['Authorized Amount']
46
 
47
  # Process Co-PI information
@@ -53,33 +66,46 @@ def create_network(df):
53
 
54
  # Add PI node and attributes
55
  G.add_node(pi)
56
- faculty_colleges[pi].append(college)
57
  faculty_amounts[pi] += amount
58
 
59
  # Add Co-PI nodes and edges
60
  for co_pi in co_pis:
61
  if co_pi and co_pi != pi: # Prevent self-loops
62
  G.add_node(co_pi)
63
- faculty_colleges[co_pi].append(college)
64
  faculty_amounts[co_pi] += amount
65
  G.add_edge(pi, co_pi)
66
 
67
- # Determine dominant college for each faculty member
68
- college_map = {}
69
- for faculty, colleges in faculty_colleges.items():
70
- college_counts = defaultdict(int)
71
- for c in colleges:
72
- if c and c != 'nan':
73
- college_counts[c] += 1
74
- if college_counts:
75
- college_map[faculty] = max(college_counts, key=college_counts.get)
76
- else:
77
- college_map[faculty] = 'Unknown'
 
 
 
 
78
 
79
  # Create color mapping
80
- unique_colleges = sorted(list(set(college_map.values())))
81
- colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
82
- college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
 
 
 
 
 
 
 
 
 
83
 
84
  # Calculate node sizes based on total funding
85
  amounts = list(faculty_amounts.values())
@@ -98,6 +124,7 @@ def create_network(df):
98
 
99
  # Add attributes to nodes
100
  for i, node in enumerate(G.nodes()):
 
101
  G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
102
  G.nodes[node]['size'] = node_sizes[i]
103
  G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}"
@@ -106,7 +133,7 @@ def create_network(df):
106
  return G, college_colors
107
 
108
  # Create network
109
- G, college_colors = create_network(df)
110
 
111
  # Create pyvis network
112
  nt = Network(
@@ -127,7 +154,12 @@ with open('network.html', 'r', encoding='utf-8') as f:
127
 
128
  # Add some explanation
129
  st.markdown("""
130
- **Network Interaction Guide: Scroll to zoom | Click-drag background to pan| Hover nodes for details (Name/College/Funding)**
 
 
 
 
 
131
  """)
132
 
133
  # Show college color legend
@@ -142,5 +174,6 @@ st.components.v1.html(html, height=800, scrolling=True)
142
 
143
 
144
  # Show raw data
145
- st.subheader("Award Data - (data source: OSU E-Activity)")
146
- st.dataframe(df, use_container_width=True)
 
 
16
  # Load data
17
  @st.cache_data
18
  def load_data():
19
+ award_df = pd.read_csv('award.csv')
20
+ faculty_college_df = pd.read_csv('faculty_college_20250226.csv')
21
+ return award_df, faculty_college_df
22
+
23
+
24
+ award_df, faculty_college_df = load_data()
25
+ award_df.columns = award_df.columns.str.strip() # Clean column names
26
+ faculty_college_df = faculty_college_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
27
 
28
+ faculty_college_map = dict(zip(faculty_college_df['Faculty Name'], faculty_college_df['College of']))
 
29
 
30
  def convert_amount(amount_str):
31
  amount_str = amount_str.replace("$", "").replace(",", "")
32
  amount_str = round(float(amount_str), 2)
33
  return amount_str
34
 
35
+ award_df['Authorized Amount'] = award_df['Authorized Amount'].apply(convert_amount)
36
 
37
  # Create graph and process data
38
  @st.cache_resource
39
+ def create_network(df, college_map):
40
  G = nx.Graph()
41
+ # faculty_colleges = defaultdict(list)
42
  faculty_amounts = defaultdict(float)
43
 
44
+ colorblind_palette =[
45
+ '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4',
46
+ '#FFEEAD', '#D4A5A5', '#779ECB', '#FFB347',
47
+ '#B1DDF3', '#A8E6CF', '#DCEDC1', '#FFD3B6',
48
+ '#FFAAA5', '#C8C6A7', '#92967D'
49
+ ]
50
+
51
  for _, row in df.iterrows():
52
  # Process PI information
53
  pi = str(row['PI Name']).strip()
54
  if not pi or pi == 'nan':
55
  continue
56
 
57
+ # college = str(row['College']).strip()
58
  amount = row['Authorized Amount']
59
 
60
  # Process Co-PI information
 
66
 
67
  # Add PI node and attributes
68
  G.add_node(pi)
69
+ # faculty_colleges[pi].append(college)
70
  faculty_amounts[pi] += amount
71
 
72
  # Add Co-PI nodes and edges
73
  for co_pi in co_pis:
74
  if co_pi and co_pi != pi: # Prevent self-loops
75
  G.add_node(co_pi)
76
+ # faculty_colleges[co_pi].append(college)
77
  faculty_amounts[co_pi] += amount
78
  G.add_edge(pi, co_pi)
79
 
80
+ # # Determine dominant college for each faculty member
81
+ # college_map = {}
82
+ # for faculty, colleges in faculty_colleges.items():
83
+ # college_counts = defaultdict(int)
84
+ # for c in colleges:
85
+ # if c and c != 'nan':
86
+ # college_counts[c] += 1
87
+ # if college_counts:
88
+ # college_map[faculty] = max(college_counts, key=college_counts.get)
89
+ # else:
90
+ # college_map[faculty] = 'Unknown'
91
+
92
+ # Get college for each node
93
+ college_assignment = {node: college_map.get(node, 'Unknown')
94
+ for node in G.nodes()}
95
 
96
  # Create color mapping
97
+ unique_colleges = sorted(list(set(college_assignment.values())))
98
+
99
+ # Create color mapping with cycling if needed
100
+ college_colors = {}
101
+ for i, college in enumerate(unique_colleges):
102
+ college_colors[college] = colorblind_palette[i % len(colorblind_palette)]
103
+
104
+ # Add explicit color for Unknown
105
+ college_colors['Unknown'] = '#888888'
106
+ # unique_colleges = sorted(list(set(college_map.values())))
107
+ # colormap = plt.cm.get_cmap('tab20', len(unique_colleges))
108
+ # college_colors = {college: to_hex(colormap(i)) for i, college in enumerate(unique_colleges)}
109
 
110
  # Calculate node sizes based on total funding
111
  amounts = list(faculty_amounts.values())
 
124
 
125
  # Add attributes to nodes
126
  for i, node in enumerate(G.nodes()):
127
+ college = college_assignment.get(node, 'Unknown')
128
  G.nodes[node]['color'] = college_colors.get(college_map.get(node, 'Unknown'), '#888888')
129
  G.nodes[node]['size'] = node_sizes[i]
130
  G.nodes[node]['title'] = (f"{node} | College: {college_map.get(node, 'Unknown')}"
 
133
  return G, college_colors
134
 
135
  # Create network
136
+ G, college_colors = create_network(award_df, faculty_college_map)
137
 
138
  # Create pyvis network
139
  nt = Network(
 
154
 
155
  # Add some explanation
156
  st.markdown("""
157
+ **Network Interaction Guide:**
158
+ - Drag nodes to rearrange the network
159
+ - Scroll to zoom in/out to see the details: Faculty Name | College | Total Funding
160
+ - Click and drag background to pan
161
+ - Hover over nodes to see details
162
+ - Use the control panel (click the gear icon) to adjust physics settings
163
  """)
164
 
165
  # Show college color legend
 
174
 
175
 
176
  # Show raw data
177
+ st.subheader("Award Data")
178
+ st.dataframe(award_df, use_container_width=True)
179
+