Commit ·
0c3f178
1
Parent(s): dcf9d0a
HW6 Viz 1 Update
Browse files- app.py +47 -7
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -31,12 +31,52 @@ def load_data():
|
|
| 31 |
'degree': list(degrees.values()),
|
| 32 |
'clustering_coefficient': [clustering[n] for n in degrees.keys()]
|
| 33 |
})
|
| 34 |
-
return df
|
| 35 |
|
| 36 |
-
df = load_data()
|
| 37 |
|
| 38 |
-
# --- Visualization 1:
|
| 39 |
-
st.subheader("Visualization 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
hist = alt.Chart(df).mark_bar().encode(
|
| 42 |
alt.X('degree:Q', bin=alt.Bin(maxbins=50), title='Degree (Number of Friends)'),
|
|
@@ -68,8 +108,8 @@ It reveals that most users have a low-to-medium number of friends, while a few n
|
|
| 68 |
I'd add interactive sliders to explore bin size and consider fitting a power-law model.
|
| 69 |
""")
|
| 70 |
|
| 71 |
-
# --- Visualization
|
| 72 |
-
st.subheader("Visualization
|
| 73 |
|
| 74 |
scatter = alt.Chart(df).mark_circle(size=60, opacity=0.6).encode(
|
| 75 |
x=alt.X('degree:Q', title='Degree (Number of Friends)'),
|
|
@@ -83,7 +123,7 @@ scatter = alt.Chart(df).mark_circle(size=60, opacity=0.6).encode(
|
|
| 83 |
|
| 84 |
st.altair_chart(scatter, use_container_width=True)
|
| 85 |
|
| 86 |
-
# --- Write-Up for Visualization
|
| 87 |
st.markdown("""
|
| 88 |
**What is being visualized:**
|
| 89 |
This scatter plot shows how the clustering coefficient (a measure of how connected a node's neighbors are) varies with degree.
|
|
|
|
| 31 |
'degree': list(degrees.values()),
|
| 32 |
'clustering_coefficient': [clustering[n] for n in degrees.keys()]
|
| 33 |
})
|
| 34 |
+
return df, G
|
| 35 |
|
| 36 |
+
df, G = load_data()
|
| 37 |
|
| 38 |
+
# --- Visualization 1: Interactive Network Graph ---
|
| 39 |
+
st.subheader("Visualization 1: Interactive Network Graph")
|
| 40 |
+
|
| 41 |
+
from pyvis.network import Network
|
| 42 |
+
import streamlit.components.v1 as components
|
| 43 |
+
import tempfile
|
| 44 |
+
import os
|
| 45 |
+
|
| 46 |
+
# Limit size for performance
|
| 47 |
+
G_sub = G.subgraph(list(df.sort_values(by='degree', ascending=False).head(100)['node']))
|
| 48 |
+
|
| 49 |
+
# Create Pyvis Network
|
| 50 |
+
net = Network(height="500px", width="100%", notebook=False, bgcolor="#222222", font_color="white")
|
| 51 |
+
net.from_nx(G_sub)
|
| 52 |
+
|
| 53 |
+
# Save to temp file and embed
|
| 54 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as tmp_file:
|
| 55 |
+
net.save_graph(tmp_file.name)
|
| 56 |
+
HtmlFile = open(tmp_file.name, 'r', encoding='utf-8')
|
| 57 |
+
components.html(HtmlFile.read(), height=550)
|
| 58 |
+
HtmlFile.close()
|
| 59 |
+
os.unlink(tmp_file.name)
|
| 60 |
+
|
| 61 |
+
# --- Write-Up for Visualization 1 ---
|
| 62 |
+
st.markdown("""
|
| 63 |
+
**What is being visualized:**
|
| 64 |
+
This interactive diagram shows a subgraph of the 100 most connected nodes in the Facebook network.
|
| 65 |
+
|
| 66 |
+
**Why it’s interesting:**
|
| 67 |
+
It reveals tightly knit clusters, central hubs, and bridges visually — which can’t be seen in numeric plots.
|
| 68 |
+
|
| 69 |
+
**Design choices:**
|
| 70 |
+
- `pyvis` enables force-directed layout with drag-zoom
|
| 71 |
+
- High-degree nodes selected for visibility and clarity
|
| 72 |
+
- Dark theme for contrast and style
|
| 73 |
+
|
| 74 |
+
**What I'd improve with more time:**
|
| 75 |
+
I’d explore community detection and color nodes by cluster, or allow node attribute filtering live in the app.
|
| 76 |
+
""")
|
| 77 |
+
|
| 78 |
+
# --- Visualization 2: Degree Histogram ---
|
| 79 |
+
st.subheader("Visualization 2: Degree Distribution Histogram")
|
| 80 |
|
| 81 |
hist = alt.Chart(df).mark_bar().encode(
|
| 82 |
alt.X('degree:Q', bin=alt.Bin(maxbins=50), title='Degree (Number of Friends)'),
|
|
|
|
| 108 |
I'd add interactive sliders to explore bin size and consider fitting a power-law model.
|
| 109 |
""")
|
| 110 |
|
| 111 |
+
# --- Visualization 3: Scatter Plot Degree vs. Clustering ---
|
| 112 |
+
st.subheader("Visualization 3: Degree vs Clustering Coefficient")
|
| 113 |
|
| 114 |
scatter = alt.Chart(df).mark_circle(size=60, opacity=0.6).encode(
|
| 115 |
x=alt.X('degree:Q', title='Degree (Number of Friends)'),
|
|
|
|
| 123 |
|
| 124 |
st.altair_chart(scatter, use_container_width=True)
|
| 125 |
|
| 126 |
+
# --- Write-Up for Visualization 3 ---
|
| 127 |
st.markdown("""
|
| 128 |
**What is being visualized:**
|
| 129 |
This scatter plot shows how the clustering coefficient (a measure of how connected a node's neighbors are) varies with degree.
|
requirements.txt
CHANGED
|
@@ -2,3 +2,4 @@ streamlit
|
|
| 2 |
altair
|
| 3 |
pandas
|
| 4 |
networkx
|
|
|
|
|
|
| 2 |
altair
|
| 3 |
pandas
|
| 4 |
networkx
|
| 5 |
+
pyvis
|