Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,23 +3,21 @@ import pandas as pd
|
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import networkx as nx
|
| 6 |
-
import tempfile
|
| 7 |
import os
|
| 8 |
|
| 9 |
def process_file(file):
|
| 10 |
-
|
| 11 |
-
file_ext = os.path.splitext(file)[1].lower()
|
| 12 |
if file_ext == '.csv':
|
| 13 |
-
df = pd.read_csv(file)
|
| 14 |
elif file_ext in ['.xls', '.xlsx']:
|
| 15 |
-
df = pd.read_excel(file)
|
| 16 |
else:
|
| 17 |
-
return "Unsupported file format", None, None, None, None
|
| 18 |
|
| 19 |
required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
|
| 20 |
missing = [col for col in required_cols if col not in df.columns]
|
| 21 |
if missing:
|
| 22 |
-
return f"Missing columns: {', '.join(missing)}", None, None, None, None
|
| 23 |
|
| 24 |
df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
|
| 25 |
df = df.dropna(subset=['date'])
|
|
@@ -35,12 +33,17 @@ def process_file(file):
|
|
| 35 |
fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
|
| 36 |
fig2.update_layout(xaxis_tickangle=-45)
|
| 37 |
|
| 38 |
-
# Timeline of Comments
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
fig3 = px.scatter(
|
| 40 |
df,
|
| 41 |
x='date',
|
| 42 |
-
y=
|
| 43 |
-
hover_data=
|
| 44 |
title="Comments Over Time (Likes)",
|
| 45 |
labels={'like_count': 'Like Count', 'date': 'Date'}
|
| 46 |
)
|
|
@@ -50,10 +53,10 @@ def process_file(file):
|
|
| 50 |
# Save to CSV for keyword search
|
| 51 |
df.to_csv("latest_data.csv", index=False)
|
| 52 |
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
|
| 56 |
-
return "Success", fig1, fig2, fig3,
|
| 57 |
|
| 58 |
def build_network_html_plotly(df):
|
| 59 |
G = nx.DiGraph()
|
|
@@ -109,20 +112,19 @@ def build_network_html_plotly(df):
|
|
| 109 |
title='Comment Thread Network',
|
| 110 |
showlegend=False,
|
| 111 |
hovermode='closest',
|
| 112 |
-
margin=dict(b=20,l=5,r=5,t=40),
|
| 113 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 114 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
|
| 115 |
)
|
| 116 |
|
| 117 |
-
# Save to
|
| 118 |
-
|
| 119 |
-
html_path = os.path.join(tmp_dir, "network.html")
|
| 120 |
fig.write_html(html_path)
|
| 121 |
|
| 122 |
with open(html_path, "r", encoding="utf-8") as f:
|
| 123 |
html_content = f.read()
|
| 124 |
|
| 125 |
-
return html_content
|
| 126 |
|
| 127 |
def search_keyword(keyword):
|
| 128 |
if not os.path.exists("latest_data.csv"):
|
|
@@ -153,14 +155,19 @@ with gr.Blocks() as demo:
|
|
| 153 |
plot2 = gr.Plot(label="π€ Top 20 Authors")
|
| 154 |
timeline = gr.Plot(label="π Comment Timeline")
|
| 155 |
network_html = gr.HTML(label="π§΅ Thread Network")
|
|
|
|
| 156 |
|
| 157 |
with gr.Row():
|
| 158 |
keyword_input = gr.Textbox(label="π Search Keyword in Comments")
|
| 159 |
search_button = gr.Button("Search")
|
| 160 |
-
|
| 161 |
search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="π Search Results")
|
| 162 |
|
| 163 |
-
file_input.change(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
|
|
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import networkx as nx
|
|
|
|
| 6 |
import os
|
| 7 |
|
| 8 |
def process_file(file):
|
| 9 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
|
|
|
| 10 |
if file_ext == '.csv':
|
| 11 |
+
df = pd.read_csv(file.name)
|
| 12 |
elif file_ext in ['.xls', '.xlsx']:
|
| 13 |
+
df = pd.read_excel(file.name)
|
| 14 |
else:
|
| 15 |
+
return "Unsupported file format", None, None, None, None, None
|
| 16 |
|
| 17 |
required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
|
| 18 |
missing = [col for col in required_cols if col not in df.columns]
|
| 19 |
if missing:
|
| 20 |
+
return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
|
| 21 |
|
| 22 |
df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
|
| 23 |
df = df.dropna(subset=['date'])
|
|
|
|
| 33 |
fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
|
| 34 |
fig2.update_layout(xaxis_tickangle=-45)
|
| 35 |
|
| 36 |
+
# Timeline of Comments (like_count if exists else 0)
|
| 37 |
+
y_data = df['like_count'] if 'like_count' in df.columns else [0]*len(df)
|
| 38 |
+
hover_cols = ['author', 'text']
|
| 39 |
+
if 'like_count' in df.columns:
|
| 40 |
+
hover_cols.append('like_count')
|
| 41 |
+
|
| 42 |
fig3 = px.scatter(
|
| 43 |
df,
|
| 44 |
x='date',
|
| 45 |
+
y=y_data,
|
| 46 |
+
hover_data=hover_cols,
|
| 47 |
title="Comments Over Time (Likes)",
|
| 48 |
labels={'like_count': 'Like Count', 'date': 'Date'}
|
| 49 |
)
|
|
|
|
| 53 |
# Save to CSV for keyword search
|
| 54 |
df.to_csv("latest_data.csv", index=False)
|
| 55 |
|
| 56 |
+
# Build network HTML and save permanently
|
| 57 |
+
network_html_content, network_path = build_network_html_plotly(df)
|
| 58 |
|
| 59 |
+
return "Success", fig1, fig2, fig3, network_html_content, network_path
|
| 60 |
|
| 61 |
def build_network_html_plotly(df):
|
| 62 |
G = nx.DiGraph()
|
|
|
|
| 112 |
title='Comment Thread Network',
|
| 113 |
showlegend=False,
|
| 114 |
hovermode='closest',
|
| 115 |
+
margin=dict(b=20, l=5, r=5, t=40),
|
| 116 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 117 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
|
| 118 |
)
|
| 119 |
|
| 120 |
+
# Save to permanent HTML file
|
| 121 |
+
html_path = "network.html"
|
|
|
|
| 122 |
fig.write_html(html_path)
|
| 123 |
|
| 124 |
with open(html_path, "r", encoding="utf-8") as f:
|
| 125 |
html_content = f.read()
|
| 126 |
|
| 127 |
+
return html_content, html_path
|
| 128 |
|
| 129 |
def search_keyword(keyword):
|
| 130 |
if not os.path.exists("latest_data.csv"):
|
|
|
|
| 155 |
plot2 = gr.Plot(label="π€ Top 20 Authors")
|
| 156 |
timeline = gr.Plot(label="π Comment Timeline")
|
| 157 |
network_html = gr.HTML(label="π§΅ Thread Network")
|
| 158 |
+
download_network = gr.File(label="β¬οΈ Download Network HTML", interactive=False)
|
| 159 |
|
| 160 |
with gr.Row():
|
| 161 |
keyword_input = gr.Textbox(label="π Search Keyword in Comments")
|
| 162 |
search_button = gr.Button("Search")
|
| 163 |
+
|
| 164 |
search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="π Search Results")
|
| 165 |
|
| 166 |
+
file_input.change(
|
| 167 |
+
fn=process_file,
|
| 168 |
+
inputs=file_input,
|
| 169 |
+
outputs=[status, plot1, plot2, timeline, network_html, download_network]
|
| 170 |
+
)
|
| 171 |
search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
|
| 172 |
|
| 173 |
if __name__ == "__main__":
|