clementBE commited on
Commit
113fac7
Β·
verified Β·
1 Parent(s): 8bd3a54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -3,23 +3,21 @@ import pandas as pd
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import networkx as nx
6
- import tempfile
7
  import os
8
 
9
  def process_file(file):
10
- # file is a path string when using gr.File
11
- file_ext = os.path.splitext(file)[1].lower()
12
  if file_ext == '.csv':
13
- df = pd.read_csv(file)
14
  elif file_ext in ['.xls', '.xlsx']:
15
- df = pd.read_excel(file)
16
  else:
17
- return "Unsupported file format", None, None, None, None
18
 
19
  required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
20
  missing = [col for col in required_cols if col not in df.columns]
21
  if missing:
22
- return f"Missing columns: {', '.join(missing)}", None, None, None, None
23
 
24
  df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
25
  df = df.dropna(subset=['date'])
@@ -35,12 +33,17 @@ def process_file(file):
35
  fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
36
  fig2.update_layout(xaxis_tickangle=-45)
37
 
38
- # Timeline of Comments with Like Count on Y-axis
 
 
 
 
 
39
  fig3 = px.scatter(
40
  df,
41
  x='date',
42
- y='like_count' if 'like_count' in df.columns else [0]*len(df),
43
- hover_data=['author', 'text', 'like_count'] if 'like_count' in df.columns else ['author', 'text'],
44
  title="Comments Over Time (Likes)",
45
  labels={'like_count': 'Like Count', 'date': 'Date'}
46
  )
@@ -50,10 +53,10 @@ def process_file(file):
50
  # Save to CSV for keyword search
51
  df.to_csv("latest_data.csv", index=False)
52
 
53
- # You may not want to remove the file immediately since Gradio manages files:
54
- # os.remove(file)
55
 
56
- return "Success", fig1, fig2, fig3, build_network_html_plotly(df)
57
 
58
  def build_network_html_plotly(df):
59
  G = nx.DiGraph()
@@ -109,20 +112,19 @@ def build_network_html_plotly(df):
109
  title='Comment Thread Network',
110
  showlegend=False,
111
  hovermode='closest',
112
- margin=dict(b=20,l=5,r=5,t=40),
113
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
114
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
115
  )
116
 
117
- # Save to temp HTML file
118
- tmp_dir = tempfile.mkdtemp()
119
- html_path = os.path.join(tmp_dir, "network.html")
120
  fig.write_html(html_path)
121
 
122
  with open(html_path, "r", encoding="utf-8") as f:
123
  html_content = f.read()
124
 
125
- return html_content
126
 
127
  def search_keyword(keyword):
128
  if not os.path.exists("latest_data.csv"):
@@ -153,14 +155,19 @@ with gr.Blocks() as demo:
153
  plot2 = gr.Plot(label="πŸ‘€ Top 20 Authors")
154
  timeline = gr.Plot(label="πŸ•’ Comment Timeline")
155
  network_html = gr.HTML(label="🧡 Thread Network")
 
156
 
157
  with gr.Row():
158
  keyword_input = gr.Textbox(label="πŸ” Search Keyword in Comments")
159
  search_button = gr.Button("Search")
160
-
161
  search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="πŸ” Search Results")
162
 
163
- file_input.change(fn=process_file, inputs=file_input, outputs=[status, plot1, plot2, timeline, network_html])
 
 
 
 
164
  search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
165
 
166
  if __name__ == "__main__":
 
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import networkx as nx
 
6
  import os
7
 
8
  def process_file(file):
9
+ file_ext = os.path.splitext(file.name)[1].lower()
 
10
  if file_ext == '.csv':
11
+ df = pd.read_csv(file.name)
12
  elif file_ext in ['.xls', '.xlsx']:
13
+ df = pd.read_excel(file.name)
14
  else:
15
+ return "Unsupported file format", None, None, None, None, None
16
 
17
  required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
18
  missing = [col for col in required_cols if col not in df.columns]
19
  if missing:
20
+ return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
21
 
22
  df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
23
  df = df.dropna(subset=['date'])
 
33
  fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
34
  fig2.update_layout(xaxis_tickangle=-45)
35
 
36
+ # Timeline of Comments (like_count if exists else 0)
37
+ y_data = df['like_count'] if 'like_count' in df.columns else [0]*len(df)
38
+ hover_cols = ['author', 'text']
39
+ if 'like_count' in df.columns:
40
+ hover_cols.append('like_count')
41
+
42
  fig3 = px.scatter(
43
  df,
44
  x='date',
45
+ y=y_data,
46
+ hover_data=hover_cols,
47
  title="Comments Over Time (Likes)",
48
  labels={'like_count': 'Like Count', 'date': 'Date'}
49
  )
 
53
  # Save to CSV for keyword search
54
  df.to_csv("latest_data.csv", index=False)
55
 
56
+ # Build network HTML and save permanently
57
+ network_html_content, network_path = build_network_html_plotly(df)
58
 
59
+ return "Success", fig1, fig2, fig3, network_html_content, network_path
60
 
61
  def build_network_html_plotly(df):
62
  G = nx.DiGraph()
 
112
  title='Comment Thread Network',
113
  showlegend=False,
114
  hovermode='closest',
115
+ margin=dict(b=20, l=5, r=5, t=40),
116
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
117
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
118
  )
119
 
120
+ # Save to permanent HTML file
121
+ html_path = "network.html"
 
122
  fig.write_html(html_path)
123
 
124
  with open(html_path, "r", encoding="utf-8") as f:
125
  html_content = f.read()
126
 
127
+ return html_content, html_path
128
 
129
  def search_keyword(keyword):
130
  if not os.path.exists("latest_data.csv"):
 
155
  plot2 = gr.Plot(label="πŸ‘€ Top 20 Authors")
156
  timeline = gr.Plot(label="πŸ•’ Comment Timeline")
157
  network_html = gr.HTML(label="🧡 Thread Network")
158
+ download_network = gr.File(label="⬇️ Download Network HTML", interactive=False)
159
 
160
  with gr.Row():
161
  keyword_input = gr.Textbox(label="πŸ” Search Keyword in Comments")
162
  search_button = gr.Button("Search")
163
+
164
  search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="πŸ” Search Results")
165
 
166
+ file_input.change(
167
+ fn=process_file,
168
+ inputs=file_input,
169
+ outputs=[status, plot1, plot2, timeline, network_html, download_network]
170
+ )
171
  search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
172
 
173
  if __name__ == "__main__":