clementBE commited on
Commit
58b3135
·
verified ·
1 Parent(s): 5936ccc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -63
app.py CHANGED
@@ -1,66 +1,40 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import matplotlib.pyplot as plt
4
  import plotly.express as px
5
- import networkx as nx
6
- import io
7
-
8
- # --- Load your dataset
9
- df = pd.read_csv("comments.csv", parse_dates=["date"]) # Ensure 'date' is datetime
10
- df["date"] = pd.to_datetime(df["date"])
11
-
12
- # --- 1. Plot messages by day or month
13
- def plot_messages_over_time(group_by="D"):
14
- df_grouped = df.groupby(pd.Grouper(key="date", freq=group_by)).size().reset_index(name="count")
15
- fig = px.line(df_grouped, x="date", y="count", title="Messages Over Time")
16
- return fig
17
-
18
- # --- 2. Plot messages by author
19
- def plot_messages_by_author(top_n=20):
20
- top_authors = df["author"].value_counts().head(top_n).reset_index()
21
- top_authors.columns = ["author", "message_count"]
22
- fig = px.bar(top_authors, x="author", y="message_count", title=f"Top {top_n} Authors")
23
- return fig
24
-
25
- # --- 3. Network of threads
26
- def generate_thread_network(min_replies=1):
27
- edges = df[["parent_id", "comment_id"]].dropna()
28
- G = nx.DiGraph()
29
- for _, row in edges.iterrows():
30
- G.add_edge(row["parent_id"], row["comment_id"])
31
-
32
- # Filter small components
33
- G_filtered = G.copy()
34
- isolated = [node for node in G.nodes if G.degree(node) < min_replies]
35
- G_filtered.remove_nodes_from(isolated)
36
-
37
- fig = plot_network(G_filtered)
38
- return fig
39
-
40
- def plot_network(G):
41
- pos = nx.spring_layout(G, seed=42)
42
- fig, ax = plt.subplots(figsize=(8, 6))
43
- nx.draw(G, pos, with_labels=False, node_size=50, edge_color='gray', alpha=0.7, ax=ax)
44
- return fig
45
-
46
- # --- Gradio interface
47
- with gr.Blocks() as demo:
48
- gr.Markdown("# 🧠 YouTube Comment Explorer")
49
- gr.Markdown("Explore messages by time, author, and discussion threads")
50
-
51
- with gr.Tab("📆 Messages Over Time"):
52
- freq_choice = gr.Radio(["D", "M"], label="Group by (D=day, M=month)", value="M")
53
- time_plot = gr.Plot()
54
- freq_choice.change(fn=plot_messages_over_time, inputs=freq_choice, outputs=time_plot)
55
-
56
- with gr.Tab("👤 Messages by Author"):
57
- author_slider = gr.Slider(5, 50, value=20, step=1, label="Top N Authors")
58
- author_plot = gr.Plot()
59
- author_slider.change(fn=plot_messages_by_author, inputs=author_slider, outputs=author_plot)
60
-
61
- with gr.Tab("🔗 Thread Network"):
62
- thread_slider = gr.Slider(1, 10, value=1, step=1, label="Min Replies to Include Node")
63
- network_plot = gr.Plot()
64
- thread_slider.change(fn=generate_thread_network, inputs=thread_slider, outputs=network_plot)
65
-
66
- demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import plotly.express as px
4
+ import os
5
+
6
+ def process_file(file):
7
+ # Load CSV and convert Unix timestamp to datetime
8
+ df = pd.read_csv(file.name)
9
+ df['date'] = pd.to_datetime(df['timestamp'], unit='s') # convert Unix timestamp
10
+
11
+ # --- Plot 1: Messages over time ---
12
+ df['date_only'] = df['date'].dt.date
13
+ messages_per_day = df.groupby("date_only").size().reset_index(name="count")
14
+ fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
15
+
16
+ # --- Plot 2: Messages by Author ---
17
+ top_authors = df['author'].value_counts().nlargest(20).reset_index()
18
+ top_authors.columns = ['author', 'count']
19
+ fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
20
+ fig2.update_layout(xaxis_tickangle=-45)
21
+
22
+ # (Optional) delete the uploaded file
23
+ os.remove(file.name)
24
+
25
+ return fig1, fig2
26
+
27
+ # Gradio interface
28
+ interface = gr.Interface(
29
+ fn=process_file,
30
+ inputs=gr.File(label="Upload your CSV file", file_types=[".csv"]),
31
+ outputs=[
32
+ gr.Plot(label="Messages by Day"),
33
+ gr.Plot(label="Messages by Author"),
34
+ ],
35
+ title="Message Analyzer",
36
+ description="Upload a CSV file with at least 'author' and 'timestamp' (Unix) columns."
37
+ )
38
+
39
+ if __name__ == "__main__":
40
+ interface.launch()