clementBE commited on
Commit
5936ccc
·
verified ·
0 Parent(s):

initial commit

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +66 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: YT Comments Explorer
3
+ emoji: 🐢
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.36.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import plotly.express as px
5
+ import networkx as nx
6
+ import io
7
+
8
+ # --- Load your dataset
9
+ df = pd.read_csv("comments.csv", parse_dates=["date"]) # Ensure 'date' is datetime
10
+ df["date"] = pd.to_datetime(df["date"])
11
+
12
+ # --- 1. Plot messages by day or month
13
+ def plot_messages_over_time(group_by="D"):
14
+ df_grouped = df.groupby(pd.Grouper(key="date", freq=group_by)).size().reset_index(name="count")
15
+ fig = px.line(df_grouped, x="date", y="count", title="Messages Over Time")
16
+ return fig
17
+
18
+ # --- 2. Plot messages by author
19
+ def plot_messages_by_author(top_n=20):
20
+ top_authors = df["author"].value_counts().head(top_n).reset_index()
21
+ top_authors.columns = ["author", "message_count"]
22
+ fig = px.bar(top_authors, x="author", y="message_count", title=f"Top {top_n} Authors")
23
+ return fig
24
+
25
+ # --- 3. Network of threads
26
+ def generate_thread_network(min_replies=1):
27
+ edges = df[["parent_id", "comment_id"]].dropna()
28
+ G = nx.DiGraph()
29
+ for _, row in edges.iterrows():
30
+ G.add_edge(row["parent_id"], row["comment_id"])
31
+
32
+ # Filter small components
33
+ G_filtered = G.copy()
34
+ isolated = [node for node in G.nodes if G.degree(node) < min_replies]
35
+ G_filtered.remove_nodes_from(isolated)
36
+
37
+ fig = plot_network(G_filtered)
38
+ return fig
39
+
40
+ def plot_network(G):
41
+ pos = nx.spring_layout(G, seed=42)
42
+ fig, ax = plt.subplots(figsize=(8, 6))
43
+ nx.draw(G, pos, with_labels=False, node_size=50, edge_color='gray', alpha=0.7, ax=ax)
44
+ return fig
45
+
46
+ # --- Gradio interface
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# 🧠 YouTube Comment Explorer")
49
+ gr.Markdown("Explore messages by time, author, and discussion threads")
50
+
51
+ with gr.Tab("📆 Messages Over Time"):
52
+ freq_choice = gr.Radio(["D", "M"], label="Group by (D=day, M=month)", value="M")
53
+ time_plot = gr.Plot()
54
+ freq_choice.change(fn=plot_messages_over_time, inputs=freq_choice, outputs=time_plot)
55
+
56
+ with gr.Tab("👤 Messages by Author"):
57
+ author_slider = gr.Slider(5, 50, value=20, step=1, label="Top N Authors")
58
+ author_plot = gr.Plot()
59
+ author_slider.change(fn=plot_messages_by_author, inputs=author_slider, outputs=author_plot)
60
+
61
+ with gr.Tab("🔗 Thread Network"):
62
+ thread_slider = gr.Slider(1, 10, value=1, step=1, label="Min Replies to Include Node")
63
+ network_plot = gr.Plot()
64
+ thread_slider.change(fn=generate_thread_network, inputs=thread_slider, outputs=network_plot)
65
+
66
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ matplotlib
4
+ plotly
5
+ networkx