Spaces:

clementBE
/

YT_Comments_explorer

Sleeping

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

58b3135

verified ·

1 Parent(s): 5936ccc

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -63

app.py CHANGED Viewed

@@ -1,66 +1,40 @@
 import gradio as gr
 import pandas as pd
-import matplotlib.pyplot as plt
 import plotly.express as px
-import networkx as nx
-import io
-# --- Load your dataset
-df = pd.read_csv("comments.csv", parse_dates=["date"])  # Ensure 'date' is datetime
-df["date"] = pd.to_datetime(df["date"])
-# --- 1. Plot messages by day or month
-def plot_messages_over_time(group_by="D"):
-    df_grouped = df.groupby(pd.Grouper(key="date", freq=group_by)).size().reset_index(name="count")
-    fig = px.line(df_grouped, x="date", y="count", title="Messages Over Time")
-    return fig
-# --- 2. Plot messages by author
-def plot_messages_by_author(top_n=20):
-    top_authors = df["author"].value_counts().head(top_n).reset_index()
-    top_authors.columns = ["author", "message_count"]
-    fig = px.bar(top_authors, x="author", y="message_count", title=f"Top {top_n} Authors")
-    return fig
-# --- 3. Network of threads
-def generate_thread_network(min_replies=1):
-    edges = df[["parent_id", "comment_id"]].dropna()
-    G = nx.DiGraph()
-    for _, row in edges.iterrows():
-        G.add_edge(row["parent_id"], row["comment_id"])
-    # Filter small components
-    G_filtered = G.copy()
-    isolated = [node for node in G.nodes if G.degree(node) < min_replies]
-    G_filtered.remove_nodes_from(isolated)
-    fig = plot_network(G_filtered)
-    return fig
-def plot_network(G):
-    pos = nx.spring_layout(G, seed=42)
-    fig, ax = plt.subplots(figsize=(8, 6))
-    nx.draw(G, pos, with_labels=False, node_size=50, edge_color='gray', alpha=0.7, ax=ax)
-    return fig
-# --- Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 YouTube Comment Explorer")
-    gr.Markdown("Explore messages by time, author, and discussion threads")
-    with gr.Tab("📆 Messages Over Time"):
-        freq_choice = gr.Radio(["D", "M"], label="Group by (D=day, M=month)", value="M")
-        time_plot = gr.Plot()
-        freq_choice.change(fn=plot_messages_over_time, inputs=freq_choice, outputs=time_plot)
-    with gr.Tab("👤 Messages by Author"):
-        author_slider = gr.Slider(5, 50, value=20, step=1, label="Top N Authors")
-        author_plot = gr.Plot()
-        author_slider.change(fn=plot_messages_by_author, inputs=author_slider, outputs=author_plot)
-    with gr.Tab("🔗 Thread Network"):
-        thread_slider = gr.Slider(1, 10, value=1, step=1, label="Min Replies to Include Node")
-        network_plot = gr.Plot()
-        thread_slider.change(fn=generate_thread_network, inputs=thread_slider, outputs=network_plot)
-demo.launch()

 import gradio as gr
 import pandas as pd
 import plotly.express as px
+import os
+def process_file(file):
+    # Load CSV and convert Unix timestamp to datetime
+    df = pd.read_csv(file.name)
+    df['date'] = pd.to_datetime(df['timestamp'], unit='s')  # convert Unix timestamp
+    # --- Plot 1: Messages over time ---
+    df['date_only'] = df['date'].dt.date
+    messages_per_day = df.groupby("date_only").size().reset_index(name="count")
+    fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
+    # --- Plot 2: Messages by Author ---
+    top_authors = df['author'].value_counts().nlargest(20).reset_index()
+    top_authors.columns = ['author', 'count']
+    fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
+    fig2.update_layout(xaxis_tickangle=-45)
+    # (Optional) delete the uploaded file
+    os.remove(file.name)
+    return fig1, fig2
+# Gradio interface
+interface = gr.Interface(
+    fn=process_file,
+    inputs=gr.File(label="Upload your CSV file", file_types=[".csv"]),
+    outputs=[
+        gr.Plot(label="Messages by Day"),
+        gr.Plot(label="Messages by Author"),
+    ],
+    title="Message Analyzer",
+    description="Upload a CSV file with at least 'author' and 'timestamp' (Unix) columns."
+)
+if __name__ == "__main__":
+    interface.launch()