Spaces:

clementBE
/

YT_Comments_explorer

Sleeping

App Files Files Community

clementBE commited on Sep 19, 2025

Commit

ea55efd

verified ·

1 Parent(s): 113fac7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -14,12 +14,12 @@ def process_file(file):
     else:
         return "Unsupported file format", None, None, None, None, None
-    required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
     missing = [col for col in required_cols if col not in df.columns]
     if missing:
         return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
-    df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
     df = df.dropna(subset=['date'])
     df['date_only'] = df['date'].dt.date
@@ -28,16 +28,14 @@ def process_file(file):
     fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
     # Top Authors
-    top_authors = df['author'].value_counts().nlargest(20).reset_index()
     top_authors.columns = ['author', 'count']
     fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
     fig2.update_layout(xaxis_tickangle=-45)
     # Timeline of Comments (like_count if exists else 0)
-    y_data = df['like_count'] if 'like_count' in df.columns else [0]*len(df)
-    hover_cols = ['author', 'text']
-    if 'like_count' in df.columns:
-        hover_cols.append('like_count')
     fig3 = px.scatter(
         df,
@@ -61,9 +59,9 @@ def process_file(file):
 def build_network_html_plotly(df):
     G = nx.DiGraph()
     for _, row in df.iterrows():
-        author = str(row['author'])
-        comment_id = str(row['id'])
-        parent_id = row['parent']
         G.add_node(comment_id, label=author)
         G.add_edge(author, comment_id)
@@ -128,27 +126,23 @@ def build_network_html_plotly(df):
 def search_keyword(keyword):
     if not os.path.exists("latest_data.csv"):
-        return pd.DataFrame(columns=['date', 'author', 'like_count', 'text'])
     df = pd.read_csv("latest_data.csv")
     if 'text' not in df.columns:
-        return pd.DataFrame(columns=['date', 'author', 'like_count', 'text'])
     mask = df['text'].astype(str).str.contains(keyword, case=False, na=False)
-    result = df.loc[mask, ['timestamp', 'author', 'text']]
-    if 'like_count' in df.columns:
-        result['like_count'] = df.loc[mask, 'like_count']
-    else:
-        result['like_count'] = None
-    result['date'] = pd.to_datetime(result['timestamp'], unit='s', errors='coerce')
     result = result.dropna(subset=['date'])
-    return result[['date', 'author', 'like_count', 'text']].head(100)
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Comment Thread Analyzer with Timeline + Search")
-    file_input = gr.File(label="📁 Upload CSV or XLSX", file_types=[".csv", ".xls", ".xlsx"])
     status = gr.Textbox(label="✅ Status")
     plot1 = gr.Plot(label="📈 Messages per Day")
@@ -161,7 +155,7 @@ with gr.Blocks() as demo:
         keyword_input = gr.Textbox(label="🔍 Search Keyword in Comments")
         search_button = gr.Button("Search")
-    search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="🔍 Search Results")
     file_input.change(
         fn=process_file,

     else:
         return "Unsupported file format", None, None, None, None, None
+    required_cols = ['comment_id', 'text', 'like_count', 'author_name', 'author_channel_id', 'published_at', 'parent_id']
     missing = [col for col in required_cols if col not in df.columns]
     if missing:
         return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
+    df['date'] = pd.to_datetime(df['published_at'], errors='coerce')
     df = df.dropna(subset=['date'])
     df['date_only'] = df['date'].dt.date
     fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
     # Top Authors
+    top_authors = df['author_name'].value_counts().nlargest(20).reset_index()
     top_authors.columns = ['author', 'count']
     fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
     fig2.update_layout(xaxis_tickangle=-45)
     # Timeline of Comments (like_count if exists else 0)
+    y_data = df['like_count'].fillna(0)
+    hover_cols = ['author_name', 'text', 'like_count']
     fig3 = px.scatter(
         df,
 def build_network_html_plotly(df):
     G = nx.DiGraph()
     for _, row in df.iterrows():
+        author = str(row['author_name'])
+        comment_id = str(row['comment_id'])
+        parent_id = row['parent_id']
         G.add_node(comment_id, label=author)
         G.add_edge(author, comment_id)
 def search_keyword(keyword):
     if not os.path.exists("latest_data.csv"):
+        return pd.DataFrame(columns=['date', 'author_name', 'like_count', 'text'])
     df = pd.read_csv("latest_data.csv")
     if 'text' not in df.columns:
+        return pd.DataFrame(columns=['date', 'author_name', 'like_count', 'text'])
     mask = df['text'].astype(str).str.contains(keyword, case=False, na=False)
+    result = df.loc[mask, ['published_at', 'author_name', 'like_count', 'text']]
+    result['date'] = pd.to_datetime(result['published_at'], errors='coerce')
     result = result.dropna(subset=['date'])
+    return result[['date', 'author_name', 'like_count', 'text']].head(100)
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 YouTube Comment Thread Analyzer with Timeline + Search")
+    file_input = gr.File(label="📁 Upload CSV or XLSX (YouTube API v3 format)")
     status = gr.Textbox(label="✅ Status")
     plot1 = gr.Plot(label="📈 Messages per Day")
         keyword_input = gr.Textbox(label="🔍 Search Keyword in Comments")
         search_button = gr.Button("Search")
+    search_results = gr.Dataframe(headers=["date", "author_name", "like_count", "text"], label="🔍 Search Results")
     file_input.change(
         fn=process_file,