Spaces:

clementBE
/

YT_Comments_explorer

Sleeping

App Files Files Community

clementBE commited on Jul 11, 2025

Commit

113fac7

verified ·

1 Parent(s): 8bd3a54

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -20

app.py CHANGED Viewed

@@ -3,23 +3,21 @@ import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import networkx as nx
-import tempfile
 import os
 def process_file(file):
-    # file is a path string when using gr.File
-    file_ext = os.path.splitext(file)[1].lower()
     if file_ext == '.csv':
-        df = pd.read_csv(file)
     elif file_ext in ['.xls', '.xlsx']:
-        df = pd.read_excel(file)
     else:
-        return "Unsupported file format", None, None, None, None
     required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
     missing = [col for col in required_cols if col not in df.columns]
     if missing:
-        return f"Missing columns: {', '.join(missing)}", None, None, None, None
     df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
     df = df.dropna(subset=['date'])
@@ -35,12 +33,17 @@ def process_file(file):
     fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
     fig2.update_layout(xaxis_tickangle=-45)
-    # Timeline of Comments with Like Count on Y-axis
     fig3 = px.scatter(
         df,
         x='date',
-        y='like_count' if 'like_count' in df.columns else [0]*len(df),
-        hover_data=['author', 'text', 'like_count'] if 'like_count' in df.columns else ['author', 'text'],
         title="Comments Over Time (Likes)",
         labels={'like_count': 'Like Count', 'date': 'Date'}
     )
@@ -50,10 +53,10 @@ def process_file(file):
     # Save to CSV for keyword search
     df.to_csv("latest_data.csv", index=False)
-    # You may not want to remove the file immediately since Gradio manages files:
-    # os.remove(file)
-    return "Success", fig1, fig2, fig3, build_network_html_plotly(df)
 def build_network_html_plotly(df):
     G = nx.DiGraph()
@@ -109,20 +112,19 @@ def build_network_html_plotly(df):
                         title='Comment Thread Network',
                         showlegend=False,
                         hovermode='closest',
-                        margin=dict(b=20,l=5,r=5,t=40),
                         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                     )
-    # Save to temp HTML file
-    tmp_dir = tempfile.mkdtemp()
-    html_path = os.path.join(tmp_dir, "network.html")
     fig.write_html(html_path)
     with open(html_path, "r", encoding="utf-8") as f:
         html_content = f.read()
-    return html_content
 def search_keyword(keyword):
     if not os.path.exists("latest_data.csv"):
@@ -153,14 +155,19 @@ with gr.Blocks() as demo:
     plot2 = gr.Plot(label="👤 Top 20 Authors")
     timeline = gr.Plot(label="🕒 Comment Timeline")
     network_html = gr.HTML(label="🧵 Thread Network")
     with gr.Row():
         keyword_input = gr.Textbox(label="🔍 Search Keyword in Comments")
         search_button = gr.Button("Search")
     search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="🔍 Search Results")
-    file_input.change(fn=process_file, inputs=file_input, outputs=[status, plot1, plot2, timeline, network_html])
     search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
 if __name__ == "__main__":

 import plotly.express as px
 import plotly.graph_objects as go
 import networkx as nx
 import os
 def process_file(file):
+    file_ext = os.path.splitext(file.name)[1].lower()
     if file_ext == '.csv':
+        df = pd.read_csv(file.name)
     elif file_ext in ['.xls', '.xlsx']:
+        df = pd.read_excel(file.name)
     else:
+        return "Unsupported file format", None, None, None, None, None
     required_cols = ['timestamp', 'author', 'text', 'id', 'parent']
     missing = [col for col in required_cols if col not in df.columns]
     if missing:
+        return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
     df['date'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
     df = df.dropna(subset=['date'])
     fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
     fig2.update_layout(xaxis_tickangle=-45)
+    # Timeline of Comments (like_count if exists else 0)
+    y_data = df['like_count'] if 'like_count' in df.columns else [0]*len(df)
+    hover_cols = ['author', 'text']
+    if 'like_count' in df.columns:
+        hover_cols.append('like_count')
     fig3 = px.scatter(
         df,
         x='date',
+        y=y_data,
+        hover_data=hover_cols,
         title="Comments Over Time (Likes)",
         labels={'like_count': 'Like Count', 'date': 'Date'}
     )
     # Save to CSV for keyword search
     df.to_csv("latest_data.csv", index=False)
+    # Build network HTML and save permanently
+    network_html_content, network_path = build_network_html_plotly(df)
+    return "Success", fig1, fig2, fig3, network_html_content, network_path
 def build_network_html_plotly(df):
     G = nx.DiGraph()
                         title='Comment Thread Network',
                         showlegend=False,
                         hovermode='closest',
+                        margin=dict(b=20, l=5, r=5, t=40),
                         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                     )
+    # Save to permanent HTML file
+    html_path = "network.html"
     fig.write_html(html_path)
     with open(html_path, "r", encoding="utf-8") as f:
         html_content = f.read()
+    return html_content, html_path
 def search_keyword(keyword):
     if not os.path.exists("latest_data.csv"):
     plot2 = gr.Plot(label="👤 Top 20 Authors")
     timeline = gr.Plot(label="🕒 Comment Timeline")
     network_html = gr.HTML(label="🧵 Thread Network")
+    download_network = gr.File(label="⬇️ Download Network HTML", interactive=False)
     with gr.Row():
         keyword_input = gr.Textbox(label="🔍 Search Keyword in Comments")
         search_button = gr.Button("Search")
     search_results = gr.Dataframe(headers=["date", "author", "like_count", "text"], label="🔍 Search Results")
+    file_input.change(
+        fn=process_file,
+        inputs=file_input,
+        outputs=[status, plot1, plot2, timeline, network_html, download_network]
+    )
     search_button.click(fn=search_keyword, inputs=keyword_input, outputs=search_results)
 if __name__ == "__main__":