Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,12 +14,12 @@ def process_file(file):
|
|
| 14 |
else:
|
| 15 |
return "Unsupported file format", None, None, None, None, None
|
| 16 |
|
| 17 |
-
required_cols = ['
|
| 18 |
missing = [col for col in required_cols if col not in df.columns]
|
| 19 |
if missing:
|
| 20 |
return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
|
| 21 |
|
| 22 |
-
df['date'] = pd.to_datetime(df['
|
| 23 |
df = df.dropna(subset=['date'])
|
| 24 |
df['date_only'] = df['date'].dt.date
|
| 25 |
|
|
@@ -28,16 +28,14 @@ def process_file(file):
|
|
| 28 |
fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
|
| 29 |
|
| 30 |
# Top Authors
|
| 31 |
-
top_authors = df['
|
| 32 |
top_authors.columns = ['author', 'count']
|
| 33 |
fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
|
| 34 |
fig2.update_layout(xaxis_tickangle=-45)
|
| 35 |
|
| 36 |
# Timeline of Comments (like_count if exists else 0)
|
| 37 |
-
y_data = df['like_count']
|
| 38 |
-
hover_cols = ['
|
| 39 |
-
if 'like_count' in df.columns:
|
| 40 |
-
hover_cols.append('like_count')
|
| 41 |
|
| 42 |
fig3 = px.scatter(
|
| 43 |
df,
|
|
@@ -61,9 +59,9 @@ def process_file(file):
|
|
| 61 |
def build_network_html_plotly(df):
|
| 62 |
G = nx.DiGraph()
|
| 63 |
for _, row in df.iterrows():
|
| 64 |
-
author = str(row['
|
| 65 |
-
comment_id = str(row['
|
| 66 |
-
parent_id = row['
|
| 67 |
|
| 68 |
G.add_node(comment_id, label=author)
|
| 69 |
G.add_edge(author, comment_id)
|
|
@@ -128,27 +126,23 @@ def build_network_html_plotly(df):
|
|
| 128 |
|
| 129 |
def search_keyword(keyword):
|
| 130 |
if not os.path.exists("latest_data.csv"):
|
| 131 |
-
return pd.DataFrame(columns=['date', '
|
| 132 |
|
| 133 |
df = pd.read_csv("latest_data.csv")
|
| 134 |
|
| 135 |
if 'text' not in df.columns:
|
| 136 |
-
return pd.DataFrame(columns=['date', '
|
| 137 |
|
| 138 |
mask = df['text'].astype(str).str.contains(keyword, case=False, na=False)
|
| 139 |
-
result = df.loc[mask, ['
|
| 140 |
-
|
| 141 |
-
result['like_count'] = df.loc[mask, 'like_count']
|
| 142 |
-
else:
|
| 143 |
-
result['like_count'] = None
|
| 144 |
-
result['date'] = pd.to_datetime(result['timestamp'], unit='s', errors='coerce')
|
| 145 |
result = result.dropna(subset=['date'])
|
| 146 |
-
return result[['date', '
|
| 147 |
|
| 148 |
# Gradio interface
|
| 149 |
with gr.Blocks() as demo:
|
| 150 |
-
gr.Markdown("## π§ Comment Thread Analyzer with Timeline + Search")
|
| 151 |
-
file_input = gr.File(label="π Upload CSV or XLSX
|
| 152 |
|
| 153 |
status = gr.Textbox(label="β
Status")
|
| 154 |
plot1 = gr.Plot(label="π Messages per Day")
|
|
@@ -161,7 +155,7 @@ with gr.Blocks() as demo:
|
|
| 161 |
keyword_input = gr.Textbox(label="π Search Keyword in Comments")
|
| 162 |
search_button = gr.Button("Search")
|
| 163 |
|
| 164 |
-
search_results = gr.Dataframe(headers=["date", "
|
| 165 |
|
| 166 |
file_input.change(
|
| 167 |
fn=process_file,
|
|
|
|
| 14 |
else:
|
| 15 |
return "Unsupported file format", None, None, None, None, None
|
| 16 |
|
| 17 |
+
required_cols = ['comment_id', 'text', 'like_count', 'author_name', 'author_channel_id', 'published_at', 'parent_id']
|
| 18 |
missing = [col for col in required_cols if col not in df.columns]
|
| 19 |
if missing:
|
| 20 |
return f"Missing columns: {', '.join(missing)}", None, None, None, None, None
|
| 21 |
|
| 22 |
+
df['date'] = pd.to_datetime(df['published_at'], errors='coerce')
|
| 23 |
df = df.dropna(subset=['date'])
|
| 24 |
df['date_only'] = df['date'].dt.date
|
| 25 |
|
|
|
|
| 28 |
fig1 = px.line(messages_per_day, x="date_only", y="count", title="Messages per Day")
|
| 29 |
|
| 30 |
# Top Authors
|
| 31 |
+
top_authors = df['author_name'].value_counts().nlargest(20).reset_index()
|
| 32 |
top_authors.columns = ['author', 'count']
|
| 33 |
fig2 = px.bar(top_authors, x='author', y='count', title="Top 20 Authors", text='count')
|
| 34 |
fig2.update_layout(xaxis_tickangle=-45)
|
| 35 |
|
| 36 |
# Timeline of Comments (like_count if exists else 0)
|
| 37 |
+
y_data = df['like_count'].fillna(0)
|
| 38 |
+
hover_cols = ['author_name', 'text', 'like_count']
|
|
|
|
|
|
|
| 39 |
|
| 40 |
fig3 = px.scatter(
|
| 41 |
df,
|
|
|
|
| 59 |
def build_network_html_plotly(df):
|
| 60 |
G = nx.DiGraph()
|
| 61 |
for _, row in df.iterrows():
|
| 62 |
+
author = str(row['author_name'])
|
| 63 |
+
comment_id = str(row['comment_id'])
|
| 64 |
+
parent_id = row['parent_id']
|
| 65 |
|
| 66 |
G.add_node(comment_id, label=author)
|
| 67 |
G.add_edge(author, comment_id)
|
|
|
|
| 126 |
|
| 127 |
def search_keyword(keyword):
|
| 128 |
if not os.path.exists("latest_data.csv"):
|
| 129 |
+
return pd.DataFrame(columns=['date', 'author_name', 'like_count', 'text'])
|
| 130 |
|
| 131 |
df = pd.read_csv("latest_data.csv")
|
| 132 |
|
| 133 |
if 'text' not in df.columns:
|
| 134 |
+
return pd.DataFrame(columns=['date', 'author_name', 'like_count', 'text'])
|
| 135 |
|
| 136 |
mask = df['text'].astype(str).str.contains(keyword, case=False, na=False)
|
| 137 |
+
result = df.loc[mask, ['published_at', 'author_name', 'like_count', 'text']]
|
| 138 |
+
result['date'] = pd.to_datetime(result['published_at'], errors='coerce')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
result = result.dropna(subset=['date'])
|
| 140 |
+
return result[['date', 'author_name', 'like_count', 'text']].head(100)
|
| 141 |
|
| 142 |
# Gradio interface
|
| 143 |
with gr.Blocks() as demo:
|
| 144 |
+
gr.Markdown("## π§ YouTube Comment Thread Analyzer with Timeline + Search")
|
| 145 |
+
file_input = gr.File(label="π Upload CSV or XLSX (YouTube API v3 format)")
|
| 146 |
|
| 147 |
status = gr.Textbox(label="β
Status")
|
| 148 |
plot1 = gr.Plot(label="π Messages per Day")
|
|
|
|
| 155 |
keyword_input = gr.Textbox(label="π Search Keyword in Comments")
|
| 156 |
search_button = gr.Button("Search")
|
| 157 |
|
| 158 |
+
search_results = gr.Dataframe(headers=["date", "author_name", "like_count", "text"], label="π Search Results")
|
| 159 |
|
| 160 |
file_input.change(
|
| 161 |
fn=process_file,
|