gopalaKrishna1236 commited on
Commit
364a23f
·
verified ·
1 Parent(s): d0c979c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +474 -0
  2. requirements.txt +13 -0
app.py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import io
4
+ import re
5
+ import sys
6
+ import uuid
7
+ import math
8
+ import traceback
9
+ from datetime import datetime
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ # Headless matplotlib
15
+ import matplotlib
16
+ matplotlib.use("Agg")
17
+ import matplotlib.pyplot as plt
18
+
19
+ import gradio as gr
20
+
21
+ # ------------------ NLP / Modeling ------------------
22
+ import nltk
23
+ from nltk.corpus import stopwords
24
+ from nltk.sentiment import SentimentIntensityAnalyzer
25
+
26
+ # Transformers sentiment (optional: advanced)
27
+ from transformers import pipeline
28
+
29
+ # Time-series & stats
30
+ import ruptures as rpt
31
+
32
+ # PDF reporting
33
+ from reportlab.lib.pagesizes import A4
34
+ from reportlab.pdfgen import canvas
35
+ from reportlab.lib.units import cm
36
+ from reportlab.lib.utils import ImageReader
37
+
38
+ # ------------------ NLTK bootstrap ------------------
39
+ def _ensure_nltk():
40
+ try:
41
+ nltk.data.find("tokenizers/punkt")
42
+ except LookupError:
43
+ nltk.download("punkt", quiet=True)
44
+ try:
45
+ nltk.data.find("corpora/stopwords")
46
+ except LookupError:
47
+ nltk.download("stopwords", quiet=True)
48
+ try:
49
+ nltk.data.find("sentiment/vader_lexicon.zip")
50
+ except LookupError:
51
+ nltk.download("vader_lexicon", quiet=True)
52
+
53
+ _ensure_nltk()
54
+ try:
55
+ EN_STOPWORDS = set(stopwords.words("english"))
56
+ except Exception:
57
+ EN_STOPWORDS = set()
58
+
59
+ def init_vader():
60
+ try:
61
+ return SentimentIntensityAnalyzer()
62
+ except Exception:
63
+ nltk.download("vader_lexicon", quiet=True)
64
+ return SentimentIntensityAnalyzer()
65
+
66
+ VADER = init_vader()
67
+
68
+ # ------------------ Transformers init (lazy) ------------------
69
+ _cached_pipe = None
70
+ def get_roberta_pipeline():
71
+ global _cached_pipe
72
+ if _cached_pipe is None:
73
+ model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
74
+ try:
75
+ _cached_pipe = pipeline("sentiment-analysis", model=model_name, tokenizer=model_name, truncation=True)
76
+ except Exception:
77
+ model_name = "cardiffnlp/twitter-roberta-base-sentiment"
78
+ _cached_pipe = pipeline("sentiment-analysis", model=model_name, tokenizer=model_name, truncation=True)
79
+ return _cached_pipe
80
+
81
+ # ------------------ Helpers ------------------
82
+ TOKEN_PATTERN = re.compile(r"[A-Za-z']+")
83
+ URL_RE = re.compile(r"https?://\S+")
84
+
85
+ def tokenize(text: str):
86
+ if not isinstance(text, str):
87
+ text = "" if pd.isna(text) else str(text)
88
+ text = URL_RE.sub("", text)
89
+ toks = [t.lower() for t in TOKEN_PATTERN.findall(text)]
90
+ toks = [t for t in toks if t not in EN_STOPWORDS and len(t) > 1]
91
+ return toks
92
+
93
+ def read_csv_safe(path):
94
+ last_err = None
95
+ for enc in [None, "utf-8", "utf-8-sig", "latin-1"]:
96
+ try:
97
+ if enc is None:
98
+ return pd.read_csv(path, header=None)
99
+ return pd.read_csv(path, header=None, encoding=enc)
100
+ except Exception as e:
101
+ last_err = e
102
+ raise last_err
103
+
104
+ def coerce_sentiment140(df):
105
+ if df.shape[1] >= 6:
106
+ df = df.iloc[:, :6]
107
+ df.columns = ["target", "ids", "date", "flag", "user", "text"]
108
+ return df
109
+
110
+ def vader_score(text):
111
+ vs = VADER.polarity_scores(text if isinstance(text, str) else "")
112
+ return vs["compound"]
113
+
114
+ def classify_label(score, pos_thr=0.05, neg_thr=-0.05):
115
+ if score >= pos_thr:
116
+ return "Positive"
117
+ elif score <= neg_thr:
118
+ return "Negative"
119
+ else:
120
+ return "Neutral"
121
+
122
+ def aggregate_ts(df, date_col, score_col, freq="D", ma_window=7, ci=True):
123
+ s = df[[date_col, score_col]].dropna()
124
+ s[date_col] = pd.to_datetime(s[date_col], errors="coerce")
125
+ s = s.dropna(subset=[date_col])
126
+ s = s.set_index(date_col).sort_index()
127
+ agg = s.resample(freq).mean()
128
+ if ma_window and ma_window > 1:
129
+ agg["ma"] = agg[score_col].rolling(ma_window, min_periods=1).mean()
130
+ else:
131
+ agg["ma"] = agg[score_col]
132
+ if ci:
133
+ std = agg[score_col].rolling(ma_window, min_periods=2).std(ddof=1)
134
+ n = s.resample(freq).count()[score_col].rolling(ma_window, min_periods=1).sum()
135
+ se = std / np.sqrt(np.maximum(n, 1))
136
+ agg["ci_low"] = agg["ma"] - 1.96 * se
137
+ agg["ci_high"] = agg["ma"] + 1.96 * se
138
+ return agg
139
+
140
+ def rolling_z_anomalies(series, window=14, z=2.5):
141
+ x = series.values.astype(float)
142
+ if len(x) < max(5, window):
143
+ return np.array([False]*len(x))
144
+ roll_mean = pd.Series(x).rolling(window, min_periods=5).mean()
145
+ roll_std = pd.Series(x).rolling(window, min_periods=5).std(ddof=1)
146
+ zscores = (pd.Series(x) - roll_mean) / (roll_std.replace(0, np.nan))
147
+ return (zscores.abs() >= z).fillna(False).values
148
+
149
+ def changepoints(series, penalty=6):
150
+ x = series.dropna().values.astype(float)
151
+ if len(x) < 10:
152
+ return []
153
+ algo = rpt.Pelt(model="rbf").fit(x)
154
+ try:
155
+ result = algo.predict(pen=penalty)
156
+ except Exception:
157
+ return []
158
+ cps = [series.index[min(len(series)-1, i-1)] for i in result[:-1]]
159
+ return cps
160
+
161
+ def _save_fig(fig, name):
162
+ os.makedirs("charts", exist_ok=True)
163
+ path = os.path.join("charts", f"{name}_{uuid.uuid4().hex}.png")
164
+ fig.savefig(path, format="png", dpi=150, bbox_inches="tight")
165
+ plt.close(fig)
166
+ return path
167
+
168
+ def plot_trend(agg, title="Sentiment Trend", show_ci=True, anomalies=None, cps=None):
169
+ fig = plt.figure()
170
+ ax = plt.gca()
171
+ ax.plot(agg.index, agg["ma"], label="Moving Avg")
172
+ ax.plot(agg.index, agg.iloc[:,0], alpha=0.3, label="Mean")
173
+ if show_ci and "ci_low" in agg and "ci_high" in agg:
174
+ ax.fill_between(agg.index, agg["ci_low"], agg["ci_high"], alpha=0.2, label="95% CI")
175
+ if anomalies is not None and anomalies.any():
176
+ ax.scatter(agg.index[anomalies], agg["ma"][anomalies], marker="x", s=40, label="Anomaly")
177
+ if cps:
178
+ for cp in cps:
179
+ ax.axvline(cp, linestyle="--", alpha=0.6, label="Change-point")
180
+ ax.set_title(title)
181
+ ax.set_ylabel("Sentiment (−1 to 1)")
182
+ ax.set_xlabel("Date")
183
+ ax.legend(loc="best")
184
+ fig.autofmt_xdate()
185
+ return _save_fig(fig, "trend")
186
+
187
+ def plot_pie(series, title="Sentiment Distribution"):
188
+ counts = series.value_counts()
189
+ fig = plt.figure()
190
+ plt.pie(counts.values, labels=counts.index, autopct="%1.1f%%", startangle=90)
191
+ plt.title(title)
192
+ return _save_fig(fig, "pie")
193
+
194
+ def top_terms(df_text, top_k=20):
195
+ from collections import Counter
196
+ tokens = []
197
+ hashtags = []
198
+ mentions = []
199
+ for t in df_text:
200
+ if not isinstance(t, str):
201
+ continue
202
+ hashtags += [h.lower() for h in re.findall(r"#\w+", t)]
203
+ mentions += [m.lower() for m in re.findall(r"@\w+", t)]
204
+ tokens += tokenize(t)
205
+ tok_top = Counter(tokens).most_common(top_k)
206
+ hash_top = Counter(hashtags).most_common(top_k)
207
+ ment_top = Counter(mentions).most_common(top_k)
208
+ return tok_top, hash_top, ment_top
209
+
210
+ def ngram_top(df_text, n=2, top_k=15):
211
+ from collections import Counter
212
+ ngrams = Counter()
213
+ for t in df_text:
214
+ toks = tokenize(t)
215
+ for i in range(len(toks)-n+1):
216
+ ngrams.update([" ".join(toks[i:i+n])])
217
+ return ngrams.most_common(top_k)
218
+
219
+ # ------------------ Filters ------------------
220
+ def apply_keyword_filter(df, tcol, mode, kw_text):
221
+ if not kw_text or not isinstance(kw_text, str) or kw_text.strip() == "":
222
+ return df.copy(), None
223
+ kws = [k.strip() for k in re.split(r"[,\\n]+", kw_text) if k.strip()]
224
+ if len(kws) == 0:
225
+ return df.copy(), None
226
+ s = df[tcol].astype(str).fillna("")
227
+ if mode == "Any keyword (OR)":
228
+ mask = s.str.contains("|".join([re.escape(k) for k in kws]), case=False, na=False)
229
+ elif mode == "All keywords (AND)":
230
+ mask = pd.Series(True, index=s.index)
231
+ for k in kws:
232
+ mask &= s.str.contains(re.escape(k), case=False, na=False)
233
+ else: # Regex
234
+ try:
235
+ mask = s.str.contains(kw_text, case=False, na=False, regex=True)
236
+ except Exception:
237
+ mask = pd.Series(False, index=s.index)
238
+ return df[mask].copy(), kws
239
+
240
+ def apply_date_range(df, dcol, start, end):
241
+ if not dcol:
242
+ return df
243
+ if start:
244
+ start_dt = pd.to_datetime(start, errors="coerce")
245
+ df = df[pd.to_datetime(df[dcol], errors="coerce") >= start_dt]
246
+ if end:
247
+ end_dt = pd.to_datetime(end, errors="coerce")
248
+ df = df[pd.to_datetime(df[dcol], errors="coerce") <= end_dt]
249
+ return df
250
+
251
+ # ------------------ PDF Report ------------------
252
+ def _draw_wrapped_text(c, text, x, y, max_width_cm=17, leading=14):
253
+ from reportlab.lib.styles import getSampleStyleSheet
254
+ from reportlab.platypus import Paragraph
255
+ from reportlab.lib.units import cm
256
+ from reportlab.lib.styles import ParagraphStyle
257
+ from reportlab.lib import colors
258
+ style = ParagraphStyle(name="Body", fontName="Helvetica", fontSize=10, leading=leading, textColor=colors.black)
259
+ from reportlab.platypus import Frame
260
+ frame = Frame(x*cm, y*cm, max_width_cm*cm, 100*cm, showBoundary=0)
261
+ story = [Paragraph(text.replace("\\n","<br/>"), style)]
262
+ frame.addFromList(story, c)
263
+
264
+ def build_pdf_report(out_path, title, meta, trend_img, pie_img, terms, ngrams):
265
+ c = canvas.Canvas(out_path, pagesize=A4)
266
+ W, H = A4
267
+ # Cover
268
+ c.setFont("Helvetica-Bold", 16)
269
+ c.drawString(2*cm, H-2*cm, title)
270
+ c.setFont("Helvetica", 10)
271
+ y = H-3*cm
272
+ for line in meta:
273
+ c.drawString(2*cm, y, line)
274
+ y -= 0.6*cm
275
+ c.showPage()
276
+
277
+ # Trend
278
+ if trend_img and os.path.exists(trend_img):
279
+ c.drawString(2*cm, H-2*cm, "Sentiment Trend")
280
+ img = ImageReader(trend_img)
281
+ c.drawImage(img, 2*cm, 4*cm, width=W-4*cm, height=H-7*cm, preserveAspectRatio=True, anchor='c')
282
+ c.showPage()
283
+
284
+ # Pie
285
+ if pie_img and os.path.exists(pie_img):
286
+ c.drawString(2*cm, H-2*cm, "Sentiment Distribution")
287
+ img = ImageReader(pie_img)
288
+ c.drawImage(img, 2*cm, 6*cm, width=W-4*cm, height=H-9*cm, preserveAspectRatio=True, anchor='c')
289
+ c.showPage()
290
+
291
+ # Terms
292
+ c.setFont("Helvetica-Bold", 12)
293
+ c.drawString(2*cm, H-2*cm, "Top Terms / Hashtags / Mentions")
294
+ c.setFont("Helvetica", 10)
295
+ y = H-3*cm
296
+ for sec_title, pairs in terms.items():
297
+ c.setFont("Helvetica-Bold", 11)
298
+ c.drawString(2*cm, y, sec_title)
299
+ y -= 0.5*cm
300
+ c.setFont("Helvetica", 10)
301
+ for w, cnt in pairs[:25]:
302
+ c.drawString(2.8*cm, y, f"- {w}: {cnt}")
303
+ y -= 0.45*cm
304
+ if y < 3*cm:
305
+ c.showPage()
306
+ y = H-2*cm
307
+ y -= 0.3*cm
308
+ if y < 3*cm:
309
+ c.showPage()
310
+ y = H-2*cm
311
+ # Bigrams
312
+ c.setFont("Helvetica-Bold", 12)
313
+ c.drawString(2*cm, H-2*cm, "Top Bigrams")
314
+ c.setFont("Helvetica", 10)
315
+ y = H-3*cm
316
+ for w, cnt in ngrams[:25]:
317
+ c.drawString(2.8*cm, y, f"- {w}: {cnt}")
318
+ y -= 0.45*cm
319
+ if y < 3*cm:
320
+ c.showPage()
321
+ y = H-2*cm
322
+
323
+ c.save()
324
+ return out_path
325
+
326
+ # ------------------ Gradio UI ------------------
327
+ with gr.Blocks(title="Advanced Sentiment Trend Analyzer") as demo:
328
+ gr.Markdown("# 📈 Advanced Customer Sentiment Trend Analyzer\nIndustry-grade tool for tracking sentiment over time using Sentiment140 or similar datasets.")
329
+
330
+ with gr.Row():
331
+ with gr.Column():
332
+ file = gr.File(label="Upload Sentiment140 CSV (or similar). 6 columns expected.", file_count="single", file_types=[".csv"])
333
+ engine = gr.Radio(choices=["VADER (fast)", "RoBERTa (accurate)"], value="VADER (fast)", label="Sentiment Engine")
334
+ text_col = gr.Dropdown(label="Text column", choices=[], value=None)
335
+ date_col = gr.Dropdown(label="Date column", choices=[], value=None, allow_custom_value=True)
336
+
337
+ gr.Markdown("### Filters")
338
+ kw_text = gr.Textbox(label="Keyword filter (comma-separated OR regex)", placeholder="e.g., refund, delayed OR ^outage|downtime", lines=2)
339
+ kw_mode = gr.Radio(choices=["Any keyword (OR)", "All keywords (AND)", "Regex"], value="Any keyword (OR)", label="Keyword mode")
340
+ start_date = gr.Textbox(label="Start date (YYYY-MM-DD)", placeholder="e.g., 2009-04-06")
341
+ end_date = gr.Textbox(label="End date (YYYY-MM-DD)", placeholder="e.g., 2009-04-20")
342
+
343
+ gr.Markdown("### Time Series")
344
+ agg_freq = gr.Radio(choices=["D","W","M"], value="D", label="Aggregate by (D/W/M)")
345
+ ma_window = gr.Slider(3, 60, value=7, step=1, label="Moving average window (days)")
346
+ show_ci = gr.Checkbox(value=True, label="Show 95% confidence band")
347
+ z_window = gr.Slider(7, 90, value=21, step=1, label="Anomaly rolling window")
348
+ z_thresh = gr.Slider(1.5, 4.0, value=2.5, step=0.1, label="Anomaly z-score threshold")
349
+ cp_penalty = gr.Slider(2, 20, value=6, step=1, label="Change-point penalty (higher=fewer)")
350
+
351
+ gr.Markdown("### Insights")
352
+ top_k = gr.Slider(5, 50, value=20, step=1, label="Top tokens/hashtags/mentions")
353
+ gen_ngrams = gr.Checkbox(value=True, label="Show Top Bigrams")
354
+
355
+ run = gr.Button("Run Analysis 🚀", variant="primary")
356
+ with gr.Column():
357
+ trend_img = gr.Image(label="Trend Chart", type="filepath")
358
+ pie_img = gr.Image(label="Sentiment Distribution", type="filepath")
359
+ terms_md = gr.Markdown(label="Top Terms / Hashtags / Mentions")
360
+ ngrams_md = gr.Markdown(label="Top Bigrams")
361
+ debug_md = gr.Markdown(label="Debug Info")
362
+ export = gr.File(label="Download Enriched CSV")
363
+ pdf_out = gr.File(label="Download PDF Report")
364
+
365
+ def on_upload(f):
366
+ if f is None:
367
+ return gr.update(choices=[], value=None), gr.update(choices=[], value=None)
368
+ df = read_csv_safe(f.name)
369
+ df = coerce_sentiment140(df)
370
+ cols = df.columns.tolist()
371
+ text_guess = "text" if "text" in cols else (cols[-1] if cols else None)
372
+ date_guess = "date" if "date" in cols else None
373
+ return gr.update(choices=cols, value=text_guess), gr.update(choices=cols, value=date_guess)
374
+
375
+ file.change(on_upload, inputs=[file], outputs=[text_col, date_col])
376
+
377
+ def run_pipeline(f, eng, tcol, dcol, kwtext, kwmode, sd, ed, freq, maw, showci, zwin, zthr, cpp, topk, want_ngrams):
378
+ if f is None:
379
+ raise gr.Error("Please upload a CSV.")
380
+ try:
381
+ df = read_csv_safe(f.name)
382
+ df = coerce_sentiment140(df)
383
+ cols = df.columns.tolist()
384
+ if tcol not in cols:
385
+ raise gr.Error(f"Text column '{tcol}' not in {cols}")
386
+ if dcol and dcol not in cols:
387
+ raise gr.Error(f"Date column '{dcol}' not in {cols}")
388
+ # Parse date column early for filters
389
+ if dcol:
390
+ df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
391
+ # Keyword filter
392
+ df, used_kws = apply_keyword_filter(df, tcol, kwmode, kwtext)
393
+ # Date range filter
394
+ df = apply_date_range(df, dcol, sd, ed)
395
+ if df.empty:
396
+ raise gr.Error("No rows after applying filters. Relax filters or clear them.")
397
+ # Scoring
398
+ if eng.startswith("VADER"):
399
+ df["_score"] = df[tcol].astype(str).apply(vader_score)
400
+ else:
401
+ pipe = get_roberta_pipeline()
402
+ texts = df[tcol].astype(str).tolist()
403
+ scores = []
404
+ batch = 64
405
+ for i in range(0, len(texts), batch):
406
+ chunk = texts[i:i+batch]
407
+ res = pipe(chunk, truncation=True)
408
+ for r in res:
409
+ lbl, sc = r["label"].upper(), float(r["score"])
410
+ if "NEG" in lbl:
411
+ scores.append(-sc)
412
+ elif "POS" in lbl:
413
+ scores.append(sc)
414
+ else:
415
+ scores.append(0.0)
416
+ df["_score"] = scores
417
+ df["_label"] = df["_score"].apply(classify_label)
418
+
419
+ if not dcol:
420
+ raise gr.Error("Please choose a date column for trend analysis.")
421
+ agg = aggregate_ts(df, dcol, "_score", freq=freq, ma_window=int(maw), ci=showci)
422
+ anoms = rolling_z_anomalies(agg["ma"], window=int(zwin), z=float(zthr))
423
+ cps = changepoints(agg["ma"], penalty=int(cpp))
424
+ trend_path = plot_trend(agg, title=f"Sentiment Trend ({eng}, {freq}-agg, MA={maw})", show_ci=showci, anomalies=anoms, cps=cps)
425
+ pie_path = plot_pie(df["_label"], title="Overall Sentiment Distribution")
426
+
427
+ # Terms
428
+ tok_top, hash_top, ment_top = top_terms(df[tcol], top_k=int(topk))
429
+ terms_lines = ["### Top Tokens", ""] + [f"- {w}: {c}" for w,c in tok_top]
430
+ terms_lines += ["", "### Top Hashtags", ""] + [f"- {w}: {c}" for w,c in hash_top]
431
+ terms_lines += ["", "### Top Mentions", ""] + [f"- {w}: {c}" for w,c in ment_top]
432
+ terms_md = "\n".join(terms_lines)
433
+
434
+ # N-grams
435
+ if want_ngrams:
436
+ ng = ngram_top(df[tcol], n=2, top_k=15)
437
+ ngrams_md = "### Top Bigrams\n\n" + "\n".join([f"- {w}: {c}" for w,c in ng])
438
+ ng_list = ng
439
+ else:
440
+ ngrams_md = "### Top Bigrams\n\n(Disabled)"
441
+ ng_list = []
442
+
443
+ # Export CSV
444
+ export_path = "enriched_sentiment.csv"
445
+ df.to_csv(export_path, index=False)
446
+
447
+ # Build PDF
448
+ meta = [
449
+ f"Engine: {eng}",
450
+ f"Rows (after filters): {len(df)}",
451
+ f"Date agg: {freq}, MA window: {maw}, CI: {bool(showci)}",
452
+ f"Anomaly window: {zwin}, z-threshold: {zthr}, CP penalty: {cpp}",
453
+ f"Filters: keywords={kwtext or 'None'} mode={kwmode}; date_range={sd or 'N/A'} to {ed or 'N/A'}",
454
+ ]
455
+ terms_dict = {"Top Tokens": tok_top, "Top Hashtags": hash_top, "Top Mentions": ment_top}
456
+ pdf_path = "sentiment_report.pdf"
457
+ build_pdf_report(pdf_path, "Customer Sentiment Trend Report", meta, trend_path, pie_path, terms_dict, ng_list)
458
+
459
+ dbg = "#### Data shape\n" + str(df.shape) + "\n\n#### Columns\n" + str(df.dtypes) + "\n"
460
+ return trend_path, pie_path, terms_md, ngrams_md, dbg, export_path, pdf_path
461
+ except Exception as e:
462
+ tb = traceback.format_exc()
463
+ print(tb, file=sys.stderr)
464
+ raise gr.Error(f"RuntimeError: {type(e).__name__}: {e}")
465
+
466
+ run.click(
467
+ run_pipeline,
468
+ inputs=[file, engine, text_col, date_col, kw_text, kw_mode, start_date, end_date, agg_freq, ma_window, show_ci, z_window, z_thresh, cp_penalty, top_k, gen_ngrams],
469
+ outputs=[trend_img, pie_img, terms_md, ngrams_md, debug_md, export, pdf_out]
470
+ )
471
+
472
+ if __name__ == "__main__":
473
+ port = int(os.environ.get("PORT", "7860"))
474
+ demo.launch(server_name="0.0.0.0", server_port=port)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ pandas==2.2.2
3
+ numpy==1.26.4
4
+ matplotlib==3.8.4
5
+ nltk==3.8.1
6
+ statsmodels==0.14.2
7
+ ruptures==1.1.9
8
+ transformers==4.44.2
9
+ torch>=2.1.0
10
+ accelerate==0.33.0
11
+ scikit-learn==1.4.2
12
+
13
+ reportlab==3.6.13