WebashalarForML commited on
Commit
cbf6fc5
·
verified ·
1 Parent(s): a2cae89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +818 -818
app.py CHANGED
@@ -1,819 +1,819 @@
1
- """
2
- PlayPulse Intelligence — Flask App (v2)
3
- ─────────────────────────────────────────
4
- Key improvements over v1
5
- • Chat has conversation memory (per session, server-side deque)
6
- • Intent router is enum-strict + falls back properly
7
- • 6 inline chat tools (no agent needed for simple queries)
8
- • Agent is one of those tools — called only for deep analysis
9
- • /chat returns structured payload: reply + optional table / chart_data / agent_data
10
- • "tabular format" requests produce real table JSON the frontend can render
11
- """
12
-
13
- import urllib.parse
14
- import math
15
- import re
16
- import json
17
- import requests
18
- from collections import deque, defaultdict
19
- from datetime import datetime
20
- from flask import Flask, request, render_template, jsonify, session
21
- from google_play_scraper import reviews, Sort, search, app as app_info
22
- import pandas as pd
23
- from utils.agents import run_agent, build_llm
24
- import os
25
-
26
- app = Flask(__name__)
27
- app.secret_key = os.getenv("FLASK_SECRET", "playpulse-secret-2026")
28
-
29
- # ── Per-session conversation memory (server-side, max 20 turns) ───────────
30
- # key: session_id → deque of {"role": "user"|"assistant", "content": str}
31
- _CONV_MEMORY: dict[str, deque] = defaultdict(lambda: deque(maxlen=20))
32
-
33
- MAX_HISTORY_FOR_LLM = 6 # last N turns sent to LLM for context
34
-
35
-
36
- # ═══════════════════════════════════════════════════════════════════════════
37
- # SCRAPER HELPERS (unchanged from v1)
38
- # ═══════════════════════════════════════════════════════════════════════════
39
-
40
- def extract_app_id(url_or_name: str) -> str:
41
- url_or_name = url_or_name.strip()
42
- if "play.google.com" in url_or_name:
43
- parsed = urllib.parse.urlparse(url_or_name)
44
- qp = urllib.parse.parse_qs(parsed.query)
45
- if 'id' in qp:
46
- return qp['id'][0]
47
- if "." in url_or_name and " " not in url_or_name:
48
- return url_or_name
49
- return ""
50
-
51
-
52
- def scrape_store_ids(query: str, n_hits: int = 5):
53
- try:
54
- url = f"https://play.google.com/store/search?q={urllib.parse.quote(query)}&c=apps"
55
- headers = {"User-Agent": "Mozilla/5.0"}
56
- resp = requests.get(url, headers=headers, timeout=10)
57
- if resp.status_code != 200:
58
- return []
59
- pids = re.findall(r'details\?id=([a-zA-Z0-9._]+)', resp.text)
60
- unique: list[str] = []
61
- for p in pids:
62
- if p not in unique and "None" not in p:
63
- unique.append(p)
64
- return unique[:n_hits]
65
- except Exception:
66
- return []
67
-
68
-
69
- def serialize_review(r: dict) -> dict:
70
- return {
71
- "reviewId": r.get("reviewId", ""),
72
- "userName": r.get("userName", ""),
73
- "userImage": r.get("userImage", ""),
74
- "content": r.get("content", ""),
75
- "score": r.get("score", 0),
76
- "thumbsUpCount": r.get("thumbsUpCount", 0),
77
- "reviewCreatedVersion": r.get("reviewCreatedVersion", ""),
78
- "at": r["at"].isoformat() if r.get("at") else "",
79
- "replyContent": r.get("replyContent", "") or "",
80
- "repliedAt": r["repliedAt"].isoformat() if r.get("repliedAt") else "",
81
- }
82
-
83
-
84
- def fetch_app_reviews(app_id, review_count, sort_order, star_ratings_input):
85
- info = app_info(app_id, lang='en', country='us')
86
- sort_map = {
87
- 'MOST_RELEVANT': Sort.MOST_RELEVANT,
88
- 'NEWEST': Sort.NEWEST,
89
- 'RATING': Sort.RATING,
90
- }
91
- selected_sort = sort_map.get(sort_order, Sort.MOST_RELEVANT)
92
-
93
- if star_ratings_input == 'all' or not star_ratings_input:
94
- star_filters = [None]
95
- else:
96
- star_filters = sorted(
97
- {int(s) for s in star_ratings_input if str(s).isdigit() and 1 <= int(s) <= 5},
98
- reverse=True
99
- )
100
-
101
- per_bucket = math.ceil(_review_limit(review_count) / len(star_filters))
102
- all_reviews: list[dict] = []
103
- seen_ids: set[str] = set()
104
-
105
- for star in star_filters:
106
- result, _ = reviews(
107
- app_id, lang='en', country='us',
108
- sort=selected_sort, count=per_bucket,
109
- filter_score_with=star,
110
- )
111
- for r in result:
112
- rid = r.get('reviewId', '')
113
- if rid not in seen_ids:
114
- seen_ids.add(rid)
115
- s = serialize_review(r)
116
- s['appTitle'] = info['title']
117
- s['appId'] = app_id
118
- all_reviews.append(s)
119
-
120
- return info, all_reviews
121
-
122
-
123
- def _review_limit(count):
124
- try:
125
- return int(count)
126
- except Exception:
127
- return 150
128
-
129
-
130
- # ═══════════════════════════════════════════════════════════════════════════
131
- # INLINE CHAT TOOLS (fast, no heavy agent needed for simple queries)
132
- # ═══════════════════════════════════════════════════════════════════════════
133
-
134
- def _tool_rating_breakdown(df: pd.DataFrame) -> dict:
135
- """Star rating distribution across all reviews."""
136
- dist = df["score"].value_counts().sort_index()
137
- total = max(1, len(df))
138
- rows = [
139
- {
140
- "Stars": f"{'★' * int(s)} ({int(s)})",
141
- "Count": int(c),
142
- "Percentage": f"{round(c/total*100,1)}%",
143
- }
144
- for s, c in dist.items()
145
- ]
146
- return {
147
- "table": {
148
- "title": "Rating Distribution",
149
- "columns": ["Stars", "Count", "Percentage"],
150
- "rows": rows,
151
- },
152
- "summary": f"{len(df)} reviews: avg {round(df['score'].mean(),2)}/5",
153
- }
154
-
155
-
156
- def _tool_app_comparison(df: pd.DataFrame) -> dict:
157
- """Per-app avg rating + negative % table."""
158
- if "appId" not in df.columns and "appTitle" not in df.columns:
159
- return {"error": "No app column in data"}
160
-
161
- app_col = "appTitle" if "appTitle" in df.columns else "appId"
162
- rows = []
163
- for app_name, grp in df.groupby(app_col):
164
- sc = pd.to_numeric(grp["score"], errors="coerce")
165
- rows.append({
166
- "App": str(app_name),
167
- "Reviews": len(grp),
168
- "Avg Rating": f"{round(float(sc.mean()),2)} ★",
169
- "% Negative": f"{round(float((sc <= 2).mean()*100),1)}%",
170
- "% Positive": f"{round(float((sc >= 4).mean()*100),1)}%",
171
- })
172
- rows.sort(key=lambda x: x["Avg Rating"])
173
- return {
174
- "table": {
175
- "title": "App Comparison",
176
- "columns": ["App", "Reviews", "Avg Rating", "% Negative", "% Positive"],
177
- "rows": rows,
178
- },
179
- "summary": f"Compared {len(rows)} apps",
180
- }
181
-
182
-
183
- def _tool_top_reviews(df: pd.DataFrame, min_stars: int = 1,
184
- max_stars: int = 2, n: int = 5,
185
- app_filter: str = "") -> dict:
186
- """Filtered review list as table."""
187
- sc = pd.to_numeric(df["score"], errors="coerce")
188
- mask = (sc >= min_stars) & (sc <= max_stars)
189
- if app_filter:
190
- app_col = "appTitle" if "appTitle" in df.columns else "appId"
191
- mask &= df[app_col].astype(str).str.lower().str.contains(
192
- re.escape(app_filter.lower()), na=False)
193
-
194
- subset = df[mask].head(n)
195
- tc = "content" if "content" in df.columns else df.columns[0]
196
- app_col = "appTitle" if "appTitle" in df.columns else ("appId" if "appId" in df.columns else None)
197
-
198
- rows = []
199
- for _, r in subset.iterrows():
200
- row = {
201
- "User": str(r.get("userName", ""))[:20],
202
- "Stars": "★" * int(r.get("score", 0)),
203
- "Review": str(r.get(tc, ""))[:120],
204
- }
205
- if app_col:
206
- row["App"] = str(r.get(app_col, ""))
207
- if "thumbsUpCount" in df.columns:
208
- row["Helpful"] = int(r.get("thumbsUpCount", 0))
209
- rows.append(row)
210
-
211
- label = f"{min_stars}–{max_stars} star"
212
- cols = list(rows[0].keys()) if rows else []
213
- return {
214
- "table": {
215
- "title": f"Top {label} Reviews" + (f" — {app_filter}" if app_filter else ""),
216
- "columns": cols,
217
- "rows": rows,
218
- },
219
- "summary": f"Showing {len(rows)} of {int(mask.sum())} matching reviews",
220
- }
221
-
222
-
223
- def _tool_top_helpful(df: pd.DataFrame, n: int = 5) -> dict:
224
- """Most helpful reviews."""
225
- if "thumbsUpCount" not in df.columns:
226
- return {"error": "No helpful votes column"}
227
- df2 = df.copy()
228
- df2["__h"] = pd.to_numeric(df2["thumbsUpCount"], errors="coerce").fillna(0)
229
- subset = df2.nlargest(n, "__h")
230
- tc = "content" if "content" in df.columns else df.columns[0]
231
- app_col = "appTitle" if "appTitle" in df.columns else None
232
-
233
- rows = []
234
- for _, r in subset.iterrows():
235
- row = {
236
- "Stars": "★" * int(r.get("score", 0)),
237
- "Helpful": int(r.get("thumbsUpCount", 0)),
238
- "Review": str(r.get(tc, ""))[:120],
239
- }
240
- if app_col:
241
- row["App"] = str(r.get(app_col, ""))
242
- rows.append(row)
243
- return {
244
- "table": {
245
- "title": "Most Helpful Reviews",
246
- "columns": list(rows[0].keys()) if rows else [],
247
- "rows": rows,
248
- },
249
- "summary": f"Top {len(rows)} most helpful reviews",
250
- }
251
-
252
-
253
- def _tool_keyword_search(df: pd.DataFrame, keyword: str, n: int = 8) -> dict:
254
- """Search review text for keyword."""
255
- tc = "content" if "content" in df.columns else df.columns[0]
256
- mask = df[tc].astype(str).str.lower().str.contains(
257
- re.escape(keyword.lower()), na=False)
258
- subset = df[mask].head(n)
259
- app_col = "appTitle" if "appTitle" in df.columns else None
260
-
261
- rows = []
262
- for _, r in subset.iterrows():
263
- row = {
264
- "Stars": "★" * int(r.get("score", 0)),
265
- "Review": str(r.get(tc, ""))[:150],
266
- }
267
- if app_col:
268
- row["App"] = str(r.get(app_col, ""))
269
- rows.append(row)
270
- return {
271
- "table": {
272
- "title": f'Reviews mentioning "{keyword}"',
273
- "columns": list(rows[0].keys()) if rows else [],
274
- "rows": rows,
275
- },
276
- "summary": f"Found {int(mask.sum())} reviews mentioning '{keyword}'",
277
- }
278
-
279
-
280
- # ═══════════════════════════════════════════════════════════════════════════
281
- # INTENT CLASSIFIER (enum-strict, multi-class)
282
- # ═══════════════════════════════════════════════════════════════════════════
283
-
284
- INTENT_SYSTEM = """You are an intent classifier for a game-review chat assistant.
285
- Classify the user message into EXACTLY ONE of these intents:
286
-
287
- TABLE — user wants data in tabular / structured / list format
288
- COMPARISON — comparing apps / games against each other
289
- KEYWORD — wants to search for a specific word/phrase in reviews
290
- HELPFUL — wants the most helpful / upvoted reviews
291
- ANALYSIS — deep insight, summary, cluster analysis, sentiment, recommendations
292
- FILTER — filtering the visible table (show only X stars, only app Y)
293
- GREETING — hi, hello, thanks, small talk
294
- GENERAL — questions about features, how to use the tool, unrelated
295
-
296
- Return ONLY one word from the list above. No explanation."""
297
-
298
-
299
- def classify_intent(message: str, llm) -> str:
300
- from langchain_core.messages import HumanMessage, SystemMessage
301
- try:
302
- resp = llm.invoke([
303
- SystemMessage(content=INTENT_SYSTEM),
304
- HumanMessage(content=f'Message: "{message}"'),
305
- ])
306
- raw = getattr(resp, "content", str(resp)).strip().upper().split()[0]
307
- valid = {"TABLE","COMPARISON","KEYWORD","HELPFUL","ANALYSIS","FILTER","GREETING","GENERAL"}
308
- return raw if raw in valid else "ANALYSIS"
309
- except Exception:
310
- return "ANALYSIS"
311
-
312
-
313
- # ═══════════════════════════════════════════════════════════════════════════
314
- # PARAMETER EXTRACTOR (LLM extracts structured params from natural language)
315
- # ═══════════════════════════════════════════════════════════════════════════
316
-
317
- def extract_params(message: str, intent: str, llm, apps: list[str]) -> dict:
318
- """Extract structured parameters from a message given its intent."""
319
- app_list_str = ", ".join(apps[:10]) if apps else "none"
320
-
321
- system = f"""Extract parameters from the user message for intent={intent}.
322
- Known app names in dataset: [{app_list_str}]
323
-
324
- Return ONLY valid JSON (no markdown):
325
- {{
326
- "min_stars": 1-5 or null,
327
- "max_stars": 1-5 or null,
328
- "n": integer count or 5,
329
- "app_filter": "exact app name or title from known list, or empty string",
330
- "keyword": "search term or empty string",
331
- "metric": "avg_rating|pct_negative|pct_positive|count or empty"
332
- }}"""
333
-
334
- from langchain_core.messages import HumanMessage, SystemMessage
335
- try:
336
- resp = llm.invoke([
337
- SystemMessage(content=system),
338
- HumanMessage(content=message),
339
- ])
340
- raw = getattr(resp, "content", str(resp)).strip()
341
- raw = re.sub(r"^```(?:json)?", "", raw).strip().rstrip("```")
342
- return json.loads(raw)
343
- except Exception:
344
- return {"min_stars": None, "max_stars": None, "n": 5,
345
- "app_filter": "", "keyword": "", "metric": ""}
346
-
347
-
348
- # ═══════════════════════════════════════════════════════════════════════════
349
- # RESPONSE FORMATTER (converts tool output + agent report → rich reply)
350
- # ══════════════════════════════════════════════════════════════���════════════
351
-
352
- def _format_agent_report(report: dict) -> str:
353
- """Convert agent report dict into a well-structured markdown-like text reply."""
354
- parts = []
355
-
356
- if report.get("direct_answer"):
357
- parts.append(report["direct_answer"])
358
-
359
- problems = report.get("top_problems", [])
360
- if problems:
361
- parts.append("\n**Top Issues:**")
362
- for i, p in enumerate(problems[:4], 1):
363
- sev = p.get("severity","").upper()
364
- issue = p.get("issue","")
365
- desc = p.get("description","")
366
- ev = p.get("evidence","")
367
- parts.append(f"{i}. **{issue}** [{sev}] — {desc}" + (f' _"{ev}"_' if ev else ""))
368
-
369
- strengths = report.get("key_strengths", [])
370
- if strengths:
371
- parts.append("\n**What Users Love:**")
372
- for s in strengths[:3]:
373
- parts.append(f"• **{s.get('strength','')}** — {s.get('description','')}")
374
-
375
- recs = report.get("recommendations", [])
376
- if recs:
377
- parts.append("\n**Recommendations:**")
378
- for i, r in enumerate(recs[:3], 1):
379
- parts.append(f"{i}. [{r.get('priority','').upper()}] {r.get('action','')} — {r.get('rationale','')}")
380
-
381
- return "\n".join(parts) if parts else report.get("executive_summary", "Analysis complete.")
382
-
383
-
384
- def _build_agent_table(report: dict, app_breakdown: list) -> dict | None:
385
- """If agent ran app_comparison tool, surface it as a table."""
386
- if not app_breakdown:
387
- return None
388
- rows = [
389
- {
390
- "App": a.get("app",""),
391
- "Reviews": a.get("count",""),
392
- "Avg Rating": f"{a.get('avg_rating','?')} ★",
393
- "% Negative": f"{a.get('pct_negative','?')}%",
394
- "% Positive": f"{a.get('pct_positive','?')}%",
395
- }
396
- for a in app_breakdown
397
- ]
398
- return {
399
- "title": "App Breakdown",
400
- "columns": ["App","Reviews","Avg Rating","% Negative","% Positive"],
401
- "rows": rows,
402
- }
403
-
404
-
405
- # ═══════════════════════════════════════════════════════════════════════════
406
- # /chat ENDPOINT — the core of PlayPulse Intelligence
407
- # ═══════════════════════════════════════════════════════════════════════════
408
-
409
- @app.route('/chat', methods=['POST'])
410
- def chat():
411
- try:
412
- data = request.json or {}
413
- user_message = data.get('message', '').strip()
414
- current_reviews = data.get('reviews', [])
415
- session_id = data.get('session_id') or request.remote_addr or "default"
416
-
417
- if not user_message:
418
- return jsonify({"error": "No message provided"}), 400
419
-
420
- llm = build_llm()
421
- if not llm:
422
- return jsonify({"reply": "AI service unavailable — no API key configured.", "type": "error"})
423
-
424
- # ── Conversation memory ────────────────────────────────────────────
425
- memory = _CONV_MEMORY[session_id]
426
- memory.append({"role": "user", "content": user_message})
427
-
428
- # ── Build context from reviews ─────────────────────────────────────
429
- df = pd.DataFrame(current_reviews) if current_reviews else pd.DataFrame()
430
- has_data = not df.empty
431
-
432
- # Detected app names for parameter extraction
433
- apps: list[str] = []
434
- if has_data:
435
- for col in ["appTitle", "appId"]:
436
- if col in df.columns:
437
- apps = df[col].dropna().astype(str).unique().tolist()
438
- break
439
-
440
- # ── Classify intent ────────────────────────────────────────────────
441
- intent = classify_intent(user_message, llm)
442
- print(f"[ChatRouter] Intent: {intent} | has_data: {has_data} | apps: {apps[:3]}")
443
-
444
- # ── Handle GREETING / GENERAL ──────────────────────────────────────
445
- if intent in ("GREETING", "GENERAL"):
446
- from langchain_core.messages import HumanMessage, SystemMessage
447
- history_msgs = []
448
- for turn in list(memory)[-MAX_HISTORY_FOR_LLM:]:
449
- if turn["role"] == "user":
450
- history_msgs.append(HumanMessage(content=turn["content"]))
451
- else:
452
- from langchain_core.messages import AIMessage
453
- history_msgs.append(AIMessage(content=turn["content"]))
454
-
455
- sys_msg = SystemMessage(content=(
456
- "You are PlayPulse Intelligence, a friendly AI assistant for analyzing "
457
- "Google Play Store reviews. Be helpful, concise, and conversational. "
458
- "If the user greets you, greet back briefly. "
459
- "If they ask what you can do, explain you can analyze reviews, compare apps, "
460
- "find issues, show ratings, and answer questions about the scraped data."
461
- ))
462
- resp = llm.invoke([sys_msg] + history_msgs)
463
- reply = getattr(resp, "content", str(resp)).strip()
464
- memory.append({"role": "assistant", "content": reply})
465
- return jsonify({"reply": reply, "type": "general"})
466
-
467
- # ── No data loaded — ask user to scrape first ─────────────────────
468
- if not has_data and intent not in ("GREETING","GENERAL"):
469
- reply = ("No reviews loaded yet. Please scrape an app first using the search bar, "
470
- "then I can analyze the data for you! 🎮")
471
- memory.append({"role": "assistant", "content": reply})
472
- return jsonify({"reply": reply, "type": "general"})
473
-
474
- # ── FILTER intent ─────────────────────────────────────────────────
475
- if intent == "FILTER":
476
- params = extract_params(user_message, intent, llm, apps)
477
- filter_payload: dict = {}
478
- if params.get("min_stars"):
479
- stars = list(range(
480
- int(params.get("min_stars",1)),
481
- int(params.get("max_stars",params.get("min_stars",1)))+1
482
- ))
483
- filter_payload["stars"] = stars
484
- if params.get("app_filter"):
485
- filter_payload["app"] = params["app_filter"]
486
- if params.get("keyword"):
487
- filter_payload["query"] = params["keyword"]
488
-
489
- # Also show a summary table via TABLE tool
490
- result = _tool_top_reviews(
491
- df,
492
- min_stars=int(params.get("min_stars") or 1),
493
- max_stars=int(params.get("max_stars") or 5),
494
- n=int(params.get("n") or 8),
495
- app_filter=params.get("app_filter",""),
496
- )
497
- reply = result.get("summary","Filters applied.")
498
- table = result.get("table")
499
- memory.append({"role": "assistant", "content": reply})
500
- return jsonify({
501
- "reply": reply,
502
- "filters": filter_payload,
503
- "table": table,
504
- "type": "filter",
505
- })
506
-
507
- # ── COMPARISON intent ─────────────────────────────────────────────
508
- if intent == "COMPARISON":
509
- result = _tool_app_comparison(df)
510
- if "error" in result:
511
- reply = result["error"]
512
- memory.append({"role": "assistant", "content": reply})
513
- return jsonify({"reply": reply, "type": "general"})
514
-
515
- # Also ask LLM to narrate
516
- narration_prompt = (
517
- f"Here is a comparison table of apps by rating:\n"
518
- f"{json.dumps(result['table']['rows'], indent=2)}\n\n"
519
- f"User asked: '{user_message}'\n"
520
- f"Write a 2-3 sentence natural language summary highlighting "
521
- f"the worst and best performing apps."
522
- )
523
- from langchain_core.messages import HumanMessage
524
- narr_resp = llm.invoke([HumanMessage(content=narration_prompt)])
525
- narration = getattr(narr_resp, "content", str(narr_resp)).strip()
526
-
527
- memory.append({"role": "assistant", "content": narration})
528
- return jsonify({
529
- "reply": narration,
530
- "table": result["table"],
531
- "type": "comparison",
532
- })
533
-
534
- # ── TABLE intent ──────────────────────────────────────────────────
535
- if intent == "TABLE":
536
- # Check what the PREVIOUS assistant message was about
537
- # so "get me this in tabular format" works correctly
538
- prev_context = ""
539
- history = list(memory)
540
- for turn in reversed(history[:-1]): # skip current user msg
541
- if turn["role"] == "assistant":
542
- prev_context = turn["content"]
543
- break
544
-
545
- # If previous answer was about app comparison / ratings → show comparison table
546
- comp_keywords = ["rating","low rating","negative","ranked","comparison","games"]
547
- if any(k in prev_context.lower() for k in comp_keywords) or "tabular" in user_message.lower():
548
- result = _tool_app_comparison(df)
549
- if "table" in result:
550
- reply = f"Here's the comparison table. {result['summary']}"
551
- memory.append({"role": "assistant", "content": reply})
552
- return jsonify({
553
- "reply": reply,
554
- "table": result["table"],
555
- "type": "table",
556
- })
557
-
558
- # Otherwise extract params and show filtered reviews table
559
- params = extract_params(user_message, "TABLE", llm, apps)
560
- result = _tool_top_reviews(
561
- df,
562
- min_stars=int(params.get("min_stars") or 1),
563
- max_stars=int(params.get("max_stars") or 5),
564
- n=int(params.get("n") or 10),
565
- app_filter=params.get("app_filter",""),
566
- )
567
- reply = result.get("summary","")
568
- memory.append({"role": "assistant", "content": reply})
569
- return jsonify({
570
- "reply": reply,
571
- "table": result.get("table"),
572
- "type": "table",
573
- })
574
-
575
- # ── KEYWORD intent ────────────────────────────────────────────────
576
- if intent == "KEYWORD":
577
- params = extract_params(user_message, intent, llm, apps)
578
- kw = params.get("keyword","")
579
- if not kw:
580
- # Ask LLM to extract keyword from message
581
- from langchain_core.messages import HumanMessage
582
- kw_resp = llm.invoke([HumanMessage(content=(
583
- f'Extract the search keyword or phrase from: "{user_message}". '
584
- f'Return ONLY the keyword, nothing else.'
585
- ))])
586
- kw = getattr(kw_resp, "content", str(kw_resp)).strip().strip('"')
587
-
588
- result = _tool_keyword_search(df, kw, n=10)
589
- reply = result.get("summary","")
590
- memory.append({"role": "assistant", "content": reply})
591
- return jsonify({
592
- "reply": reply,
593
- "table": result.get("table"),
594
- "type": "keyword",
595
- })
596
-
597
- # ── HELPFUL intent ────────────────────────────────────────────────
598
- if intent == "HELPFUL":
599
- params = extract_params(user_message, intent, llm, apps)
600
- result = _tool_top_helpful(df, n=int(params.get("n") or 5))
601
- if "error" in result:
602
- reply = result["error"]
603
- else:
604
- reply = result.get("summary","")
605
- memory.append({"role": "assistant", "content": reply})
606
- return jsonify({
607
- "reply": reply,
608
- "table": result.get("table"),
609
- "type": "helpful",
610
- })
611
-
612
- # ── ANALYSIS intent (deep — calls LangGraph agent) ────────────────
613
- # Also used as fallback for everything not caught above
614
- # Build conversation context string for agent
615
- history_context = "\n".join(
616
- f"{'User' if t['role']=='user' else 'Assistant'}: {t['content']}"
617
- for t in list(memory)[-MAX_HISTORY_FOR_LLM:]
618
- )
619
- enriched_query = (
620
- f"Conversation so far:\n{history_context}\n\n"
621
- f"User's current question: {user_message}"
622
- ) if len(memory) > 2 else user_message
623
-
624
- # Run the full LangGraph agent
625
- agent_state = run_agent(enriched_query, df=df if has_data else None)
626
- report = agent_state.get("report", {})
627
- breakdown = agent_state.get("app_breakdown", [])
628
-
629
- # Format the reply text
630
- reply = _format_agent_report(report)
631
- if not reply.strip():
632
- reply = report.get("executive_summary","I've completed the analysis.")
633
-
634
- # Build optional table from app breakdown
635
- table = _build_agent_table(report, breakdown)
636
-
637
- memory.append({"role": "assistant", "content": reply})
638
- return jsonify({
639
- "reply": reply,
640
- "table": table,
641
- "agent_data": {
642
- "top_problems": report.get("top_problems",[]),
643
- "key_strengths": report.get("key_strengths",[]),
644
- "recommendations": report.get("recommendations",[]),
645
- "clusters": agent_state.get("clusters",[]),
646
- "sentiment": agent_state.get("sentiment",{}),
647
- "stats": agent_state.get("stats",{}),
648
- },
649
- "type": "analysis",
650
- })
651
-
652
- except Exception as e:
653
- import traceback
654
- print(f"[Chat ERROR] {e}\n{traceback.format_exc()}")
655
- return jsonify({"error": str(e)}), 500
656
-
657
-
658
- # ═══════���═══════════════════════════════════════════════════════════════════
659
- # SCRAPE ROUTES (unchanged from v1)
660
- # ═══════════════════════════════════════════════════════════════════════════
661
-
662
- @app.route('/scrape', methods=['POST'])
663
- def scrape():
664
- try:
665
- data = request.json
666
- identifier = data.get('identifier', '').strip()
667
- count_type = data.get('review_count_type', 'fixed')
668
- count = 100000 if count_type == 'all' else data.get('review_count', 150)
669
-
670
- app_id = extract_app_id(identifier)
671
- if not app_id:
672
- results = search(identifier, lang="en", country="us", n_hits=1)
673
- if results and results[0].get('appId'):
674
- app_id = results[0]['appId']
675
- else:
676
- pids = scrape_store_ids(identifier, n_hits=1)
677
- if pids:
678
- app_id = pids[0]
679
- else:
680
- return jsonify({"error": f"App '{identifier}' not found"}), 404
681
-
682
- info, all_reviews = fetch_app_reviews(
683
- app_id, count, data.get('sort_order'), data.get('star_ratings'))
684
-
685
- return jsonify({
686
- "app_info": {
687
- "title": info['title'],
688
- "icon": info['icon'],
689
- "score": info['score'],
690
- "reviews": info['reviews'],
691
- "appId": app_id,
692
- },
693
- "reviews": all_reviews,
694
- })
695
- except Exception as e:
696
- return jsonify({"error": str(e)}), 500
697
-
698
-
699
- @app.route('/find-apps', methods=['POST'])
700
- def find_apps():
701
- try:
702
- data = request.json
703
- query = data.get('query', '').strip()
704
- app_count = int(data.get('app_count', 10))
705
- app_ids = scrape_store_ids(query, n_hits=app_count)
706
- if not app_ids:
707
- hits = search(query, lang="en", country="us", n_hits=app_count)
708
- app_ids = [h['appId'] for h in hits if h.get('appId')]
709
-
710
- results = []
711
- for aid in app_ids:
712
- try:
713
- info = app_info(aid, lang='en', country='us')
714
- results.append({
715
- "appId": aid,
716
- "title": info['title'],
717
- "icon": info['icon'],
718
- "score": info['score'],
719
- "developer": info.get('developer','Unknown'),
720
- "installs": info.get('installs','0+'),
721
- })
722
- except Exception:
723
- continue
724
-
725
- return jsonify({"results": results})
726
- except Exception as e:
727
- return jsonify({"error": str(e)}), 500
728
-
729
-
730
- @app.route('/scrape-batch', methods=['POST'])
731
- def scrape_batch():
732
- try:
733
- data = request.json
734
- app_ids = data.get('app_ids', [])
735
- count_type = data.get('review_count_type', 'fixed')
736
- reviews_per_app = 100000 if count_type == 'all' else int(data.get('reviews_per_app', 100))
737
-
738
- if not app_ids:
739
- return jsonify({"error": "No app IDs provided"}), 400
740
-
741
- batch_results: list[dict] = []
742
- all_combined: list[dict] = []
743
-
744
- for app_id in app_ids:
745
- try:
746
- info, app_reviews = fetch_app_reviews(
747
- app_id, reviews_per_app, data.get('sort_order'), data.get('star_ratings'))
748
- batch_results.append({
749
- "title": info['title'],
750
- "icon": info['icon'],
751
- "score": info['score'],
752
- "appId": app_id,
753
- })
754
- all_combined.extend(app_reviews)
755
- except Exception:
756
- continue
757
-
758
- return jsonify({"apps": batch_results, "reviews": all_combined})
759
- except Exception as e:
760
- return jsonify({"error": str(e)}), 500
761
-
762
-
763
- @app.route("/search-suggestions", methods=["POST"])
764
- def search_suggestions():
765
- try:
766
- query = (request.json or {}).get("query","").strip()
767
- if not query or len(query) < 2:
768
- return jsonify({"results": []})
769
-
770
- hits = search(query, lang="en", country="us", n_hits=6)
771
- results = []
772
- for h in hits:
773
- aid = h.get("appId","")
774
- if not aid or aid == "None" or "." not in aid:
775
- continue
776
- results.append({
777
- "appId": aid,
778
- "storeUrl": f"https://play.google.com/store/apps/details?id={aid}",
779
- "title": h.get("title",""),
780
- "icon": h.get("icon",""),
781
- "score": round(h.get("score") or 0, 1),
782
- "developer": h.get("developer",""),
783
- "installs": h.get("installs",""),
784
- })
785
- return jsonify({"results": results[:5]})
786
- except Exception as e:
787
- return jsonify({"error": str(e)}), 500
788
-
789
-
790
- # ═══════════════════════════════════════════════════════════════════════════
791
- # CLEAR CHAT MEMORY (optional endpoint for "New Chat" button)
792
- # ═══════════════════════════════════════════════════════════════════════════
793
-
794
- @app.route('/chat/clear', methods=['POST'])
795
- def clear_chat():
796
- session_id = (request.json or {}).get('session_id') or request.remote_addr or "default"
797
- _CONV_MEMORY[session_id].clear()
798
- return jsonify({"ok": True})
799
-
800
-
801
- # ═══════════════════════════════════════════════════════════════════════════
802
- # PAGE ROUTES
803
- # ═══════════════════════════════════════════════════════════════════════════
804
-
805
- @app.route('/scraper')
806
- def scraper():
807
- return render_template('index.html')
808
-
809
- @app.route('/batch')
810
- def batch():
811
- return render_template('batch.html')
812
-
813
- @app.route('/')
814
- def landing():
815
- return render_template('landing.html')
816
-
817
-
818
- if __name__ == "__main__":
819
  app.run(host="0.0.0.0", debug=True, port=7860)
 
1
+ """
2
+ PlayPulse Intelligence — Flask App (v3)
3
+ ─────────────────────────────────────────
4
+ Key improvements over v1
5
+ • Chat has conversation memory (per session, server-side deque)
6
+ • Intent router is enum-strict + falls back properly
7
+ • 6 inline chat tools (no agent needed for simple queries)
8
+ • Agent is one of those tools — called only for deep analysis
9
+ • /chat returns structured payload: reply + optional table / chart_data / agent_data
10
+ • "tabular format" requests produce real table JSON the frontend can render
11
+ """
12
+
13
+ import urllib.parse
14
+ import math
15
+ import re
16
+ import json
17
+ import requests
18
+ from collections import deque, defaultdict
19
+ from datetime import datetime
20
+ from flask import Flask, request, render_template, jsonify, session
21
+ from google_play_scraper import reviews, Sort, search, app as app_info
22
+ import pandas as pd
23
+ from utils.agents import run_agent, build_llm
24
+ import os
25
+
26
+ app = Flask(__name__)
27
+ app.secret_key = os.getenv("FLASK_SECRET", "playpulse-secret-2026")
28
+
29
+ # ── Per-session conversation memory (server-side, max 20 turns) ───────────
30
+ # key: session_id → deque of {"role": "user"|"assistant", "content": str}
31
+ _CONV_MEMORY: dict[str, deque] = defaultdict(lambda: deque(maxlen=20))
32
+
33
+ MAX_HISTORY_FOR_LLM = 6 # last N turns sent to LLM for context
34
+
35
+
36
+ # ═══════════════════════════════════════════════════════════════════════════
37
+ # SCRAPER HELPERS (unchanged from v1)
38
+ # ═══════════════════════════════════════════════════════════════════════════
39
+
40
+ def extract_app_id(url_or_name: str) -> str:
41
+ url_or_name = url_or_name.strip()
42
+ if "play.google.com" in url_or_name:
43
+ parsed = urllib.parse.urlparse(url_or_name)
44
+ qp = urllib.parse.parse_qs(parsed.query)
45
+ if 'id' in qp:
46
+ return qp['id'][0]
47
+ if "." in url_or_name and " " not in url_or_name:
48
+ return url_or_name
49
+ return ""
50
+
51
+
52
+ def scrape_store_ids(query: str, n_hits: int = 5):
53
+ try:
54
+ url = f"https://play.google.com/store/search?q={urllib.parse.quote(query)}&c=apps"
55
+ headers = {"User-Agent": "Mozilla/5.0"}
56
+ resp = requests.get(url, headers=headers, timeout=10)
57
+ if resp.status_code != 200:
58
+ return []
59
+ pids = re.findall(r'details\?id=([a-zA-Z0-9._]+)', resp.text)
60
+ unique: list[str] = []
61
+ for p in pids:
62
+ if p not in unique and "None" not in p:
63
+ unique.append(p)
64
+ return unique[:n_hits]
65
+ except Exception:
66
+ return []
67
+
68
+
69
+ def serialize_review(r: dict) -> dict:
70
+ return {
71
+ "reviewId": r.get("reviewId", ""),
72
+ "userName": r.get("userName", ""),
73
+ "userImage": r.get("userImage", ""),
74
+ "content": r.get("content", ""),
75
+ "score": r.get("score", 0),
76
+ "thumbsUpCount": r.get("thumbsUpCount", 0),
77
+ "reviewCreatedVersion": r.get("reviewCreatedVersion", ""),
78
+ "at": r["at"].isoformat() if r.get("at") else "",
79
+ "replyContent": r.get("replyContent", "") or "",
80
+ "repliedAt": r["repliedAt"].isoformat() if r.get("repliedAt") else "",
81
+ }
82
+
83
+
84
+ def fetch_app_reviews(app_id, review_count, sort_order, star_ratings_input):
85
+ info = app_info(app_id, lang='en', country='us')
86
+ sort_map = {
87
+ 'MOST_RELEVANT': Sort.MOST_RELEVANT,
88
+ 'NEWEST': Sort.NEWEST,
89
+ 'RATING': Sort.RATING,
90
+ }
91
+ selected_sort = sort_map.get(sort_order, Sort.MOST_RELEVANT)
92
+
93
+ if star_ratings_input == 'all' or not star_ratings_input:
94
+ star_filters = [None]
95
+ else:
96
+ star_filters = sorted(
97
+ {int(s) for s in star_ratings_input if str(s).isdigit() and 1 <= int(s) <= 5},
98
+ reverse=True
99
+ )
100
+
101
+ per_bucket = math.ceil(_review_limit(review_count) / len(star_filters))
102
+ all_reviews: list[dict] = []
103
+ seen_ids: set[str] = set()
104
+
105
+ for star in star_filters:
106
+ result, _ = reviews(
107
+ app_id, lang='en', country='us',
108
+ sort=selected_sort, count=per_bucket,
109
+ filter_score_with=star,
110
+ )
111
+ for r in result:
112
+ rid = r.get('reviewId', '')
113
+ if rid not in seen_ids:
114
+ seen_ids.add(rid)
115
+ s = serialize_review(r)
116
+ s['appTitle'] = info['title']
117
+ s['appId'] = app_id
118
+ all_reviews.append(s)
119
+
120
+ return info, all_reviews
121
+
122
+
123
+ def _review_limit(count):
124
+ try:
125
+ return int(count)
126
+ except Exception:
127
+ return 150
128
+
129
+
130
+ # ═══════════════════════════════════════════════════════════════════════════
131
+ # INLINE CHAT TOOLS (fast, no heavy agent needed for simple queries)
132
+ # ═══════════════════════════════════════════════════════════════════════════
133
+
134
+ def _tool_rating_breakdown(df: pd.DataFrame) -> dict:
135
+ """Star rating distribution across all reviews."""
136
+ dist = df["score"].value_counts().sort_index()
137
+ total = max(1, len(df))
138
+ rows = [
139
+ {
140
+ "Stars": f"{'★' * int(s)} ({int(s)})",
141
+ "Count": int(c),
142
+ "Percentage": f"{round(c/total*100,1)}%",
143
+ }
144
+ for s, c in dist.items()
145
+ ]
146
+ return {
147
+ "table": {
148
+ "title": "Rating Distribution",
149
+ "columns": ["Stars", "Count", "Percentage"],
150
+ "rows": rows,
151
+ },
152
+ "summary": f"{len(df)} reviews: avg {round(df['score'].mean(),2)}/5",
153
+ }
154
+
155
+
156
+ def _tool_app_comparison(df: pd.DataFrame) -> dict:
157
+ """Per-app avg rating + negative % table."""
158
+ if "appId" not in df.columns and "appTitle" not in df.columns:
159
+ return {"error": "No app column in data"}
160
+
161
+ app_col = "appTitle" if "appTitle" in df.columns else "appId"
162
+ rows = []
163
+ for app_name, grp in df.groupby(app_col):
164
+ sc = pd.to_numeric(grp["score"], errors="coerce")
165
+ rows.append({
166
+ "App": str(app_name),
167
+ "Reviews": len(grp),
168
+ "Avg Rating": f"{round(float(sc.mean()),2)} ★",
169
+ "% Negative": f"{round(float((sc <= 2).mean()*100),1)}%",
170
+ "% Positive": f"{round(float((sc >= 4).mean()*100),1)}%",
171
+ })
172
+ rows.sort(key=lambda x: x["Avg Rating"])
173
+ return {
174
+ "table": {
175
+ "title": "App Comparison",
176
+ "columns": ["App", "Reviews", "Avg Rating", "% Negative", "% Positive"],
177
+ "rows": rows,
178
+ },
179
+ "summary": f"Compared {len(rows)} apps",
180
+ }
181
+
182
+
183
+ def _tool_top_reviews(df: pd.DataFrame, min_stars: int = 1,
184
+ max_stars: int = 2, n: int = 5,
185
+ app_filter: str = "") -> dict:
186
+ """Filtered review list as table."""
187
+ sc = pd.to_numeric(df["score"], errors="coerce")
188
+ mask = (sc >= min_stars) & (sc <= max_stars)
189
+ if app_filter:
190
+ app_col = "appTitle" if "appTitle" in df.columns else "appId"
191
+ mask &= df[app_col].astype(str).str.lower().str.contains(
192
+ re.escape(app_filter.lower()), na=False)
193
+
194
+ subset = df[mask].head(n)
195
+ tc = "content" if "content" in df.columns else df.columns[0]
196
+ app_col = "appTitle" if "appTitle" in df.columns else ("appId" if "appId" in df.columns else None)
197
+
198
+ rows = []
199
+ for _, r in subset.iterrows():
200
+ row = {
201
+ "User": str(r.get("userName", ""))[:20],
202
+ "Stars": "★" * int(r.get("score", 0)),
203
+ "Review": str(r.get(tc, ""))[:120],
204
+ }
205
+ if app_col:
206
+ row["App"] = str(r.get(app_col, ""))
207
+ if "thumbsUpCount" in df.columns:
208
+ row["Helpful"] = int(r.get("thumbsUpCount", 0))
209
+ rows.append(row)
210
+
211
+ label = f"{min_stars}–{max_stars} star"
212
+ cols = list(rows[0].keys()) if rows else []
213
+ return {
214
+ "table": {
215
+ "title": f"Top {label} Reviews" + (f" — {app_filter}" if app_filter else ""),
216
+ "columns": cols,
217
+ "rows": rows,
218
+ },
219
+ "summary": f"Showing {len(rows)} of {int(mask.sum())} matching reviews",
220
+ }
221
+
222
+
223
+ def _tool_top_helpful(df: pd.DataFrame, n: int = 5) -> dict:
224
+ """Most helpful reviews."""
225
+ if "thumbsUpCount" not in df.columns:
226
+ return {"error": "No helpful votes column"}
227
+ df2 = df.copy()
228
+ df2["__h"] = pd.to_numeric(df2["thumbsUpCount"], errors="coerce").fillna(0)
229
+ subset = df2.nlargest(n, "__h")
230
+ tc = "content" if "content" in df.columns else df.columns[0]
231
+ app_col = "appTitle" if "appTitle" in df.columns else None
232
+
233
+ rows = []
234
+ for _, r in subset.iterrows():
235
+ row = {
236
+ "Stars": "★" * int(r.get("score", 0)),
237
+ "Helpful": int(r.get("thumbsUpCount", 0)),
238
+ "Review": str(r.get(tc, ""))[:120],
239
+ }
240
+ if app_col:
241
+ row["App"] = str(r.get(app_col, ""))
242
+ rows.append(row)
243
+ return {
244
+ "table": {
245
+ "title": "Most Helpful Reviews",
246
+ "columns": list(rows[0].keys()) if rows else [],
247
+ "rows": rows,
248
+ },
249
+ "summary": f"Top {len(rows)} most helpful reviews",
250
+ }
251
+
252
+
253
+ def _tool_keyword_search(df: pd.DataFrame, keyword: str, n: int = 8) -> dict:
254
+ """Search review text for keyword."""
255
+ tc = "content" if "content" in df.columns else df.columns[0]
256
+ mask = df[tc].astype(str).str.lower().str.contains(
257
+ re.escape(keyword.lower()), na=False)
258
+ subset = df[mask].head(n)
259
+ app_col = "appTitle" if "appTitle" in df.columns else None
260
+
261
+ rows = []
262
+ for _, r in subset.iterrows():
263
+ row = {
264
+ "Stars": "★" * int(r.get("score", 0)),
265
+ "Review": str(r.get(tc, ""))[:150],
266
+ }
267
+ if app_col:
268
+ row["App"] = str(r.get(app_col, ""))
269
+ rows.append(row)
270
+ return {
271
+ "table": {
272
+ "title": f'Reviews mentioning "{keyword}"',
273
+ "columns": list(rows[0].keys()) if rows else [],
274
+ "rows": rows,
275
+ },
276
+ "summary": f"Found {int(mask.sum())} reviews mentioning '{keyword}'",
277
+ }
278
+
279
+
280
+ # ═══════════════════════════════════════════════════════════════════════════
281
+ # INTENT CLASSIFIER (enum-strict, multi-class)
282
+ # ═══════════════════════════════════════════════════════════════════════════
283
+
284
+ INTENT_SYSTEM = """You are an intent classifier for a game-review chat assistant.
285
+ Classify the user message into EXACTLY ONE of these intents:
286
+
287
+ TABLE — user wants data in tabular / structured / list format
288
+ COMPARISON — comparing apps / games against each other
289
+ KEYWORD — wants to search for a specific word/phrase in reviews
290
+ HELPFUL — wants the most helpful / upvoted reviews
291
+ ANALYSIS — deep insight, summary, cluster analysis, sentiment, recommendations
292
+ FILTER — filtering the visible table (show only X stars, only app Y)
293
+ GREETING — hi, hello, thanks, small talk
294
+ GENERAL — questions about features, how to use the tool, unrelated
295
+
296
+ Return ONLY one word from the list above. No explanation."""
297
+
298
+
299
+ def classify_intent(message: str, llm) -> str:
300
+ from langchain_core.messages import HumanMessage, SystemMessage
301
+ try:
302
+ resp = llm.invoke([
303
+ SystemMessage(content=INTENT_SYSTEM),
304
+ HumanMessage(content=f'Message: "{message}"'),
305
+ ])
306
+ raw = getattr(resp, "content", str(resp)).strip().upper().split()[0]
307
+ valid = {"TABLE","COMPARISON","KEYWORD","HELPFUL","ANALYSIS","FILTER","GREETING","GENERAL"}
308
+ return raw if raw in valid else "ANALYSIS"
309
+ except Exception:
310
+ return "ANALYSIS"
311
+
312
+
313
+ # ═══════════════════════════════════════════════════════════════════════════
314
+ # PARAMETER EXTRACTOR (LLM extracts structured params from natural language)
315
+ # ═══════════════════════════════════════════════════════════════════════════
316
+
317
+ def extract_params(message: str, intent: str, llm, apps: list[str]) -> dict:
318
+ """Extract structured parameters from a message given its intent."""
319
+ app_list_str = ", ".join(apps[:10]) if apps else "none"
320
+
321
+ system = f"""Extract parameters from the user message for intent={intent}.
322
+ Known app names in dataset: [{app_list_str}]
323
+
324
+ Return ONLY valid JSON (no markdown):
325
+ {{
326
+ "min_stars": 1-5 or null,
327
+ "max_stars": 1-5 or null,
328
+ "n": integer count or 5,
329
+ "app_filter": "exact app name or title from known list, or empty string",
330
+ "keyword": "search term or empty string",
331
+ "metric": "avg_rating|pct_negative|pct_positive|count or empty"
332
+ }}"""
333
+
334
+ from langchain_core.messages import HumanMessage, SystemMessage
335
+ try:
336
+ resp = llm.invoke([
337
+ SystemMessage(content=system),
338
+ HumanMessage(content=message),
339
+ ])
340
+ raw = getattr(resp, "content", str(resp)).strip()
341
+ raw = re.sub(r"^```(?:json)?", "", raw).strip().rstrip("```")
342
+ return json.loads(raw)
343
+ except Exception:
344
+ return {"min_stars": None, "max_stars": None, "n": 5,
345
+ "app_filter": "", "keyword": "", "metric": ""}
346
+
347
+
348
+ # ═══════════════════════════════════════════════════════════════════════════
349
+ # RESPONSE FORMATTER (converts tool output + agent report → rich reply)
350
+ # ══════════════════════════════════════════════════════════════════════════
351
+
352
+ def _format_agent_report(report: dict) -> str:
353
+ """Convert agent report dict into a well-structured markdown-like text reply."""
354
+ parts = []
355
+
356
+ if report.get("direct_answer"):
357
+ parts.append(report["direct_answer"])
358
+
359
+ problems = report.get("top_problems", [])
360
+ if problems:
361
+ parts.append("\n**Top Issues:**")
362
+ for i, p in enumerate(problems[:4], 1):
363
+ sev = p.get("severity","").upper()
364
+ issue = p.get("issue","")
365
+ desc = p.get("description","")
366
+ ev = p.get("evidence","")
367
+ parts.append(f"{i}. **{issue}** [{sev}] — {desc}" + (f' _"{ev}"_' if ev else ""))
368
+
369
+ strengths = report.get("key_strengths", [])
370
+ if strengths:
371
+ parts.append("\n**What Users Love:**")
372
+ for s in strengths[:3]:
373
+ parts.append(f"• **{s.get('strength','')}** — {s.get('description','')}")
374
+
375
+ recs = report.get("recommendations", [])
376
+ if recs:
377
+ parts.append("\n**Recommendations:**")
378
+ for i, r in enumerate(recs[:3], 1):
379
+ parts.append(f"{i}. [{r.get('priority','').upper()}] {r.get('action','')} — {r.get('rationale','')}")
380
+
381
+ return "\n".join(parts) if parts else report.get("executive_summary", "Analysis complete.")
382
+
383
+
384
+ def _build_agent_table(report: dict, app_breakdown: list) -> dict | None:
385
+ """If agent ran app_comparison tool, surface it as a table."""
386
+ if not app_breakdown:
387
+ return None
388
+ rows = [
389
+ {
390
+ "App": a.get("app",""),
391
+ "Reviews": a.get("count",""),
392
+ "Avg Rating": f"{a.get('avg_rating','?')} ★",
393
+ "% Negative": f"{a.get('pct_negative','?')}%",
394
+ "% Positive": f"{a.get('pct_positive','?')}%",
395
+ }
396
+ for a in app_breakdown
397
+ ]
398
+ return {
399
+ "title": "App Breakdown",
400
+ "columns": ["App","Reviews","Avg Rating","% Negative","% Positive"],
401
+ "rows": rows,
402
+ }
403
+
404
+
405
+ # ═══════════════════════════════════════════════════════════════════════════
406
+ # /chat ENDPOINT — the core of PlayPulse Intelligence
407
+ # ═══════════════════════════════════════════════════════════════════════════
408
+
409
+ @app.route('/chat', methods=['POST'])
410
+ def chat():
411
+ try:
412
+ data = request.json or {}
413
+ user_message = data.get('message', '').strip()
414
+ current_reviews = data.get('reviews', [])
415
+ session_id = data.get('session_id') or request.remote_addr or "default"
416
+
417
+ if not user_message:
418
+ return jsonify({"error": "No message provided"}), 400
419
+
420
+ llm = build_llm()
421
+ if not llm:
422
+ return jsonify({"reply": "AI service unavailable — no API key configured.", "type": "error"})
423
+
424
+ # ── Conversation memory ────────────────────────────────────────────
425
+ memory = _CONV_MEMORY[session_id]
426
+ memory.append({"role": "user", "content": user_message})
427
+
428
+ # ── Build context from reviews ─────────────────────────────────────
429
+ df = pd.DataFrame(current_reviews) if current_reviews else pd.DataFrame()
430
+ has_data = not df.empty
431
+
432
+ # Detected app names for parameter extraction
433
+ apps: list[str] = []
434
+ if has_data:
435
+ for col in ["appTitle", "appId"]:
436
+ if col in df.columns:
437
+ apps = df[col].dropna().astype(str).unique().tolist()
438
+ break
439
+
440
+ # ── Classify intent ────────────────────────────────────────────────
441
+ intent = classify_intent(user_message, llm)
442
+ print(f"[ChatRouter] Intent: {intent} | has_data: {has_data} | apps: {apps[:3]}")
443
+
444
+ # ── Handle GREETING / GENERAL ──────────────────────────────────────
445
+ if intent in ("GREETING", "GENERAL"):
446
+ from langchain_core.messages import HumanMessage, SystemMessage
447
+ history_msgs = []
448
+ for turn in list(memory)[-MAX_HISTORY_FOR_LLM:]:
449
+ if turn["role"] == "user":
450
+ history_msgs.append(HumanMessage(content=turn["content"]))
451
+ else:
452
+ from langchain_core.messages import AIMessage
453
+ history_msgs.append(AIMessage(content=turn["content"]))
454
+
455
+ sys_msg = SystemMessage(content=(
456
+ "You are PlayPulse Intelligence, a friendly AI assistant for analyzing "
457
+ "Google Play Store reviews. Be helpful, concise, and conversational. "
458
+ "If the user greets you, greet back briefly. "
459
+ "If they ask what you can do, explain you can analyze reviews, compare apps, "
460
+ "find issues, show ratings, and answer questions about the scraped data."
461
+ ))
462
+ resp = llm.invoke([sys_msg] + history_msgs)
463
+ reply = getattr(resp, "content", str(resp)).strip()
464
+ memory.append({"role": "assistant", "content": reply})
465
+ return jsonify({"reply": reply, "type": "general"})
466
+
467
+ # ── No data loaded — ask user to scrape first ─────────────────────
468
+ if not has_data and intent not in ("GREETING","GENERAL"):
469
+ reply = ("No reviews loaded yet. Please scrape an app first using the search bar, "
470
+ "then I can analyze the data for you! 🎮")
471
+ memory.append({"role": "assistant", "content": reply})
472
+ return jsonify({"reply": reply, "type": "general"})
473
+
474
+ # ── FILTER intent ─────────────────────────────────────────────────
475
+ if intent == "FILTER":
476
+ params = extract_params(user_message, intent, llm, apps)
477
+ filter_payload: dict = {}
478
+ if params.get("min_stars"):
479
+ stars = list(range(
480
+ int(params.get("min_stars",1)),
481
+ int(params.get("max_stars",params.get("min_stars",1)))+1
482
+ ))
483
+ filter_payload["stars"] = stars
484
+ if params.get("app_filter"):
485
+ filter_payload["app"] = params["app_filter"]
486
+ if params.get("keyword"):
487
+ filter_payload["query"] = params["keyword"]
488
+
489
+ # Also show a summary table via TABLE tool
490
+ result = _tool_top_reviews(
491
+ df,
492
+ min_stars=int(params.get("min_stars") or 1),
493
+ max_stars=int(params.get("max_stars") or 5),
494
+ n=int(params.get("n") or 8),
495
+ app_filter=params.get("app_filter",""),
496
+ )
497
+ reply = result.get("summary","Filters applied.")
498
+ table = result.get("table")
499
+ memory.append({"role": "assistant", "content": reply})
500
+ return jsonify({
501
+ "reply": reply,
502
+ "filters": filter_payload,
503
+ "table": table,
504
+ "type": "filter",
505
+ })
506
+
507
+ # ── COMPARISON intent ─────────────────────────────────────────────
508
+ if intent == "COMPARISON":
509
+ result = _tool_app_comparison(df)
510
+ if "error" in result:
511
+ reply = result["error"]
512
+ memory.append({"role": "assistant", "content": reply})
513
+ return jsonify({"reply": reply, "type": "general"})
514
+
515
+ # Also ask LLM to narrate
516
+ narration_prompt = (
517
+ f"Here is a comparison table of apps by rating:\n"
518
+ f"{json.dumps(result['table']['rows'], indent=2)}\n\n"
519
+ f"User asked: '{user_message}'\n"
520
+ f"Write a 2-3 sentence natural language summary highlighting "
521
+ f"the worst and best performing apps."
522
+ )
523
+ from langchain_core.messages import HumanMessage
524
+ narr_resp = llm.invoke([HumanMessage(content=narration_prompt)])
525
+ narration = getattr(narr_resp, "content", str(narr_resp)).strip()
526
+
527
+ memory.append({"role": "assistant", "content": narration})
528
+ return jsonify({
529
+ "reply": narration,
530
+ "table": result["table"],
531
+ "type": "comparison",
532
+ })
533
+
534
+ # ── TABLE intent ──────────────────────────────────────────────────
535
+ if intent == "TABLE":
536
+ # Check what the PREVIOUS assistant message was about
537
+ # so "get me this in tabular format" works correctly
538
+ prev_context = ""
539
+ history = list(memory)
540
+ for turn in reversed(history[:-1]): # skip current user msg
541
+ if turn["role"] == "assistant":
542
+ prev_context = turn["content"]
543
+ break
544
+
545
+ # If previous answer was about app comparison / ratings → show comparison table
546
+ comp_keywords = ["rating","low rating","negative","ranked","comparison","games"]
547
+ if any(k in prev_context.lower() for k in comp_keywords) or "tabular" in user_message.lower():
548
+ result = _tool_app_comparison(df)
549
+ if "table" in result:
550
+ reply = f"Here's the comparison table. {result['summary']}"
551
+ memory.append({"role": "assistant", "content": reply})
552
+ return jsonify({
553
+ "reply": reply,
554
+ "table": result["table"],
555
+ "type": "table",
556
+ })
557
+
558
+ # Otherwise extract params and show filtered reviews table
559
+ params = extract_params(user_message, "TABLE", llm, apps)
560
+ result = _tool_top_reviews(
561
+ df,
562
+ min_stars=int(params.get("min_stars") or 1),
563
+ max_stars=int(params.get("max_stars") or 5),
564
+ n=int(params.get("n") or 10),
565
+ app_filter=params.get("app_filter",""),
566
+ )
567
+ reply = result.get("summary","")
568
+ memory.append({"role": "assistant", "content": reply})
569
+ return jsonify({
570
+ "reply": reply,
571
+ "table": result.get("table"),
572
+ "type": "table",
573
+ })
574
+
575
+ # ── KEYWORD intent ────────────────────────────────────────────────
576
+ if intent == "KEYWORD":
577
+ params = extract_params(user_message, intent, llm, apps)
578
+ kw = params.get("keyword","")
579
+ if not kw:
580
+ # Ask LLM to extract keyword from message
581
+ from langchain_core.messages import HumanMessage
582
+ kw_resp = llm.invoke([HumanMessage(content=(
583
+ f'Extract the search keyword or phrase from: "{user_message}". '
584
+ f'Return ONLY the keyword, nothing else.'
585
+ ))])
586
+ kw = getattr(kw_resp, "content", str(kw_resp)).strip().strip('"')
587
+
588
+ result = _tool_keyword_search(df, kw, n=10)
589
+ reply = result.get("summary","")
590
+ memory.append({"role": "assistant", "content": reply})
591
+ return jsonify({
592
+ "reply": reply,
593
+ "table": result.get("table"),
594
+ "type": "keyword",
595
+ })
596
+
597
+ # ── HELPFUL intent ────────────────────────────────────────────────
598
+ if intent == "HELPFUL":
599
+ params = extract_params(user_message, intent, llm, apps)
600
+ result = _tool_top_helpful(df, n=int(params.get("n") or 5))
601
+ if "error" in result:
602
+ reply = result["error"]
603
+ else:
604
+ reply = result.get("summary","")
605
+ memory.append({"role": "assistant", "content": reply})
606
+ return jsonify({
607
+ "reply": reply,
608
+ "table": result.get("table"),
609
+ "type": "helpful",
610
+ })
611
+
612
+ # ── ANALYSIS intent (deep — calls LangGraph agent) ────────────────
613
+ # Also used as fallback for everything not caught above
614
+ # Build conversation context string for agent
615
+ history_context = "\n".join(
616
+ f"{'User' if t['role']=='user' else 'Assistant'}: {t['content']}"
617
+ for t in list(memory)[-MAX_HISTORY_FOR_LLM:]
618
+ )
619
+ enriched_query = (
620
+ f"Conversation so far:\n{history_context}\n\n"
621
+ f"User's current question: {user_message}"
622
+ ) if len(memory) > 2 else user_message
623
+
624
+ # Run the full LangGraph agent
625
+ agent_state = run_agent(enriched_query, df=df if has_data else None)
626
+ report = agent_state.get("report", {})
627
+ breakdown = agent_state.get("app_breakdown", [])
628
+
629
+ # Format the reply text
630
+ reply = _format_agent_report(report)
631
+ if not reply.strip():
632
+ reply = report.get("executive_summary","I've completed the analysis.")
633
+
634
+ # Build optional table from app breakdown
635
+ table = _build_agent_table(report, breakdown)
636
+
637
+ memory.append({"role": "assistant", "content": reply})
638
+ return jsonify({
639
+ "reply": reply,
640
+ "table": table,
641
+ "agent_data": {
642
+ "top_problems": report.get("top_problems",[]),
643
+ "key_strengths": report.get("key_strengths",[]),
644
+ "recommendations": report.get("recommendations",[]),
645
+ "clusters": agent_state.get("clusters",[]),
646
+ "sentiment": agent_state.get("sentiment",{}),
647
+ "stats": agent_state.get("stats",{}),
648
+ },
649
+ "type": "analysis",
650
+ })
651
+
652
+ except Exception as e:
653
+ import traceback
654
+ print(f"[Chat ERROR] {e}\n{traceback.format_exc()}")
655
+ return jsonify({"error": str(e)}), 500
656
+
657
+
658
+ # ══════════════════════════════════════════════════════════════════════════
659
+ # SCRAPE ROUTES (unchanged from v1)
660
+ # ═══════════════════════════════════════════════════════════════════════════
661
+
662
+ @app.route('/scrape', methods=['POST'])
663
+ def scrape():
664
+ try:
665
+ data = request.json
666
+ identifier = data.get('identifier', '').strip()
667
+ count_type = data.get('review_count_type', 'fixed')
668
+ count = 100000 if count_type == 'all' else data.get('review_count', 150)
669
+
670
+ app_id = extract_app_id(identifier)
671
+ if not app_id:
672
+ results = search(identifier, lang="en", country="us", n_hits=1)
673
+ if results and results[0].get('appId'):
674
+ app_id = results[0]['appId']
675
+ else:
676
+ pids = scrape_store_ids(identifier, n_hits=1)
677
+ if pids:
678
+ app_id = pids[0]
679
+ else:
680
+ return jsonify({"error": f"App '{identifier}' not found"}), 404
681
+
682
+ info, all_reviews = fetch_app_reviews(
683
+ app_id, count, data.get('sort_order'), data.get('star_ratings'))
684
+
685
+ return jsonify({
686
+ "app_info": {
687
+ "title": info['title'],
688
+ "icon": info['icon'],
689
+ "score": info['score'],
690
+ "reviews": info['reviews'],
691
+ "appId": app_id,
692
+ },
693
+ "reviews": all_reviews,
694
+ })
695
+ except Exception as e:
696
+ return jsonify({"error": str(e)}), 500
697
+
698
+
699
+ @app.route('/find-apps', methods=['POST'])
700
+ def find_apps():
701
+ try:
702
+ data = request.json
703
+ query = data.get('query', '').strip()
704
+ app_count = int(data.get('app_count', 10))
705
+ app_ids = scrape_store_ids(query, n_hits=app_count)
706
+ if not app_ids:
707
+ hits = search(query, lang="en", country="us", n_hits=app_count)
708
+ app_ids = [h['appId'] for h in hits if h.get('appId')]
709
+
710
+ results = []
711
+ for aid in app_ids:
712
+ try:
713
+ info = app_info(aid, lang='en', country='us')
714
+ results.append({
715
+ "appId": aid,
716
+ "title": info['title'],
717
+ "icon": info['icon'],
718
+ "score": info['score'],
719
+ "developer": info.get('developer','Unknown'),
720
+ "installs": info.get('installs','0+'),
721
+ })
722
+ except Exception:
723
+ continue
724
+
725
+ return jsonify({"results": results})
726
+ except Exception as e:
727
+ return jsonify({"error": str(e)}), 500
728
+
729
+
730
+ @app.route('/scrape-batch', methods=['POST'])
731
+ def scrape_batch():
732
+ try:
733
+ data = request.json
734
+ app_ids = data.get('app_ids', [])
735
+ count_type = data.get('review_count_type', 'fixed')
736
+ reviews_per_app = 100000 if count_type == 'all' else int(data.get('reviews_per_app', 100))
737
+
738
+ if not app_ids:
739
+ return jsonify({"error": "No app IDs provided"}), 400
740
+
741
+ batch_results: list[dict] = []
742
+ all_combined: list[dict] = []
743
+
744
+ for app_id in app_ids:
745
+ try:
746
+ info, app_reviews = fetch_app_reviews(
747
+ app_id, reviews_per_app, data.get('sort_order'), data.get('star_ratings'))
748
+ batch_results.append({
749
+ "title": info['title'],
750
+ "icon": info['icon'],
751
+ "score": info['score'],
752
+ "appId": app_id,
753
+ })
754
+ all_combined.extend(app_reviews)
755
+ except Exception:
756
+ continue
757
+
758
+ return jsonify({"apps": batch_results, "reviews": all_combined})
759
+ except Exception as e:
760
+ return jsonify({"error": str(e)}), 500
761
+
762
+
763
+ @app.route("/search-suggestions", methods=["POST"])
764
+ def search_suggestions():
765
+ try:
766
+ query = (request.json or {}).get("query","").strip()
767
+ if not query or len(query) < 2:
768
+ return jsonify({"results": []})
769
+
770
+ hits = search(query, lang="en", country="us", n_hits=6)
771
+ results = []
772
+ for h in hits:
773
+ aid = h.get("appId","")
774
+ if not aid or aid == "None" or "." not in aid:
775
+ continue
776
+ results.append({
777
+ "appId": aid,
778
+ "storeUrl": f"https://play.google.com/store/apps/details?id={aid}",
779
+ "title": h.get("title",""),
780
+ "icon": h.get("icon",""),
781
+ "score": round(h.get("score") or 0, 1),
782
+ "developer": h.get("developer",""),
783
+ "installs": h.get("installs",""),
784
+ })
785
+ return jsonify({"results": results[:5]})
786
+ except Exception as e:
787
+ return jsonify({"error": str(e)}), 500
788
+
789
+
790
+ # ═══════════════════════════════════════════════════════════════════════════
791
+ # CLEAR CHAT MEMORY (optional endpoint for "New Chat" button)
792
+ # ═══════════════════════════════════════════════════════════════════════════
793
+
794
+ @app.route('/chat/clear', methods=['POST'])
795
+ def clear_chat():
796
+ session_id = (request.json or {}).get('session_id') or request.remote_addr or "default"
797
+ _CONV_MEMORY[session_id].clear()
798
+ return jsonify({"ok": True})
799
+
800
+
801
+ # ═══════════════════════════════════════════════════════════════════════════
802
+ # PAGE ROUTES
803
+ # ═══════════════════════════════════════════════════════════════════════════
804
+
805
+ @app.route('/scraper')
806
+ def scraper():
807
+ return render_template('index.html')
808
+
809
+ @app.route('/batch')
810
+ def batch():
811
+ return render_template('batch.html')
812
+
813
+ @app.route('/')
814
+ def landing():
815
+ return render_template('landing.html')
816
+
817
+
818
+ if __name__ == "__main__":
819
  app.run(host="0.0.0.0", debug=True, port=7860)