sonuprasad23 commited on
Commit
a00882d
·
1 Parent(s): 604f6ff

Project Uploaded

Browse files
Files changed (7) hide show
  1. Dockerfile +39 -0
  2. api_server.py +388 -0
  3. final5.py +462 -0
  4. groups.txt +2 -0
  5. index.html +586 -0
  6. recipients.json +6 -0
  7. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a base image with Python
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables to prevent interactive prompts during installation
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+ ENV PYTHONUNBUFFERED=1
10
+ ENV PYTHONIOENCODING=utf-8
11
+ ENV FLASK_APP=api_server.py
12
+ ENV FLASK_RUN_HOST=0.0.0.0
13
+ ENV FLASK_RUN_PORT=7860
14
+
15
+ # Install system dependencies for Selenium and Chrome
16
+ RUN apt-get update && apt-get install -y \
17
+ wget \
18
+ gnupg \
19
+ unzip \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Install Google Chrome
23
+ RUN wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
24
+ && echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \
25
+ && apt-get update && apt-get install -y google-chrome-stable \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ # Copy the requirements file and install Python dependencies
29
+ COPY requirements.txt .
30
+ RUN pip install --no-cache-dir -r requirements.txt
31
+
32
+ # Copy the rest of the application files
33
+ COPY . .
34
+
35
+ # Expose the port the app runs on
36
+ EXPOSE 7860
37
+
38
+ # Command to run the Flask application
39
+ CMD ["flask", "run"]
api_server.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, json, time, base64, pickle, subprocess, threading, traceback, html
2
+ from datetime import datetime
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Dict, Any, Optional
5
+ from flask import Flask, request, jsonify, send_from_directory
6
+ from flask_cors import CORS
7
+ from google_auth_oauthlib.flow import InstalledAppFlow
8
+ from google.auth.transport.requests import Request
9
+ from googleapiclient.discovery import build
10
+ from googleapiclient.errors import HttpError
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # Decode secrets at startup
16
+ if 'CREDENTIALS_B64' in os.environ:
17
+ with open('credentials.json', 'w') as f:
18
+ f.write(base64.b64decode(os.environ['CREDENTIALS_B64']).decode('utf-8'))
19
+
20
+ if 'FB_COOKIES_B64' in os.environ:
21
+ with open('facebook_cookies.pkl', 'wb') as f:
22
+ f.write(base64.b64decode(os.environ['FB_COOKIES_B64']))
23
+
24
+ GROUPS_TXT = os.environ.get("GROUPS_TXT", "groups.txt")
25
+ SCRAPE_OUTDIR = os.environ.get("SCRAPE_OUTDIR", "scraped")
26
+ ANALYSIS_OUTDIR = os.environ.get("ANALYSIS_OUTDIR", "analysis")
27
+ FINAL5_PATH = os.environ.get("FINAL5_PATH", "final5.py")
28
+ PYTHON_BIN = os.environ.get("PYTHON_BIN", "python")
29
+ SENDER_EMAIL = os.environ.get("SENDER_EMAIL", "smahato@hillsidemedicalgroup.com")
30
+
31
+ GEMINI_KEYS = []
32
+ for i in range(1, 6):
33
+ key = os.environ.get(f"GEMINI_API_KEY_{i}")
34
+ if key:
35
+ GEMINI_KEYS.append(key)
36
+
37
+ GMAIL_SCOPES = [
38
+ "https://www.googleapis.com/auth/gmail.send",
39
+ "https://www.googleapis.com/auth/gmail.metadata",
40
+ ]
41
+ os.makedirs(SCRAPE_OUTDIR, exist_ok=True)
42
+ os.makedirs(ANALYSIS_OUTDIR, exist_ok=True)
43
+
44
+ @dataclass
45
+ class GroupRun:
46
+ link: str
47
+ stage: str = "pending"
48
+ scraped_json: str = ""
49
+ analysis_json: str = ""
50
+ scraped_posts: int = 0
51
+ detected_posts: int = 0
52
+ emails_sent_by_final5: int = 0
53
+ error: str = ""
54
+
55
+ @dataclass
56
+ class PipelineState:
57
+ running: bool = False
58
+ message: str = "idle"
59
+ progress: int = 0
60
+ current: int = 0
61
+ total: int = 0
62
+ groups: List[GroupRun] = field(default_factory=list)
63
+ recipients: List[str] = field(default_factory=list)
64
+ summary_path: str = ""
65
+
66
+ app = Flask(__name__, static_folder='.', static_url_path='')
67
+ CORS(app)
68
+
69
+ class LogBuffer:
70
+ def __init__(self, max_items: int = 10000):
71
+ self._buf: List[Dict[str, Any]] = []
72
+ self._lock = threading.Lock()
73
+ self._next_id = 1
74
+ self._max = max_items
75
+ def append(self, msg: str, level: str = "info", source: str = "server"):
76
+ ts = datetime.now().strftime("%H:%M:%S")
77
+ line = {"id": self._next_id, "ts": ts, "level": level, "source": source, "msg": msg}
78
+ with self._lock:
79
+ self._buf.append(line); self._next_id += 1
80
+ if len(self._buf) > self._max: self._buf = self._buf[-self._max:]
81
+ def clear(self):
82
+ with self._lock: self._buf.clear()
83
+ def get_after(self, after_id: int, limit: int = 500):
84
+ with self._lock:
85
+ if after_id <= 0: data = self._buf[-limit:]
86
+ else: data = [x for x in self._buf if x["id"] > after_id][:limit]
87
+ last_id = self._buf[-1]["id"] if self._buf else after_id
88
+ return data, last_id
89
+
90
+ logs = LogBuffer()
91
+ def log(msg: str, level: str = "info", source: str = "server"):
92
+ logs.append(msg, level, source)
93
+ print(f"[{level.upper()}][{source}] {msg}", flush=True)
94
+
95
+ live_lock = threading.Lock()
96
+ live_state: Dict[str, Any] = {
97
+ "group": None,
98
+ "counts": {"total_posts": 0, "kw_hits": 0, "ai_done": 0, "confirmed": 0, "emails": 0},
99
+ "posts": []
100
+ }
101
+
102
+ def reset_live_state(group_link: str):
103
+ with live_lock:
104
+ live_state["group"] = group_link
105
+ live_state["counts"] = {"total_posts": 0, "kw_hits": 0, "ai_done": 0, "confirmed": 0, "emails": 0}
106
+ live_state["posts"] = []
107
+
108
+ def ensure_post_obj(pid: int) -> Dict[str, Any]:
109
+ with live_lock:
110
+ for p in live_state["posts"]:
111
+ if p.get("id") == pid:
112
+ return p
113
+ p = {"id": pid, "text": "", "group_link": live_state.get("group")}
114
+ live_state["posts"].append(p)
115
+ return p
116
+
117
+ def load_scraped_into_live(path: str):
118
+ try:
119
+ with open(path, "r", encoding="utf-8") as f:
120
+ posts = json.load(f)
121
+ except Exception as e:
122
+ log(f"live load error: {e}", "error", "LIVE")
123
+ return
124
+ with live_lock:
125
+ live_state["posts"] = posts
126
+ live_state["counts"]["total_posts"] = len(posts)
127
+
128
+ def handle_event_line(line: str):
129
+ if not line.startswith("::"): return
130
+ try:
131
+ if "::SCRAPE_SAVED::" in line:
132
+ path = line.split("::SCRAPE_SAVED::", 1)[1].strip()
133
+ if path: load_scraped_into_live(path)
134
+ elif "::KW_HIT::" in line:
135
+ d = json.loads(line.split("::KW_HIT::", 1)[1].strip())
136
+ p = ensure_post_obj(int(d["id"]))
137
+ p["found_keywords"] = d.get("found_keywords", [])
138
+ with live_lock: live_state["counts"]["kw_hits"] += 1
139
+ elif "::AI_RESULT::" in line:
140
+ d = json.loads(line.split("::AI_RESULT::", 1)[1].strip())
141
+ p = ensure_post_obj(int(d["id"]))
142
+ ai = d.get("ai", {})
143
+ p["ai"] = ai
144
+ with live_lock:
145
+ live_state["counts"]["ai_done"] += 1
146
+ if ai.get("is_medical_seeking"): live_state["counts"]["confirmed"] += 1
147
+ elif "::EMAIL_SENT::" in line:
148
+ d = json.loads(line.split("::EMAIL_SENT::", 1)[1].strip())
149
+ p = ensure_post_obj(int(d["id"]))
150
+ sent = int(d.get("sent", 0))
151
+ p["email_sent"] = sent > 0
152
+ if sent > 0:
153
+ with live_lock: live_state["counts"]["emails"] += sent
154
+ except Exception as e:
155
+ log(f"live parse error: {e}", "error", "LIVE")
156
+
157
+ def read_groups(path: str) -> List[str]:
158
+ if not os.path.exists(path): return []
159
+ with open(path, "r", encoding="utf-8") as f:
160
+ return [ln.strip() for ln in f.read().splitlines() if ln.strip()]
161
+
162
+ def slugify(url: str) -> str:
163
+ s = re.sub(r"[^a-zA-Z0-9]+", "-", url)
164
+ return s.strip("-").lower()
165
+
166
+ def build_gmail_service():
167
+ creds = None
168
+ if os.path.exists("token.pickle"):
169
+ with open("token.pickle", "rb") as token: creds = pickle.load(token)
170
+ if not creds or not creds.valid:
171
+ if creds and creds.expired and creds.refresh_token:
172
+ creds.refresh(Request())
173
+ else:
174
+ if not os.path.exists("credentials.json"):
175
+ log("credentials.json missing; Gmail unavailable", "warn", "gmail")
176
+ return None
177
+ flow = InstalledAppFlow.from_client_secrets_file("credentials.json", GMAIL_SCOPES)
178
+ creds = flow.run_local_server(port=0)
179
+ with open("token.pickle", "wb") as token: pickle.dump(creds, token)
180
+ try:
181
+ return build("gmail", "v1", credentials=creds)
182
+ except Exception as e:
183
+ log(f"Gmail service build failed: {e}", "error", "gmail")
184
+ return None
185
+
186
+ gmail_service = build_gmail_service()
187
+
188
+ def send_html_email(to_emails: List[str], subject: str, html_content: str) -> int:
189
+ if not gmail_service:
190
+ log("Gmail not configured; skipping email", "warn", "gmail")
191
+ return 0
192
+ from email.message import EmailMessage
193
+ sent = 0
194
+ for to in to_emails:
195
+ try:
196
+ msg = EmailMessage()
197
+ msg["to"] = to
198
+ msg["from"] = SENDER_EMAIL
199
+ msg["subject"] = subject
200
+ msg.set_content(html_content, subtype="html")
201
+ raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
202
+ gmail_service.users().messages().send(userId="me", body={"raw": raw}).execute()
203
+ sent += 1
204
+ except HttpError as e:
205
+ log(f"Gmail HTTP error to {to}: {e}", "error", "gmail")
206
+ except Exception as e:
207
+ log(f"Gmail send error to {to}: {e}", "error", "gmail")
208
+ return sent
209
+
210
+ def build_confirmed_posts_email(groups_run: List[GroupRun], all_confirmed_posts: List[Dict[str, Any]]) -> str:
211
+ total_groups, total_scraped, total_confirmed = len(groups_run), sum(g.scraped_posts for g in groups_run), len(all_confirmed_posts)
212
+ table_rows = "".join(f"""
213
+ <tr>
214
+ <td style="padding: 8px; border-bottom: 1px solid #eee;"><a href="{g.link}" target="_blank">{g.link}</a></td>
215
+ <td style="padding: 8px; border-bottom: 1px solid #eee; text-align: center;">{g.scraped_posts}</td>
216
+ <td style="padding: 8px; border-bottom: 1px solid #eee; text-align: center;">{g.detected_posts}</td>
217
+ <td style="padding: 8px; border-bottom: 1px solid #eee;">{"OK" if g.stage == "done" else "ERROR"}</td>
218
+ </tr>""" for g in groups_run)
219
+ summary_table_html = f"""<h3>Group Summary</h3><table style="width: 100%; border-collapse: collapse; margin-top: 8px; border: 1px solid #ddd;"><thead><tr style="background: #0f172a; color: #fff;"><th style="text-align: left; padding: 8px;">Group Link</th><th style="text-align: center; padding: 8px;">Posts Scraped</th><th style="text-align: center; padding: 8px;">Confirmed Posts</th><th style="text-align: left; padding: 8px;">Status</th></tr></thead><tbody>{table_rows}</tbody></table>"""
220
+ if all_confirmed_posts:
221
+ posts_html = "".join(f"""
222
+ <div style="margin-bottom: 25px; padding: 12px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;">
223
+ <h4 style="margin-top: 0; margin-bottom: 8px;">Post ID: {p.get("id", "N/A")} | Urgency: {p.get("ai_analysis", {}).get("urgency_level", "N/A")} | Confidence: {p.get("ai_analysis", {}).get("confidence", "N/A")}</h4>
224
+ <p style="margin: 5px 0;"><strong>Summary:</strong> {html.escape(p.get("ai_analysis", {}).get("medical_summary", "N/A"))}</p>
225
+ <p style="margin: 5px 0;"><strong>Text:</strong></p>
226
+ <pre style="white-space: pre-wrap; background-color: #f0f0f0; padding: 8px; border: 1px solid #eee; border-radius: 3px; font-family: monospace; font-size: 0.9em;">{html.escape(p.get("text", "N/A"))}</pre>
227
+ <p style="margin: 5px 0;"><a href="{p.get("group_link", "#")}" target="_blank">View Group</a></p>
228
+ </div>""" for p in all_confirmed_posts)
229
+ else: posts_html = "<p>No confirmed medical posts were found during this run.</p>"
230
+ return f"""<!DOCTYPE html><html><head><title>Hillside Medical Group - Confirmed Medical Posts Summary</title></head><body style="font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5;"><div style="max-width: 900px; margin: 20px auto; padding: 20px; background-color: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px;"><div style="background: #1e3c72; color: #fff; padding: 16px 20px; border-radius: 6px 6px 0 0;"><h2 style="margin: 0;">Hillside Medical Group - Confirmed Medical Posts</h2><div style="font-size: 0.9em;">Run completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</div></div><div style="padding: 16px;"><p><strong>Overall Summary:</strong> Processed {total_groups} groups, scraped {total_scraped} posts, found {total_confirmed} confirmed medical posts.</p><hr style="margin: 20px 0; border: 0; border-top: 1px solid #eee;">{summary_table_html}<hr style="margin: 20px 0; border: 0; border-top: 1px solid #eee;"><h3>Confirmed Posts Details</h3>{posts_html}</div><div style="margin-top: 20px; padding: 10px; font-size: 0.8em; color: #666; border-top: 1px solid #eee;"><p>This email contains posts identified as potentially seeking personal medical help. Please review and take appropriate action.</p><p><em>Note: The link provided is to the group. Direct post links are not currently extracted.</em></p></div></div></body></html>"""
231
+
232
+ state = PipelineState()
233
+
234
+ def stream_process_lines(args: List[str], env: Optional[Dict[str, str]] = None, tag: str = "FINAL5") -> int:
235
+ log(f"Exec: {' '.join(args)}", "info", tag)
236
+ proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, universal_newlines=True, env=env or os.environ.copy())
237
+ def pump(pipe, name):
238
+ for raw in pipe:
239
+ line = (raw or "").rstrip("\n")
240
+ if not line: continue
241
+ if line.startswith("::"):
242
+ try: handle_event_line(line)
243
+ except Exception as e: log(f"event parse error: {e}", "error", tag)
244
+ log(line, "info" if name == "stdout" else "warn", tag)
245
+ t1 = threading.Thread(target=pump, args=(proc.stdout, "stdout"), daemon=True)
246
+ t2 = threading.Thread(target=pump, args=(proc.stderr, "stderr"), daemon=True)
247
+ t1.start(); t2.start()
248
+ rc = proc.wait()
249
+ t1.join(timeout=0.2); t2.join(timeout=0.2)
250
+ log(f"Exit code: {rc}", "info", tag)
251
+ return rc
252
+
253
+ def call_final5_for_group(group_url: str, out_json: str, analysis_json: str, recipients: List[str]) -> Dict[str, Any]:
254
+ args = [PYTHON_BIN, FINAL5_PATH, "--group", group_url, "--out", out_json, "--analysis-out", analysis_json, "--recipients", ",".join(recipients), "--sender", SENDER_EMAIL, "--headless"]
255
+ if GEMINI_KEYS: args.extend(["--gemini-keys", ",".join(GEMINI_KEYS)])
256
+ env = os.environ.copy()
257
+ env["PYTHONUNBUFFERED"] = "1"
258
+ env["PYTHONIOENCODING"] = "utf-8"
259
+ rc = stream_process_lines(args, env=env, tag="FINAL5")
260
+ return {"ok": rc == 0, "code": rc}
261
+
262
+ def run_pipeline(recipients: List[str]):
263
+ try:
264
+ logs.clear()
265
+ log("Pipeline starting", "info", "ORCHESTRATOR")
266
+ state.running, state.message, state.progress, state.recipients = True, "initializing", 0, recipients
267
+ state.groups.clear()
268
+ links = read_groups(GROUPS_TXT)
269
+ state.total = len(links)
270
+ if not links:
271
+ log("No groups found in groups.txt", "warn", "ORCHESTRATOR")
272
+ state.message, state.running = "No groups", False
273
+ return
274
+ all_confirmed_posts = []
275
+ for i, link in enumerate(links, start=1):
276
+ reset_live_state(link)
277
+ g = GroupRun(link=link, stage="running")
278
+ state.groups.append(g)
279
+ state.current, state.message, state.progress = i, f"Processing {link}", int(((i - 1) / max(1, state.total)) * 100)
280
+ log(f"[{i}/{state.total}] Processing group: {link}", "info", "ORCHESTRATOR")
281
+ slug = slugify(link)
282
+ out_json, analysis_json = os.path.join(SCRAPE_OUTDIR, f"{slug}.json"), os.path.join(ANALYSIS_OUTDIR, f"analysis_{slug}.json")
283
+ g.scraped_json, g.analysis_json = out_json, analysis_json
284
+ result = call_final5_for_group(link, out_json, analysis_json, recipients)
285
+ if not result.get("ok"):
286
+ g.stage, g.error = "error", f"final5 exit code {result.get('code')}"
287
+ log(f"final5 failed for {link}: code {result.get('code')}", "error", "ORCHESTRATOR")
288
+ else:
289
+ try:
290
+ if os.path.exists(out_json):
291
+ with open(out_json, "r", encoding="utf-8") as f: g.scraped_posts = len(json.load(f))
292
+ if os.path.exists(analysis_json):
293
+ with open(analysis_json, "r", encoding="utf-8") as f: a = json.load(f)
294
+ g.detected_posts = a.get("confirmed_medical", 0)
295
+ g.emails_sent_by_final5 = a.get("emails_sent", 0)
296
+ confirmed_posts = a.get("posts", [])
297
+ for post in confirmed_posts:
298
+ if "group_link" not in post: post["group_link"] = link
299
+ all_confirmed_posts.extend(confirmed_posts)
300
+ g.stage = "done"
301
+ log(f"Group done: scraped={g.scraped_posts}, confirmed={g.detected_posts}", "info", "ORCHESTRATOR")
302
+ except Exception as e:
303
+ g.stage, g.error = "error", f"parse_error: {e}"
304
+ log(f"Parsing outputs failed for {link}: {e}", "error", "ORCHESTRATOR")
305
+ state.progress = int((i / max(1, state.total)) * 100)
306
+ try:
307
+ html_content = build_confirmed_posts_email(state.groups, all_confirmed_posts)
308
+ subject = f"🩺 Hillside - Confirmed Medical Posts Found ({len(all_confirmed_posts)} total)"
309
+ sent_count = send_html_email(recipients, subject, html_content)
310
+ log(f"Consolidated email sent to {len(recipients)} recipient(s), {sent_count} successful", "info", "GMAIL")
311
+ except Exception as e:
312
+ log(f"Error building or sending consolidated email: {e}", "error", "ORCHESTRATOR")
313
+ summary = {"run_date": datetime.now().isoformat(), "groups": [g.__dict__ for g in state.groups]}
314
+ summary_path = os.path.join(ANALYSIS_OUTDIR, "analysis_summary.json")
315
+ with open(summary_path, "w", encoding="utf-8") as f: json.dump(summary, f, ensure_ascii=False, indent=2)
316
+ state.summary_path, state.message, state.progress, state.running = summary_path, "All groups processed", 100, False
317
+ log("Pipeline finished", "info", "ORCHESTRATOR")
318
+ except Exception as e:
319
+ state.message, state.running = f"pipeline_error: {e}", False
320
+ log(f"Pipeline error: {e}\n{traceback.format_exc()}", "error", "ORCHESTRATOR")
321
+
322
+ @app.route("/")
323
+ def index():
324
+ return send_from_directory('.', 'index.html')
325
+
326
+ @app.get("/api/system/status")
327
+ def system_status():
328
+ return jsonify({
329
+ "gmail": gmail_service is not None, "groups_file_exists": os.path.exists(GROUPS_TXT),
330
+ "groups_count": len(read_groups(GROUPS_TXT)), "scrape_outdir": SCRAPE_OUTDIR,
331
+ "analysis_outdir": ANALYSIS_OUTDIR, "sender_email": SENDER_EMAIL,
332
+ "final5_exists": os.path.exists(FINAL5_PATH), "gemini_keys_count": len(GEMINI_KEYS)
333
+ })
334
+
335
+ @app.get("/api/groups")
336
+ def api_groups():
337
+ return jsonify({"groups": read_groups(GROUPS_TXT)})
338
+
339
+ @app.post("/api/process/start")
340
+ def api_process_start():
341
+ if state.running: return jsonify({"success": False, "message": "Already running"}), 409
342
+ data = request.json or {}
343
+ recips = data.get("recipients") or [SENDER_EMAIL]
344
+ if isinstance(recips, str): recips = [e.strip() for e in recips.split(",") if e.strip()]
345
+ threading.Thread(target=run_pipeline, args=(recips,), daemon=True).start()
346
+ log(f"Start requested by client; recipients={recips}", "info", "API")
347
+ return jsonify({"success": True, "message": "Pipeline started", "recipients": recips})
348
+
349
+ @app.get("/api/process/status")
350
+ def api_process_status():
351
+ return jsonify({"running": state.running, "message": state.message, "progress": state.progress,
352
+ "current": state.current, "total": state.total, "groups": [g.__dict__ for g in state.groups]})
353
+
354
+ @app.get("/api/process/logs")
355
+ def api_process_logs():
356
+ data, last_id = logs.get_after(int(request.args.get("after", "0")), limit=int(request.args.get("limit", "500")))
357
+ return jsonify({"entries": data, "last": last_id})
358
+
359
+ @app.post("/api/process/clear-logs")
360
+ def api_clear_logs():
361
+ logs.clear()
362
+ log("Logs cleared by client", "info", "API")
363
+ return jsonify({"success": True})
364
+
365
+ @app.get("/api/live/state")
366
+ def api_live_state():
367
+ with live_lock: return jsonify({"success": True, "data": live_state})
368
+
369
+ @app.get("/api/results/summary")
370
+ def api_results_summary():
371
+ p = state.summary_path or os.path.join(ANALYSIS_OUTDIR, "analysis_summary.json")
372
+ if not os.path.exists(p): return jsonify({"success": False, "message": "No summary yet"}), 404
373
+ with open(p, "r", encoding="utf-8") as f: return jsonify({"success": True, "data": json.load(f)})
374
+
375
+ @app.get("/api/recipients")
376
+ def api_get_recipients():
377
+ recipients_path = "recipients.json"
378
+ if not os.path.exists(recipients_path): return jsonify({"success": False, "message": "recipients.json not found"}), 404
379
+ try:
380
+ with open(recipients_path, "r", encoding="utf-8") as f: data = json.load(f)
381
+ if not isinstance(data, list): return jsonify({"success": False, "message": "Invalid format"}), 500
382
+ return jsonify({"success": True, "data": data})
383
+ except Exception as e:
384
+ return jsonify({"success": False, "message": f"Error reading file: {str(e)}"}), 500
385
+
386
+ if __name__ == "__main__":
387
+ port = int(os.environ.get("PORT", 7860))
388
+ app.run(host="0.0.0.0", port=port)
final5.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, sys, time, json, base64, pickle, argparse, traceback
2
+ from typing import List, Dict, Any, Tuple
3
+ from datetime import datetime
4
+
5
+ try:
6
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
7
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
8
+ except Exception:
9
+ pass
10
+
11
+ from selenium import webdriver
12
+ from selenium.webdriver.common.by import By
13
+ from selenium.webdriver.support.ui import WebDriverWait
14
+ from selenium.webdriver.support import expected_conditions as EC
15
+ from selenium.common.exceptions import (
16
+ StaleElementReferenceException, NoSuchElementException, TimeoutException
17
+ )
18
+ from google_auth_oauthlib.flow import InstalledAppFlow
19
+ from google.auth.transport.requests import Request
20
+ from googleapiclient.discovery import build
21
+ from googleapiclient.errors import HttpError
22
+ import google.generativeai as genai
23
+ from google.api_core.exceptions import ResourceExhausted
24
+
25
+ def get_args():
26
+ p = argparse.ArgumentParser(description="Scrape one FB group, analyze, and email alerts.")
27
+ p.add_argument("--group", required=True)
28
+ p.add_argument("--out", required=True)
29
+ p.add_argument("--analysis-out", required=True)
30
+ p.add_argument("--recipients", default="")
31
+ p.add_argument("--sender", default=os.environ.get("SENDER_EMAIL", ""))
32
+ p.add_argument("--cookies-file", default=os.environ.get("FB_COOKIES_FILE","facebook_cookies.pkl"))
33
+ p.add_argument("--max-scrolls", type=int, default=int(os.environ.get("MAX_SCROLLS","5")))
34
+ p.add_argument("--scroll-pause", type=float, default=float(os.environ.get("SCROLL_PAUSE","3")))
35
+ p.add_argument("--gemini-keys", default="")
36
+ p.add_argument("--headless", action="store_true", help="Prefer headless browser")
37
+ return p.parse_args()
38
+
39
+ GMAIL_SCOPES = [
40
+ "https://www.googleapis.com/auth/gmail.send",
41
+ "https://www.googleapis.com/auth/gmail.metadata",
42
+ ]
43
+
44
+ def build_gmail_service():
45
+ creds = None
46
+ if os.path.exists("token.pickle"):
47
+ try:
48
+ with open("token.pickle", "rb") as token:
49
+ creds = pickle.load(token)
50
+ except Exception:
51
+ creds = None
52
+ if not creds or not creds.valid:
53
+ if creds and creds.expired and creds.refresh_token:
54
+ try:
55
+ creds.refresh(Request())
56
+ except Exception:
57
+ creds = None
58
+ if not creds:
59
+ if not os.path.exists("credentials.json"):
60
+ print("[GMAIL] credentials.json missing; Gmail unavailable")
61
+ return None
62
+ flow = InstalledAppFlow.from_client_secrets_file("credentials.json", GMAIL_SCOPES)
63
+ creds = flow.run_local_server(port=0)
64
+ with open("token.pickle", "wb") as token:
65
+ pickle.dump(creds, token)
66
+ try:
67
+ svc = build("gmail", "v1", credentials=creds)
68
+ _ = svc.users().getProfile(userId="me").execute()
69
+ return svc
70
+ except Exception as e:
71
+ print(f"[GMAIL] service build failed: {e}")
72
+ return None
73
+
74
+ def send_html_email(service, sender: str, to_list: List[str], subject: str, html: str) -> int:
75
+ if not service:
76
+ print("[GMAIL] service not available; skipping email")
77
+ return 0
78
+ from email.message import EmailMessage
79
+ sent = 0
80
+ for to in to_list:
81
+ try:
82
+ msg = EmailMessage()
83
+ msg["to"] = to
84
+ msg["from"] = sender
85
+ msg["subject"] = subject
86
+ msg.set_content(html, subtype="html")
87
+ raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
88
+ service.users().messages().send(userId="me", body={"raw": raw}).execute()
89
+ sent += 1
90
+ except HttpError as e:
91
+ print(f"[GMAIL] http error to {to}: {e}")
92
+ except Exception as e:
93
+ print(f"[GMAIL] send error to {to}: {e}")
94
+ return sent
95
+
96
+ GEMINI_MODEL = "gemini-1.5-flash"
97
+
98
+ class GeminiManager:
99
+ def __init__(self, api_keys: List[str]):
100
+ self.api_keys = api_keys
101
+ self.current_key_index = 0
102
+ self.model = None
103
+ self._setup_model()
104
+
105
+ def _setup_model(self):
106
+ if not self.api_keys:
107
+ print("[GEMINI] No API keys provided")
108
+ self.model = None
109
+ return
110
+ while self.current_key_index < len(self.api_keys):
111
+ try:
112
+ api_key = self.api_keys[self.current_key_index]
113
+ genai.configure(api_key=api_key)
114
+ self.model = genai.GenerativeModel(GEMINI_MODEL)
115
+ print(f"[GEMINI] Using API key {self.current_key_index + 1}")
116
+ return
117
+ except Exception as e:
118
+ print(f"[GEMINI] Failed to setup with key {self.current_key_index + 1}: {e}")
119
+ self.current_key_index += 1
120
+ print("[GEMINI] All API keys failed")
121
+ self.model = None
122
+
123
+ def rotate_key(self):
124
+ self.current_key_index += 1
125
+ self._setup_model()
126
+
127
+ def is_available(self):
128
+ return self.model is not None
129
+
130
+ def generate_content(self, prompt: str):
131
+ if not self.is_available():
132
+ raise Exception("No available Gemini model")
133
+ try:
134
+ return self.model.generate_content(prompt)
135
+ except ResourceExhausted as e:
136
+ self.rotate_key()
137
+ if self.is_available():
138
+ return self.model.generate_content(prompt)
139
+ else:
140
+ raise e
141
+
142
+ def parse_retry_seconds_from_error(err: Exception) -> int:
143
+ s = str(err)
144
+ m1 = re.search(r"retry[_ ]delay\s*\{\s*seconds:\s*(\d+)", s, re.IGNORECASE)
145
+ if m1: return int(m1.group(1))
146
+ m2 = re.search(r'"retryDelay"\s*:\s*"(\d+)s"', s)
147
+ if m2: return int(m2.group(1))
148
+ return 45
149
+
150
+ def ai_medical_intent(gemini_manager: GeminiManager, post_text: str, found_keywords: List[str]) -> Dict[str,Any]:
151
+ fallback = {
152
+ "is_medical_seeking": False,
153
+ "confidence": "low",
154
+ "medical_summary": "Not a medical request (AI unavailable/throttled)",
155
+ "suggested_services": [],
156
+ "urgency_level": "low",
157
+ "analysis": "Keyword-based fallback",
158
+ "reasoning": "short explanation",
159
+ "matched_keywords": found_keywords
160
+ }
161
+ if not gemini_manager or not gemini_manager.is_available():
162
+ return fallback
163
+ keywords_str = ", ".join(found_keywords) if found_keywords else "none"
164
+ prompt = f"""
165
+ Analyze this social post and decide if the author is genuinely seeking medical help, doctor/hospital recommendations, or healthcare services for PERSONAL HEALTH NEEDS (not business, donations, or casual mentions).
166
+ KEYWORDS FOUND IN POST: {keywords_str}
167
+ CRITICAL RULES:
168
+ 1. ONLY flag posts where someone is seeking medical care for themselves or a loved one
169
+ 2. IGNORE posts about:
170
+ - Business services (e.g., "Looking for a doctor for my clinic")
171
+ - Donations or fundraising (e.g., "Raising money for surgery")
172
+ - Selling medical products
173
+ - Job postings for medical professionals
174
+ - General health information sharing
175
+ - Research or academic inquiries
176
+ 3. ONLY flag if it's a PERSONAL HEALTH NEED
177
+ Post: "{post_text}"
178
+ Return ONLY JSON:
179
+ {{
180
+ "is_medical_seeking": true/false,
181
+ "confidence": "high/medium/low",
182
+ "medical_summary": "short summary",
183
+ "suggested_services": ["service1","service2"],
184
+ "urgency_level": "high/medium/low",
185
+ "analysis": "why it's seeking help",
186
+ "reasoning": "short explanation",
187
+ "matched_keywords": ["keyword1", "keyword2"]
188
+ }}
189
+ """
190
+ for attempt in range(1, 5):
191
+ try:
192
+ resp = gemini_manager.generate_content(prompt)
193
+ txt = (resp.text or "").strip()
194
+ s, e = txt.find("{"), txt.rfind("}") + 1
195
+ if s >= 0 and e > s:
196
+ result = json.loads(txt[s:e])
197
+ result["is_medical_seeking"] = bool(result.get("is_medical_seeking", False))
198
+ if "matched_keywords" not in result:
199
+ result["matched_keywords"] = found_keywords
200
+ return result
201
+ return fallback
202
+ except ResourceExhausted as e:
203
+ wait_s = min(parse_retry_seconds_from_error(e) + 2, 120)
204
+ print(f"[GEMINI] 429 rate limit; backoff {wait_s}s (attempt {attempt}/4)")
205
+ time.sleep(wait_s)
206
+ if gemini_manager.is_available():
207
+ continue
208
+ else:
209
+ return fallback
210
+ except Exception as e:
211
+ print(f"[GEMINI] error: {e}")
212
+ gemini_manager.rotate_key()
213
+ if not gemini_manager.is_available():
214
+ return fallback
215
+ return fallback
216
+
217
+ MEDICAL_KEYWORDS = [
218
+ "doctor","physician","primary care","healthcare","medical","clinic","hospital",
219
+ "urgent care","emergency","er","specialist","pediatrician","dentist",
220
+ "gynecologist","obgyn","women's health","health center","family doctor",
221
+ "maternity","prenatal","postnatal","labor","delivery",
222
+ "need doctor","looking for doctor","find doctor","recommend doctor",
223
+ "medical help","health help","appointment","checkup","treatment",
224
+ "prescription","medicine","surgery","best hospital","best clinic",
225
+ "where to go","doctor recommendation",
226
+ "pregnancy","birth control","contraception","fertility",
227
+ "hillside","medical group","wellness center"
228
+ ]
229
+
230
+ def contains_keywords(text: str) -> Tuple[bool, List[str]]:
231
+ tl = (text or "").lower()
232
+ hits = [kw for kw in MEDICAL_KEYWORDS if kw in tl]
233
+ return (len(hits) > 0, hits)
234
+
235
+ def new_driver(headless: bool):
236
+ options = webdriver.ChromeOptions()
237
+ options.add_argument("--disable-notifications")
238
+ options.add_argument("--disable-web-security")
239
+ options.add_argument("--disable-features=IsolateOrigins,site-per-process")
240
+ options.add_argument("--disable-blink-features=AutomationControlled")
241
+ options.add_experimental_option("useAutomationExtension", False)
242
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
243
+ options.add_argument("--window-size=1920,1080")
244
+ options.add_argument("--lang=en-US,en")
245
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
246
+ if headless:
247
+ options.add_argument("--headless=new")
248
+ options.add_argument("--disable-gpu")
249
+ options.add_argument("--disable-dev-shm-usage")
250
+ options.add_argument("--no-sandbox")
251
+ options.add_argument("--disable-extensions")
252
+ options.add_argument("--disable-plugins")
253
+ options.add_argument("--disable-images")
254
+ driver = webdriver.Chrome(options=options)
255
+ try:
256
+ driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
257
+ "source": "Object.defineProperty(navigator, 'webdriver', { get: () => undefined });"
258
+ })
259
+ except Exception:
260
+ pass
261
+ return driver
262
+
263
+ def load_cookies(driver, cookies_file: str):
264
+ print("[FB] Loading Facebook homepage...")
265
+ driver.get("https://www.facebook.com")
266
+ time.sleep(3)
267
+ try:
268
+ with open(cookies_file, "rb") as f:
269
+ cookies = pickle.load(f)
270
+ for cookie in cookies:
271
+ if "sameSite" in cookie and cookie["sameSite"] not in ["Strict","Lax","None"]:
272
+ cookie["sameSite"] = "Lax"
273
+ try:
274
+ driver.add_cookie(cookie)
275
+ except Exception:
276
+ pass
277
+ print("[FB] Cookies loaded. Refreshing page...")
278
+ driver.refresh()
279
+ time.sleep(5)
280
+ except FileNotFoundError:
281
+ raise RuntimeError(f"[FB] Cookies file not found: {cookies_file}")
282
+ except Exception as e:
283
+ raise RuntimeError(f"[FB] Cookie load error: {e}")
284
+
285
+ def wait_group_feed(driver, wait):
286
+ wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
287
+ feed_loaded = False
288
+ start = time.time(); timeout = 30
289
+ while not feed_loaded and (time.time() - start) < timeout:
290
+ try:
291
+ driver.find_element(By.XPATH, "//div[@data-pagelet='GroupFeed' or @role='feed']")
292
+ feed_loaded = True; break
293
+ except NoSuchElementException:
294
+ try:
295
+ driver.find_element(By.XPATH, "//div[@role='article']")
296
+ feed_loaded = True; break
297
+ except NoSuchElementException:
298
+ pass
299
+ time.sleep(1)
300
+ if not feed_loaded:
301
+ raise TimeoutException("Timed out waiting for group feed")
302
+
303
+ def find_message_nodes(driver):
304
+ nodes = driver.find_elements(By.XPATH, "//div[@data-ad-preview='message']")
305
+ if nodes: return nodes
306
+ nodes = driver.find_elements(By.XPATH, "//div[@data-ad-comet-preview='message']")
307
+ if nodes: return nodes
308
+ return driver.find_elements(By.XPATH, "//div[@role='article']//div[@dir='auto' and string-length(normalize-space())>0]")
309
+
310
+ def scrape_group(driver, wait, group_url: str, max_scrolls: int, pause: float):
311
+ print(f"[SCRAPE] Navigating to group: {group_url}")
312
+ driver.get(group_url)
313
+ wait_group_feed(driver, wait)
314
+ posts, seen, rects = [], set(), set()
315
+ total = 0
316
+ for s in range(max_scrolls):
317
+ print(f"[SCRAPE] --- Scroll {s+1}/{max_scrolls} ---")
318
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
319
+ try:
320
+ wait.until(lambda d: d.execute_script("return document.readyState") == "complete")
321
+ except Exception:
322
+ pass
323
+ time.sleep(pause)
324
+ try:
325
+ divs = find_message_nodes(driver)
326
+ print(f"[SCRAPE] Nodes found: {len(divs)}")
327
+ except Exception as e:
328
+ print(f"[SCRAPE] find error: {e}")
329
+ continue
330
+ added = 0
331
+ for i, d in enumerate(divs):
332
+ try:
333
+ rect = (d.rect.get('x'), d.rect.get('y'), d.rect.get('width'), d.rect.get('height'))
334
+ if rect in rects: continue
335
+ rects.add(rect)
336
+ except Exception:
337
+ pass
338
+ try:
339
+ txt = (d.text or "").strip()
340
+ if len(txt) < 20:
341
+ try:
342
+ art = d.find_element(By.XPATH, "ancestor::div[@role='article']")
343
+ txt = (art.text or "").strip()
344
+ except Exception:
345
+ pass
346
+ except StaleElementReferenceException:
347
+ continue
348
+ if not txt or len(txt) < 20: continue
349
+ if txt in seen: continue
350
+ wc = len(re.findall(r"\b\w+\b", txt))
351
+ if wc > 7 and not any(j in txt for j in ["LikeCommentShare","Write a comment","View more comments"]):
352
+ seen.add(txt)
353
+ total += 1
354
+ posts.append({"id": total, "text": txt, "group_link": group_url})
355
+ added += 1
356
+ print(f"[SCRAPE] New posts this scroll: {added}")
357
+ print(f"[SCRAPE] Total unique posts: {total}")
358
+ return posts
359
+
360
+ def try_scrape_with_fallback(group_url: str, cookies_file: str, max_scrolls: int, pause: float):
361
+ driver = new_driver(headless=True)
362
+ wait = WebDriverWait(driver, 15)
363
+ try:
364
+ load_cookies(driver, cookies_file)
365
+ posts = scrape_group(driver, wait, group_url, max_scrolls, pause)
366
+ return posts, driver
367
+ except Exception as e:
368
+ try:
369
+ driver.quit()
370
+ except Exception:
371
+ pass
372
+ print(f"[SCRAPE] Error in headless mode: {e}")
373
+ return [], None
374
+
375
+ def main():
376
+ args = get_args()
377
+ os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
378
+ os.makedirs(os.path.dirname(args.analysis_out) or ".", exist_ok=True)
379
+
380
+ gemini_keys = []
381
+ if args.gemini_keys:
382
+ gemini_keys = [k.strip() for k in args.gemini_keys.split(",") if k.strip()]
383
+ else:
384
+ for i in range(1, 6):
385
+ key = os.environ.get(f"GEMINI_API_KEY_{i}")
386
+ if key:
387
+ gemini_keys.append(key)
388
+ gemini_manager = GeminiManager(gemini_keys) if gemini_keys else None
389
+
390
+ gmail = build_gmail_service()
391
+
392
+ posts, driver = try_scrape_with_fallback(args.group, args.cookies_file, args.max_scrolls, args.scroll_pause)
393
+ if driver:
394
+ try:
395
+ driver.quit()
396
+ except Exception:
397
+ pass
398
+
399
+ try:
400
+ with open(args.out, "w", encoding="utf-8") as f:
401
+ json.dump(posts, f, ensure_ascii=False, indent=2)
402
+ print(f"[SCRAPE] Saved scraped posts to {args.out}")
403
+ print(f"::SCRAPE_SAVED::{args.out}")
404
+ except Exception as e:
405
+ print(f"[SCRAPE] Error saving posts: {e}")
406
+
407
+ keyword_hits, confirmed = [], []
408
+ for p in posts:
409
+ has, hits = contains_keywords(p.get("text",""))
410
+ if has:
411
+ p["found_keywords"] = hits
412
+ keyword_hits.append(p)
413
+ print(f"::KW_HIT::{json.dumps({'id': p['id'], 'found_keywords': hits}, ensure_ascii=False)}")
414
+
415
+ per_call_sleep = 7
416
+ analyzed_posts = []
417
+ for idx, p in enumerate(keyword_hits, start=1):
418
+ found_kws = p.get("found_keywords", [])
419
+ ai = ai_medical_intent(gemini_manager, p.get("text",""), found_kws)
420
+ p["ai_analysis"] = ai
421
+ print(f"::AI_RESULT::{json.dumps({'id': p['id'], 'ai': ai}, ensure_ascii=False)}")
422
+ if ai.get("is_medical_seeking"):
423
+ confirmed.append(p)
424
+ analyzed_posts.append(p)
425
+ if idx < len(keyword_hits):
426
+ time.sleep(per_call_sleep)
427
+
428
+ report = {
429
+ "analysis_date": datetime.now().isoformat(),
430
+ "group_link": args.group,
431
+ "total_posts": len(posts),
432
+ "keyword_hits": len(keyword_hits),
433
+ "confirmed_medical": len(confirmed),
434
+ "emails_sent": 0,
435
+ "posts": confirmed
436
+ }
437
+
438
+ try:
439
+ with open(args.analysis_out, "w", encoding="utf-8") as f:
440
+ json.dump(report, f, ensure_ascii=False, indent=2)
441
+ print(f"[ANALYSIS] Saved analysis to {args.analysis_out}")
442
+ print(f"::ANALYSIS_SAVED::{args.analysis_out}")
443
+ except Exception as e:
444
+ print(f"[ANALYSIS] Error saving analysis: {e}")
445
+
446
+ if __name__ == "__main__":
447
+ try:
448
+ # Decode secrets from environment variables and write to files
449
+ if 'CREDENTIALS_B64' in os.environ:
450
+ with open('credentials.json', 'w') as f:
451
+ f.write(base64.b64decode(os.environ['CREDENTIALS_B64']).decode('utf-8'))
452
+
453
+ if 'FB_COOKIES_B64' in os.environ:
454
+ with open('facebook_cookies.pkl', 'wb') as f:
455
+ f.write(base64.b64decode(os.environ['FB_COOKIES_B64']))
456
+
457
+ main()
458
+ except Exception as e:
459
+ print("Unhandled error:")
460
+ print(e)
461
+ print(traceback.format_exc())
462
+ raise
groups.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ https://www.facebook.com/groups/Lifepaths
2
+ https://www.facebook.com/groups/1324510239249728
index.html ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8"/>
5
+ <meta name="viewport" content="width=device-width,initial-scale=1"/>
6
+ <title>Hillside Medical Group - Social Media Monitor</title>
7
+ <style>
8
+ :root{
9
+ --bg:#f6f7fb; --ink:#0f172a; --muted:#475569;
10
+ --card:#ffffff; --border:#e5e7eb; --shadow:0 6px 24px rgba(15,23,42,.06);
11
+ --grad1:#2c5aa0; --grad2:#1e3c72; /* Blue theme */
12
+ --ok:#22c55e; --warn:#f59e0b; --err:#ef4444;
13
+ --term-bg:#0b1221; --term-ink:#22c55e; --term-hdr:#07101d; --term-border:#1f2937; --term-meta:#86efac;
14
+ --badge:#e2e8f0; --badge-ink:#0f172a;
15
+ --kw:#f59e0b; --kw-ink:#1f2937;
16
+ --pill:#dcfce7; --pill-ink:#166534;
17
+ }
18
+ *{box-sizing:border-box}
19
+ body{margin:0;font-family:Inter,system-ui,Arial,sans-serif;background:var(--bg);color:var(--ink)}
20
+ .wrap{max-width:1400px;margin:24px auto;padding:0 16px}
21
+ .header{background:linear-gradient(135deg,var(--grad1),var(--grad2));color:#fff;border-radius:14px;padding:18px;box-shadow:0 8px 28px rgba(15,23,42,.18); text-align: center;} /* Centered header text */
22
+ h1{margin:0 0 6px 0;font-size:22px;font-weight:600}
23
+ .grid{display:grid;gap:16px;grid-template-columns:1fr 420px}
24
+ .card{background:var(--card);border:1px solid var(--border);border-radius:14px;box-shadow:var(--shadow);padding:18px}
25
+ .btn{background:var(--ok);color:#fff;border:none;border-radius:10px;padding:10px 14px;font-weight:600;cursor:pointer}
26
+ .btn.red{background:var(--err)} .btn.gray{background:#64748b}
27
+ .btn[disabled]{opacity:.5;cursor:not-allowed}
28
+ input, select{width:100%;padding:10px 12px;border:1px solid var(--border);border-radius:10px} /* Added select style */
29
+ label{font-weight:600; display: block; margin-bottom: 6px;} /* Improved label spacing */
30
+ .muted{color:var(--muted);font-size:13px}
31
+ .bar{height:12px;background:var(--border);border-radius:999px;overflow:hidden}
32
+ .fill{height:100%;background:linear-gradient(90deg,var(--grad1),#667eea);width:0%}
33
+ .mono{font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-size:12px;color:#334155;word-break:break-all}
34
+
35
+ /* Terminal */
36
+ .terminal{background:var(--term-bg);border-radius:14px;border:1px solid var(--term-border);height:520px;display:flex;flex-direction:column}
37
+ .term-header{color:#a7f3d0;background:var(--term-hdr);border-radius:14px 14px 0 0;padding:10px 12px;font-weight:700;font-size:13px;border-bottom:1px solid var(--term-border)}
38
+ .term-body{flex:1;overflow:auto;padding:10px 12px}
39
+ .term-line{font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-size:12px;color:var(--term-ink);white-space:pre-wrap;word-break:break-word;margin:0}
40
+ .term-meta{color:var(--term-meta)}
41
+ .term-warn{color:#facc15}
42
+ .term-err{color:#f87171}
43
+ .term-footer{padding:8px 12px;border-top:1px solid var(--term-border);display:flex;gap:8px}
44
+
45
+ /* Analysis */
46
+ .tabs{display:flex;gap:10px;margin-bottom:10px}
47
+ .tab{background:#e2e8f0;color:#0f172a;border:none;border-radius:10px;padding:8px 12px;font-weight:600;cursor:pointer}
48
+ .tab.active{background:#1e293b;color:#fff}
49
+ .flex{display:flex;gap:10px;flex-wrap:wrap;align-items:center}
50
+ .pill{display:inline-block;background:var(--pill);color:var(--pill-ink);padding:2px 10px;border-radius:999px;font-size:12px;font-weight:700}
51
+ .stat{display:grid;grid-template-columns:repeat(4,minmax(120px,1fr));gap:10px;margin-top:10px}
52
+ .stat .box{background:#f8fafc;border:1px solid var(--border);border-radius:12px;padding:10px;text-align:center}
53
+ .stat .num{font-weight:800;font-size:22px}
54
+ .filters{display:flex;gap:8px;flex-wrap:wrap;align-items:center;margin-top:10px}
55
+ .filter{background:#e5e7eb;border:none;border-radius:999px;padding:6px 10px;font-weight:600;cursor:pointer}
56
+ .filter.active{background:#1e293b;color:#fff}
57
+ .search{flex:1;min-width:240px}
58
+ .posts{margin-top:12px;display:grid;gap:10px}
59
+ .post{background:#ffffff;border:1px solid var(--border);border-radius:12px;padding:12px}
60
+ .post-hdr{display:flex;justify-content:space-between;gap:8px;flex-wrap:wrap}
61
+ .badge{display:inline-block;background:var(--badge);color:var(--badge-ink);padding:2px 8px;border-radius:999px;font-size:12px;font-weight:700}
62
+ .kw{display:inline-block;background:#fff3cd;color:var(--kw-ink);border:1px solid #fde68a;padding:2px 8px;border-radius:999px;font-size:12px;margin:2px 4px 0 0}
63
+ .ai{margin-top:8px;background:#f1f5f9;border:1px solid var(--border);border-radius:10px;padding:10px}
64
+ .ai-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:6px}
65
+ .ai-pill{display:inline-block;border-radius:999px;padding:2px 10px;font-size:12px;font-weight:800}
66
+ .ai-ok{background:#dcfce7;color:#166534}
67
+ .ai-mid{background:#fef9c3;color:#92400e}
68
+ .ai-low{background:#fee2e2;color:#991b1b}
69
+ .email{font-size:12px;font-weight:700}
70
+ .email.ok{color:#166534} .email.no{color:#991b1b}
71
+ .reason{margin-top:6px;background:#fff;border:1px dashed var(--border);border-radius:10px;padding:8px;display:none}
72
+ .reason.show{display:block}
73
+
74
+ /* Recipient Dropdown */
75
+ .recipient-container { margin-top: 10px; position: relative; } /* Container for dropdown and custom input */
76
+ .recipient-select { margin-bottom: 10px; } /* Space below select if custom input appears */
77
+ #custom-recipient { margin-top: 6px; display: none; } /* Initially hidden custom input */
78
+ </style>
79
+ </head>
80
+ <body>
81
+ <div class="wrap">
82
+ <div class="header">
83
+ <h1>🩺 Hillside Medical Group - Social Media Monitor</h1>
84
+ <div class="muted">Automated monitoring and analysis of medical help requests in Facebook groups.</div>
85
+ </div>
86
+
87
+ <div class="grid" style="margin-top:16px">
88
+ <div class="left">
89
+ <div class="card">
90
+ <h3>System Status</h3>
91
+ <div id="sys"></div>
92
+ <button class="btn" style="margin-top:8px" onclick="refreshSystem()">Refresh Status</button>
93
+ </div>
94
+
95
+ <div class="card" style="margin-top:16px">
96
+ <h3>Start Monitoring Process</h3>
97
+ <label for="recipient-select">Report Recipients</label>
98
+ <div class="recipient-container">
99
+ <select id="recipient-select" class="recipient-select" onchange="handleRecipientChange()">
100
+ <option value="">-- Loading Recipients --</option>
101
+ </select>
102
+ <input type="email" id="custom-recipient" placeholder="Enter custom email address..." />
103
+ </div>
104
+ <div class="muted" style="margin:6px 0 10px">
105
+ Select a recipient from the list or choose 'Custom' to enter an email address.
106
+ The report summary will be sent to the selected address(es) after processing.
107
+ </div>
108
+ <div class="flex">
109
+ <button id="start" class="btn" onclick="startProcess()">Start Monitoring</button>
110
+ <button class="btn gray" onclick="refreshLive()">Refresh Live View</button>
111
+ </div>
112
+ <div style="margin-top:12px">
113
+ <div style="display:flex;justify-content:space-between"><b>Overall Progress</b><span id="pct" class="muted">0%</span></div>
114
+ <div class="bar"><div id="fill" class="fill"></div></div>
115
+ <div id="msg" class="muted" style="margin-top:6px">idle</div>
116
+ </div>
117
+ </div>
118
+
119
+ <div class="card" style="margin-top:16px">
120
+ <div class="tabs">
121
+ <button id="tab-groups" class="tab active" onclick="switchSection('groups')">Groups</button>
122
+ <button id="tab-analysis" class="tab" onclick="switchSection('analysis')">Analysis (Live)</button>
123
+ <button id="tab-summary" class="tab" onclick="switchSection('summary')">Summary</button>
124
+ </div>
125
+
126
+ <!-- Groups -->
127
+ <div id="section-groups">
128
+ <h3>Configured Groups (from groups.txt)</h3>
129
+ <div id="groups"></div>
130
+ </div>
131
+
132
+ <!-- Analysis Live -->
133
+ <div id="section-analysis" style="display:none">
134
+ <div class="flex">
135
+ <div class="pill" id="live-group">Group: –</div>
136
+ </div>
137
+
138
+ <div class="stat">
139
+ <div class="box"><div class="num" id="cnt-total">0</div><div class="muted">Total Posts</div></div>
140
+ <div class="box"><div class="num" id="cnt-kw">0</div><div class="muted">Keyword Hits</div></div>
141
+ <div class="box"><div class="num" id="cnt-ai">0</div><div class="muted">AI Analyzed</div></div>
142
+ <div class="box"><div class="num" id="cnt-confirmed">0</div><div class="muted">Confirmed</div></div>
143
+ </div>
144
+
145
+ <div class="filters">
146
+ <button class="filter active" id="flt-all" onclick="setFilter('all')">All</button>
147
+ <button class="filter" id="flt-kw" onclick="setFilter('kw')">Keyword</button>
148
+ <button class="filter" id="flt-confirmed" onclick="setFilter('confirmed')">Confirmed</button>
149
+ <input id="search" class="search" placeholder="Search post text..."/>
150
+ </div>
151
+
152
+ <div class="posts" id="posts"></div>
153
+ </div>
154
+
155
+ <!-- Summary -->
156
+ <div id="section-summary" style="display:none">
157
+ <h3>Last Run Summary</h3>
158
+ <div id="summary" class="muted">No summary available yet.</div>
159
+ </div>
160
+ </div>
161
+ </div>
162
+
163
+ <div class="right">
164
+ <div class="terminal">
165
+ <div class="term-header">Process Logs (Live)</div>
166
+ <div id="term" class="term-body"></div>
167
+ <div class="term-footer">
168
+ <button class="btn" onclick="scrollBottom()">Scroll to Bottom</button>
169
+ <button class="btn red" onclick="clearLogs()">Clear Logs</button>
170
+ </div>
171
+ </div>
172
+ </div>
173
+ </div>
174
+ </div>
175
+
176
+ <script>
177
+ const API = "/api"; //Server re babu
178
+
179
+ async function refreshSystem(){
180
+ try {
181
+ const r = await fetch(`${API}/system/status`);
182
+ const j = await r.json();
183
+ const chip = (b)=>`<span style="padding:2px 8px;border-radius:999px;font-size:12px;font-weight:700;background:${b?'#dcfce7':'#fee2e2'};color:${b?'#166534':'#991b1b'}">${b?'OK':'Missing'}</span>`;
184
+ document.getElementById('sys').innerHTML = `
185
+ <table>
186
+ <tr><td>Gmail Service</td><td>${chip(j.gmail)}</td></tr>
187
+ <tr><td>Groups File (groups.txt)</td><td>${chip(j.groups_file_exists)} • Count: ${j.groups_count}</td></tr>
188
+ <tr><td>Processing Script (final5.py)</td><td>${chip(j.final5_exists)}</td></tr>
189
+ <tr><td>Default Sender</td><td class="mono">${j.sender_email}</td></tr>
190
+ <tr><td>Data Folders</td><td class="mono">${j.scrape_outdir} • ${j.analysis_outdir}</td></tr>
191
+ </table>`;
192
+ } catch (error) {
193
+ console.error("Error refreshing system status:", error);
194
+ document.getElementById('sys').innerHTML = `<div class="muted">Error loading system status.</div>`;
195
+ }
196
+ }
197
+
198
+ async function loadGroups(){
199
+ try {
200
+ const r = await fetch(`${API}/groups`);
201
+ const j = await r.json();
202
+ const list = j.groups || [];
203
+ if(!list.length){
204
+ document.getElementById('groups').innerHTML = `<div class="muted">Please add Facebook group links to 'groups.txt' (one per line).</div>`;
205
+ return;
206
+ }
207
+ document.getElementById('groups').innerHTML = list.map((g,i)=>`
208
+ <div style="border:1px solid var(--border);border-radius:12px;padding:10px;margin:6px 0;background:#fff">
209
+ <div style="display:flex;gap:8px;align-items:center;flex-wrap:wrap"><b>#${i+1}</b><span class="mono">${g}</span></div>
210
+ <div id="g-${i}" class="muted">Status: Pending</div>
211
+ </div>
212
+ `).join("");
213
+ } catch (error) {
214
+ console.error("Error loading groups:", error);
215
+ document.getElementById('groups').innerHTML = `<div class="muted">Error loading groups list.</div>`;
216
+ }
217
+ }
218
+
219
+ // ---------------- Recipient Management ----------------
220
+ let recipientList = []; // Store fetched recipients
221
+
222
+ async function loadRecipients() {
223
+ const selectElement = document.getElementById('recipient-select');
224
+ selectElement.innerHTML = '<option value="">-- Loading Recipients --</option>';
225
+ try {
226
+ const response = await fetch(`${API}/recipients`);
227
+ const result = await response.json();
228
+
229
+ if (result.success && Array.isArray(result.data)) {
230
+ recipientList = result.data;
231
+ populateRecipientDropdown();
232
+ } else {
233
+ console.warn("API did not return a successful recipient list:", result);
234
+ selectElement.innerHTML = '<option value="">-- Error Loading --</option>';
235
+ }
236
+ } catch (error) {
237
+ console.error("Error fetching recipients:", error);
238
+ selectElement.innerHTML = '<option value="">-- Network Error --</option>';
239
+ }
240
+ }
241
+
242
+ function populateRecipientDropdown() {
243
+ const selectElement = document.getElementById('recipient-select');
244
+ selectElement.innerHTML = ''; // Clear loading option
245
+
246
+ // Add default recipient first
247
+ const defaultRecipient = "smahato@hillsidemedicalgroup.com";
248
+ let defaultOptionFound = false;
249
+ recipientList.forEach(recipient => {
250
+ const option = document.createElement('option');
251
+ option.value = recipient.email;
252
+ option.textContent = `${recipient.name} (${recipient.email})`;
253
+ if (recipient.email === defaultRecipient) {
254
+ option.selected = true;
255
+ defaultOptionFound = true;
256
+ }
257
+ selectElement.appendChild(option);
258
+ });
259
+
260
+ // Add Custom option
261
+ const customOption = document.createElement('option');
262
+ customOption.value = "custom";
263
+ customOption.textContent = "-- Custom Email --";
264
+ selectElement.appendChild(customOption);
265
+
266
+ // If default wasn't in the list, add it and select it
267
+ if (!defaultOptionFound) {
268
+ const defaultOption = document.createElement('option');
269
+ defaultOption.value = defaultRecipient;
270
+ defaultOption.textContent = `Subash Mahato (Default) (${defaultRecipient})`;
271
+ defaultOption.selected = true;
272
+ selectElement.insertBefore(defaultOption, selectElement.firstChild); // Add to top
273
+ }
274
+ }
275
+
276
+ function handleRecipientChange() {
277
+ const selectElement = document.getElementById('recipient-select');
278
+ const customInput = document.getElementById('custom-recipient');
279
+ if (selectElement.value === 'custom') {
280
+ customInput.style.display = 'block';
281
+ customInput.focus();
282
+ } else {
283
+ customInput.style.display = 'none';
284
+ customInput.value = ''; // Clear if deselected
285
+ }
286
+ }
287
+
288
+ function getSelectedRecipients() {
289
+ const selectElement = document.getElementById('recipient-select');
290
+ const customInput = document.getElementById('custom-recipient');
291
+ let emails = [];
292
+
293
+ if (selectElement.value === 'custom' && customInput.value.trim() !== '') {
294
+ // Validate custom email (basic check)
295
+ const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
296
+ if (emailRegex.test(customInput.value.trim())) {
297
+ emails.push(customInput.value.trim());
298
+ } else {
299
+ alert("Please enter a valid custom email address.");
300
+ return null; // Signal error
301
+ }
302
+ } else if (selectElement.value) {
303
+ emails.push(selectElement.value);
304
+ } else {
305
+ // No recipient selected (shouldn't happen with default)
306
+ alert("Please select a recipient.");
307
+ return null;
308
+ }
309
+ return emails;
310
+ }
311
+
312
+
313
+ // ---------------- Process control ----------------
314
+ async function startProcess(){
315
+ document.getElementById('start').disabled = true;
316
+
317
+ const recipients = getSelectedRecipients();
318
+ if (!recipients) {
319
+ // Error message already shown in getSelectedRecipients
320
+ document.getElementById('start').disabled = false;
321
+ return;
322
+ }
323
+
324
+ try {
325
+ await fetch(`${API}/process/clear-logs`, {method:'POST'}); // Clear previous logs
326
+ const r = await fetch(`${API}/process/start`, {
327
+ method:'POST',
328
+ headers:{'Content-Type':'application/json'},
329
+ body: JSON.stringify({recipients: recipients}) // Send selected recipients
330
+ });
331
+ const j = await r.json();
332
+ if(!j.success){
333
+ alert(j.message||'Failed to start the process.');
334
+ document.getElementById('start').disabled = false;
335
+ return;
336
+ }
337
+ pollStatus(); // Start polling for status updates
338
+ } catch (error) {
339
+ console.error("Error starting process:", error);
340
+ alert("An error occurred while trying to start the process.");
341
+ document.getElementById('start').disabled = false;
342
+ }
343
+ }
344
+
345
+ async function pollStatus(){
346
+ try {
347
+ const r = await fetch(`${API}/process/status`);
348
+ const j = await r.json();
349
+ document.getElementById('msg').innerText = j.message || 'Idle';
350
+ document.getElementById('pct').innerText = `${j.progress||0}%`;
351
+ document.getElementById('fill').style.width = `${j.progress||0}%`;
352
+ (j.groups||[]).forEach((g,idx)=>{
353
+ const el = document.getElementById(`g-${idx}`);
354
+ if(!el) return;
355
+ el.innerHTML = `Status: ${g.stage} • Scraped: ${g.scraped_posts} • Confirmed: ${g.detected_posts}${g.error?(' • Error: '+g.error):''}`;
356
+ });
357
+ if(j.running){
358
+ setTimeout(pollStatus, 1200); // Poll every 1.2 seconds while running
359
+ } else {
360
+ document.getElementById('start').disabled = false;
361
+ loadSummary(); // Load the final summary when done
362
+ }
363
+ } catch (error) {
364
+ console.error("Error polling status:", error);
365
+ // Optionally, retry or show error state
366
+ setTimeout(pollStatus, 3000); // Retry after a longer delay on error
367
+ }
368
+ }
369
+
370
+ // ---------------- Logs (terminal) ----------------
371
+ let lastLogId = 0;
372
+ function appendLogs(entries){
373
+ const term = document.getElementById('term');
374
+ const nearBottom = term.scrollTop + term.clientHeight >= term.scrollHeight - 40;
375
+ entries.forEach(e=>{
376
+ const div = document.createElement('div');
377
+ div.className = 'term-line';
378
+ const color = e.level === 'error' ? 'term-err' : e.level === 'warn' ? 'term-warn' : '';
379
+ div.innerHTML = `<span class="term-meta">[${e.ts}] [${e.source}]</span> <span class="${color}">${escapeHtml(e.msg)}</span>`;
380
+ term.appendChild(div);
381
+ });
382
+ if(nearBottom || entries.length){ term.scrollTop = term.scrollHeight; }
383
+ }
384
+ async function pollLogs(){
385
+ try{
386
+ const r = await fetch(`${API}/process/logs?after=${lastLogId}&limit=500`);
387
+ const j = await r.json();
388
+ if(j.entries && j.entries.length){
389
+ appendLogs(j.entries);
390
+ lastLogId = j.last || lastLogId;
391
+ }
392
+ }catch(e){
393
+ console.error("Error polling logs:", e);
394
+ // Continue polling even on error
395
+ }
396
+ setTimeout(pollLogs, 900); // Poll logs every 0.9 seconds
397
+ }
398
+ function scrollBottom(){
399
+ const term = document.getElementById('term');
400
+ term.scrollTop = term.scrollHeight;
401
+ }
402
+ async function clearLogs(){
403
+ try {
404
+ await fetch(`${API}/process/clear-logs`, {method:'POST'});
405
+ document.getElementById('term').innerHTML = '';
406
+ lastLogId = 0;
407
+ } catch (error) {
408
+ console.error("Error clearing logs:", error);
409
+ // Optionally inform user
410
+ }
411
+ }
412
+ function escapeHtml(s){
413
+ return (s||'').replace(/[&<>"']/g, c => ({'&':'&amp;','<':'<','>':'>','"':'&quot;',"'":'&#039;'}[c]));
414
+ }
415
+
416
+ // ---------------- Live Analysis ----------------
417
+ let liveFilter = 'all'; // 'all' | 'kw' | 'confirmed'
418
+ let liveSearch = '';
419
+
420
+ function setFilter(f){
421
+ liveFilter = f;
422
+ document.getElementById('flt-all').classList.toggle('active', f==='all');
423
+ document.getElementById('flt-kw').classList.toggle('active', f==='kw');
424
+ document.getElementById('flt-confirmed').classList.toggle('active', f==='confirmed');
425
+ refreshLive();
426
+ }
427
+ document.getElementById('search').addEventListener('input', e=>{
428
+ liveSearch = e.target.value.toLowerCase();
429
+ refreshLive();
430
+ });
431
+
432
+ async function refreshLive(){
433
+ try {
434
+ const r = await fetch(`${API}/live/state`);
435
+ const j = await r.json();
436
+ if(!j.success) {
437
+ console.warn("Live state API returned not success:", j);
438
+ return;
439
+ }
440
+ const data = j.data || {};
441
+ renderLive(data);
442
+ } catch (error) {
443
+ console.error("Error refreshing live state:", error);
444
+ // Optionally update UI to show error
445
+ }
446
+ }
447
+
448
+ function renderLive(data){
449
+ // header + counts
450
+ document.getElementById('live-group').innerText = `Current Group: ${data.group || '–'}`;
451
+ const c = data.counts || {};
452
+ document.getElementById('cnt-total').innerText = c.total_posts || 0;
453
+ document.getElementById('cnt-kw').innerText = c.kw_hits || 0;
454
+ document.getElementById('cnt-ai').innerText = c.ai_done || 0;
455
+ document.getElementById('cnt-confirmed').innerText = c.confirmed || 0;
456
+
457
+ const posts = Array.isArray(data.posts) ? data.posts : [];
458
+ // filter/search
459
+ const filtered = posts.filter(p=>{
460
+ const hasKW = Array.isArray(p.found_keywords) && p.found_keywords.length>0;
461
+ const isConfirmed = p.ai && p.ai.is_medical_seeking;
462
+ if(liveFilter==='kw' && !hasKW) return false;
463
+ if(liveFilter==='confirmed' && !isConfirmed) return false;
464
+ if(liveSearch && !(p.text||'').toLowerCase().includes(liveSearch)) return false;
465
+ return true;
466
+ });
467
+
468
+ const html = filtered.map(p=>{
469
+ const hasKW = Array.isArray(p.found_keywords) && p.found_keywords.length>0;
470
+ const ai = p.ai || null;
471
+ const confirm = !!(ai && ai.is_medical_seeking);
472
+ const conf = ai ? (ai.confidence||'').toLowerCase() : '';
473
+ const urg = ai ? (ai.urgency_level||'').toLowerCase() : '';
474
+ const confClass = conf==='high' ? 'ai-ok' : conf==='medium' ? 'ai-mid' : 'ai-low';
475
+ const urgClass = urg==='high' ? 'ai-low' : urg==='medium' ? 'ai-mid' : 'ai-ok';
476
+ const emailTxt = p.email_sent ? '<span class="email ok">Email Sent</span>' : '<span class="email no">No Email</span>';
477
+ return `
478
+ <div class="post">
479
+ <div class="post-hdr">
480
+ <div class="flex">
481
+ <span class="badge">Post #${p.id || '-'}</span>
482
+ ${hasKW ? '<span class="badge">Keyword Hit</span>' : ''}
483
+ ${confirm ? '<span class="badge" style="background-color: #bfdbfe; color: #1e40af;">CONFIRMED</span>' : ''} <!-- Blue badge for confirmed -->
484
+ </div>
485
+ <div class="muted mono">Group: ${escapeHtml(p.group_link || 'N/A')}</div>
486
+ </div>
487
+ <div style="margin-top:6px; white-space: pre-wrap;">${escapeHtml(p.text || '')}</div> <!-- pre-wrap for text formatting -->
488
+
489
+ ${hasKW ? `
490
+ <div style="margin-top:6px">${(p.found_keywords||[]).map(k=>`<span class="kw">${escapeHtml(k)}</span>`).join('')}</div>
491
+ ` : ''}
492
+
493
+ ${ai ? `
494
+ <div class="ai">
495
+ <div class="ai-row">
496
+ <span class="ai-pill ${confClass}">Confidence: ${(ai.confidence||'').toUpperCase()}</span>
497
+ <span class="ai-pill ${urgClass}">Urgency: ${(ai.urgency_level||'').toUpperCase()}</span>
498
+ ${emailTxt}
499
+ </div>
500
+ <div><b>Summary:</b> ${escapeHtml(ai.medical_summary || '')}</div>
501
+ <div style="margin-top:4px"><b>Analysis:</b> ${escapeHtml(ai.analysis || '')}</div>
502
+ ${Array.isArray(ai.suggested_services) && ai.suggested_services.length ? `
503
+ <div style="margin-top:4px"><b>Suggested Services:</b> ${(ai.suggested_services||[]).map(s=>`<span class="badge" style="margin-right:6px">${escapeHtml(s)}</span>`).join('')}</div>
504
+ `:''}
505
+ <div style="margin-top:6px">
506
+ <button class="btn gray" onclick="toggleReason(${p.id})">Show Reasoning</button>
507
+ <div id="reason-${p.id}" class="reason">${escapeHtml(ai.reasoning || 'No reasoning provided.')}</div>
508
+ </div>
509
+ </div>
510
+ ` : '<div class="muted">Awaiting AI analysis...</div>'}
511
+ </div>
512
+ `;
513
+ }).join('');
514
+ document.getElementById('posts').innerHTML = html || `<div class="muted">No posts match the current filters.</div>`;
515
+ }
516
+
517
+ function toggleReason(id){
518
+ const el = document.getElementById(`reason-${id}`);
519
+ if(!el) return;
520
+ el.classList.toggle('show');
521
+ }
522
+
523
+ // auto poll live state
524
+ async function pollLive(){
525
+ await refreshLive();
526
+ setTimeout(pollLive, 1200); // Poll live view every 1.2 seconds
527
+ }
528
+
529
+ // ---------------- Summary ----------------
530
+ async function loadSummary(){
531
+ try {
532
+ const r = await fetch(`${API}/results/summary`);
533
+ const el = document.getElementById('summary');
534
+ if(r.status!==200){
535
+ el.innerHTML = `<div class="muted">Processing complete. Summary will appear here shortly.</div>`;
536
+ // Retry once after a short delay if not found immediately
537
+ setTimeout(async () => {
538
+ const retry = await fetch(`${API}/results/summary`);
539
+ if(retry.status === 200) {
540
+ const j = await retry.json();
541
+ el.innerHTML = `<pre class="mono">${JSON.stringify(j.data,null,2)}</pre>`;
542
+ }
543
+ }, 2000);
544
+ return;
545
+ }
546
+ const j = await r.json();
547
+ // Check if data exists and format nicely
548
+ if (j.success && j.data) {
549
+ // You can create a more user-friendly summary view here instead of raw JSON
550
+ // For now, we'll keep the JSON view
551
+ el.innerHTML = `<pre class="mono">${JSON.stringify(j.data,null,2)}</pre>`;
552
+ } else {
553
+ el.innerHTML = `<div class="muted">Summary data unavailable.</div>`;
554
+ }
555
+ } catch (error) {
556
+ console.error("Error loading summary:", error);
557
+ document.getElementById('summary').innerHTML = `<div class="muted">Error loading summary.</div>`;
558
+ }
559
+ }
560
+
561
+ // ---------------- Tabs ----------------
562
+ function switchSection(name){
563
+ const sec = (id,show)=>document.getElementById(id).style.display = show?'block':'none';
564
+ document.getElementById('tab-groups').classList.toggle('active', name==='groups');
565
+ document.getElementById('tab-analysis').classList.toggle('active', name==='analysis');
566
+ document.getElementById('tab-summary').classList.toggle('active', name==='summary');
567
+ sec('section-groups', name==='groups');
568
+ sec('section-analysis', name==='analysis');
569
+ sec('section-summary', name==='summary');
570
+ if(name==='analysis') refreshLive();
571
+ if(name==='summary') loadSummary();
572
+ }
573
+
574
+ // ---------------- Boot ----------------
575
+ // Load initial data and start polling
576
+ refreshSystem();
577
+ loadGroups();
578
+ loadRecipients(); // Load recipients on startup
579
+ loadSummary();
580
+ pollStatus(); // Start status polling
581
+ pollLogs(); // Start log polling
582
+ pollLive(); // Start live analysis polling
583
+
584
+ </script>
585
+ </body>
586
+ </html>
recipients.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [
2
+ {"name": "Dr. Patel", "email": ""},
3
+ {"name": "Bruno", "email": "admin@hillsidemedicalgroup.com"},
4
+ {"name": "Afren", "email": "reports@hillsidemedicalgroup.com"},
5
+ {"name": "Sonu", "email": "smahato@hillsidemedicalgroup.com"}
6
+ ]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ selenium
2
+ google-api-python-client
3
+ google-auth-oauthlib
4
+ google-auth-httplib2
5
+ google-generativeai
6
+ Flask
7
+ Flask-Cors
8
+ python-dotenv
9
+ requests
10
+ webdriver-manager