Spaces:
Sleeping
Sleeping
Commit ·
a00882d
1
Parent(s): 604f6ff
Project Uploaded
Browse files- Dockerfile +39 -0
- api_server.py +388 -0
- final5.py +462 -0
- groups.txt +2 -0
- index.html +586 -0
- recipients.json +6 -0
- requirements.txt +10 -0
Dockerfile
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a base image with Python
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Set environment variables to prevent interactive prompts during installation
|
| 8 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 9 |
+
ENV PYTHONUNBUFFERED=1
|
| 10 |
+
ENV PYTHONIOENCODING=utf-8
|
| 11 |
+
ENV FLASK_APP=api_server.py
|
| 12 |
+
ENV FLASK_RUN_HOST=0.0.0.0
|
| 13 |
+
ENV FLASK_RUN_PORT=7860
|
| 14 |
+
|
| 15 |
+
# Install system dependencies for Selenium and Chrome
|
| 16 |
+
RUN apt-get update && apt-get install -y \
|
| 17 |
+
wget \
|
| 18 |
+
gnupg \
|
| 19 |
+
unzip \
|
| 20 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 21 |
+
|
| 22 |
+
# Install Google Chrome
|
| 23 |
+
RUN wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
| 24 |
+
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \
|
| 25 |
+
&& apt-get update && apt-get install -y google-chrome-stable \
|
| 26 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 27 |
+
|
| 28 |
+
# Copy the requirements file and install Python dependencies
|
| 29 |
+
COPY requirements.txt .
|
| 30 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 31 |
+
|
| 32 |
+
# Copy the rest of the application files
|
| 33 |
+
COPY . .
|
| 34 |
+
|
| 35 |
+
# Expose the port the app runs on
|
| 36 |
+
EXPOSE 7860
|
| 37 |
+
|
| 38 |
+
# Command to run the Flask application
|
| 39 |
+
CMD ["flask", "run"]
|
api_server.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, re, json, time, base64, pickle, subprocess, threading, traceback, html
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
+
from flask import Flask, request, jsonify, send_from_directory
|
| 6 |
+
from flask_cors import CORS
|
| 7 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 8 |
+
from google.auth.transport.requests import Request
|
| 9 |
+
from googleapiclient.discovery import build
|
| 10 |
+
from googleapiclient.errors import HttpError
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
# Decode secrets at startup
|
| 16 |
+
if 'CREDENTIALS_B64' in os.environ:
|
| 17 |
+
with open('credentials.json', 'w') as f:
|
| 18 |
+
f.write(base64.b64decode(os.environ['CREDENTIALS_B64']).decode('utf-8'))
|
| 19 |
+
|
| 20 |
+
if 'FB_COOKIES_B64' in os.environ:
|
| 21 |
+
with open('facebook_cookies.pkl', 'wb') as f:
|
| 22 |
+
f.write(base64.b64decode(os.environ['FB_COOKIES_B64']))
|
| 23 |
+
|
| 24 |
+
GROUPS_TXT = os.environ.get("GROUPS_TXT", "groups.txt")
|
| 25 |
+
SCRAPE_OUTDIR = os.environ.get("SCRAPE_OUTDIR", "scraped")
|
| 26 |
+
ANALYSIS_OUTDIR = os.environ.get("ANALYSIS_OUTDIR", "analysis")
|
| 27 |
+
FINAL5_PATH = os.environ.get("FINAL5_PATH", "final5.py")
|
| 28 |
+
PYTHON_BIN = os.environ.get("PYTHON_BIN", "python")
|
| 29 |
+
SENDER_EMAIL = os.environ.get("SENDER_EMAIL", "smahato@hillsidemedicalgroup.com")
|
| 30 |
+
|
| 31 |
+
GEMINI_KEYS = []
|
| 32 |
+
for i in range(1, 6):
|
| 33 |
+
key = os.environ.get(f"GEMINI_API_KEY_{i}")
|
| 34 |
+
if key:
|
| 35 |
+
GEMINI_KEYS.append(key)
|
| 36 |
+
|
| 37 |
+
GMAIL_SCOPES = [
|
| 38 |
+
"https://www.googleapis.com/auth/gmail.send",
|
| 39 |
+
"https://www.googleapis.com/auth/gmail.metadata",
|
| 40 |
+
]
|
| 41 |
+
os.makedirs(SCRAPE_OUTDIR, exist_ok=True)
|
| 42 |
+
os.makedirs(ANALYSIS_OUTDIR, exist_ok=True)
|
| 43 |
+
|
| 44 |
+
@dataclass
|
| 45 |
+
class GroupRun:
|
| 46 |
+
link: str
|
| 47 |
+
stage: str = "pending"
|
| 48 |
+
scraped_json: str = ""
|
| 49 |
+
analysis_json: str = ""
|
| 50 |
+
scraped_posts: int = 0
|
| 51 |
+
detected_posts: int = 0
|
| 52 |
+
emails_sent_by_final5: int = 0
|
| 53 |
+
error: str = ""
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class PipelineState:
|
| 57 |
+
running: bool = False
|
| 58 |
+
message: str = "idle"
|
| 59 |
+
progress: int = 0
|
| 60 |
+
current: int = 0
|
| 61 |
+
total: int = 0
|
| 62 |
+
groups: List[GroupRun] = field(default_factory=list)
|
| 63 |
+
recipients: List[str] = field(default_factory=list)
|
| 64 |
+
summary_path: str = ""
|
| 65 |
+
|
| 66 |
+
app = Flask(__name__, static_folder='.', static_url_path='')
|
| 67 |
+
CORS(app)
|
| 68 |
+
|
| 69 |
+
class LogBuffer:
|
| 70 |
+
def __init__(self, max_items: int = 10000):
|
| 71 |
+
self._buf: List[Dict[str, Any]] = []
|
| 72 |
+
self._lock = threading.Lock()
|
| 73 |
+
self._next_id = 1
|
| 74 |
+
self._max = max_items
|
| 75 |
+
def append(self, msg: str, level: str = "info", source: str = "server"):
|
| 76 |
+
ts = datetime.now().strftime("%H:%M:%S")
|
| 77 |
+
line = {"id": self._next_id, "ts": ts, "level": level, "source": source, "msg": msg}
|
| 78 |
+
with self._lock:
|
| 79 |
+
self._buf.append(line); self._next_id += 1
|
| 80 |
+
if len(self._buf) > self._max: self._buf = self._buf[-self._max:]
|
| 81 |
+
def clear(self):
|
| 82 |
+
with self._lock: self._buf.clear()
|
| 83 |
+
def get_after(self, after_id: int, limit: int = 500):
|
| 84 |
+
with self._lock:
|
| 85 |
+
if after_id <= 0: data = self._buf[-limit:]
|
| 86 |
+
else: data = [x for x in self._buf if x["id"] > after_id][:limit]
|
| 87 |
+
last_id = self._buf[-1]["id"] if self._buf else after_id
|
| 88 |
+
return data, last_id
|
| 89 |
+
|
| 90 |
+
logs = LogBuffer()
|
| 91 |
+
def log(msg: str, level: str = "info", source: str = "server"):
|
| 92 |
+
logs.append(msg, level, source)
|
| 93 |
+
print(f"[{level.upper()}][{source}] {msg}", flush=True)
|
| 94 |
+
|
| 95 |
+
live_lock = threading.Lock()
|
| 96 |
+
live_state: Dict[str, Any] = {
|
| 97 |
+
"group": None,
|
| 98 |
+
"counts": {"total_posts": 0, "kw_hits": 0, "ai_done": 0, "confirmed": 0, "emails": 0},
|
| 99 |
+
"posts": []
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
def reset_live_state(group_link: str):
|
| 103 |
+
with live_lock:
|
| 104 |
+
live_state["group"] = group_link
|
| 105 |
+
live_state["counts"] = {"total_posts": 0, "kw_hits": 0, "ai_done": 0, "confirmed": 0, "emails": 0}
|
| 106 |
+
live_state["posts"] = []
|
| 107 |
+
|
| 108 |
+
def ensure_post_obj(pid: int) -> Dict[str, Any]:
|
| 109 |
+
with live_lock:
|
| 110 |
+
for p in live_state["posts"]:
|
| 111 |
+
if p.get("id") == pid:
|
| 112 |
+
return p
|
| 113 |
+
p = {"id": pid, "text": "", "group_link": live_state.get("group")}
|
| 114 |
+
live_state["posts"].append(p)
|
| 115 |
+
return p
|
| 116 |
+
|
| 117 |
+
def load_scraped_into_live(path: str):
|
| 118 |
+
try:
|
| 119 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 120 |
+
posts = json.load(f)
|
| 121 |
+
except Exception as e:
|
| 122 |
+
log(f"live load error: {e}", "error", "LIVE")
|
| 123 |
+
return
|
| 124 |
+
with live_lock:
|
| 125 |
+
live_state["posts"] = posts
|
| 126 |
+
live_state["counts"]["total_posts"] = len(posts)
|
| 127 |
+
|
| 128 |
+
def handle_event_line(line: str):
|
| 129 |
+
if not line.startswith("::"): return
|
| 130 |
+
try:
|
| 131 |
+
if "::SCRAPE_SAVED::" in line:
|
| 132 |
+
path = line.split("::SCRAPE_SAVED::", 1)[1].strip()
|
| 133 |
+
if path: load_scraped_into_live(path)
|
| 134 |
+
elif "::KW_HIT::" in line:
|
| 135 |
+
d = json.loads(line.split("::KW_HIT::", 1)[1].strip())
|
| 136 |
+
p = ensure_post_obj(int(d["id"]))
|
| 137 |
+
p["found_keywords"] = d.get("found_keywords", [])
|
| 138 |
+
with live_lock: live_state["counts"]["kw_hits"] += 1
|
| 139 |
+
elif "::AI_RESULT::" in line:
|
| 140 |
+
d = json.loads(line.split("::AI_RESULT::", 1)[1].strip())
|
| 141 |
+
p = ensure_post_obj(int(d["id"]))
|
| 142 |
+
ai = d.get("ai", {})
|
| 143 |
+
p["ai"] = ai
|
| 144 |
+
with live_lock:
|
| 145 |
+
live_state["counts"]["ai_done"] += 1
|
| 146 |
+
if ai.get("is_medical_seeking"): live_state["counts"]["confirmed"] += 1
|
| 147 |
+
elif "::EMAIL_SENT::" in line:
|
| 148 |
+
d = json.loads(line.split("::EMAIL_SENT::", 1)[1].strip())
|
| 149 |
+
p = ensure_post_obj(int(d["id"]))
|
| 150 |
+
sent = int(d.get("sent", 0))
|
| 151 |
+
p["email_sent"] = sent > 0
|
| 152 |
+
if sent > 0:
|
| 153 |
+
with live_lock: live_state["counts"]["emails"] += sent
|
| 154 |
+
except Exception as e:
|
| 155 |
+
log(f"live parse error: {e}", "error", "LIVE")
|
| 156 |
+
|
| 157 |
+
def read_groups(path: str) -> List[str]:
|
| 158 |
+
if not os.path.exists(path): return []
|
| 159 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 160 |
+
return [ln.strip() for ln in f.read().splitlines() if ln.strip()]
|
| 161 |
+
|
| 162 |
+
def slugify(url: str) -> str:
|
| 163 |
+
s = re.sub(r"[^a-zA-Z0-9]+", "-", url)
|
| 164 |
+
return s.strip("-").lower()
|
| 165 |
+
|
| 166 |
+
def build_gmail_service():
|
| 167 |
+
creds = None
|
| 168 |
+
if os.path.exists("token.pickle"):
|
| 169 |
+
with open("token.pickle", "rb") as token: creds = pickle.load(token)
|
| 170 |
+
if not creds or not creds.valid:
|
| 171 |
+
if creds and creds.expired and creds.refresh_token:
|
| 172 |
+
creds.refresh(Request())
|
| 173 |
+
else:
|
| 174 |
+
if not os.path.exists("credentials.json"):
|
| 175 |
+
log("credentials.json missing; Gmail unavailable", "warn", "gmail")
|
| 176 |
+
return None
|
| 177 |
+
flow = InstalledAppFlow.from_client_secrets_file("credentials.json", GMAIL_SCOPES)
|
| 178 |
+
creds = flow.run_local_server(port=0)
|
| 179 |
+
with open("token.pickle", "wb") as token: pickle.dump(creds, token)
|
| 180 |
+
try:
|
| 181 |
+
return build("gmail", "v1", credentials=creds)
|
| 182 |
+
except Exception as e:
|
| 183 |
+
log(f"Gmail service build failed: {e}", "error", "gmail")
|
| 184 |
+
return None
|
| 185 |
+
|
| 186 |
+
gmail_service = build_gmail_service()
|
| 187 |
+
|
| 188 |
+
def send_html_email(to_emails: List[str], subject: str, html_content: str) -> int:
|
| 189 |
+
if not gmail_service:
|
| 190 |
+
log("Gmail not configured; skipping email", "warn", "gmail")
|
| 191 |
+
return 0
|
| 192 |
+
from email.message import EmailMessage
|
| 193 |
+
sent = 0
|
| 194 |
+
for to in to_emails:
|
| 195 |
+
try:
|
| 196 |
+
msg = EmailMessage()
|
| 197 |
+
msg["to"] = to
|
| 198 |
+
msg["from"] = SENDER_EMAIL
|
| 199 |
+
msg["subject"] = subject
|
| 200 |
+
msg.set_content(html_content, subtype="html")
|
| 201 |
+
raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
|
| 202 |
+
gmail_service.users().messages().send(userId="me", body={"raw": raw}).execute()
|
| 203 |
+
sent += 1
|
| 204 |
+
except HttpError as e:
|
| 205 |
+
log(f"Gmail HTTP error to {to}: {e}", "error", "gmail")
|
| 206 |
+
except Exception as e:
|
| 207 |
+
log(f"Gmail send error to {to}: {e}", "error", "gmail")
|
| 208 |
+
return sent
|
| 209 |
+
|
| 210 |
+
def build_confirmed_posts_email(groups_run: List[GroupRun], all_confirmed_posts: List[Dict[str, Any]]) -> str:
|
| 211 |
+
total_groups, total_scraped, total_confirmed = len(groups_run), sum(g.scraped_posts for g in groups_run), len(all_confirmed_posts)
|
| 212 |
+
table_rows = "".join(f"""
|
| 213 |
+
<tr>
|
| 214 |
+
<td style="padding: 8px; border-bottom: 1px solid #eee;"><a href="{g.link}" target="_blank">{g.link}</a></td>
|
| 215 |
+
<td style="padding: 8px; border-bottom: 1px solid #eee; text-align: center;">{g.scraped_posts}</td>
|
| 216 |
+
<td style="padding: 8px; border-bottom: 1px solid #eee; text-align: center;">{g.detected_posts}</td>
|
| 217 |
+
<td style="padding: 8px; border-bottom: 1px solid #eee;">{"OK" if g.stage == "done" else "ERROR"}</td>
|
| 218 |
+
</tr>""" for g in groups_run)
|
| 219 |
+
summary_table_html = f"""<h3>Group Summary</h3><table style="width: 100%; border-collapse: collapse; margin-top: 8px; border: 1px solid #ddd;"><thead><tr style="background: #0f172a; color: #fff;"><th style="text-align: left; padding: 8px;">Group Link</th><th style="text-align: center; padding: 8px;">Posts Scraped</th><th style="text-align: center; padding: 8px;">Confirmed Posts</th><th style="text-align: left; padding: 8px;">Status</th></tr></thead><tbody>{table_rows}</tbody></table>"""
|
| 220 |
+
if all_confirmed_posts:
|
| 221 |
+
posts_html = "".join(f"""
|
| 222 |
+
<div style="margin-bottom: 25px; padding: 12px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;">
|
| 223 |
+
<h4 style="margin-top: 0; margin-bottom: 8px;">Post ID: {p.get("id", "N/A")} | Urgency: {p.get("ai_analysis", {}).get("urgency_level", "N/A")} | Confidence: {p.get("ai_analysis", {}).get("confidence", "N/A")}</h4>
|
| 224 |
+
<p style="margin: 5px 0;"><strong>Summary:</strong> {html.escape(p.get("ai_analysis", {}).get("medical_summary", "N/A"))}</p>
|
| 225 |
+
<p style="margin: 5px 0;"><strong>Text:</strong></p>
|
| 226 |
+
<pre style="white-space: pre-wrap; background-color: #f0f0f0; padding: 8px; border: 1px solid #eee; border-radius: 3px; font-family: monospace; font-size: 0.9em;">{html.escape(p.get("text", "N/A"))}</pre>
|
| 227 |
+
<p style="margin: 5px 0;"><a href="{p.get("group_link", "#")}" target="_blank">View Group</a></p>
|
| 228 |
+
</div>""" for p in all_confirmed_posts)
|
| 229 |
+
else: posts_html = "<p>No confirmed medical posts were found during this run.</p>"
|
| 230 |
+
return f"""<!DOCTYPE html><html><head><title>Hillside Medical Group - Confirmed Medical Posts Summary</title></head><body style="font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5;"><div style="max-width: 900px; margin: 20px auto; padding: 20px; background-color: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px;"><div style="background: #1e3c72; color: #fff; padding: 16px 20px; border-radius: 6px 6px 0 0;"><h2 style="margin: 0;">Hillside Medical Group - Confirmed Medical Posts</h2><div style="font-size: 0.9em;">Run completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</div></div><div style="padding: 16px;"><p><strong>Overall Summary:</strong> Processed {total_groups} groups, scraped {total_scraped} posts, found {total_confirmed} confirmed medical posts.</p><hr style="margin: 20px 0; border: 0; border-top: 1px solid #eee;">{summary_table_html}<hr style="margin: 20px 0; border: 0; border-top: 1px solid #eee;"><h3>Confirmed Posts Details</h3>{posts_html}</div><div style="margin-top: 20px; padding: 10px; font-size: 0.8em; color: #666; border-top: 1px solid #eee;"><p>This email contains posts identified as potentially seeking personal medical help. Please review and take appropriate action.</p><p><em>Note: The link provided is to the group. Direct post links are not currently extracted.</em></p></div></div></body></html>"""
|
| 231 |
+
|
| 232 |
+
state = PipelineState()
|
| 233 |
+
|
| 234 |
+
def stream_process_lines(args: List[str], env: Optional[Dict[str, str]] = None, tag: str = "FINAL5") -> int:
|
| 235 |
+
log(f"Exec: {' '.join(args)}", "info", tag)
|
| 236 |
+
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, universal_newlines=True, env=env or os.environ.copy())
|
| 237 |
+
def pump(pipe, name):
|
| 238 |
+
for raw in pipe:
|
| 239 |
+
line = (raw or "").rstrip("\n")
|
| 240 |
+
if not line: continue
|
| 241 |
+
if line.startswith("::"):
|
| 242 |
+
try: handle_event_line(line)
|
| 243 |
+
except Exception as e: log(f"event parse error: {e}", "error", tag)
|
| 244 |
+
log(line, "info" if name == "stdout" else "warn", tag)
|
| 245 |
+
t1 = threading.Thread(target=pump, args=(proc.stdout, "stdout"), daemon=True)
|
| 246 |
+
t2 = threading.Thread(target=pump, args=(proc.stderr, "stderr"), daemon=True)
|
| 247 |
+
t1.start(); t2.start()
|
| 248 |
+
rc = proc.wait()
|
| 249 |
+
t1.join(timeout=0.2); t2.join(timeout=0.2)
|
| 250 |
+
log(f"Exit code: {rc}", "info", tag)
|
| 251 |
+
return rc
|
| 252 |
+
|
| 253 |
+
def call_final5_for_group(group_url: str, out_json: str, analysis_json: str, recipients: List[str]) -> Dict[str, Any]:
|
| 254 |
+
args = [PYTHON_BIN, FINAL5_PATH, "--group", group_url, "--out", out_json, "--analysis-out", analysis_json, "--recipients", ",".join(recipients), "--sender", SENDER_EMAIL, "--headless"]
|
| 255 |
+
if GEMINI_KEYS: args.extend(["--gemini-keys", ",".join(GEMINI_KEYS)])
|
| 256 |
+
env = os.environ.copy()
|
| 257 |
+
env["PYTHONUNBUFFERED"] = "1"
|
| 258 |
+
env["PYTHONIOENCODING"] = "utf-8"
|
| 259 |
+
rc = stream_process_lines(args, env=env, tag="FINAL5")
|
| 260 |
+
return {"ok": rc == 0, "code": rc}
|
| 261 |
+
|
| 262 |
+
def run_pipeline(recipients: List[str]):
|
| 263 |
+
try:
|
| 264 |
+
logs.clear()
|
| 265 |
+
log("Pipeline starting", "info", "ORCHESTRATOR")
|
| 266 |
+
state.running, state.message, state.progress, state.recipients = True, "initializing", 0, recipients
|
| 267 |
+
state.groups.clear()
|
| 268 |
+
links = read_groups(GROUPS_TXT)
|
| 269 |
+
state.total = len(links)
|
| 270 |
+
if not links:
|
| 271 |
+
log("No groups found in groups.txt", "warn", "ORCHESTRATOR")
|
| 272 |
+
state.message, state.running = "No groups", False
|
| 273 |
+
return
|
| 274 |
+
all_confirmed_posts = []
|
| 275 |
+
for i, link in enumerate(links, start=1):
|
| 276 |
+
reset_live_state(link)
|
| 277 |
+
g = GroupRun(link=link, stage="running")
|
| 278 |
+
state.groups.append(g)
|
| 279 |
+
state.current, state.message, state.progress = i, f"Processing {link}", int(((i - 1) / max(1, state.total)) * 100)
|
| 280 |
+
log(f"[{i}/{state.total}] Processing group: {link}", "info", "ORCHESTRATOR")
|
| 281 |
+
slug = slugify(link)
|
| 282 |
+
out_json, analysis_json = os.path.join(SCRAPE_OUTDIR, f"{slug}.json"), os.path.join(ANALYSIS_OUTDIR, f"analysis_{slug}.json")
|
| 283 |
+
g.scraped_json, g.analysis_json = out_json, analysis_json
|
| 284 |
+
result = call_final5_for_group(link, out_json, analysis_json, recipients)
|
| 285 |
+
if not result.get("ok"):
|
| 286 |
+
g.stage, g.error = "error", f"final5 exit code {result.get('code')}"
|
| 287 |
+
log(f"final5 failed for {link}: code {result.get('code')}", "error", "ORCHESTRATOR")
|
| 288 |
+
else:
|
| 289 |
+
try:
|
| 290 |
+
if os.path.exists(out_json):
|
| 291 |
+
with open(out_json, "r", encoding="utf-8") as f: g.scraped_posts = len(json.load(f))
|
| 292 |
+
if os.path.exists(analysis_json):
|
| 293 |
+
with open(analysis_json, "r", encoding="utf-8") as f: a = json.load(f)
|
| 294 |
+
g.detected_posts = a.get("confirmed_medical", 0)
|
| 295 |
+
g.emails_sent_by_final5 = a.get("emails_sent", 0)
|
| 296 |
+
confirmed_posts = a.get("posts", [])
|
| 297 |
+
for post in confirmed_posts:
|
| 298 |
+
if "group_link" not in post: post["group_link"] = link
|
| 299 |
+
all_confirmed_posts.extend(confirmed_posts)
|
| 300 |
+
g.stage = "done"
|
| 301 |
+
log(f"Group done: scraped={g.scraped_posts}, confirmed={g.detected_posts}", "info", "ORCHESTRATOR")
|
| 302 |
+
except Exception as e:
|
| 303 |
+
g.stage, g.error = "error", f"parse_error: {e}"
|
| 304 |
+
log(f"Parsing outputs failed for {link}: {e}", "error", "ORCHESTRATOR")
|
| 305 |
+
state.progress = int((i / max(1, state.total)) * 100)
|
| 306 |
+
try:
|
| 307 |
+
html_content = build_confirmed_posts_email(state.groups, all_confirmed_posts)
|
| 308 |
+
subject = f"🩺 Hillside - Confirmed Medical Posts Found ({len(all_confirmed_posts)} total)"
|
| 309 |
+
sent_count = send_html_email(recipients, subject, html_content)
|
| 310 |
+
log(f"Consolidated email sent to {len(recipients)} recipient(s), {sent_count} successful", "info", "GMAIL")
|
| 311 |
+
except Exception as e:
|
| 312 |
+
log(f"Error building or sending consolidated email: {e}", "error", "ORCHESTRATOR")
|
| 313 |
+
summary = {"run_date": datetime.now().isoformat(), "groups": [g.__dict__ for g in state.groups]}
|
| 314 |
+
summary_path = os.path.join(ANALYSIS_OUTDIR, "analysis_summary.json")
|
| 315 |
+
with open(summary_path, "w", encoding="utf-8") as f: json.dump(summary, f, ensure_ascii=False, indent=2)
|
| 316 |
+
state.summary_path, state.message, state.progress, state.running = summary_path, "All groups processed", 100, False
|
| 317 |
+
log("Pipeline finished", "info", "ORCHESTRATOR")
|
| 318 |
+
except Exception as e:
|
| 319 |
+
state.message, state.running = f"pipeline_error: {e}", False
|
| 320 |
+
log(f"Pipeline error: {e}\n{traceback.format_exc()}", "error", "ORCHESTRATOR")
|
| 321 |
+
|
| 322 |
+
@app.route("/")
|
| 323 |
+
def index():
|
| 324 |
+
return send_from_directory('.', 'index.html')
|
| 325 |
+
|
| 326 |
+
@app.get("/api/system/status")
|
| 327 |
+
def system_status():
|
| 328 |
+
return jsonify({
|
| 329 |
+
"gmail": gmail_service is not None, "groups_file_exists": os.path.exists(GROUPS_TXT),
|
| 330 |
+
"groups_count": len(read_groups(GROUPS_TXT)), "scrape_outdir": SCRAPE_OUTDIR,
|
| 331 |
+
"analysis_outdir": ANALYSIS_OUTDIR, "sender_email": SENDER_EMAIL,
|
| 332 |
+
"final5_exists": os.path.exists(FINAL5_PATH), "gemini_keys_count": len(GEMINI_KEYS)
|
| 333 |
+
})
|
| 334 |
+
|
| 335 |
+
@app.get("/api/groups")
|
| 336 |
+
def api_groups():
|
| 337 |
+
return jsonify({"groups": read_groups(GROUPS_TXT)})
|
| 338 |
+
|
| 339 |
+
@app.post("/api/process/start")
|
| 340 |
+
def api_process_start():
|
| 341 |
+
if state.running: return jsonify({"success": False, "message": "Already running"}), 409
|
| 342 |
+
data = request.json or {}
|
| 343 |
+
recips = data.get("recipients") or [SENDER_EMAIL]
|
| 344 |
+
if isinstance(recips, str): recips = [e.strip() for e in recips.split(",") if e.strip()]
|
| 345 |
+
threading.Thread(target=run_pipeline, args=(recips,), daemon=True).start()
|
| 346 |
+
log(f"Start requested by client; recipients={recips}", "info", "API")
|
| 347 |
+
return jsonify({"success": True, "message": "Pipeline started", "recipients": recips})
|
| 348 |
+
|
| 349 |
+
@app.get("/api/process/status")
|
| 350 |
+
def api_process_status():
|
| 351 |
+
return jsonify({"running": state.running, "message": state.message, "progress": state.progress,
|
| 352 |
+
"current": state.current, "total": state.total, "groups": [g.__dict__ for g in state.groups]})
|
| 353 |
+
|
| 354 |
+
@app.get("/api/process/logs")
|
| 355 |
+
def api_process_logs():
|
| 356 |
+
data, last_id = logs.get_after(int(request.args.get("after", "0")), limit=int(request.args.get("limit", "500")))
|
| 357 |
+
return jsonify({"entries": data, "last": last_id})
|
| 358 |
+
|
| 359 |
+
@app.post("/api/process/clear-logs")
|
| 360 |
+
def api_clear_logs():
|
| 361 |
+
logs.clear()
|
| 362 |
+
log("Logs cleared by client", "info", "API")
|
| 363 |
+
return jsonify({"success": True})
|
| 364 |
+
|
| 365 |
+
@app.get("/api/live/state")
|
| 366 |
+
def api_live_state():
|
| 367 |
+
with live_lock: return jsonify({"success": True, "data": live_state})
|
| 368 |
+
|
| 369 |
+
@app.get("/api/results/summary")
|
| 370 |
+
def api_results_summary():
|
| 371 |
+
p = state.summary_path or os.path.join(ANALYSIS_OUTDIR, "analysis_summary.json")
|
| 372 |
+
if not os.path.exists(p): return jsonify({"success": False, "message": "No summary yet"}), 404
|
| 373 |
+
with open(p, "r", encoding="utf-8") as f: return jsonify({"success": True, "data": json.load(f)})
|
| 374 |
+
|
| 375 |
+
@app.get("/api/recipients")
|
| 376 |
+
def api_get_recipients():
|
| 377 |
+
recipients_path = "recipients.json"
|
| 378 |
+
if not os.path.exists(recipients_path): return jsonify({"success": False, "message": "recipients.json not found"}), 404
|
| 379 |
+
try:
|
| 380 |
+
with open(recipients_path, "r", encoding="utf-8") as f: data = json.load(f)
|
| 381 |
+
if not isinstance(data, list): return jsonify({"success": False, "message": "Invalid format"}), 500
|
| 382 |
+
return jsonify({"success": True, "data": data})
|
| 383 |
+
except Exception as e:
|
| 384 |
+
return jsonify({"success": False, "message": f"Error reading file: {str(e)}"}), 500
|
| 385 |
+
|
| 386 |
+
if __name__ == "__main__":
|
| 387 |
+
port = int(os.environ.get("PORT", 7860))
|
| 388 |
+
app.run(host="0.0.0.0", port=port)
|
final5.py
ADDED
|
@@ -0,0 +1,462 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, re, sys, time, json, base64, pickle, argparse, traceback
|
| 2 |
+
from typing import List, Dict, Any, Tuple
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
| 7 |
+
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
| 8 |
+
except Exception:
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
from selenium import webdriver
|
| 12 |
+
from selenium.webdriver.common.by import By
|
| 13 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 14 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 15 |
+
from selenium.common.exceptions import (
|
| 16 |
+
StaleElementReferenceException, NoSuchElementException, TimeoutException
|
| 17 |
+
)
|
| 18 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 19 |
+
from google.auth.transport.requests import Request
|
| 20 |
+
from googleapiclient.discovery import build
|
| 21 |
+
from googleapiclient.errors import HttpError
|
| 22 |
+
import google.generativeai as genai
|
| 23 |
+
from google.api_core.exceptions import ResourceExhausted
|
| 24 |
+
|
| 25 |
+
def get_args():
|
| 26 |
+
p = argparse.ArgumentParser(description="Scrape one FB group, analyze, and email alerts.")
|
| 27 |
+
p.add_argument("--group", required=True)
|
| 28 |
+
p.add_argument("--out", required=True)
|
| 29 |
+
p.add_argument("--analysis-out", required=True)
|
| 30 |
+
p.add_argument("--recipients", default="")
|
| 31 |
+
p.add_argument("--sender", default=os.environ.get("SENDER_EMAIL", ""))
|
| 32 |
+
p.add_argument("--cookies-file", default=os.environ.get("FB_COOKIES_FILE","facebook_cookies.pkl"))
|
| 33 |
+
p.add_argument("--max-scrolls", type=int, default=int(os.environ.get("MAX_SCROLLS","5")))
|
| 34 |
+
p.add_argument("--scroll-pause", type=float, default=float(os.environ.get("SCROLL_PAUSE","3")))
|
| 35 |
+
p.add_argument("--gemini-keys", default="")
|
| 36 |
+
p.add_argument("--headless", action="store_true", help="Prefer headless browser")
|
| 37 |
+
return p.parse_args()
|
| 38 |
+
|
| 39 |
+
GMAIL_SCOPES = [
|
| 40 |
+
"https://www.googleapis.com/auth/gmail.send",
|
| 41 |
+
"https://www.googleapis.com/auth/gmail.metadata",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
def build_gmail_service():
|
| 45 |
+
creds = None
|
| 46 |
+
if os.path.exists("token.pickle"):
|
| 47 |
+
try:
|
| 48 |
+
with open("token.pickle", "rb") as token:
|
| 49 |
+
creds = pickle.load(token)
|
| 50 |
+
except Exception:
|
| 51 |
+
creds = None
|
| 52 |
+
if not creds or not creds.valid:
|
| 53 |
+
if creds and creds.expired and creds.refresh_token:
|
| 54 |
+
try:
|
| 55 |
+
creds.refresh(Request())
|
| 56 |
+
except Exception:
|
| 57 |
+
creds = None
|
| 58 |
+
if not creds:
|
| 59 |
+
if not os.path.exists("credentials.json"):
|
| 60 |
+
print("[GMAIL] credentials.json missing; Gmail unavailable")
|
| 61 |
+
return None
|
| 62 |
+
flow = InstalledAppFlow.from_client_secrets_file("credentials.json", GMAIL_SCOPES)
|
| 63 |
+
creds = flow.run_local_server(port=0)
|
| 64 |
+
with open("token.pickle", "wb") as token:
|
| 65 |
+
pickle.dump(creds, token)
|
| 66 |
+
try:
|
| 67 |
+
svc = build("gmail", "v1", credentials=creds)
|
| 68 |
+
_ = svc.users().getProfile(userId="me").execute()
|
| 69 |
+
return svc
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"[GMAIL] service build failed: {e}")
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
def send_html_email(service, sender: str, to_list: List[str], subject: str, html: str) -> int:
|
| 75 |
+
if not service:
|
| 76 |
+
print("[GMAIL] service not available; skipping email")
|
| 77 |
+
return 0
|
| 78 |
+
from email.message import EmailMessage
|
| 79 |
+
sent = 0
|
| 80 |
+
for to in to_list:
|
| 81 |
+
try:
|
| 82 |
+
msg = EmailMessage()
|
| 83 |
+
msg["to"] = to
|
| 84 |
+
msg["from"] = sender
|
| 85 |
+
msg["subject"] = subject
|
| 86 |
+
msg.set_content(html, subtype="html")
|
| 87 |
+
raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
|
| 88 |
+
service.users().messages().send(userId="me", body={"raw": raw}).execute()
|
| 89 |
+
sent += 1
|
| 90 |
+
except HttpError as e:
|
| 91 |
+
print(f"[GMAIL] http error to {to}: {e}")
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"[GMAIL] send error to {to}: {e}")
|
| 94 |
+
return sent
|
| 95 |
+
|
| 96 |
+
GEMINI_MODEL = "gemini-1.5-flash"
|
| 97 |
+
|
| 98 |
+
class GeminiManager:
|
| 99 |
+
def __init__(self, api_keys: List[str]):
|
| 100 |
+
self.api_keys = api_keys
|
| 101 |
+
self.current_key_index = 0
|
| 102 |
+
self.model = None
|
| 103 |
+
self._setup_model()
|
| 104 |
+
|
| 105 |
+
def _setup_model(self):
|
| 106 |
+
if not self.api_keys:
|
| 107 |
+
print("[GEMINI] No API keys provided")
|
| 108 |
+
self.model = None
|
| 109 |
+
return
|
| 110 |
+
while self.current_key_index < len(self.api_keys):
|
| 111 |
+
try:
|
| 112 |
+
api_key = self.api_keys[self.current_key_index]
|
| 113 |
+
genai.configure(api_key=api_key)
|
| 114 |
+
self.model = genai.GenerativeModel(GEMINI_MODEL)
|
| 115 |
+
print(f"[GEMINI] Using API key {self.current_key_index + 1}")
|
| 116 |
+
return
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f"[GEMINI] Failed to setup with key {self.current_key_index + 1}: {e}")
|
| 119 |
+
self.current_key_index += 1
|
| 120 |
+
print("[GEMINI] All API keys failed")
|
| 121 |
+
self.model = None
|
| 122 |
+
|
| 123 |
+
def rotate_key(self):
|
| 124 |
+
self.current_key_index += 1
|
| 125 |
+
self._setup_model()
|
| 126 |
+
|
| 127 |
+
def is_available(self):
|
| 128 |
+
return self.model is not None
|
| 129 |
+
|
| 130 |
+
def generate_content(self, prompt: str):
|
| 131 |
+
if not self.is_available():
|
| 132 |
+
raise Exception("No available Gemini model")
|
| 133 |
+
try:
|
| 134 |
+
return self.model.generate_content(prompt)
|
| 135 |
+
except ResourceExhausted as e:
|
| 136 |
+
self.rotate_key()
|
| 137 |
+
if self.is_available():
|
| 138 |
+
return self.model.generate_content(prompt)
|
| 139 |
+
else:
|
| 140 |
+
raise e
|
| 141 |
+
|
| 142 |
+
def parse_retry_seconds_from_error(err: Exception) -> int:
|
| 143 |
+
s = str(err)
|
| 144 |
+
m1 = re.search(r"retry[_ ]delay\s*\{\s*seconds:\s*(\d+)", s, re.IGNORECASE)
|
| 145 |
+
if m1: return int(m1.group(1))
|
| 146 |
+
m2 = re.search(r'"retryDelay"\s*:\s*"(\d+)s"', s)
|
| 147 |
+
if m2: return int(m2.group(1))
|
| 148 |
+
return 45
|
| 149 |
+
|
| 150 |
+
def ai_medical_intent(gemini_manager: GeminiManager, post_text: str, found_keywords: List[str]) -> Dict[str,Any]:
|
| 151 |
+
fallback = {
|
| 152 |
+
"is_medical_seeking": False,
|
| 153 |
+
"confidence": "low",
|
| 154 |
+
"medical_summary": "Not a medical request (AI unavailable/throttled)",
|
| 155 |
+
"suggested_services": [],
|
| 156 |
+
"urgency_level": "low",
|
| 157 |
+
"analysis": "Keyword-based fallback",
|
| 158 |
+
"reasoning": "short explanation",
|
| 159 |
+
"matched_keywords": found_keywords
|
| 160 |
+
}
|
| 161 |
+
if not gemini_manager or not gemini_manager.is_available():
|
| 162 |
+
return fallback
|
| 163 |
+
keywords_str = ", ".join(found_keywords) if found_keywords else "none"
|
| 164 |
+
prompt = f"""
|
| 165 |
+
Analyze this social post and decide if the author is genuinely seeking medical help, doctor/hospital recommendations, or healthcare services for PERSONAL HEALTH NEEDS (not business, donations, or casual mentions).
|
| 166 |
+
KEYWORDS FOUND IN POST: {keywords_str}
|
| 167 |
+
CRITICAL RULES:
|
| 168 |
+
1. ONLY flag posts where someone is seeking medical care for themselves or a loved one
|
| 169 |
+
2. IGNORE posts about:
|
| 170 |
+
- Business services (e.g., "Looking for a doctor for my clinic")
|
| 171 |
+
- Donations or fundraising (e.g., "Raising money for surgery")
|
| 172 |
+
- Selling medical products
|
| 173 |
+
- Job postings for medical professionals
|
| 174 |
+
- General health information sharing
|
| 175 |
+
- Research or academic inquiries
|
| 176 |
+
3. ONLY flag if it's a PERSONAL HEALTH NEED
|
| 177 |
+
Post: "{post_text}"
|
| 178 |
+
Return ONLY JSON:
|
| 179 |
+
{{
|
| 180 |
+
"is_medical_seeking": true/false,
|
| 181 |
+
"confidence": "high/medium/low",
|
| 182 |
+
"medical_summary": "short summary",
|
| 183 |
+
"suggested_services": ["service1","service2"],
|
| 184 |
+
"urgency_level": "high/medium/low",
|
| 185 |
+
"analysis": "why it's seeking help",
|
| 186 |
+
"reasoning": "short explanation",
|
| 187 |
+
"matched_keywords": ["keyword1", "keyword2"]
|
| 188 |
+
}}
|
| 189 |
+
"""
|
| 190 |
+
for attempt in range(1, 5):
|
| 191 |
+
try:
|
| 192 |
+
resp = gemini_manager.generate_content(prompt)
|
| 193 |
+
txt = (resp.text or "").strip()
|
| 194 |
+
s, e = txt.find("{"), txt.rfind("}") + 1
|
| 195 |
+
if s >= 0 and e > s:
|
| 196 |
+
result = json.loads(txt[s:e])
|
| 197 |
+
result["is_medical_seeking"] = bool(result.get("is_medical_seeking", False))
|
| 198 |
+
if "matched_keywords" not in result:
|
| 199 |
+
result["matched_keywords"] = found_keywords
|
| 200 |
+
return result
|
| 201 |
+
return fallback
|
| 202 |
+
except ResourceExhausted as e:
|
| 203 |
+
wait_s = min(parse_retry_seconds_from_error(e) + 2, 120)
|
| 204 |
+
print(f"[GEMINI] 429 rate limit; backoff {wait_s}s (attempt {attempt}/4)")
|
| 205 |
+
time.sleep(wait_s)
|
| 206 |
+
if gemini_manager.is_available():
|
| 207 |
+
continue
|
| 208 |
+
else:
|
| 209 |
+
return fallback
|
| 210 |
+
except Exception as e:
|
| 211 |
+
print(f"[GEMINI] error: {e}")
|
| 212 |
+
gemini_manager.rotate_key()
|
| 213 |
+
if not gemini_manager.is_available():
|
| 214 |
+
return fallback
|
| 215 |
+
return fallback
|
| 216 |
+
|
| 217 |
+
MEDICAL_KEYWORDS = [
|
| 218 |
+
"doctor","physician","primary care","healthcare","medical","clinic","hospital",
|
| 219 |
+
"urgent care","emergency","er","specialist","pediatrician","dentist",
|
| 220 |
+
"gynecologist","obgyn","women's health","health center","family doctor",
|
| 221 |
+
"maternity","prenatal","postnatal","labor","delivery",
|
| 222 |
+
"need doctor","looking for doctor","find doctor","recommend doctor",
|
| 223 |
+
"medical help","health help","appointment","checkup","treatment",
|
| 224 |
+
"prescription","medicine","surgery","best hospital","best clinic",
|
| 225 |
+
"where to go","doctor recommendation",
|
| 226 |
+
"pregnancy","birth control","contraception","fertility",
|
| 227 |
+
"hillside","medical group","wellness center"
|
| 228 |
+
]
|
| 229 |
+
|
| 230 |
+
def contains_keywords(text: str) -> Tuple[bool, List[str]]:
|
| 231 |
+
tl = (text or "").lower()
|
| 232 |
+
hits = [kw for kw in MEDICAL_KEYWORDS if kw in tl]
|
| 233 |
+
return (len(hits) > 0, hits)
|
| 234 |
+
|
| 235 |
+
def new_driver(headless: bool):
|
| 236 |
+
options = webdriver.ChromeOptions()
|
| 237 |
+
options.add_argument("--disable-notifications")
|
| 238 |
+
options.add_argument("--disable-web-security")
|
| 239 |
+
options.add_argument("--disable-features=IsolateOrigins,site-per-process")
|
| 240 |
+
options.add_argument("--disable-blink-features=AutomationControlled")
|
| 241 |
+
options.add_experimental_option("useAutomationExtension", False)
|
| 242 |
+
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
| 243 |
+
options.add_argument("--window-size=1920,1080")
|
| 244 |
+
options.add_argument("--lang=en-US,en")
|
| 245 |
+
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
| 246 |
+
if headless:
|
| 247 |
+
options.add_argument("--headless=new")
|
| 248 |
+
options.add_argument("--disable-gpu")
|
| 249 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 250 |
+
options.add_argument("--no-sandbox")
|
| 251 |
+
options.add_argument("--disable-extensions")
|
| 252 |
+
options.add_argument("--disable-plugins")
|
| 253 |
+
options.add_argument("--disable-images")
|
| 254 |
+
driver = webdriver.Chrome(options=options)
|
| 255 |
+
try:
|
| 256 |
+
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
| 257 |
+
"source": "Object.defineProperty(navigator, 'webdriver', { get: () => undefined });"
|
| 258 |
+
})
|
| 259 |
+
except Exception:
|
| 260 |
+
pass
|
| 261 |
+
return driver
|
| 262 |
+
|
| 263 |
+
def load_cookies(driver, cookies_file: str):
|
| 264 |
+
print("[FB] Loading Facebook homepage...")
|
| 265 |
+
driver.get("https://www.facebook.com")
|
| 266 |
+
time.sleep(3)
|
| 267 |
+
try:
|
| 268 |
+
with open(cookies_file, "rb") as f:
|
| 269 |
+
cookies = pickle.load(f)
|
| 270 |
+
for cookie in cookies:
|
| 271 |
+
if "sameSite" in cookie and cookie["sameSite"] not in ["Strict","Lax","None"]:
|
| 272 |
+
cookie["sameSite"] = "Lax"
|
| 273 |
+
try:
|
| 274 |
+
driver.add_cookie(cookie)
|
| 275 |
+
except Exception:
|
| 276 |
+
pass
|
| 277 |
+
print("[FB] Cookies loaded. Refreshing page...")
|
| 278 |
+
driver.refresh()
|
| 279 |
+
time.sleep(5)
|
| 280 |
+
except FileNotFoundError:
|
| 281 |
+
raise RuntimeError(f"[FB] Cookies file not found: {cookies_file}")
|
| 282 |
+
except Exception as e:
|
| 283 |
+
raise RuntimeError(f"[FB] Cookie load error: {e}")
|
| 284 |
+
|
| 285 |
+
def wait_group_feed(driver, wait):
|
| 286 |
+
wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
| 287 |
+
feed_loaded = False
|
| 288 |
+
start = time.time(); timeout = 30
|
| 289 |
+
while not feed_loaded and (time.time() - start) < timeout:
|
| 290 |
+
try:
|
| 291 |
+
driver.find_element(By.XPATH, "//div[@data-pagelet='GroupFeed' or @role='feed']")
|
| 292 |
+
feed_loaded = True; break
|
| 293 |
+
except NoSuchElementException:
|
| 294 |
+
try:
|
| 295 |
+
driver.find_element(By.XPATH, "//div[@role='article']")
|
| 296 |
+
feed_loaded = True; break
|
| 297 |
+
except NoSuchElementException:
|
| 298 |
+
pass
|
| 299 |
+
time.sleep(1)
|
| 300 |
+
if not feed_loaded:
|
| 301 |
+
raise TimeoutException("Timed out waiting for group feed")
|
| 302 |
+
|
| 303 |
+
def find_message_nodes(driver):
|
| 304 |
+
nodes = driver.find_elements(By.XPATH, "//div[@data-ad-preview='message']")
|
| 305 |
+
if nodes: return nodes
|
| 306 |
+
nodes = driver.find_elements(By.XPATH, "//div[@data-ad-comet-preview='message']")
|
| 307 |
+
if nodes: return nodes
|
| 308 |
+
return driver.find_elements(By.XPATH, "//div[@role='article']//div[@dir='auto' and string-length(normalize-space())>0]")
|
| 309 |
+
|
| 310 |
+
def scrape_group(driver, wait, group_url: str, max_scrolls: int, pause: float):
|
| 311 |
+
print(f"[SCRAPE] Navigating to group: {group_url}")
|
| 312 |
+
driver.get(group_url)
|
| 313 |
+
wait_group_feed(driver, wait)
|
| 314 |
+
posts, seen, rects = [], set(), set()
|
| 315 |
+
total = 0
|
| 316 |
+
for s in range(max_scrolls):
|
| 317 |
+
print(f"[SCRAPE] --- Scroll {s+1}/{max_scrolls} ---")
|
| 318 |
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 319 |
+
try:
|
| 320 |
+
wait.until(lambda d: d.execute_script("return document.readyState") == "complete")
|
| 321 |
+
except Exception:
|
| 322 |
+
pass
|
| 323 |
+
time.sleep(pause)
|
| 324 |
+
try:
|
| 325 |
+
divs = find_message_nodes(driver)
|
| 326 |
+
print(f"[SCRAPE] Nodes found: {len(divs)}")
|
| 327 |
+
except Exception as e:
|
| 328 |
+
print(f"[SCRAPE] find error: {e}")
|
| 329 |
+
continue
|
| 330 |
+
added = 0
|
| 331 |
+
for i, d in enumerate(divs):
|
| 332 |
+
try:
|
| 333 |
+
rect = (d.rect.get('x'), d.rect.get('y'), d.rect.get('width'), d.rect.get('height'))
|
| 334 |
+
if rect in rects: continue
|
| 335 |
+
rects.add(rect)
|
| 336 |
+
except Exception:
|
| 337 |
+
pass
|
| 338 |
+
try:
|
| 339 |
+
txt = (d.text or "").strip()
|
| 340 |
+
if len(txt) < 20:
|
| 341 |
+
try:
|
| 342 |
+
art = d.find_element(By.XPATH, "ancestor::div[@role='article']")
|
| 343 |
+
txt = (art.text or "").strip()
|
| 344 |
+
except Exception:
|
| 345 |
+
pass
|
| 346 |
+
except StaleElementReferenceException:
|
| 347 |
+
continue
|
| 348 |
+
if not txt or len(txt) < 20: continue
|
| 349 |
+
if txt in seen: continue
|
| 350 |
+
wc = len(re.findall(r"\b\w+\b", txt))
|
| 351 |
+
if wc > 7 and not any(j in txt for j in ["LikeCommentShare","Write a comment","View more comments"]):
|
| 352 |
+
seen.add(txt)
|
| 353 |
+
total += 1
|
| 354 |
+
posts.append({"id": total, "text": txt, "group_link": group_url})
|
| 355 |
+
added += 1
|
| 356 |
+
print(f"[SCRAPE] New posts this scroll: {added}")
|
| 357 |
+
print(f"[SCRAPE] Total unique posts: {total}")
|
| 358 |
+
return posts
|
| 359 |
+
|
| 360 |
+
def try_scrape_with_fallback(group_url: str, cookies_file: str, max_scrolls: int, pause: float):
|
| 361 |
+
driver = new_driver(headless=True)
|
| 362 |
+
wait = WebDriverWait(driver, 15)
|
| 363 |
+
try:
|
| 364 |
+
load_cookies(driver, cookies_file)
|
| 365 |
+
posts = scrape_group(driver, wait, group_url, max_scrolls, pause)
|
| 366 |
+
return posts, driver
|
| 367 |
+
except Exception as e:
|
| 368 |
+
try:
|
| 369 |
+
driver.quit()
|
| 370 |
+
except Exception:
|
| 371 |
+
pass
|
| 372 |
+
print(f"[SCRAPE] Error in headless mode: {e}")
|
| 373 |
+
return [], None
|
| 374 |
+
|
| 375 |
+
def main():
|
| 376 |
+
args = get_args()
|
| 377 |
+
os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
|
| 378 |
+
os.makedirs(os.path.dirname(args.analysis_out) or ".", exist_ok=True)
|
| 379 |
+
|
| 380 |
+
gemini_keys = []
|
| 381 |
+
if args.gemini_keys:
|
| 382 |
+
gemini_keys = [k.strip() for k in args.gemini_keys.split(",") if k.strip()]
|
| 383 |
+
else:
|
| 384 |
+
for i in range(1, 6):
|
| 385 |
+
key = os.environ.get(f"GEMINI_API_KEY_{i}")
|
| 386 |
+
if key:
|
| 387 |
+
gemini_keys.append(key)
|
| 388 |
+
gemini_manager = GeminiManager(gemini_keys) if gemini_keys else None
|
| 389 |
+
|
| 390 |
+
gmail = build_gmail_service()
|
| 391 |
+
|
| 392 |
+
posts, driver = try_scrape_with_fallback(args.group, args.cookies_file, args.max_scrolls, args.scroll_pause)
|
| 393 |
+
if driver:
|
| 394 |
+
try:
|
| 395 |
+
driver.quit()
|
| 396 |
+
except Exception:
|
| 397 |
+
pass
|
| 398 |
+
|
| 399 |
+
try:
|
| 400 |
+
with open(args.out, "w", encoding="utf-8") as f:
|
| 401 |
+
json.dump(posts, f, ensure_ascii=False, indent=2)
|
| 402 |
+
print(f"[SCRAPE] Saved scraped posts to {args.out}")
|
| 403 |
+
print(f"::SCRAPE_SAVED::{args.out}")
|
| 404 |
+
except Exception as e:
|
| 405 |
+
print(f"[SCRAPE] Error saving posts: {e}")
|
| 406 |
+
|
| 407 |
+
keyword_hits, confirmed = [], []
|
| 408 |
+
for p in posts:
|
| 409 |
+
has, hits = contains_keywords(p.get("text",""))
|
| 410 |
+
if has:
|
| 411 |
+
p["found_keywords"] = hits
|
| 412 |
+
keyword_hits.append(p)
|
| 413 |
+
print(f"::KW_HIT::{json.dumps({'id': p['id'], 'found_keywords': hits}, ensure_ascii=False)}")
|
| 414 |
+
|
| 415 |
+
per_call_sleep = 7
|
| 416 |
+
analyzed_posts = []
|
| 417 |
+
for idx, p in enumerate(keyword_hits, start=1):
|
| 418 |
+
found_kws = p.get("found_keywords", [])
|
| 419 |
+
ai = ai_medical_intent(gemini_manager, p.get("text",""), found_kws)
|
| 420 |
+
p["ai_analysis"] = ai
|
| 421 |
+
print(f"::AI_RESULT::{json.dumps({'id': p['id'], 'ai': ai}, ensure_ascii=False)}")
|
| 422 |
+
if ai.get("is_medical_seeking"):
|
| 423 |
+
confirmed.append(p)
|
| 424 |
+
analyzed_posts.append(p)
|
| 425 |
+
if idx < len(keyword_hits):
|
| 426 |
+
time.sleep(per_call_sleep)
|
| 427 |
+
|
| 428 |
+
report = {
|
| 429 |
+
"analysis_date": datetime.now().isoformat(),
|
| 430 |
+
"group_link": args.group,
|
| 431 |
+
"total_posts": len(posts),
|
| 432 |
+
"keyword_hits": len(keyword_hits),
|
| 433 |
+
"confirmed_medical": len(confirmed),
|
| 434 |
+
"emails_sent": 0,
|
| 435 |
+
"posts": confirmed
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
try:
|
| 439 |
+
with open(args.analysis_out, "w", encoding="utf-8") as f:
|
| 440 |
+
json.dump(report, f, ensure_ascii=False, indent=2)
|
| 441 |
+
print(f"[ANALYSIS] Saved analysis to {args.analysis_out}")
|
| 442 |
+
print(f"::ANALYSIS_SAVED::{args.analysis_out}")
|
| 443 |
+
except Exception as e:
|
| 444 |
+
print(f"[ANALYSIS] Error saving analysis: {e}")
|
| 445 |
+
|
| 446 |
+
if __name__ == "__main__":
|
| 447 |
+
try:
|
| 448 |
+
# Decode secrets from environment variables and write to files
|
| 449 |
+
if 'CREDENTIALS_B64' in os.environ:
|
| 450 |
+
with open('credentials.json', 'w') as f:
|
| 451 |
+
f.write(base64.b64decode(os.environ['CREDENTIALS_B64']).decode('utf-8'))
|
| 452 |
+
|
| 453 |
+
if 'FB_COOKIES_B64' in os.environ:
|
| 454 |
+
with open('facebook_cookies.pkl', 'wb') as f:
|
| 455 |
+
f.write(base64.b64decode(os.environ['FB_COOKIES_B64']))
|
| 456 |
+
|
| 457 |
+
main()
|
| 458 |
+
except Exception as e:
|
| 459 |
+
print("Unhandled error:")
|
| 460 |
+
print(e)
|
| 461 |
+
print(traceback.format_exc())
|
| 462 |
+
raise
|
groups.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
https://www.facebook.com/groups/Lifepaths
|
| 2 |
+
https://www.facebook.com/groups/1324510239249728
|
index.html
ADDED
|
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
| 6 |
+
<title>Hillside Medical Group - Social Media Monitor</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root{
|
| 9 |
+
--bg:#f6f7fb; --ink:#0f172a; --muted:#475569;
|
| 10 |
+
--card:#ffffff; --border:#e5e7eb; --shadow:0 6px 24px rgba(15,23,42,.06);
|
| 11 |
+
--grad1:#2c5aa0; --grad2:#1e3c72; /* Blue theme */
|
| 12 |
+
--ok:#22c55e; --warn:#f59e0b; --err:#ef4444;
|
| 13 |
+
--term-bg:#0b1221; --term-ink:#22c55e; --term-hdr:#07101d; --term-border:#1f2937; --term-meta:#86efac;
|
| 14 |
+
--badge:#e2e8f0; --badge-ink:#0f172a;
|
| 15 |
+
--kw:#f59e0b; --kw-ink:#1f2937;
|
| 16 |
+
--pill:#dcfce7; --pill-ink:#166534;
|
| 17 |
+
}
|
| 18 |
+
*{box-sizing:border-box}
|
| 19 |
+
body{margin:0;font-family:Inter,system-ui,Arial,sans-serif;background:var(--bg);color:var(--ink)}
|
| 20 |
+
.wrap{max-width:1400px;margin:24px auto;padding:0 16px}
|
| 21 |
+
.header{background:linear-gradient(135deg,var(--grad1),var(--grad2));color:#fff;border-radius:14px;padding:18px;box-shadow:0 8px 28px rgba(15,23,42,.18); text-align: center;} /* Centered header text */
|
| 22 |
+
h1{margin:0 0 6px 0;font-size:22px;font-weight:600}
|
| 23 |
+
.grid{display:grid;gap:16px;grid-template-columns:1fr 420px}
|
| 24 |
+
.card{background:var(--card);border:1px solid var(--border);border-radius:14px;box-shadow:var(--shadow);padding:18px}
|
| 25 |
+
.btn{background:var(--ok);color:#fff;border:none;border-radius:10px;padding:10px 14px;font-weight:600;cursor:pointer}
|
| 26 |
+
.btn.red{background:var(--err)} .btn.gray{background:#64748b}
|
| 27 |
+
.btn[disabled]{opacity:.5;cursor:not-allowed}
|
| 28 |
+
input, select{width:100%;padding:10px 12px;border:1px solid var(--border);border-radius:10px} /* Added select style */
|
| 29 |
+
label{font-weight:600; display: block; margin-bottom: 6px;} /* Improved label spacing */
|
| 30 |
+
.muted{color:var(--muted);font-size:13px}
|
| 31 |
+
.bar{height:12px;background:var(--border);border-radius:999px;overflow:hidden}
|
| 32 |
+
.fill{height:100%;background:linear-gradient(90deg,var(--grad1),#667eea);width:0%}
|
| 33 |
+
.mono{font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-size:12px;color:#334155;word-break:break-all}
|
| 34 |
+
|
| 35 |
+
/* Terminal */
|
| 36 |
+
.terminal{background:var(--term-bg);border-radius:14px;border:1px solid var(--term-border);height:520px;display:flex;flex-direction:column}
|
| 37 |
+
.term-header{color:#a7f3d0;background:var(--term-hdr);border-radius:14px 14px 0 0;padding:10px 12px;font-weight:700;font-size:13px;border-bottom:1px solid var(--term-border)}
|
| 38 |
+
.term-body{flex:1;overflow:auto;padding:10px 12px}
|
| 39 |
+
.term-line{font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-size:12px;color:var(--term-ink);white-space:pre-wrap;word-break:break-word;margin:0}
|
| 40 |
+
.term-meta{color:var(--term-meta)}
|
| 41 |
+
.term-warn{color:#facc15}
|
| 42 |
+
.term-err{color:#f87171}
|
| 43 |
+
.term-footer{padding:8px 12px;border-top:1px solid var(--term-border);display:flex;gap:8px}
|
| 44 |
+
|
| 45 |
+
/* Analysis */
|
| 46 |
+
.tabs{display:flex;gap:10px;margin-bottom:10px}
|
| 47 |
+
.tab{background:#e2e8f0;color:#0f172a;border:none;border-radius:10px;padding:8px 12px;font-weight:600;cursor:pointer}
|
| 48 |
+
.tab.active{background:#1e293b;color:#fff}
|
| 49 |
+
.flex{display:flex;gap:10px;flex-wrap:wrap;align-items:center}
|
| 50 |
+
.pill{display:inline-block;background:var(--pill);color:var(--pill-ink);padding:2px 10px;border-radius:999px;font-size:12px;font-weight:700}
|
| 51 |
+
.stat{display:grid;grid-template-columns:repeat(4,minmax(120px,1fr));gap:10px;margin-top:10px}
|
| 52 |
+
.stat .box{background:#f8fafc;border:1px solid var(--border);border-radius:12px;padding:10px;text-align:center}
|
| 53 |
+
.stat .num{font-weight:800;font-size:22px}
|
| 54 |
+
.filters{display:flex;gap:8px;flex-wrap:wrap;align-items:center;margin-top:10px}
|
| 55 |
+
.filter{background:#e5e7eb;border:none;border-radius:999px;padding:6px 10px;font-weight:600;cursor:pointer}
|
| 56 |
+
.filter.active{background:#1e293b;color:#fff}
|
| 57 |
+
.search{flex:1;min-width:240px}
|
| 58 |
+
.posts{margin-top:12px;display:grid;gap:10px}
|
| 59 |
+
.post{background:#ffffff;border:1px solid var(--border);border-radius:12px;padding:12px}
|
| 60 |
+
.post-hdr{display:flex;justify-content:space-between;gap:8px;flex-wrap:wrap}
|
| 61 |
+
.badge{display:inline-block;background:var(--badge);color:var(--badge-ink);padding:2px 8px;border-radius:999px;font-size:12px;font-weight:700}
|
| 62 |
+
.kw{display:inline-block;background:#fff3cd;color:var(--kw-ink);border:1px solid #fde68a;padding:2px 8px;border-radius:999px;font-size:12px;margin:2px 4px 0 0}
|
| 63 |
+
.ai{margin-top:8px;background:#f1f5f9;border:1px solid var(--border);border-radius:10px;padding:10px}
|
| 64 |
+
.ai-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:6px}
|
| 65 |
+
.ai-pill{display:inline-block;border-radius:999px;padding:2px 10px;font-size:12px;font-weight:800}
|
| 66 |
+
.ai-ok{background:#dcfce7;color:#166534}
|
| 67 |
+
.ai-mid{background:#fef9c3;color:#92400e}
|
| 68 |
+
.ai-low{background:#fee2e2;color:#991b1b}
|
| 69 |
+
.email{font-size:12px;font-weight:700}
|
| 70 |
+
.email.ok{color:#166534} .email.no{color:#991b1b}
|
| 71 |
+
.reason{margin-top:6px;background:#fff;border:1px dashed var(--border);border-radius:10px;padding:8px;display:none}
|
| 72 |
+
.reason.show{display:block}
|
| 73 |
+
|
| 74 |
+
/* Recipient Dropdown */
|
| 75 |
+
.recipient-container { margin-top: 10px; position: relative; } /* Container for dropdown and custom input */
|
| 76 |
+
.recipient-select { margin-bottom: 10px; } /* Space below select if custom input appears */
|
| 77 |
+
#custom-recipient { margin-top: 6px; display: none; } /* Initially hidden custom input */
|
| 78 |
+
</style>
|
| 79 |
+
</head>
|
| 80 |
+
<body>
|
| 81 |
+
<div class="wrap">
|
| 82 |
+
<div class="header">
|
| 83 |
+
<h1>🩺 Hillside Medical Group - Social Media Monitor</h1>
|
| 84 |
+
<div class="muted">Automated monitoring and analysis of medical help requests in Facebook groups.</div>
|
| 85 |
+
</div>
|
| 86 |
+
|
| 87 |
+
<div class="grid" style="margin-top:16px">
|
| 88 |
+
<div class="left">
|
| 89 |
+
<div class="card">
|
| 90 |
+
<h3>System Status</h3>
|
| 91 |
+
<div id="sys"></div>
|
| 92 |
+
<button class="btn" style="margin-top:8px" onclick="refreshSystem()">Refresh Status</button>
|
| 93 |
+
</div>
|
| 94 |
+
|
| 95 |
+
<div class="card" style="margin-top:16px">
|
| 96 |
+
<h3>Start Monitoring Process</h3>
|
| 97 |
+
<label for="recipient-select">Report Recipients</label>
|
| 98 |
+
<div class="recipient-container">
|
| 99 |
+
<select id="recipient-select" class="recipient-select" onchange="handleRecipientChange()">
|
| 100 |
+
<option value="">-- Loading Recipients --</option>
|
| 101 |
+
</select>
|
| 102 |
+
<input type="email" id="custom-recipient" placeholder="Enter custom email address..." />
|
| 103 |
+
</div>
|
| 104 |
+
<div class="muted" style="margin:6px 0 10px">
|
| 105 |
+
Select a recipient from the list or choose 'Custom' to enter an email address.
|
| 106 |
+
The report summary will be sent to the selected address(es) after processing.
|
| 107 |
+
</div>
|
| 108 |
+
<div class="flex">
|
| 109 |
+
<button id="start" class="btn" onclick="startProcess()">Start Monitoring</button>
|
| 110 |
+
<button class="btn gray" onclick="refreshLive()">Refresh Live View</button>
|
| 111 |
+
</div>
|
| 112 |
+
<div style="margin-top:12px">
|
| 113 |
+
<div style="display:flex;justify-content:space-between"><b>Overall Progress</b><span id="pct" class="muted">0%</span></div>
|
| 114 |
+
<div class="bar"><div id="fill" class="fill"></div></div>
|
| 115 |
+
<div id="msg" class="muted" style="margin-top:6px">idle</div>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
|
| 119 |
+
<div class="card" style="margin-top:16px">
|
| 120 |
+
<div class="tabs">
|
| 121 |
+
<button id="tab-groups" class="tab active" onclick="switchSection('groups')">Groups</button>
|
| 122 |
+
<button id="tab-analysis" class="tab" onclick="switchSection('analysis')">Analysis (Live)</button>
|
| 123 |
+
<button id="tab-summary" class="tab" onclick="switchSection('summary')">Summary</button>
|
| 124 |
+
</div>
|
| 125 |
+
|
| 126 |
+
<!-- Groups -->
|
| 127 |
+
<div id="section-groups">
|
| 128 |
+
<h3>Configured Groups (from groups.txt)</h3>
|
| 129 |
+
<div id="groups"></div>
|
| 130 |
+
</div>
|
| 131 |
+
|
| 132 |
+
<!-- Analysis Live -->
|
| 133 |
+
<div id="section-analysis" style="display:none">
|
| 134 |
+
<div class="flex">
|
| 135 |
+
<div class="pill" id="live-group">Group: –</div>
|
| 136 |
+
</div>
|
| 137 |
+
|
| 138 |
+
<div class="stat">
|
| 139 |
+
<div class="box"><div class="num" id="cnt-total">0</div><div class="muted">Total Posts</div></div>
|
| 140 |
+
<div class="box"><div class="num" id="cnt-kw">0</div><div class="muted">Keyword Hits</div></div>
|
| 141 |
+
<div class="box"><div class="num" id="cnt-ai">0</div><div class="muted">AI Analyzed</div></div>
|
| 142 |
+
<div class="box"><div class="num" id="cnt-confirmed">0</div><div class="muted">Confirmed</div></div>
|
| 143 |
+
</div>
|
| 144 |
+
|
| 145 |
+
<div class="filters">
|
| 146 |
+
<button class="filter active" id="flt-all" onclick="setFilter('all')">All</button>
|
| 147 |
+
<button class="filter" id="flt-kw" onclick="setFilter('kw')">Keyword</button>
|
| 148 |
+
<button class="filter" id="flt-confirmed" onclick="setFilter('confirmed')">Confirmed</button>
|
| 149 |
+
<input id="search" class="search" placeholder="Search post text..."/>
|
| 150 |
+
</div>
|
| 151 |
+
|
| 152 |
+
<div class="posts" id="posts"></div>
|
| 153 |
+
</div>
|
| 154 |
+
|
| 155 |
+
<!-- Summary -->
|
| 156 |
+
<div id="section-summary" style="display:none">
|
| 157 |
+
<h3>Last Run Summary</h3>
|
| 158 |
+
<div id="summary" class="muted">No summary available yet.</div>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
|
| 163 |
+
<div class="right">
|
| 164 |
+
<div class="terminal">
|
| 165 |
+
<div class="term-header">Process Logs (Live)</div>
|
| 166 |
+
<div id="term" class="term-body"></div>
|
| 167 |
+
<div class="term-footer">
|
| 168 |
+
<button class="btn" onclick="scrollBottom()">Scroll to Bottom</button>
|
| 169 |
+
<button class="btn red" onclick="clearLogs()">Clear Logs</button>
|
| 170 |
+
</div>
|
| 171 |
+
</div>
|
| 172 |
+
</div>
|
| 173 |
+
</div>
|
| 174 |
+
</div>
|
| 175 |
+
|
| 176 |
+
<script>
|
| 177 |
+
const API = "/api"; //Server re babu
|
| 178 |
+
|
| 179 |
+
async function refreshSystem(){
|
| 180 |
+
try {
|
| 181 |
+
const r = await fetch(`${API}/system/status`);
|
| 182 |
+
const j = await r.json();
|
| 183 |
+
const chip = (b)=>`<span style="padding:2px 8px;border-radius:999px;font-size:12px;font-weight:700;background:${b?'#dcfce7':'#fee2e2'};color:${b?'#166534':'#991b1b'}">${b?'OK':'Missing'}</span>`;
|
| 184 |
+
document.getElementById('sys').innerHTML = `
|
| 185 |
+
<table>
|
| 186 |
+
<tr><td>Gmail Service</td><td>${chip(j.gmail)}</td></tr>
|
| 187 |
+
<tr><td>Groups File (groups.txt)</td><td>${chip(j.groups_file_exists)} • Count: ${j.groups_count}</td></tr>
|
| 188 |
+
<tr><td>Processing Script (final5.py)</td><td>${chip(j.final5_exists)}</td></tr>
|
| 189 |
+
<tr><td>Default Sender</td><td class="mono">${j.sender_email}</td></tr>
|
| 190 |
+
<tr><td>Data Folders</td><td class="mono">${j.scrape_outdir} • ${j.analysis_outdir}</td></tr>
|
| 191 |
+
</table>`;
|
| 192 |
+
} catch (error) {
|
| 193 |
+
console.error("Error refreshing system status:", error);
|
| 194 |
+
document.getElementById('sys').innerHTML = `<div class="muted">Error loading system status.</div>`;
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
async function loadGroups(){
|
| 199 |
+
try {
|
| 200 |
+
const r = await fetch(`${API}/groups`);
|
| 201 |
+
const j = await r.json();
|
| 202 |
+
const list = j.groups || [];
|
| 203 |
+
if(!list.length){
|
| 204 |
+
document.getElementById('groups').innerHTML = `<div class="muted">Please add Facebook group links to 'groups.txt' (one per line).</div>`;
|
| 205 |
+
return;
|
| 206 |
+
}
|
| 207 |
+
document.getElementById('groups').innerHTML = list.map((g,i)=>`
|
| 208 |
+
<div style="border:1px solid var(--border);border-radius:12px;padding:10px;margin:6px 0;background:#fff">
|
| 209 |
+
<div style="display:flex;gap:8px;align-items:center;flex-wrap:wrap"><b>#${i+1}</b><span class="mono">${g}</span></div>
|
| 210 |
+
<div id="g-${i}" class="muted">Status: Pending</div>
|
| 211 |
+
</div>
|
| 212 |
+
`).join("");
|
| 213 |
+
} catch (error) {
|
| 214 |
+
console.error("Error loading groups:", error);
|
| 215 |
+
document.getElementById('groups').innerHTML = `<div class="muted">Error loading groups list.</div>`;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
// ---------------- Recipient Management ----------------
|
| 220 |
+
let recipientList = []; // Store fetched recipients
|
| 221 |
+
|
| 222 |
+
async function loadRecipients() {
|
| 223 |
+
const selectElement = document.getElementById('recipient-select');
|
| 224 |
+
selectElement.innerHTML = '<option value="">-- Loading Recipients --</option>';
|
| 225 |
+
try {
|
| 226 |
+
const response = await fetch(`${API}/recipients`);
|
| 227 |
+
const result = await response.json();
|
| 228 |
+
|
| 229 |
+
if (result.success && Array.isArray(result.data)) {
|
| 230 |
+
recipientList = result.data;
|
| 231 |
+
populateRecipientDropdown();
|
| 232 |
+
} else {
|
| 233 |
+
console.warn("API did not return a successful recipient list:", result);
|
| 234 |
+
selectElement.innerHTML = '<option value="">-- Error Loading --</option>';
|
| 235 |
+
}
|
| 236 |
+
} catch (error) {
|
| 237 |
+
console.error("Error fetching recipients:", error);
|
| 238 |
+
selectElement.innerHTML = '<option value="">-- Network Error --</option>';
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
function populateRecipientDropdown() {
|
| 243 |
+
const selectElement = document.getElementById('recipient-select');
|
| 244 |
+
selectElement.innerHTML = ''; // Clear loading option
|
| 245 |
+
|
| 246 |
+
// Add default recipient first
|
| 247 |
+
const defaultRecipient = "smahato@hillsidemedicalgroup.com";
|
| 248 |
+
let defaultOptionFound = false;
|
| 249 |
+
recipientList.forEach(recipient => {
|
| 250 |
+
const option = document.createElement('option');
|
| 251 |
+
option.value = recipient.email;
|
| 252 |
+
option.textContent = `${recipient.name} (${recipient.email})`;
|
| 253 |
+
if (recipient.email === defaultRecipient) {
|
| 254 |
+
option.selected = true;
|
| 255 |
+
defaultOptionFound = true;
|
| 256 |
+
}
|
| 257 |
+
selectElement.appendChild(option);
|
| 258 |
+
});
|
| 259 |
+
|
| 260 |
+
// Add Custom option
|
| 261 |
+
const customOption = document.createElement('option');
|
| 262 |
+
customOption.value = "custom";
|
| 263 |
+
customOption.textContent = "-- Custom Email --";
|
| 264 |
+
selectElement.appendChild(customOption);
|
| 265 |
+
|
| 266 |
+
// If default wasn't in the list, add it and select it
|
| 267 |
+
if (!defaultOptionFound) {
|
| 268 |
+
const defaultOption = document.createElement('option');
|
| 269 |
+
defaultOption.value = defaultRecipient;
|
| 270 |
+
defaultOption.textContent = `Subash Mahato (Default) (${defaultRecipient})`;
|
| 271 |
+
defaultOption.selected = true;
|
| 272 |
+
selectElement.insertBefore(defaultOption, selectElement.firstChild); // Add to top
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
function handleRecipientChange() {
|
| 277 |
+
const selectElement = document.getElementById('recipient-select');
|
| 278 |
+
const customInput = document.getElementById('custom-recipient');
|
| 279 |
+
if (selectElement.value === 'custom') {
|
| 280 |
+
customInput.style.display = 'block';
|
| 281 |
+
customInput.focus();
|
| 282 |
+
} else {
|
| 283 |
+
customInput.style.display = 'none';
|
| 284 |
+
customInput.value = ''; // Clear if deselected
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
function getSelectedRecipients() {
|
| 289 |
+
const selectElement = document.getElementById('recipient-select');
|
| 290 |
+
const customInput = document.getElementById('custom-recipient');
|
| 291 |
+
let emails = [];
|
| 292 |
+
|
| 293 |
+
if (selectElement.value === 'custom' && customInput.value.trim() !== '') {
|
| 294 |
+
// Validate custom email (basic check)
|
| 295 |
+
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
|
| 296 |
+
if (emailRegex.test(customInput.value.trim())) {
|
| 297 |
+
emails.push(customInput.value.trim());
|
| 298 |
+
} else {
|
| 299 |
+
alert("Please enter a valid custom email address.");
|
| 300 |
+
return null; // Signal error
|
| 301 |
+
}
|
| 302 |
+
} else if (selectElement.value) {
|
| 303 |
+
emails.push(selectElement.value);
|
| 304 |
+
} else {
|
| 305 |
+
// No recipient selected (shouldn't happen with default)
|
| 306 |
+
alert("Please select a recipient.");
|
| 307 |
+
return null;
|
| 308 |
+
}
|
| 309 |
+
return emails;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
// ---------------- Process control ----------------
|
| 314 |
+
async function startProcess(){
|
| 315 |
+
document.getElementById('start').disabled = true;
|
| 316 |
+
|
| 317 |
+
const recipients = getSelectedRecipients();
|
| 318 |
+
if (!recipients) {
|
| 319 |
+
// Error message already shown in getSelectedRecipients
|
| 320 |
+
document.getElementById('start').disabled = false;
|
| 321 |
+
return;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
try {
|
| 325 |
+
await fetch(`${API}/process/clear-logs`, {method:'POST'}); // Clear previous logs
|
| 326 |
+
const r = await fetch(`${API}/process/start`, {
|
| 327 |
+
method:'POST',
|
| 328 |
+
headers:{'Content-Type':'application/json'},
|
| 329 |
+
body: JSON.stringify({recipients: recipients}) // Send selected recipients
|
| 330 |
+
});
|
| 331 |
+
const j = await r.json();
|
| 332 |
+
if(!j.success){
|
| 333 |
+
alert(j.message||'Failed to start the process.');
|
| 334 |
+
document.getElementById('start').disabled = false;
|
| 335 |
+
return;
|
| 336 |
+
}
|
| 337 |
+
pollStatus(); // Start polling for status updates
|
| 338 |
+
} catch (error) {
|
| 339 |
+
console.error("Error starting process:", error);
|
| 340 |
+
alert("An error occurred while trying to start the process.");
|
| 341 |
+
document.getElementById('start').disabled = false;
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
async function pollStatus(){
|
| 346 |
+
try {
|
| 347 |
+
const r = await fetch(`${API}/process/status`);
|
| 348 |
+
const j = await r.json();
|
| 349 |
+
document.getElementById('msg').innerText = j.message || 'Idle';
|
| 350 |
+
document.getElementById('pct').innerText = `${j.progress||0}%`;
|
| 351 |
+
document.getElementById('fill').style.width = `${j.progress||0}%`;
|
| 352 |
+
(j.groups||[]).forEach((g,idx)=>{
|
| 353 |
+
const el = document.getElementById(`g-${idx}`);
|
| 354 |
+
if(!el) return;
|
| 355 |
+
el.innerHTML = `Status: ${g.stage} • Scraped: ${g.scraped_posts} • Confirmed: ${g.detected_posts}${g.error?(' • Error: '+g.error):''}`;
|
| 356 |
+
});
|
| 357 |
+
if(j.running){
|
| 358 |
+
setTimeout(pollStatus, 1200); // Poll every 1.2 seconds while running
|
| 359 |
+
} else {
|
| 360 |
+
document.getElementById('start').disabled = false;
|
| 361 |
+
loadSummary(); // Load the final summary when done
|
| 362 |
+
}
|
| 363 |
+
} catch (error) {
|
| 364 |
+
console.error("Error polling status:", error);
|
| 365 |
+
// Optionally, retry or show error state
|
| 366 |
+
setTimeout(pollStatus, 3000); // Retry after a longer delay on error
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
// ---------------- Logs (terminal) ----------------
|
| 371 |
+
let lastLogId = 0;
|
| 372 |
+
function appendLogs(entries){
|
| 373 |
+
const term = document.getElementById('term');
|
| 374 |
+
const nearBottom = term.scrollTop + term.clientHeight >= term.scrollHeight - 40;
|
| 375 |
+
entries.forEach(e=>{
|
| 376 |
+
const div = document.createElement('div');
|
| 377 |
+
div.className = 'term-line';
|
| 378 |
+
const color = e.level === 'error' ? 'term-err' : e.level === 'warn' ? 'term-warn' : '';
|
| 379 |
+
div.innerHTML = `<span class="term-meta">[${e.ts}] [${e.source}]</span> <span class="${color}">${escapeHtml(e.msg)}</span>`;
|
| 380 |
+
term.appendChild(div);
|
| 381 |
+
});
|
| 382 |
+
if(nearBottom || entries.length){ term.scrollTop = term.scrollHeight; }
|
| 383 |
+
}
|
| 384 |
+
async function pollLogs(){
|
| 385 |
+
try{
|
| 386 |
+
const r = await fetch(`${API}/process/logs?after=${lastLogId}&limit=500`);
|
| 387 |
+
const j = await r.json();
|
| 388 |
+
if(j.entries && j.entries.length){
|
| 389 |
+
appendLogs(j.entries);
|
| 390 |
+
lastLogId = j.last || lastLogId;
|
| 391 |
+
}
|
| 392 |
+
}catch(e){
|
| 393 |
+
console.error("Error polling logs:", e);
|
| 394 |
+
// Continue polling even on error
|
| 395 |
+
}
|
| 396 |
+
setTimeout(pollLogs, 900); // Poll logs every 0.9 seconds
|
| 397 |
+
}
|
| 398 |
+
function scrollBottom(){
|
| 399 |
+
const term = document.getElementById('term');
|
| 400 |
+
term.scrollTop = term.scrollHeight;
|
| 401 |
+
}
|
| 402 |
+
async function clearLogs(){
|
| 403 |
+
try {
|
| 404 |
+
await fetch(`${API}/process/clear-logs`, {method:'POST'});
|
| 405 |
+
document.getElementById('term').innerHTML = '';
|
| 406 |
+
lastLogId = 0;
|
| 407 |
+
} catch (error) {
|
| 408 |
+
console.error("Error clearing logs:", error);
|
| 409 |
+
// Optionally inform user
|
| 410 |
+
}
|
| 411 |
+
}
|
| 412 |
+
function escapeHtml(s){
|
| 413 |
+
return (s||'').replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c]));
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
// ---------------- Live Analysis ----------------
|
| 417 |
+
let liveFilter = 'all'; // 'all' | 'kw' | 'confirmed'
|
| 418 |
+
let liveSearch = '';
|
| 419 |
+
|
| 420 |
+
function setFilter(f){
|
| 421 |
+
liveFilter = f;
|
| 422 |
+
document.getElementById('flt-all').classList.toggle('active', f==='all');
|
| 423 |
+
document.getElementById('flt-kw').classList.toggle('active', f==='kw');
|
| 424 |
+
document.getElementById('flt-confirmed').classList.toggle('active', f==='confirmed');
|
| 425 |
+
refreshLive();
|
| 426 |
+
}
|
| 427 |
+
document.getElementById('search').addEventListener('input', e=>{
|
| 428 |
+
liveSearch = e.target.value.toLowerCase();
|
| 429 |
+
refreshLive();
|
| 430 |
+
});
|
| 431 |
+
|
| 432 |
+
async function refreshLive(){
|
| 433 |
+
try {
|
| 434 |
+
const r = await fetch(`${API}/live/state`);
|
| 435 |
+
const j = await r.json();
|
| 436 |
+
if(!j.success) {
|
| 437 |
+
console.warn("Live state API returned not success:", j);
|
| 438 |
+
return;
|
| 439 |
+
}
|
| 440 |
+
const data = j.data || {};
|
| 441 |
+
renderLive(data);
|
| 442 |
+
} catch (error) {
|
| 443 |
+
console.error("Error refreshing live state:", error);
|
| 444 |
+
// Optionally update UI to show error
|
| 445 |
+
}
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
function renderLive(data){
|
| 449 |
+
// header + counts
|
| 450 |
+
document.getElementById('live-group').innerText = `Current Group: ${data.group || '–'}`;
|
| 451 |
+
const c = data.counts || {};
|
| 452 |
+
document.getElementById('cnt-total').innerText = c.total_posts || 0;
|
| 453 |
+
document.getElementById('cnt-kw').innerText = c.kw_hits || 0;
|
| 454 |
+
document.getElementById('cnt-ai').innerText = c.ai_done || 0;
|
| 455 |
+
document.getElementById('cnt-confirmed').innerText = c.confirmed || 0;
|
| 456 |
+
|
| 457 |
+
const posts = Array.isArray(data.posts) ? data.posts : [];
|
| 458 |
+
// filter/search
|
| 459 |
+
const filtered = posts.filter(p=>{
|
| 460 |
+
const hasKW = Array.isArray(p.found_keywords) && p.found_keywords.length>0;
|
| 461 |
+
const isConfirmed = p.ai && p.ai.is_medical_seeking;
|
| 462 |
+
if(liveFilter==='kw' && !hasKW) return false;
|
| 463 |
+
if(liveFilter==='confirmed' && !isConfirmed) return false;
|
| 464 |
+
if(liveSearch && !(p.text||'').toLowerCase().includes(liveSearch)) return false;
|
| 465 |
+
return true;
|
| 466 |
+
});
|
| 467 |
+
|
| 468 |
+
const html = filtered.map(p=>{
|
| 469 |
+
const hasKW = Array.isArray(p.found_keywords) && p.found_keywords.length>0;
|
| 470 |
+
const ai = p.ai || null;
|
| 471 |
+
const confirm = !!(ai && ai.is_medical_seeking);
|
| 472 |
+
const conf = ai ? (ai.confidence||'').toLowerCase() : '';
|
| 473 |
+
const urg = ai ? (ai.urgency_level||'').toLowerCase() : '';
|
| 474 |
+
const confClass = conf==='high' ? 'ai-ok' : conf==='medium' ? 'ai-mid' : 'ai-low';
|
| 475 |
+
const urgClass = urg==='high' ? 'ai-low' : urg==='medium' ? 'ai-mid' : 'ai-ok';
|
| 476 |
+
const emailTxt = p.email_sent ? '<span class="email ok">Email Sent</span>' : '<span class="email no">No Email</span>';
|
| 477 |
+
return `
|
| 478 |
+
<div class="post">
|
| 479 |
+
<div class="post-hdr">
|
| 480 |
+
<div class="flex">
|
| 481 |
+
<span class="badge">Post #${p.id || '-'}</span>
|
| 482 |
+
${hasKW ? '<span class="badge">Keyword Hit</span>' : ''}
|
| 483 |
+
${confirm ? '<span class="badge" style="background-color: #bfdbfe; color: #1e40af;">CONFIRMED</span>' : ''} <!-- Blue badge for confirmed -->
|
| 484 |
+
</div>
|
| 485 |
+
<div class="muted mono">Group: ${escapeHtml(p.group_link || 'N/A')}</div>
|
| 486 |
+
</div>
|
| 487 |
+
<div style="margin-top:6px; white-space: pre-wrap;">${escapeHtml(p.text || '')}</div> <!-- pre-wrap for text formatting -->
|
| 488 |
+
|
| 489 |
+
${hasKW ? `
|
| 490 |
+
<div style="margin-top:6px">${(p.found_keywords||[]).map(k=>`<span class="kw">${escapeHtml(k)}</span>`).join('')}</div>
|
| 491 |
+
` : ''}
|
| 492 |
+
|
| 493 |
+
${ai ? `
|
| 494 |
+
<div class="ai">
|
| 495 |
+
<div class="ai-row">
|
| 496 |
+
<span class="ai-pill ${confClass}">Confidence: ${(ai.confidence||'').toUpperCase()}</span>
|
| 497 |
+
<span class="ai-pill ${urgClass}">Urgency: ${(ai.urgency_level||'').toUpperCase()}</span>
|
| 498 |
+
${emailTxt}
|
| 499 |
+
</div>
|
| 500 |
+
<div><b>Summary:</b> ${escapeHtml(ai.medical_summary || '')}</div>
|
| 501 |
+
<div style="margin-top:4px"><b>Analysis:</b> ${escapeHtml(ai.analysis || '')}</div>
|
| 502 |
+
${Array.isArray(ai.suggested_services) && ai.suggested_services.length ? `
|
| 503 |
+
<div style="margin-top:4px"><b>Suggested Services:</b> ${(ai.suggested_services||[]).map(s=>`<span class="badge" style="margin-right:6px">${escapeHtml(s)}</span>`).join('')}</div>
|
| 504 |
+
`:''}
|
| 505 |
+
<div style="margin-top:6px">
|
| 506 |
+
<button class="btn gray" onclick="toggleReason(${p.id})">Show Reasoning</button>
|
| 507 |
+
<div id="reason-${p.id}" class="reason">${escapeHtml(ai.reasoning || 'No reasoning provided.')}</div>
|
| 508 |
+
</div>
|
| 509 |
+
</div>
|
| 510 |
+
` : '<div class="muted">Awaiting AI analysis...</div>'}
|
| 511 |
+
</div>
|
| 512 |
+
`;
|
| 513 |
+
}).join('');
|
| 514 |
+
document.getElementById('posts').innerHTML = html || `<div class="muted">No posts match the current filters.</div>`;
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
function toggleReason(id){
|
| 518 |
+
const el = document.getElementById(`reason-${id}`);
|
| 519 |
+
if(!el) return;
|
| 520 |
+
el.classList.toggle('show');
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
// auto poll live state
|
| 524 |
+
async function pollLive(){
|
| 525 |
+
await refreshLive();
|
| 526 |
+
setTimeout(pollLive, 1200); // Poll live view every 1.2 seconds
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
// ---------------- Summary ----------------
|
| 530 |
+
async function loadSummary(){
|
| 531 |
+
try {
|
| 532 |
+
const r = await fetch(`${API}/results/summary`);
|
| 533 |
+
const el = document.getElementById('summary');
|
| 534 |
+
if(r.status!==200){
|
| 535 |
+
el.innerHTML = `<div class="muted">Processing complete. Summary will appear here shortly.</div>`;
|
| 536 |
+
// Retry once after a short delay if not found immediately
|
| 537 |
+
setTimeout(async () => {
|
| 538 |
+
const retry = await fetch(`${API}/results/summary`);
|
| 539 |
+
if(retry.status === 200) {
|
| 540 |
+
const j = await retry.json();
|
| 541 |
+
el.innerHTML = `<pre class="mono">${JSON.stringify(j.data,null,2)}</pre>`;
|
| 542 |
+
}
|
| 543 |
+
}, 2000);
|
| 544 |
+
return;
|
| 545 |
+
}
|
| 546 |
+
const j = await r.json();
|
| 547 |
+
// Check if data exists and format nicely
|
| 548 |
+
if (j.success && j.data) {
|
| 549 |
+
// You can create a more user-friendly summary view here instead of raw JSON
|
| 550 |
+
// For now, we'll keep the JSON view
|
| 551 |
+
el.innerHTML = `<pre class="mono">${JSON.stringify(j.data,null,2)}</pre>`;
|
| 552 |
+
} else {
|
| 553 |
+
el.innerHTML = `<div class="muted">Summary data unavailable.</div>`;
|
| 554 |
+
}
|
| 555 |
+
} catch (error) {
|
| 556 |
+
console.error("Error loading summary:", error);
|
| 557 |
+
document.getElementById('summary').innerHTML = `<div class="muted">Error loading summary.</div>`;
|
| 558 |
+
}
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
// ---------------- Tabs ----------------
|
| 562 |
+
function switchSection(name){
|
| 563 |
+
const sec = (id,show)=>document.getElementById(id).style.display = show?'block':'none';
|
| 564 |
+
document.getElementById('tab-groups').classList.toggle('active', name==='groups');
|
| 565 |
+
document.getElementById('tab-analysis').classList.toggle('active', name==='analysis');
|
| 566 |
+
document.getElementById('tab-summary').classList.toggle('active', name==='summary');
|
| 567 |
+
sec('section-groups', name==='groups');
|
| 568 |
+
sec('section-analysis', name==='analysis');
|
| 569 |
+
sec('section-summary', name==='summary');
|
| 570 |
+
if(name==='analysis') refreshLive();
|
| 571 |
+
if(name==='summary') loadSummary();
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
// ---------------- Boot ----------------
|
| 575 |
+
// Load initial data and start polling
|
| 576 |
+
refreshSystem();
|
| 577 |
+
loadGroups();
|
| 578 |
+
loadRecipients(); // Load recipients on startup
|
| 579 |
+
loadSummary();
|
| 580 |
+
pollStatus(); // Start status polling
|
| 581 |
+
pollLogs(); // Start log polling
|
| 582 |
+
pollLive(); // Start live analysis polling
|
| 583 |
+
|
| 584 |
+
</script>
|
| 585 |
+
</body>
|
| 586 |
+
</html>
|
recipients.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{"name": "Dr. Patel", "email": ""},
|
| 3 |
+
{"name": "Bruno", "email": "admin@hillsidemedicalgroup.com"},
|
| 4 |
+
{"name": "Afren", "email": "reports@hillsidemedicalgroup.com"},
|
| 5 |
+
{"name": "Sonu", "email": "smahato@hillsidemedicalgroup.com"}
|
| 6 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
selenium
|
| 2 |
+
google-api-python-client
|
| 3 |
+
google-auth-oauthlib
|
| 4 |
+
google-auth-httplib2
|
| 5 |
+
google-generativeai
|
| 6 |
+
Flask
|
| 7 |
+
Flask-Cors
|
| 8 |
+
python-dotenv
|
| 9 |
+
requests
|
| 10 |
+
webdriver-manager
|