vikramvasudevan commited on
Commit
2fcea48
·
verified ·
1 Parent(s): 7911979

Upload folder using huggingface_hub

Browse files
home.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ from typing import Dict, Any, List, Tuple
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from plotly.graph_objects import Figure, Scatter
8
+
9
+ # ----- DB wiring -----
10
+ # Expects your SheamiDB class to be in modules/db.py
11
+ # from modules.db import SheamiDB
12
+ # For illustration, we lazy-import at runtime to avoid import errors if path differs.
13
+ from dotenv import load_dotenv
14
+
15
+ from ui import get_app_theme, get_app_title, get_css
16
+
17
+ load_dotenv(override=True)
18
+ DB_URI = os.getenv("MONGODB_URI")
19
+ DB_NAME = os.getenv("MONGODB_DB", "sheami")
20
+
21
+
22
+ def get_db():
23
+ from modules.db import SheamiDB # imported here so this file stays portable
24
+
25
+ return SheamiDB(DB_URI, db_name=DB_NAME)
26
+
27
+
28
+ # ----- Data shaping helpers -----
29
+
30
+
31
+ def _fmt(dt: Any) -> str:
32
+ if isinstance(dt, (datetime,)):
33
+ return dt.strftime("%Y-%m-%d %H:%M")
34
+ if isinstance(dt, str):
35
+ return dt
36
+ return ""
37
+
38
+
39
+ def flatten_reports(reports: List[Dict[str, Any]]) -> pd.DataFrame:
40
+ """Explode parsed tests inside each report to a flat table."""
41
+ rows = []
42
+ for r in reports:
43
+ rid = str(r.get("_id", ""))
44
+ uploaded_at = _fmt(r.get("uploaded_at"))
45
+ file_name = r.get("file_name", "")
46
+ tests = (r.get("parsed_data") or {}).get("tests", [])
47
+ if not tests:
48
+ rows.append(
49
+ {
50
+ "report_id": rid,
51
+ "uploaded_at": uploaded_at,
52
+ "file_name": file_name,
53
+ "test_name": "",
54
+ "value": "",
55
+ "unit": "",
56
+ "reference_range": "",
57
+ }
58
+ )
59
+ else:
60
+ for t in tests:
61
+ rows.append(
62
+ {
63
+ "report_id": rid,
64
+ "uploaded_at": uploaded_at,
65
+ "file_name": file_name,
66
+ "test_name": t.get("name", ""),
67
+ "value": t.get("value", ""),
68
+ "unit": t.get("unit", ""),
69
+ "reference_range": t.get("reference_range", ""),
70
+ }
71
+ )
72
+ if not rows:
73
+ rows = [
74
+ {
75
+ k: ""
76
+ for k in [
77
+ "report_id",
78
+ "uploaded_at",
79
+ "file_name",
80
+ "test_name",
81
+ "value",
82
+ "unit",
83
+ "reference_range",
84
+ ]
85
+ }
86
+ ]
87
+ return pd.DataFrame(rows)
88
+
89
+
90
+ def trends_index(trends: List[Dict[str, Any]]) -> List[str]:
91
+ names = sorted({t.get("test_name", "") for t in trends if t.get("test_name")})
92
+ return names
93
+
94
+
95
+ def build_trend_figure(trend_doc: Dict[str, Any]) -> Figure:
96
+ """Make a Plotly line chart for a single test's trend_data."""
97
+ points = trend_doc.get("trend_data", [])
98
+ if not points:
99
+ fig = Figure()
100
+ fig.update_layout(
101
+ title="No trend data", xaxis_title="Date", yaxis_title="Value"
102
+ )
103
+ return fig
104
+ dates = [pd.to_datetime(p.get("date")) for p in points]
105
+ values = [p.get("value") for p in points]
106
+ fig = Figure()
107
+ fig.add_trace(
108
+ Scatter(
109
+ x=dates,
110
+ y=values,
111
+ mode="lines+markers",
112
+ name=trend_doc.get("test_name", "Trend"),
113
+ )
114
+ )
115
+ fig.update_layout(
116
+ margin=dict(l=30, r=20, t=40, b=30),
117
+ xaxis_title="Date",
118
+ yaxis_title="Value",
119
+ title=f"Trend — {trend_doc.get('test_name','')} ({len(points)} points)",
120
+ )
121
+ return fig
122
+
123
+
124
+ # ----- App state + loaders -----
125
+
126
+
127
+ def load_user(email: str) -> Tuple[Dict[str, Any], List[Tuple[str, str]]]:
128
+ """Return (user_dict, patient_choices[(label, value), ...])"""
129
+ if not email:
130
+ return {}, []
131
+ db = get_db()
132
+ user = db.get_user_by_email(email)
133
+ if not user:
134
+ return {}, []
135
+ # Preload patients
136
+ patients = db.get_patients_by_user(str(user["_id"]))
137
+ choices = [(p.get("name", str(p["_id"])), str(p["_id"])) for p in patients]
138
+ return user, choices
139
+
140
+
141
+ def load_patient_bundle(
142
+ patient_id: str,
143
+ ) -> Tuple[pd.DataFrame, List[str], Dict[str, Any], List[Dict[str, Any]]]:
144
+ """
145
+ Given patient_id, return:
146
+ - reports_df
147
+ - test_names (for dropdown)
148
+ - meta dict with patient basics
149
+ - final_reports list
150
+ """
151
+ if not patient_id:
152
+ return pd.DataFrame(), [], {}, []
153
+ db = get_db()
154
+ # patient
155
+ patient = (
156
+ db.patients.find_one(
157
+ {
158
+ "_id": db.patients._Database__client.codec_options.document_class.objectid_class(
159
+ patient_id
160
+ )
161
+ }
162
+ )
163
+ if False
164
+ else db.patients.find_one({"_id": __import__("bson").ObjectId(patient_id)})
165
+ )
166
+ # related
167
+ reports = db.get_reports_by_patient(patient_id)
168
+ trends = db.get_trends_by_patient(patient_id)
169
+ finals = db.get_final_reports_by_patient(patient_id)
170
+
171
+ reports_df = flatten_reports(reports)
172
+ test_names = trends_index(trends)
173
+
174
+ meta = {
175
+ "Patient": patient.get("name", ""),
176
+ "Gender": patient.get("gender", ""),
177
+ "DOB": patient.get("dob", ""),
178
+ "Created": _fmt(patient.get("created_at")),
179
+ }
180
+ return reports_df, test_names, meta, finals
181
+
182
+
183
+ def load_trend_figure(patient_id: str, test_name: str) -> Figure:
184
+ if not (patient_id and test_name):
185
+ return Figure()
186
+ db = get_db()
187
+ doc = db.trends.find_one(
188
+ {"patient_id": __import__("bson").ObjectId(patient_id), "test_name": test_name}
189
+ )
190
+ return build_trend_figure(doc or {})
191
+
192
+
193
+ def format_final_reports(finals: List[Dict[str, Any]]) -> pd.DataFrame:
194
+ rows = []
195
+ for fr in finals:
196
+ rows.append(
197
+ {
198
+ "final_report_id": str(fr.get("_id", "")),
199
+ "generated_at": _fmt(fr.get("generated_at")),
200
+ "summary": fr.get("summary", ""),
201
+ "recommendations": "; ".join(fr.get("recommendations", [])),
202
+ }
203
+ )
204
+ return pd.DataFrame(
205
+ rows
206
+ or [
207
+ {
208
+ "final_report_id": "",
209
+ "generated_at": "",
210
+ "summary": "",
211
+ "recommendations": "",
212
+ }
213
+ ]
214
+ )
215
+
216
+
217
+ def add_patient_ui(user_email, name, age, gender):
218
+ db = get_db()
219
+ user = db.get_user_by_email(user_email)
220
+ if not user:
221
+ return "User not found"
222
+ pid = db.add_patient(user["_id"], name, age, gender)
223
+ return f"✅ Patient {name} added (ID: {pid})"
224
+
225
+
226
+ def edit_patient_ui(patient_id, name, age, gender):
227
+ db = get_db()
228
+ success = db.update_patient(
229
+ patient_id, {"name": name, "age": age, "gender": gender}
230
+ )
231
+ return "✅ Updated" if success else "❌ Patient not found"
232
+
233
+
234
+ def delete_patient_ui(patient_id):
235
+ db = get_db()
236
+ success = db.delete_patient(patient_id)
237
+ return "✅ Deleted" if success else "❌ Patient not found"
238
+
239
+
240
+ # ----- Gradio UI -----
241
+ # ----- Gradio UI (Sidebar Layout) -----
242
+ with gr.Blocks(
243
+ title=get_app_title(), theme=get_app_theme(), css=get_css(), fill_height=True
244
+ ) as demo:
245
+ with gr.Row():
246
+ with gr.Column(scale=1): # Sidebar
247
+ gr.Markdown("### Sidebar")
248
+
249
+ email_in = gr.Textbox(label="User Email", placeholder="doctor1@sheami.com")
250
+ load_btn = gr.Button("🔍 Load Patients")
251
+
252
+ patient_list = gr.Radio(label="Patients", choices=[], interactive=True)
253
+
254
+ with gr.Accordion("➕ Add Patient", open=False):
255
+ new_name = gr.Textbox(label="Name")
256
+ new_age = gr.Number(label="Age")
257
+ new_gender = gr.Dropdown(["M", "F"], label="Gender")
258
+ add_btn = gr.Button("Add")
259
+ add_out = gr.Textbox(label="Status")
260
+
261
+ delete_btn = gr.Button("🗑️ Delete Selected")
262
+ delete_out = gr.Textbox(label="Status")
263
+
264
+ with gr.Column(scale=3): # Main area
265
+ with gr.Row():
266
+ edit_name = gr.Textbox(label="Edit Name")
267
+ edit_age = gr.Number(label="Edit Age")
268
+ edit_gender = gr.Dropdown(["M", "F"], label="Edit Gender")
269
+ edit_btn = gr.Button("✏️ Save Changes")
270
+ edit_out = gr.Textbox(label="Status")
271
+
272
+ meta_box = gr.JSON(label="Patient Details")
273
+
274
+ with gr.Tabs():
275
+ with gr.Tab("📄 Reports"):
276
+ reports_df = gr.DataFrame(
277
+ headers=[
278
+ "report_id",
279
+ "uploaded_at",
280
+ "file_name",
281
+ "test_name",
282
+ "value",
283
+ "unit",
284
+ "reference_range",
285
+ ],
286
+ row_count=(0, "dynamic"),
287
+ wrap=True,
288
+ interactive=False,
289
+ )
290
+ with gr.Tab("📈 Trends"):
291
+ test_dd = gr.Dropdown(
292
+ choices=[], label="Select Test", interactive=True
293
+ )
294
+ trend_plot = gr.Plot(label="Trend Chart")
295
+ with gr.Tab("✅ Final Reports"):
296
+ final_df = gr.DataFrame(
297
+ headers=[
298
+ "final_report_id",
299
+ "generated_at",
300
+ "summary",
301
+ "recommendations",
302
+ ],
303
+ row_count=(0, "dynamic"),
304
+ wrap=True,
305
+ interactive=False,
306
+ )
307
+
308
+ # ---- Events ----
309
+ def on_load(email):
310
+ user, patient_choices = load_user(email)
311
+ return gr.update(choices=patient_choices), {}
312
+
313
+ load_btn.click(on_load, inputs=[email_in], outputs=[patient_list, meta_box])
314
+
315
+ def on_patient_select(patient_id):
316
+ reports_df_val, test_names, meta, finals = load_patient_bundle(patient_id)
317
+ return (
318
+ meta,
319
+ reports_df_val,
320
+ gr.update(choices=test_names),
321
+ format_final_reports(finals),
322
+ )
323
+
324
+ patient_list.change(
325
+ on_patient_select,
326
+ inputs=[patient_list],
327
+ outputs=[meta_box, reports_df, test_dd, final_df],
328
+ )
329
+
330
+ def on_test_change(patient_id, test_name):
331
+ return load_trend_figure(patient_id, test_name)
332
+
333
+ test_dd.change(on_test_change, inputs=[patient_list, test_dd], outputs=trend_plot)
334
+
335
+ add_btn.click(
336
+ add_patient_ui,
337
+ inputs=[email_in, new_name, new_age, new_gender],
338
+ outputs=add_out,
339
+ )
340
+
341
+ delete_btn.click(
342
+ delete_patient_ui, inputs=[patient_list], outputs=delete_out
343
+ )
344
+
345
+ edit_btn.click(
346
+ edit_patient_ui,
347
+ inputs=[patient_list, edit_name, edit_age, edit_gender],
348
+ outputs=edit_out,
349
+ )
350
+
351
+ if __name__ == "__main__":
352
+ demo.launch()
modules/__init__.py ADDED
File without changes
modules/db.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pymongo import MongoClient
3
+ from datetime import datetime
4
+ from bson import ObjectId
5
+ from dotenv import load_dotenv
6
+
7
+ class SheamiDB:
8
+ def __init__(self, uri: str, db_name: str = "sheami"):
9
+ """Initialize connection to MongoDB Atlas (or local Mongo)."""
10
+ self.client = MongoClient(uri)
11
+ self.db = self.client[db_name]
12
+
13
+ # Collections
14
+ self.users = self.db["users"]
15
+ self.patients = self.db["patients"]
16
+ self.reports = self.db["reports"]
17
+ self.trends = self.db["trends"]
18
+ self.final_reports = self.db["final_reports"]
19
+
20
+ # ---------------------------
21
+ # USER FUNCTIONS
22
+ # ---------------------------
23
+ def add_user(self, email: str, name: str) -> str:
24
+ user = {
25
+ "email": email,
26
+ "name": name,
27
+ "created_at": datetime.utcnow()
28
+ }
29
+ result = self.users.insert_one(user)
30
+ return str(result.inserted_id)
31
+
32
+ def get_user(self, user_id: str) -> dict:
33
+ return self.users.find_one({"_id": ObjectId(user_id)})
34
+
35
+ # ---------------------------
36
+ # PATIENT FUNCTIONS
37
+ # ---------------------------
38
+ def add_patient(self, user_id: str, name: str, dob: str, gender: str) -> str:
39
+ patient = {
40
+ "user_id": ObjectId(user_id),
41
+ "name": name,
42
+ "dob": dob,
43
+ "gender": gender,
44
+ "created_at": datetime.utcnow()
45
+ }
46
+ result = self.patients.insert_one(patient)
47
+ return str(result.inserted_id)
48
+
49
+ def get_patients_by_user(self, user_id: str) -> list:
50
+ return list(self.patients.find({"user_id": ObjectId(user_id)}))
51
+
52
+ # ---------------------------
53
+ # REPORT FUNCTIONS
54
+ # ---------------------------
55
+ def add_report(self, patient_id: str, file_name: str, parsed_data: dict) -> str:
56
+ report = {
57
+ "patient_id": ObjectId(patient_id),
58
+ "uploaded_at": datetime.utcnow(),
59
+ "file_name": file_name,
60
+ "parsed_data": parsed_data
61
+ }
62
+ result = self.reports.insert_one(report)
63
+ return str(result.inserted_id)
64
+
65
+ def get_reports_by_patient(self, patient_id: str) -> list:
66
+ return list(self.reports.find({"patient_id": ObjectId(patient_id)}))
67
+
68
+ # ---------------------------
69
+ # TREND FUNCTIONS
70
+ # ---------------------------
71
+ def add_or_update_trend(self, patient_id: str, test_name: str, trend_data: list):
72
+ """Insert new trend or update existing one."""
73
+ self.trends.update_one(
74
+ {"patient_id": ObjectId(patient_id), "test_name": test_name},
75
+ {"$set": {"trend_data": trend_data, "last_updated": datetime.utcnow()}},
76
+ upsert=True
77
+ )
78
+
79
+ def get_trends_by_patient(self, patient_id: str) -> list:
80
+ return list(self.trends.find({"patient_id": ObjectId(patient_id)}))
81
+
82
+ # ---------------------------
83
+ # FINAL REPORT FUNCTIONS
84
+ # ---------------------------
85
+ def add_final_report(self, patient_id: str, summary: str, recommendations: list, trend_snapshots: list) -> str:
86
+ final_report = {
87
+ "patient_id": ObjectId(patient_id),
88
+ "generated_at": datetime.utcnow(),
89
+ "summary": summary,
90
+ "recommendations": recommendations,
91
+ "trend_snapshots": trend_snapshots
92
+ }
93
+ result = self.final_reports.insert_one(final_report)
94
+ return str(result.inserted_id)
95
+
96
+ def get_final_reports_by_patient(self, patient_id: str) -> list:
97
+ return list(self.final_reports.find({"patient_id": ObjectId(patient_id)}))
98
+
99
+ # ---------------------------
100
+ # FETCH FULL USER DATA
101
+ # ---------------------------
102
+ def get_user_by_email(self, email: str) -> dict:
103
+ """Fetch user by email."""
104
+ return self.users.find_one({"email": email})
105
+
106
+ def get_user_full_data(self, user_id: str) -> dict:
107
+ """
108
+ Fetch user + all patients, reports, trends, final reports
109
+ for populating UI (tabbed layout).
110
+ """
111
+ user = self.get_user(user_id)
112
+ if not user:
113
+ return {}
114
+
115
+ # Get patients for user
116
+ patients = self.get_patients_by_user(user_id)
117
+ full_patients = []
118
+
119
+ for patient in patients:
120
+ pid = str(patient["_id"])
121
+
122
+ # Fetch related collections
123
+ patient_reports = self.get_reports_by_patient(pid)
124
+ patient_trends = self.get_trends_by_patient(pid)
125
+ patient_final_reports = self.get_final_reports_by_patient(pid)
126
+
127
+ full_patients.append({
128
+ "patient": patient,
129
+ "reports": patient_reports,
130
+ "trends": patient_trends,
131
+ "final_reports": patient_final_reports
132
+ })
133
+
134
+ return {
135
+ "user": user,
136
+ "patients": full_patients
137
+ }
138
+
139
+ def update_patient(self, patient_id, fields: dict):
140
+ result = self.patients.update_one({"_id": patient_id}, {"$set": fields})
141
+ return result.modified_count > 0
142
+
143
+ def delete_patient(self, patient_id):
144
+ result = self.patients.delete_one({"_id": patient_id})
145
+ return result.deleted_count > 0
pyproject.toml CHANGED
@@ -7,6 +7,7 @@ requires-python = ">=3.13"
7
  dependencies = [
8
  "authlib>=1.6.1",
9
  "dotenv>=0.9.9",
 
10
  "fastapi>=0.116.1",
11
  "gradio>=5.42.0",
12
  "gradio-modal>=0.0.4",
@@ -17,7 +18,8 @@ dependencies = [
17
  "markdown2>=2.5.4",
18
  "matplotlib>=3.10.5",
19
  "pandas>=2.3.1",
20
- "pymongo[srv]>=4.14.0",
 
21
  "pypdf>=6.0.0",
22
  "python-multipart>=0.0.20",
23
  "reportlab>=4.4.3",
 
7
  dependencies = [
8
  "authlib>=1.6.1",
9
  "dotenv>=0.9.9",
10
+ "faker>=37.5.3",
11
  "fastapi>=0.116.1",
12
  "gradio>=5.42.0",
13
  "gradio-modal>=0.0.4",
 
18
  "markdown2>=2.5.4",
19
  "matplotlib>=3.10.5",
20
  "pandas>=2.3.1",
21
+ "plotly>=6.3.0",
22
+ "pymongo>=4.14.0",
23
  "pypdf>=6.0.0",
24
  "python-multipart>=0.0.20",
25
  "reportlab>=4.4.3",
tests/__init__.py ADDED
File without changes
tests/generate_test_data.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ scripts/generate_test_data.py
3
+
4
+ Generates realistic test data for Sheami using your modules.db.SheamiDB API.
5
+
6
+ Behavior:
7
+ - Creates N users (default 100)
8
+ - Each user: 3-5 patients (enforced)
9
+ - Each patient: 2-6 reports
10
+ - Each report: 3-6 tests drawn from TEST_POOL
11
+ - For each patient we write trends (per test) using add_or_update_trend
12
+ - For each patient we write a final report using add_final_report
13
+
14
+ Usage:
15
+ pip install faker pymongo python-dotenv
16
+ MONGODB_URI="mongodb+srv://<user>:<pass>@cluster0.xxxxx.mongodb.net" \
17
+ MONGODB_DB="sheami" \
18
+ python scripts/generate_test_data.py --num-users 100
19
+
20
+ The script CALLS THESE EXACT methods on your SheamiDB:
21
+ - add_user(email, name)
22
+ - add_patient(user_id, name, dob, gender)
23
+ - add_report(patient_id, file_name, parsed_data)
24
+ - add_or_update_trend(patient_id, test_name, trend_data)
25
+ - add_final_report(patient_id, summary, recommendations, trend_snapshots)
26
+ """
27
+ import argparse
28
+ import random
29
+ from collections import defaultdict
30
+ from datetime import datetime, timedelta
31
+ import os
32
+
33
+ from faker import Faker
34
+ from dotenv import load_dotenv
35
+
36
+ # Ensure env is loaded
37
+ load_dotenv()
38
+
39
+ # import your DB wrapper
40
+ from modules.db import SheamiDB
41
+
42
+ # ---------- Config & test pool ----------
43
+ faker = Faker()
44
+ TEST_POOL = {
45
+ "Hemoglobin": (11.0, 17.5, "g/dL", "11.0-17.5"),
46
+ "Glucose (Fasting)": (60, 130, "mg/dL", "70-99 fasting"),
47
+ "Total Cholesterol": (120, 300, "mg/dL", "<200 desirable"),
48
+ "Triglycerides": (40, 300, "mg/dL", "<150 normal"),
49
+ "HDL": (30, 90, "mg/dL", ">40 desirable"),
50
+ "LDL": (50, 200, "mg/dL", "<100 ideal"),
51
+ "Creatinine": (0.5, 1.8, "mg/dL", "0.5-1.2"),
52
+ "Urea (BUN)": (7, 30, "mg/dL", "7-20"),
53
+ "Sodium": (130, 150, "mmol/L", "135-145"),
54
+ "Potassium": (3.2, 5.2, "mmol/L", "3.5-5.0"),
55
+ "ALT": (7, 55, "U/L", "<45"),
56
+ "AST": (8, 48, "U/L", "<40"),
57
+ }
58
+
59
+ def random_date_between(start_year=2019):
60
+ start = datetime(start_year, 1, 1)
61
+ end = datetime.now()
62
+ days = (end - start).days
63
+ return start + timedelta(days=random.randint(0, days))
64
+
65
+ def make_test_values(k):
66
+ """Return list of test dicts matching parsed_data.tests schema."""
67
+ chosen = random.sample(list(TEST_POOL.items()), k=k)
68
+ tests = []
69
+ for name, (low, high, unit, ref) in chosen:
70
+ # generate float for float ranges, int for integer-like
71
+ if isinstance(low, float) or isinstance(high, float):
72
+ value = round(random.uniform(low, high), 2)
73
+ else:
74
+ value = int(round(random.uniform(low, high)))
75
+ tests.append({
76
+ "name": name,
77
+ "value": value,
78
+ "unit": unit,
79
+ "reference_range": ref
80
+ })
81
+ return tests
82
+
83
+ def compute_direction(points):
84
+ if len(points) < 2:
85
+ return "stable"
86
+ if points[-1]["value"] > points[-2]["value"]:
87
+ return "increasing"
88
+ if points[-1]["value"] < points[-2]["value"]:
89
+ return "decreasing"
90
+ return "stable"
91
+
92
+ # ---------- Generator function ----------
93
+ def generate_test_data(db_uri: str, db_name: str, num_users: int = 100,
94
+ min_patients=3, max_patients=5,
95
+ min_reports=2, max_reports=6,
96
+ min_tests=3, max_tests=6,
97
+ seed: int = None):
98
+ if seed is not None:
99
+ random.seed(seed)
100
+ Faker.seed(seed)
101
+
102
+ db = SheamiDB(db_uri, db_name=db_name)
103
+
104
+ counters = {"users": 0, "patients": 0, "reports": 0, "trends": 0, "final_reports": 0}
105
+
106
+ for u_idx in range(num_users):
107
+ # create user
108
+ user_name = faker.name()
109
+ user_email = faker.unique.safe_email()
110
+ user_id = db.add_user(email=user_email, name=user_name)
111
+ counters["users"] += 1
112
+
113
+ # 3-5 patients per user (as requested)
114
+ num_patients = random.randint(min_patients, max_patients)
115
+ for _p in range(num_patients):
116
+ patient_name = faker.name()
117
+ # realistic DOB between 18 and 85
118
+ age = random.randint(18, 85)
119
+ dob_dt = datetime.now() - timedelta(days=365 * age + random.randint(0, 365))
120
+ dob_str = dob_dt.strftime("%Y-%m-%d")
121
+ gender = random.choice(["male", "female", "other"])
122
+
123
+ patient_id = db.add_patient(user_id=user_id, name=patient_name, dob=dob_str, gender=gender)
124
+ counters["patients"] += 1
125
+
126
+ # collect trend points per test name
127
+ trends_map = defaultdict(list)
128
+
129
+ # 2-6 reports per patient
130
+ num_reports = random.randint(min_reports, max_reports)
131
+ for r_i in range(num_reports):
132
+ report_date_dt = random_date_between()
133
+ report_date = report_date_dt.strftime("%Y-%m-%d")
134
+ num_tests = random.randint(min_tests, max_tests)
135
+ tests = make_test_values(num_tests)
136
+
137
+ parsed_data = {
138
+ "tests": tests,
139
+ "report_date": report_date
140
+ }
141
+ file_name = f"report_{report_date.replace('-', '')}_{random.randint(1000,9999)}.pdf"
142
+ report_id = db.add_report(patient_id=patient_id, file_name=file_name, parsed_data=parsed_data)
143
+ counters["reports"] += 1
144
+
145
+ # append to trends_map
146
+ for t in tests:
147
+ trends_map[t["name"]].append({"date": report_date, "value": t["value"]})
148
+
149
+ # write trends to DB using add_or_update_trend (upsert)
150
+ for test_name, points in trends_map.items():
151
+ # sort points by date
152
+ pts_sorted = sorted(points, key=lambda x: x["date"])
153
+ db.add_or_update_trend(patient_id=patient_id, test_name=test_name, trend_data=pts_sorted)
154
+ counters["trends"] += 1
155
+
156
+ # create a final report summarizing trends
157
+ trend_snapshots = []
158
+ for test_name, points in trends_map.items():
159
+ pts_sorted = sorted(points, key=lambda x: x["date"])
160
+ latest_value = pts_sorted[-1]["value"]
161
+ direction = compute_direction(pts_sorted)
162
+ trend_snapshots.append({
163
+ "test_name": test_name,
164
+ "latest_value": latest_value,
165
+ "direction": direction
166
+ })
167
+
168
+ summary = f"Auto-generated summary for {patient_name} ({len(trend_snapshots)} tests)"
169
+ recommendations = []
170
+ # simple heuristic: if any trending up, recommend follow-up
171
+ if any(ts["direction"] == "increasing" for ts in trend_snapshots):
172
+ recommendations.append("Follow up for rising values")
173
+ else:
174
+ recommendations.append("Continue routine monitoring")
175
+ db.add_final_report(patient_id=patient_id,
176
+ summary=summary,
177
+ recommendations=recommendations,
178
+ trend_snapshots=trend_snapshots)
179
+ counters["final_reports"] += 1
180
+
181
+ # occasional progress print
182
+ if (u_idx + 1) % 10 == 0 or (u_idx + 1) == num_users:
183
+ print(f"Created {u_idx+1}/{num_users} users so far...")
184
+
185
+ # summary
186
+ print("Generation complete. Summary:")
187
+ for k, v in counters.items():
188
+ print(f" {k}: {v}")
189
+
190
+ # ---------- CLI ----------
191
+ if __name__ == "__main__":
192
+ parser = argparse.ArgumentParser(description="Generate test data for Sheami (matches your db.py).")
193
+ parser.add_argument("--num-users", type=int, default=100, help="Number of users to create")
194
+ parser.add_argument("--db-uri", type=str, default=os.getenv("MONGODB_URI", "mongodb://localhost:27017"),
195
+ help="MongoDB connection URI")
196
+ parser.add_argument("--db-name", type=str, default=os.getenv("MONGODB_DB", "sheami"),
197
+ help="Database name")
198
+ parser.add_argument("--seed", type=int, default=None, help="Random seed (optional)")
199
+ args = parser.parse_args()
200
+
201
+ generate_test_data(db_uri=args.db_uri, db_name=args.db_name,
202
+ num_users=args.num_users, seed=args.seed)
tests/test_db.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------
2
+ # Example usage
3
+ # ---------------------------
4
+ import json
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ from modules.db import SheamiDB
9
+
10
+ if __name__ == "__main__":
11
+ load_dotenv(override=True)
12
+ db = SheamiDB(os.getenv("DB_URI"))
13
+
14
+ # Suppose logged-in user email is known
15
+ user = db.get_user_by_email("doctor1@sheami.com")
16
+ if user:
17
+ user_id = str(user["_id"])
18
+ data = db.get_user_full_data(user_id)
19
+ print("data = ",json.dumps(data,indent=1))
20
+
21
+ # Now `data` looks like:
22
+ # {
23
+ # "user": {...},
24
+ # "patients": [
25
+ # {
26
+ # "patient": {...},
27
+ # "reports": [...],
28
+ # "trends": [...],
29
+ # "final_reports": [...]
30
+ # }, ...
31
+ # ]
32
+ # }
33
+ print(data)
34
+ else:
35
+ # Add user
36
+ user_id = db.add_user("doctor1@sheami.com", "Dr. Smith")
37
+
38
+ # Add patient
39
+ patient_id = db.add_patient(user_id, "John Doe", "1980-05-20", "male")
40
+
41
+ # Add report
42
+ parsed_data = {
43
+ "tests": [
44
+ {"name": "Hemoglobin", "value": 13.5, "unit": "g/dL", "reference_range": "13.0-17.0"},
45
+ {"name": "Cholesterol", "value": 210, "unit": "mg/dL", "reference_range": "<200"}
46
+ ]
47
+ }
48
+ report_id = db.add_report(patient_id, "bloodwork_july.pdf", parsed_data)
49
+
50
+ # Add trend
51
+ db.add_or_update_trend(patient_id, "Hemoglobin", [
52
+ {"date": "2025-05-01", "value": 13.2},
53
+ {"date": "2025-07-01", "value": 13.5},
54
+ {"date": "2025-08-19", "value": 13.8}
55
+ ])
56
+
57
+ # Add final report
58
+ final_report_id = db.add_final_report(
59
+ patient_id,
60
+ "Hemoglobin stable, cholesterol slightly high.",
61
+ ["Maintain healthy diet", "Check cholesterol in 3 months"],
62
+ [
63
+ {"test_name": "Hemoglobin", "latest_value": 13.8, "direction": "stable"},
64
+ {"test_name": "Cholesterol", "latest_value": 210, "direction": "increasing"}
65
+ ]
66
+ )
67
+
68
+ print("User ID:", user_id)
69
+ print("Patient ID:", patient_id)
70
+ print("Report ID:", report_id)
71
+ print("Final Report ID:", final_report_id)
tests/test_pdf_generation.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import matplotlib.pyplot as plt
4
+ import base64
5
+ from weasyprint import HTML
6
+
7
+ from config import SheamiConfig
8
+ from pdf_helper import generate_pdf
9
+
10
+ def test_generate_pdf():
11
+ from pathlib import Path
12
+
13
+ # Temporary directory for plots
14
+ tmp_dir = tempfile.mkdtemp()
15
+
16
+ # 1. Fake interpretation HTML
17
+ interpretation_html = """
18
+ <h1>Test Patient: John Doe</h1>
19
+ <p>Age: 45, Sex: Male</p>
20
+ <p>Clinical Summary:</p>
21
+ <ul>
22
+ <li>All vitals normal ✅</li>
23
+ <li>Minor deviation in cholesterol ▲</li>
24
+ <li>Vitamin D slightly low ▼</li>
25
+ </ul>
26
+ """
27
+
28
+ # 2. Generate 4 fake plots
29
+ plot_files = []
30
+ for i in range(4):
31
+ plt.figure(figsize=(4,3))
32
+ plt.plot([1,2,3,4], [i*2+1, i*2+2, i*2+1, i*2+3], marker='o')
33
+ plt.title(f"Test Plot {i+1}")
34
+ plt.xlabel("X")
35
+ plt.ylabel("Y")
36
+ plot_path = os.path.join(tmp_dir, f"plot_{i+1}.png")
37
+ plt.savefig(plot_path)
38
+ plt.close()
39
+ plot_files.append((f"Test {i+1}", plot_path))
40
+
41
+ # 3. Use your SheamiConfig logo path, or fallback to a sample image
42
+ logo_path = SheamiConfig.logo_path if hasattr(SheamiConfig, 'logo_path') else plot_files[0][1]
43
+
44
+ # 4. Call the generate_pdf function
45
+ pdf_path = os.path.join(tmp_dir, "test_report.pdf")
46
+ generate_pdf(pdf_path=pdf_path, interpretation_html=interpretation_html, plot_files=plot_files)
47
+
48
+ print(f"Test PDF generated at: {pdf_path}")
49
+
50
+
51
+ test_generate_pdf()
uv.lock CHANGED
@@ -362,6 +362,18 @@ wheels = [
362
  { url = "https://files.pythonhosted.org/packages/b2/b7/545d2c10c1fc15e48653c91efde329a790f2eecfbbf2bd16003b5db2bab0/dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9", size = 1892, upload-time = "2025-02-19T22:15:01.647Z" },
363
  ]
364
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  [[package]]
366
  name = "fastapi"
367
  version = "0.116.1"
@@ -1004,6 +1016,15 @@ wheels = [
1004
  { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
1005
  ]
1006
 
 
 
 
 
 
 
 
 
 
1007
  [[package]]
1008
  name = "numpy"
1009
  version = "2.3.2"
@@ -1216,6 +1237,19 @@ wheels = [
1216
  { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
1217
  ]
1218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1219
  [[package]]
1220
  name = "pycparser"
1221
  version = "2.22"
@@ -1550,6 +1584,7 @@ source = { virtual = "." }
1550
  dependencies = [
1551
  { name = "authlib" },
1552
  { name = "dotenv" },
 
1553
  { name = "fastapi" },
1554
  { name = "gradio" },
1555
  { name = "gradio-modal" },
@@ -1560,6 +1595,7 @@ dependencies = [
1560
  { name = "markdown2" },
1561
  { name = "matplotlib" },
1562
  { name = "pandas" },
 
1563
  { name = "pymongo" },
1564
  { name = "pypdf" },
1565
  { name = "python-multipart" },
@@ -1572,6 +1608,7 @@ dependencies = [
1572
  requires-dist = [
1573
  { name = "authlib", specifier = ">=1.6.1" },
1574
  { name = "dotenv", specifier = ">=0.9.9" },
 
1575
  { name = "fastapi", specifier = ">=0.116.1" },
1576
  { name = "gradio", specifier = ">=5.42.0" },
1577
  { name = "gradio-modal", specifier = ">=0.0.4" },
@@ -1582,7 +1619,8 @@ requires-dist = [
1582
  { name = "markdown2", specifier = ">=2.5.4" },
1583
  { name = "matplotlib", specifier = ">=3.10.5" },
1584
  { name = "pandas", specifier = ">=2.3.1" },
1585
- { name = "pymongo", extras = ["srv"], specifier = ">=4.14.0" },
 
1586
  { name = "pypdf", specifier = ">=6.0.0" },
1587
  { name = "python-multipart", specifier = ">=0.0.20" },
1588
  { name = "reportlab", specifier = ">=4.4.3" },
 
362
  { url = "https://files.pythonhosted.org/packages/b2/b7/545d2c10c1fc15e48653c91efde329a790f2eecfbbf2bd16003b5db2bab0/dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9", size = 1892, upload-time = "2025-02-19T22:15:01.647Z" },
363
  ]
364
 
365
+ [[package]]
366
+ name = "faker"
367
+ version = "37.5.3"
368
+ source = { registry = "https://pypi.org/simple" }
369
+ dependencies = [
370
+ { name = "tzdata" },
371
+ ]
372
+ sdist = { url = "https://files.pythonhosted.org/packages/ce/5d/7797a74e8e31fa227f0303239802c5f09b6722bdb6638359e7b6c8f30004/faker-37.5.3.tar.gz", hash = "sha256:8315d8ff4d6f4f588bd42ffe63abd599886c785073e26a44707e10eeba5713dc", size = 1907147, upload-time = "2025-07-30T15:52:19.528Z" }
373
+ wheels = [
374
+ { url = "https://files.pythonhosted.org/packages/4b/bf/d06dd96e7afa72069dbdd26ed0853b5e8bd7941e2c0819a9b21d6e6fc052/faker-37.5.3-py3-none-any.whl", hash = "sha256:386fe9d5e6132a915984bf887fcebcc72d6366a25dd5952905b31b141a17016d", size = 1949261, upload-time = "2025-07-30T15:52:17.729Z" },
375
+ ]
376
+
377
  [[package]]
378
  name = "fastapi"
379
  version = "0.116.1"
 
1016
  { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
1017
  ]
1018
 
1019
+ [[package]]
1020
+ name = "narwhals"
1021
+ version = "2.1.2"
1022
+ source = { registry = "https://pypi.org/simple" }
1023
+ sdist = { url = "https://files.pythonhosted.org/packages/37/f0/b0550d9b84759f4d045fd43da2f811e8b23dc2001e38c3254456da7f3adb/narwhals-2.1.2.tar.gz", hash = "sha256:afb9597e76d5b38c2c4b7c37d27a2418b8cc8049a66b8a5aca9581c92ae8f8bf", size = 533772, upload-time = "2025-08-15T08:24:50.916Z" }
1024
+ wheels = [
1025
+ { url = "https://files.pythonhosted.org/packages/a8/01/824fff6789ce92a53242d24b6f5f3a982df2f610c51020f934bf878d2a99/narwhals-2.1.2-py3-none-any.whl", hash = "sha256:136b2f533a4eb3245c54254f137c5d14cef5c4668cff67dc6e911a602acd3547", size = 392064, upload-time = "2025-08-15T08:24:48.788Z" },
1026
+ ]
1027
+
1028
  [[package]]
1029
  name = "numpy"
1030
  version = "2.3.2"
 
1237
  { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
1238
  ]
1239
 
1240
+ [[package]]
1241
+ name = "plotly"
1242
+ version = "6.3.0"
1243
+ source = { registry = "https://pypi.org/simple" }
1244
+ dependencies = [
1245
+ { name = "narwhals" },
1246
+ { name = "packaging" },
1247
+ ]
1248
+ sdist = { url = "https://files.pythonhosted.org/packages/a0/64/850de5076f4436410e1ce4f6a69f4313ef6215dfea155f3f6559335cad29/plotly-6.3.0.tar.gz", hash = "sha256:8840a184d18ccae0f9189c2b9a2943923fd5cae7717b723f36eef78f444e5a73", size = 6923926, upload-time = "2025-08-12T20:22:14.127Z" }
1249
+ wheels = [
1250
+ { url = "https://files.pythonhosted.org/packages/95/a9/12e2dc726ba1ba775a2c6922d5d5b4488ad60bdab0888c337c194c8e6de8/plotly-6.3.0-py3-none-any.whl", hash = "sha256:7ad806edce9d3cdd882eaebaf97c0c9e252043ed1ed3d382c3e3520ec07806d4", size = 9791257, upload-time = "2025-08-12T20:22:09.205Z" },
1251
+ ]
1252
+
1253
  [[package]]
1254
  name = "pycparser"
1255
  version = "2.22"
 
1584
  dependencies = [
1585
  { name = "authlib" },
1586
  { name = "dotenv" },
1587
+ { name = "faker" },
1588
  { name = "fastapi" },
1589
  { name = "gradio" },
1590
  { name = "gradio-modal" },
 
1595
  { name = "markdown2" },
1596
  { name = "matplotlib" },
1597
  { name = "pandas" },
1598
+ { name = "plotly" },
1599
  { name = "pymongo" },
1600
  { name = "pypdf" },
1601
  { name = "python-multipart" },
 
1608
  requires-dist = [
1609
  { name = "authlib", specifier = ">=1.6.1" },
1610
  { name = "dotenv", specifier = ">=0.9.9" },
1611
+ { name = "faker", specifier = ">=37.5.3" },
1612
  { name = "fastapi", specifier = ">=0.116.1" },
1613
  { name = "gradio", specifier = ">=5.42.0" },
1614
  { name = "gradio-modal", specifier = ">=0.0.4" },
 
1619
  { name = "markdown2", specifier = ">=2.5.4" },
1620
  { name = "matplotlib", specifier = ">=3.10.5" },
1621
  { name = "pandas", specifier = ">=2.3.1" },
1622
+ { name = "plotly", specifier = ">=6.3.0" },
1623
+ { name = "pymongo", specifier = ">=4.14.0" },
1624
  { name = "pypdf", specifier = ">=6.0.0" },
1625
  { name = "python-multipart", specifier = ">=0.0.20" },
1626
  { name = "reportlab", specifier = ">=4.4.3" },