vikramvasudevan commited on
Commit
9d8f87b
·
verified ·
1 Parent(s): 43f6364

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Pipfile +120 -0
  2. home.py +18 -6
  3. modules/db.py +22 -3
  4. tests/dedup_trends.py +41 -0
Pipfile ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+ aiofiles = "==24.1.0"
8
+ annotated-types = "==0.7.0"
9
+ anyio = "==4.10.0"
10
+ audioop-lts = "==0.2.2"
11
+ authlib = "==1.6.1"
12
+ brotli = "==1.1.0"
13
+ certifi = "==2025.8.3"
14
+ cffi = "==1.17.1"
15
+ charset-normalizer = "==3.4.3"
16
+ click = "==8.2.1"
17
+ colorama = "==0.4.6"
18
+ contourpy = "==1.3.3"
19
+ cryptography = "==45.0.6"
20
+ cssselect2 = "==0.8.0"
21
+ cycler = "==0.12.1"
22
+ distro = "==1.9.0"
23
+ dnspython = "==2.7.0"
24
+ dotenv = "==0.9.9"
25
+ faker = "==37.5.3"
26
+ fastapi = "==0.116.1"
27
+ ffmpy = "==0.6.1"
28
+ filelock = "==3.19.1"
29
+ fonttools = "==4.59.1"
30
+ fsspec = "==2025.7.0"
31
+ gradio = "==5.42.0"
32
+ gradio-client = "==1.11.1"
33
+ gradio-modal = "==0.0.4"
34
+ greenlet = "==3.2.4"
35
+ groovy = "==0.1.2"
36
+ h11 = "==0.16.0"
37
+ httpcore = "==1.0.9"
38
+ httpx = "==0.28.1"
39
+ huggingface-hub = "==0.34.4"
40
+ idna = "==3.10"
41
+ itsdangerous = "==2.2.0"
42
+ jinja2 = "==3.1.6"
43
+ jiter = "==0.10.0"
44
+ jsonpatch = "==1.33"
45
+ jsonpointer = "==3.0.0"
46
+ kiwisolver = "==1.4.9"
47
+ langchain = "==0.3.27"
48
+ langchain-core = "==0.3.74"
49
+ langchain-openai = "==0.3.30"
50
+ langchain-text-splitters = "==0.3.9"
51
+ langgraph = "==0.6.5"
52
+ langgraph-checkpoint = "==2.1.1"
53
+ langgraph-prebuilt = "==0.6.4"
54
+ langgraph-sdk = "==0.2.0"
55
+ langsmith = "==0.4.14"
56
+ markdown-it-py = "==4.0.0"
57
+ markdown2 = "==2.5.4"
58
+ markupsafe = "==3.0.2"
59
+ matplotlib = "==3.10.5"
60
+ mdurl = "==0.1.2"
61
+ motor = "==3.7.1"
62
+ narwhals = "==2.1.2"
63
+ numpy = "==2.3.2"
64
+ openai = "==1.99.9"
65
+ orjson = "==3.11.2"
66
+ ormsgpack = "==1.10.0"
67
+ pandas = "==2.3.1"
68
+ pillow = "==11.3.0"
69
+ plotly = "==6.3.0"
70
+ pycparser = "==2.22"
71
+ pydantic = "==2.11.7"
72
+ pydantic-core = "==2.33.2"
73
+ pydub = "==0.25.1"
74
+ pydyf = "==0.11.0"
75
+ pygments = "==2.19.2"
76
+ pymongo = "==4.14.0"
77
+ pyparsing = "==3.2.3"
78
+ pypdf = "==6.0.0"
79
+ pyphen = "==0.17.2"
80
+ python-dateutil = "==2.9.0.post0"
81
+ python-dotenv = "==1.1.1"
82
+ python-multipart = "==0.0.20"
83
+ pytz = "==2025.2"
84
+ pyyaml = "==6.0.2"
85
+ regex = "==2025.7.34"
86
+ reportlab = "==4.4.3"
87
+ requests = "==2.32.4"
88
+ requests-toolbelt = "==1.0.0"
89
+ rich = "==14.1.0"
90
+ ruff = "==0.12.9"
91
+ safehttpx = "==0.1.6"
92
+ semantic-version = "==2.10.0"
93
+ shellingham = "==1.5.4"
94
+ six = "==1.17.0"
95
+ sniffio = "==1.3.1"
96
+ sqlalchemy = "==2.0.43"
97
+ starlette = "==0.47.2"
98
+ tenacity = "==9.1.2"
99
+ tiktoken = "==0.11.0"
100
+ tinycss2 = "==1.4.0"
101
+ tinyhtml5 = "==2.0.0"
102
+ tomlkit = "==0.13.3"
103
+ tqdm = "==4.67.1"
104
+ typer = "==0.16.0"
105
+ typing-extensions = "==4.14.1"
106
+ typing-inspection = "==0.4.1"
107
+ tzdata = "==2025.2"
108
+ urllib3 = "==2.5.0"
109
+ uvicorn = "==0.35.0"
110
+ weasyprint = "==66.0"
111
+ webencodings = "==0.5.1"
112
+ websockets = "==15.0.1"
113
+ xxhash = "==3.5.0"
114
+ zopfli = "==0.2.3.post1"
115
+ zstandard = "==0.23.0"
116
+
117
+ [dev-packages]
118
+
119
+ [requires]
120
+ python_version = "3.13"
home.py CHANGED
@@ -249,8 +249,21 @@ def build_trend_figure(trend_doc: Dict[str, Any]) -> Figure:
249
  title="No trend data", xaxis_title="Date", yaxis_title="Value"
250
  )
251
  return fig
252
- dates = [pd.to_datetime(p.get("date"), errors="coerce") for p in points]
253
- values = [coerce_to_number(p.get("value")) for p in points]
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  fig = Figure()
255
  fig.add_trace(
256
  Scatter(
@@ -268,7 +281,6 @@ def build_trend_figure(trend_doc: Dict[str, Any]) -> Figure:
268
  )
269
  return fig
270
 
271
-
272
  # ----- App state + loaders -----
273
 
274
 
@@ -513,21 +525,21 @@ def build_home_page():
513
  delete_patient_btn = gr.Button(
514
  "❌ Delete Patient",
515
  size="lg",
516
- scale=1,
517
  variant="stop",
518
  interactive=False,
519
  )
520
  edit_patient_btn = gr.Button(
521
  "✏️ Edit Patient",
522
  size="lg",
523
- scale=1,
524
  variant="huggingface",
525
  interactive=False,
526
  )
527
  upload_reports_btn = gr.Button(
528
  "📊 Upload Reports",
529
  size="lg",
530
- scale=1,
531
  variant="huggingface",
532
  interactive=False,
533
  )
 
249
  title="No trend data", xaxis_title="Date", yaxis_title="Value"
250
  )
251
  return fig
252
+
253
+ # Parse dates and values and pair them
254
+ date_value_pairs = []
255
+ for p in points:
256
+ date = pd.to_datetime(p.get("date"), errors="coerce")
257
+ value = coerce_to_number(p.get("value"))
258
+ if pd.notna(date) and value is not None:
259
+ date_value_pairs.append((date, value))
260
+
261
+ # Sort pairs by date
262
+ date_value_pairs.sort(key=lambda x: x[0])
263
+
264
+ # Unpack sorted pairs
265
+ dates, values = zip(*date_value_pairs) if date_value_pairs else ([], [])
266
+
267
  fig = Figure()
268
  fig.add_trace(
269
  Scatter(
 
281
  )
282
  return fig
283
 
 
284
  # ----- App state + loaders -----
285
 
286
 
 
525
  delete_patient_btn = gr.Button(
526
  "❌ Delete Patient",
527
  size="lg",
528
+ scale=0,
529
  variant="stop",
530
  interactive=False,
531
  )
532
  edit_patient_btn = gr.Button(
533
  "✏️ Edit Patient",
534
  size="lg",
535
+ scale=0,
536
  variant="huggingface",
537
  interactive=False,
538
  )
539
  upload_reports_btn = gr.Button(
540
  "📊 Upload Reports",
541
  size="lg",
542
+ scale=0,
543
  variant="huggingface",
544
  interactive=False,
545
  )
modules/db.py CHANGED
@@ -220,17 +220,24 @@ class SheamiDB:
220
 
221
  async def delete_patient(self, patient_id: str):
222
  try:
 
 
 
223
  yield "⌛Deleting patient reports ... "
224
  result = await self.reports.delete_one({"patient_id": ObjectId(patient_id)})
225
  yield f"✅Deleted {result.deleted_count} patient reports ... "
226
  yield "⌛Deleting patient trends ... "
227
- result = await self.trends.delete_one({"patient_id": ObjectId(patient_id)})
228
  yield f"✅Deleted {result.deleted_count} patient trends ... "
229
  yield "⌛Deleting patient final reports ... "
230
- result = await self.final_reports.delete_one({"patient_id": ObjectId(patient_id)})
 
 
231
  yield f"✅Deleted {result.deleted_count} patient final reports ... "
232
  yield "⌛Deleting patient run stats ... "
233
- result = await self.run_stats.delete_one({"patient_id": ObjectId(patient_id)})
 
 
234
  yield f"✅Deleted {result.deleted_count} patient run stats ... "
235
  yield "⌛Deleting patient ... "
236
  result = await self.patients.delete_one({"_id": ObjectId(patient_id)})
@@ -537,3 +544,15 @@ class SheamiDB:
537
  )
538
  run_stats = await cursor.to_list(length=None)
539
  return run_stats
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  async def delete_patient(self, patient_id: str):
222
  try:
223
+ yield "⌛Deleting patient PDFs ... "
224
+ deleted_count = await self.delete_pdfs_by_patient_id(patient_id)
225
+ yield f"✅Deleted {deleted_count} patient PDFs ... "
226
  yield "⌛Deleting patient reports ... "
227
  result = await self.reports.delete_one({"patient_id": ObjectId(patient_id)})
228
  yield f"✅Deleted {result.deleted_count} patient reports ... "
229
  yield "⌛Deleting patient trends ... "
230
+ result = await self.trends.delete_one({"patient_id": ObjectId(patient_id)})
231
  yield f"✅Deleted {result.deleted_count} patient trends ... "
232
  yield "⌛Deleting patient final reports ... "
233
+ result = await self.final_reports.delete_one(
234
+ {"patient_id": ObjectId(patient_id)}
235
+ )
236
  yield f"✅Deleted {result.deleted_count} patient final reports ... "
237
  yield "⌛Deleting patient run stats ... "
238
+ result = await self.run_stats.delete_one(
239
+ {"patient_id": ObjectId(patient_id)}
240
+ )
241
  yield f"✅Deleted {result.deleted_count} patient run stats ... "
242
  yield "⌛Deleting patient ... "
243
  result = await self.patients.delete_one({"_id": ObjectId(patient_id)})
 
544
  )
545
  run_stats = await cursor.to_list(length=None)
546
  return run_stats
547
+
548
+ async def delete_pdfs_by_patient_id(self, patient_id: str) -> int:
549
+ # Find all files with the specified patient_id in metadata
550
+ cursor = self.db.fs.files.find(
551
+ {"metadata.patient_id": ObjectId(patient_id)}, projection={"_id": 1}
552
+ )
553
+ deleted_count = 0
554
+ async for file_doc in cursor:
555
+ file_id = file_doc["_id"]
556
+ await self.fs.delete(file_id)
557
+ deleted_count += 1
558
+ return deleted_count
tests/dedup_trends.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.db import SheamiDB
2
+
3
+ db = None # global placeholder
4
+
5
+
6
+ async def deduplicate_trend_data():
7
+ global db
8
+ if db is None:
9
+ db = SheamiDB()
10
+ cursor = db.trends.find({})
11
+ updated_count = 0
12
+ async for doc in cursor:
13
+ # Create a dict to keep max value for each date
14
+ date_max = {}
15
+ for entry in doc.get("trend_data", []):
16
+ date = entry["date"]
17
+ try:
18
+ value = float(entry["value"])
19
+ except ValueError as e:
20
+ # Skip this entry because value is not a valid float
21
+ print("Error converting str to float ", e)
22
+ continue
23
+ print("entry = ", entry)
24
+ print("date_max = ", date_max)
25
+ if date not in date_max or value > float(date_max[date]["value"]):
26
+ date_max[date] = entry
27
+ deduped_trend_data = list(date_max.values())
28
+ # Only update if changes are made
29
+ if len(deduped_trend_data) != len(doc.get("trend_data", [])):
30
+ await db.trends.update_one(
31
+ {"_id": doc["_id"]}, {"$set": {"trend_data": deduped_trend_data}}
32
+ )
33
+ updated_count += 1
34
+ print(f"Updated {updated_count} documents.")
35
+
36
+
37
+ # Usage:
38
+ if __name__ == "__main__":
39
+ import asyncio
40
+
41
+ asyncio.run(deduplicate_trend_data())