Spaces:

Fuzure
/

sheami

Sleeping

App Files Files Community

vikramvasudevan commited on Aug 21, 2025

Commit

52308e4

verified ·

1 Parent(s): 9d8f87b

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

graph.py +53 -30
modules/db.py +95 -22
modules/models.py +2 -1
tests/test_trends.py +16 -11
ui.py +1 -1

graph.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from datetime import datetime
 import threading
 import time
 import pandas as pd
 from langchain_core.prompts import ChatPromptTemplate
 import matplotlib.pyplot as plt
@@ -101,7 +102,7 @@ async def fn_init_node(state: SheamiState):
         state["messages"].append(f"{idx+1}. {report.report_file_name}")
     state["standardized_reports"] = []
     state["trends_json"] = {}
-    state["interpreted_report"] = ""
     state["current_index"] = -1
     state["units_processed"] = 0
     state["units_total"] = 0
@@ -113,7 +114,9 @@ async def fn_init_node(state: SheamiState):
     run_id = await get_db().start_run(
         user_email=state["user_email"],
         patient_id=state["patient_id"],
-        source_file_names=[report.report_file_name for report in state["uploaded_reports"]],
     )
     state["run_id"] = run_id
@@ -198,7 +201,7 @@ async def fn_standardize_current_report_node(state: SheamiState):
             SheamiConfig.get_output_dir(state["thread_id"]), f"report_{idx}.json"
         ),
         "w",
-        encoding="utf-8"
     ) as f:
         f.write(result.model_dump_json(indent=2))
@@ -393,22 +396,17 @@ async def fn_trends_aggregator_node(state: SheamiState):
                 )
     # Build trends JSON
-    state["trends_json"] = {
-        "parameter_trends": [
-            {
-                "test_name": k,
-                "values": v,
-                "reference_range": ref_ranges.get(k),
-            }
-            for k, v in sorted(trends.items(), key=lambda kv: kv[0].lower())
-        ]
-    }
     # Persist
     output_dir = SheamiConfig.get_output_dir(state["thread_id"])
     os.makedirs(output_dir, exist_ok=True)
     with open(os.path.join(output_dir, "trends.json"), "w", encoding="utf-8") as f:
-        json.dump(state["trends_json"], f, indent=2, ensure_ascii=False)
     logger.info("%s| Aggregating Trends : finished", state["thread_id"])
     state["messages"].append("Aggregating Trends : finished")
@@ -419,21 +417,42 @@ async def fn_interpreter_node(state: SheamiState):
     logger.info("%s| Interpreting Trends : started", state["thread_id"])
     state["messages"].append("Interpreting Trends : started")
     # 1. LLM narrative
     messages = [
         SystemMessage(
             content=(
                 "Interpret the following medical trends and produce a clean, structured **HTML** report without any markdown formatting like backquotes etc. "
                 "The report should have: "
-                "1. Patient summary (name, age, sex if available) "
-                "2. Trend summaries (tables with Test Name, Value, Unit, Reference Range, and Status) "
-                "3. Clinical insights. "
-                "Use ✅ for normal, ▲ for high, and ▼ for low. "
                 "Format tables in proper <table> with <tr>, <th>, <td>. "
                 "Do not include charts, they will be programmatically added."
             )
         ),
-        HumanMessage(content=json.dumps(state["trends_json"], indent=2)),
     ]
     response = await llm.ainvoke(messages)
     interpretation_html = response.content  # ✅ already HTML now
@@ -443,11 +462,9 @@ async def fn_interpreter_node(state: SheamiState):
     os.makedirs(plots_dir, exist_ok=True)
     plot_files = []
-    for param in sorted(
-        state["trends_json"].get("parameter_trends", []), key=lambda x: x["test_name"]
-    ):
         test_name = param["test_name"]
-        values = param["values"]
         x = [parse_any_date(v["date"]) for v in values]
         x = pd.to_datetime(x, errors="coerce")
@@ -463,7 +480,7 @@ async def fn_interpreter_node(state: SheamiState):
         plt.figure(figsize=(6, 4))
         plt.plot(x, y, marker="o", linestyle="-", label="Observed values")
-        ref = param.get("reference_range")
         if ref:
             ymin, ymax = ref.get("min"), ref.get("max")
             if ymin is not None and ymax is not None:
@@ -505,11 +522,17 @@ async def fn_interpreter_node(state: SheamiState):
     )
     # Save state
-    state["interpreted_report"] = pdf_path
-    schedule_cleanup(file_path=SheamiConfig.get_output_dir(state["thread_id"]))
     logger.info("%s| Interpreting Trends : finished", state["thread_id"])
     state["messages"].append("Interpreting Trends : finished")
     state["milestones"][-1].status = "completed"
     state["milestones"][-1].end_time = datetime.now()
     await get_db().add_or_update_milestone(
@@ -537,14 +560,12 @@ async def fn_interpreter_node(state: SheamiState):
         pdf_bytes = f.read()
     final_report_id = await get_db().add_final_report_v2(
         patient_id=state["patient_id"],
-        summary=interpretation_html,
         pdf_bytes=pdf_bytes,
         file_name=f"health_trends_report_{state["patient_id"]}.pdf",
     )
     logger.info("final_report_id = %s", final_report_id)
-    return state
 def schedule_cleanup(file_path, delay=300):  # 300 sec = 5 min
     def cleanup():
@@ -631,6 +652,7 @@ def create_graph(user_email: str, patient_id: str, thread_id: str):
     workflow.add_node("unit_normalizer_notifier", fn_unit_normalizer_node_notifier)
     workflow.add_node("trends_notifier", fn_trends_aggregator_node_notifier)
     workflow.add_node("interpreter_notifier", fn_interpreter_node_notifier)
     workflow.add_edge(START, "init")
     workflow.add_edge("init", "standardizer_notifier")
@@ -654,7 +676,8 @@ def create_graph(user_email: str, patient_id: str, thread_id: str):
     workflow.add_edge("trends_notifier", "trends")
     workflow.add_edge("trends", "interpreter_notifier")
     workflow.add_edge("interpreter_notifier", "interpreter")
-    workflow.add_edge("interpreter", END)
     logger.info("%s| Creating Graph : finished", thread_id)
     return workflow.compile(checkpointer=memory)

 from datetime import datetime
 import threading
 import time
+from bson import ObjectId
 import pandas as pd
 from langchain_core.prompts import ChatPromptTemplate
 import matplotlib.pyplot as plt
         state["messages"].append(f"{idx+1}. {report.report_file_name}")
     state["standardized_reports"] = []
     state["trends_json"] = {}
+    state["pdf_path"] = ""
     state["current_index"] = -1
     state["units_processed"] = 0
     state["units_total"] = 0
     run_id = await get_db().start_run(
         user_email=state["user_email"],
         patient_id=state["patient_id"],
+        source_file_names=[
+            report.report_file_name for report in state["uploaded_reports"]
+        ],
     )
     state["run_id"] = run_id
             SheamiConfig.get_output_dir(state["thread_id"]), f"report_{idx}.json"
         ),
         "w",
+        encoding="utf-8",
     ) as f:
         f.write(result.model_dump_json(indent=2))
                 )
     # Build trends JSON
+    state["trends_json"] = await get_db().get_trends_by_patient(
+        patient_id=state["patient_id"],
+        fields=["test_name", "trend_data"],
+        serializable=True,
+    )
     # Persist
     output_dir = SheamiConfig.get_output_dir(state["thread_id"])
     os.makedirs(output_dir, exist_ok=True)
     with open(os.path.join(output_dir, "trends.json"), "w", encoding="utf-8") as f:
+        json.dump(state["trends_json"], f, indent=1, ensure_ascii=False)
     logger.info("%s| Aggregating Trends : finished", state["thread_id"])
     state["messages"].append("Aggregating Trends : finished")
     logger.info("%s| Interpreting Trends : started", state["thread_id"])
     state["messages"].append("Interpreting Trends : started")
+    uploaded_reports = await get_db().get_reports_by_patient(
+        patient_id=state["patient_id"]
+    )
+    llm_input = json.dumps(
+        {
+            "patient_id": state["patient_id"],
+            "patient_info": await get_db().get_patient_by_id(
+                patient_id=state["patient_id"],
+                fields=["name", "dob", "gender"],
+                serializable=True,
+            ),
+            "uploaded_reports": [report["file_name"] for report in uploaded_reports],
+            "trends_json": state["trends_json"],
+        },
+        indent=1,
+    )
     # 1. LLM narrative
     messages = [
         SystemMessage(
             content=(
                 "Interpret the following medical trends and produce a clean, structured **HTML** report without any markdown formatting like backquotes etc. "
                 "The report should have: "
+                "1. A header with the report generation date."
+                "2. The names of the reports used to summarize this information."
+                "3. Patient summary (patient id, name, age, sex if available)"
+                "4. Test window (mention the from and to dates)"
+                "5. Trend summaries (tables with Test Name, Latest Value, Highest Value, Lowest Value, Unit, Reference Range, Trend Direction and Inference) "
+                "6. Clinical insights. "
+                "For inference column, use ✅ for normal, ▲ for high, and ▼ for low. "
+                "For trend direction, use appropriate unicode  icons like up arrow (improving trend) , down arrow (worsening trend) or checkmark if determined normal"
                 "Format tables in proper <table> with <tr>, <th>, <td>. "
                 "Do not include charts, they will be programmatically added."
             )
         ),
+        HumanMessage(content=llm_input),
     ]
     response = await llm.ainvoke(messages)
     interpretation_html = response.content  # ✅ already HTML now
     os.makedirs(plots_dir, exist_ok=True)
     plot_files = []
+    for param in sorted(state["trends_json"], key=lambda x: x["test_name"]):
         test_name = param["test_name"]
+        values = param["trend_data"]
         x = [parse_any_date(v["date"]) for v in values]
         x = pd.to_datetime(x, errors="coerce")
         plt.figure(figsize=(6, 4))
         plt.plot(x, y, marker="o", linestyle="-", label="Observed values")
+        ref = param.get("test_reference_range")
         if ref:
             ymin, ymax = ref.get("min"), ref.get("max")
             if ymin is not None and ymax is not None:
     )
     # Save state
+    state["pdf_path"] = pdf_path
+    state["interpretation_html"] = interpretation_html
     logger.info("%s| Interpreting Trends : finished", state["thread_id"])
     state["messages"].append("Interpreting Trends : finished")
+    return state
+async def fn_final_cleanup_node(state: SheamiState):
+    pdf_path = state["pdf_path"]
+    schedule_cleanup(file_path=SheamiConfig.get_output_dir(state["thread_id"]))
     state["milestones"][-1].status = "completed"
     state["milestones"][-1].end_time = datetime.now()
     await get_db().add_or_update_milestone(
         pdf_bytes = f.read()
     final_report_id = await get_db().add_final_report_v2(
         patient_id=state["patient_id"],
+        summary=state["interpretation_html"],
         pdf_bytes=pdf_bytes,
         file_name=f"health_trends_report_{state["patient_id"]}.pdf",
     )
     logger.info("final_report_id = %s", final_report_id)
 def schedule_cleanup(file_path, delay=300):  # 300 sec = 5 min
     def cleanup():
     workflow.add_node("unit_normalizer_notifier", fn_unit_normalizer_node_notifier)
     workflow.add_node("trends_notifier", fn_trends_aggregator_node_notifier)
     workflow.add_node("interpreter_notifier", fn_interpreter_node_notifier)
+    workflow.add_node("final_cleanup_node", fn_final_cleanup_node)
     workflow.add_edge(START, "init")
     workflow.add_edge("init", "standardizer_notifier")
     workflow.add_edge("trends_notifier", "trends")
     workflow.add_edge("trends", "interpreter_notifier")
     workflow.add_edge("interpreter_notifier", "interpreter")
+    workflow.add_edge("interpreter", "final_cleanup_node")
+    workflow.add_edge("final_cleanup_node", END)
     logger.info("%s| Creating Graph : finished", thread_id)
     return workflow.compile(checkpointer=memory)

modules/db.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 from typing import Any
 from datetime import datetime, timezone
@@ -70,8 +71,14 @@ class SheamiDB:
         result = await self.patients.insert_one(patient)
         return str(result.inserted_id)
-    async def get_patient_by_id(self, patient_id: str) -> Any | None:
         patient = await self.patients.find_one({"_id": ObjectId(patient_id)})
         return patient
     async def get_patients_by_user(self, user_id: str) -> list:
@@ -139,9 +146,19 @@ class SheamiDB:
             upsert=True,
         )
-    async def get_trends_by_patient(self, patient_id: str) -> list:
         cursor = self.trends.find({"patient_id": ObjectId(patient_id)})
         trends = await cursor.to_list(length=None)
         return trends
     # ---------------------------
@@ -380,19 +397,16 @@ class SheamiDB:
         updated = 0
-        async def add_trend_data_point(test):
             test_name = test.get("test_name")
             value = test.get("result_value")
             unit = test.get("test_unit")
-            test_date = (
-                test.get("test_date")
-                or report.get("uploaded_at")
-                or datetime.now(timezone.utc)
-            )
-            # Normalize date
             if isinstance(test_date, (int, float)):
-                # handle timestamp
                 test_date = datetime.fromtimestamp(test_date, tz=timezone.utc)
             elif isinstance(test_date, str):
                 try:
@@ -407,20 +421,61 @@ class SheamiDB:
                 "report_id": ObjectId(report_id),
             }
-            # Upsert trend doc
-            result = await self.trends.update_one(
-                {"patient_id": ObjectId(patient_id), "test_name": test_name},
                 {
-                    "$setOnInsert": {
                         "patient_id": ObjectId(patient_id),
                         "test_name": test_name,
-                        "created_at": datetime.now(timezone.utc),
                     },
-                    "$push": {"trend_data": point},
-                    "$set": {"last_updated": datetime.now(timezone.utc)},
-                },
-                upsert=True,
-            )
             return result
         for test in tests:
@@ -431,11 +486,11 @@ class SheamiDB:
                     continue
                 for sub_result in sub_results:
                     test_name = sub_result.get("test_name")
-                    db_output = await add_trend_data_point(sub_result)
                     updated += db_output.modified_count
                 continue
             else:
-                db_output = await add_trend_data_point(test)
                 updated += db_output.modified_count
         # print("updated/inserted", updated, "trends")
@@ -556,3 +611,21 @@ class SheamiDB:
             await self.fs.delete(file_id)
             deleted_count += 1
         return deleted_count

+from datetime import timezone
 import os
 from typing import Any
 from datetime import datetime, timezone
         result = await self.patients.insert_one(patient)
         return str(result.inserted_id)
+    async def get_patient_by_id(
+        self, patient_id: str, fields: list[str] = [], serializable: bool = False
+    ) -> Any | None:
         patient = await self.patients.find_one({"_id": ObjectId(patient_id)})
+        if fields:
+            patient = {key: patient[key] for key in fields if key in patient}
+        if serializable:
+            patient = self.convert_to_serializable_data(data=patient)
         return patient
     async def get_patients_by_user(self, user_id: str) -> list:
             upsert=True,
         )
+    async def get_trends_by_patient(
+        self, patient_id: str, fields: list[str] = None, serializable=False
+    ) -> list:
         cursor = self.trends.find({"patient_id": ObjectId(patient_id)})
         trends = await cursor.to_list(length=None)
+        if fields:
+            trends = [
+                {field: trend[field] for field in fields if field in trend}
+                for trend in trends
+            ]
+        if serializable:
+            trends = self.convert_to_serializable_data(data=trends)
         return trends
     # ---------------------------
         updated = 0
+        async def add_or_update_trend_data_point(test):
             test_name = test.get("test_name")
             value = test.get("result_value")
             unit = test.get("test_unit")
+            test_date = test.get("test_date") or datetime.now(timezone.utc)
+            test_reference_range = test.get("test_reference_range")
+            inferred_range = test.get("inferred_range")
+            # Normalize test_date (keep your existing normalization here)...
             if isinstance(test_date, (int, float)):
                 test_date = datetime.fromtimestamp(test_date, tz=timezone.utc)
             elif isinstance(test_date, str):
                 try:
                 "report_id": ObjectId(report_id),
             }
+            # Step 1: Check if trend_data with same date exists
+            existing_doc = await self.trends.find_one(
                 {
+                    "patient_id": ObjectId(patient_id),
+                    "test_name": test_name,
+                    "trend_data.date": test_date,
+                },
+                projection={"trend_data.$": 1},  # Project only matched array element
+            )
+            if existing_doc:
+                # Step 2: Update the existing trend_data array element with new data
+                result = await self.trends.update_one(
+                    {
                         "patient_id": ObjectId(patient_id),
                         "test_name": test_name,
+                        "trend_data.date": test_date,
                     },
+                    {
+                        "$set": {
+                            "trend_data.$.value": value,
+                            "trend_data.$.unit": unit,
+                            "trend_data.$.report_id": ObjectId(report_id),
+                            "last_updated": datetime.now(timezone.utc),
+                            "test_reference_range": test_reference_range,
+                            "inferred_range": inferred_range,
+                        },
+                        "$setOnInsert": {
+                            "patient_id": ObjectId(patient_id),
+                            "test_name": test_name,
+                            "created_at": datetime.now(timezone.utc),
+                        },
+                    },
+                )
+            else:
+                # Step 3: Insert new point as it does not exist yet
+                result = await self.trends.update_one(
+                    {"patient_id": ObjectId(patient_id), "test_name": test_name},
+                    {
+                        "$setOnInsert": {
+                            "patient_id": ObjectId(patient_id),
+                            "test_name": test_name,
+                            "created_at": datetime.now(timezone.utc),
+                        },
+                        "$push": {"trend_data": point},
+                        "$set": {
+                            "last_updated": datetime.now(timezone.utc),
+                            "test_reference_range": test_reference_range,
+                            "inferred_range": inferred_range,
+                            "test_reference_range": test_reference_range,
+                            "inferred_range": inferred_range,
+                        },
+                    },
+                    upsert=True,
+                )
             return result
         for test in tests:
                     continue
                 for sub_result in sub_results:
                     test_name = sub_result.get("test_name")
+                    db_output = await add_or_update_trend_data_point(sub_result)
                     updated += db_output.modified_count
                 continue
             else:
+                db_output = await add_or_update_trend_data_point(test)
                 updated += db_output.modified_count
         # print("updated/inserted", updated, "trends")
             await self.fs.delete(file_id)
             deleted_count += 1
         return deleted_count
+    def convert_to_serializable_data(self, data):
+        """
+        Recursively converts MongoDB-specific types to JSON serializable formats.
+        - ObjectId to string
+        - datetime to ISO 8601 string
+        Handles dict, list, and basic types.
+        """
+        if isinstance(data, dict):
+            return {k: self.convert_to_serializable_data(v) for k, v in data.items()}
+        elif isinstance(data, list):
+            return [self.convert_to_serializable_data(i) for i in data]
+        elif isinstance(data, ObjectId):
+            return str(data)
+        elif isinstance(data, datetime):
+            return data.isoformat()
+        else:
+            return data

modules/models.py CHANGED Viewed

@@ -95,7 +95,7 @@ class SheamiState(TypedDict):
     uploaded_reports: List[HealthReport]
     standardized_reports: List[StandardizedReport]
     trends_json: dict
-    interpreted_report: str
     current_index: int
     process_desc: str
     units_processed: int
@@ -103,3 +103,4 @@ class SheamiState(TypedDict):
     overall_units_processed: int
     overall_units_total: int
     milestones: list[SheamiMilestone]

     uploaded_reports: List[HealthReport]
     standardized_reports: List[StandardizedReport]
     trends_json: dict
+    pdf_path: str
     current_index: int
     process_desc: str
     units_processed: int
     overall_units_processed: int
     overall_units_total: int
     milestones: list[SheamiMilestone]
+    interpretation_html : str

tests/test_trends.py CHANGED Viewed

@@ -1,14 +1,19 @@
 from modules.db import SheamiDB
-db = SheamiDB()
-patient_id = "68a4265e04b98fb066f75f78"
-reports = db.get_reports_by_patient(patient_id=patient_id)
-total_updated = 0
-for report in reports:
-    print(report)
-    num_updated = db.aggregate_trends_from_report(
-        patient_id=patient_id, report_id=str(report["_id"])
-    )
-    total_updated += num_updated
-    print("total_updated = ", total_updated)

 from modules.db import SheamiDB
+async def test():
+    db = SheamiDB()
+    patient_id = "68a67a92fa6a3a741b0c5c74"
+    reports = await db.get_reports_by_patient(patient_id=patient_id)
+    total_updated = 0
+    for report in reports:
+        # print(report)
+        num_updated = await db.aggregate_trends_from_report(
+            patient_id=patient_id, report_id=str(report["_id"])
+        )
+        total_updated += num_updated
+        print("total_updated = ", total_updated)
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(test())

ui.py CHANGED Viewed

@@ -117,7 +117,7 @@ async def process_reports(user_email: str, patient_id: str, files: list):
     )
     yield construct_process_message(
         message=buffer,
-        final_output=gr.update(value=final_state["interpreted_report"], visible=True),
         milestones=final_state["milestones"],
         reports_output=msg_packet["standardized_reports"],
         trends_output=msg_packet["trends_json"],

     )
     yield construct_process_message(
         message=buffer,
+        final_output=gr.update(value=final_state["pdf_path"], visible=True),
         milestones=final_state["milestones"],
         reports_output=msg_packet["standardized_reports"],
         trends_output=msg_packet["trends_json"],