Spaces:

brestok
/

ocr-2

Sleeping

App Files Files Community

brestok commited on Feb 23, 2025

Commit

3f47633

1 Parent(s): adb81a7

fixed consult notes

Browse files

Files changed (15) hide show

ocr/api/consult/cunsult.py +63 -0
ocr/api/consult/db_requests.py +0 -0
ocr/api/consult/dto.py +0 -0
ocr/api/consult/schemas.py +0 -0
ocr/api/consult/views.py +6 -2
ocr/api/message/dto.py +0 -2
ocr/api/openai_requests.py +31 -6
ocr/api/prompts.py +140 -14
ocr/api/report/db_requests.py +2 -2
ocr/api/report/dto.py +1 -0
ocr/api/report/model.py +1 -0
ocr/api/report/views.py +11 -8
ocr/api/utils.py +1 -17
requirements.txt +22 -2
test.html +2 -0

ocr/api/consult/cunsult.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import asyncio
+import base64
+import io
+import markdown2
+from xhtml2pdf import pisa
+from ocr.api.openai_requests import generate_consult_note
+async def create_consult_notes(text: str, changes: str | None) -> str:
+    changes = '' if changes is None else f"\n\n**Changes**:\n```\n{changes}\n```"
+    chief_complaint, hpi, social_history, surgical_history, family_history, medications, assessment, plan = await asyncio.gather(
+        generate_consult_note(text, changes, 'chief'),
+        generate_consult_note(text, changes, 'hpi'),
+        generate_consult_note(text, changes, 'social'),
+        generate_consult_note(text, changes, 'surgical'),
+        generate_consult_note(text, changes, 'family'),
+        generate_consult_note(text, changes, 'medications'),
+        generate_consult_note(text, changes, 'assessment'),
+        generate_consult_note(text, changes, 'plan'),
+    )
+    response = f"""# Chief Complaint
+{chief_complaint}
+# History of Present Illness (HPI)
+{hpi}
+# Social History
+{social_history}
+# Surgical History
+{surgical_history}
+# Family History
+{family_history}
+# Medications
+{medications}
+# Assessment
+{assessment}
+# Plan
+{plan}"""
+    return response
+def text_to_pdf_base64(text: str) -> str:
+    html_text = markdown2.markdown(text)
+    pdf_buffer = io.BytesIO()
+    pisa.CreatePDF(html_text, dest=pdf_buffer)
+    pdf_bytes = pdf_buffer.getvalue()
+    base64_pdf = base64.b64encode(pdf_bytes).decode("utf-8")
+    return base64_pdf

ocr/api/consult/db_requests.py DELETED Viewed

File without changes

ocr/api/consult/dto.py DELETED Viewed

File without changes

ocr/api/consult/schemas.py DELETED Viewed

File without changes

ocr/api/consult/views.py CHANGED Viewed

@@ -1,8 +1,12 @@
 from ocr.api.consult import consult_router
-from ocr.api.utils import text_to_pdf_base64
 from ocr.core.wrappers import OcrResponseWrapper
 @consult_router.post('/{reportId}/generate')
 async def generate_consult_report(reportId: str) -> OcrResponseWrapper[str]:
-    return OcrResponseWrapper(data=text_to_pdf_base64('## Vika Kakashka'))

 from ocr.api.consult import consult_router
+from ocr.api.consult.cunsult import text_to_pdf_base64, create_consult_notes
+from ocr.api.report.db_requests import get_report_obj_by_id
 from ocr.core.wrappers import OcrResponseWrapper
 @consult_router.post('/{reportId}/generate')
 async def generate_consult_report(reportId: str) -> OcrResponseWrapper[str]:
+    report = await get_report_obj_by_id(reportId)
+    consult_notes = await create_consult_notes(report.originalText, report.changes)
+    base64_string = text_to_pdf_base64(consult_notes)
+    return OcrResponseWrapper(data=base64_string)

ocr/api/message/dto.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from enum import Enum
-from pydantic import BaseModel
 class Author(Enum):
     User = "user"

 from enum import Enum
 class Author(Enum):
     User = "user"

ocr/api/openai_requests.py CHANGED Viewed

@@ -1,14 +1,14 @@
-from ocr.api.prompts import OCRPrompts
 from ocr.api.report.model import ReportModel
 from ocr.core.wrappers import openai_wrapper
 @openai_wrapper()
-async def generate_report(request_content: list[dict]):
     messages = [
         {
             "role": "system",
-            "content": OCRPrompts.generate_report
         },
         {
             "role": "user",
@@ -19,11 +19,11 @@ async def generate_report(request_content: list[dict]):
 @openai_wrapper()
-async def generate_changes(content: list[dict], previous_report: str):
     messages = [
         {
             "role": "system",
-            "content": OCRPrompts.generate_changes
             .replace("{previous_report}", previous_report)
         },
         {
@@ -39,10 +39,35 @@ async def generate_agent_response(messages: list[dict], report: ReportModel):
     messages = [
         {
             "role": "system",
-            "content": OCRPrompts.generate_agent_response
             .replace("{reports}", report.report)
             .replace("{changes}", report.changes or 'There is no changes.')
         },
         *messages
     ]
     return messages

+from ocr.api.prompts import ocr_prompts
 from ocr.api.report.model import ReportModel
 from ocr.core.wrappers import openai_wrapper
 @openai_wrapper()
+async def generate_report(request_content: str):
     messages = [
         {
             "role": "system",
+            "content": ocr_prompts.report.generate_report
         },
         {
             "role": "user",
 @openai_wrapper()
+async def generate_changes(content: str, previous_report: str):
     messages = [
         {
             "role": "system",
+            "content": ocr_prompts.report.generate_changes
             .replace("{previous_report}", previous_report)
         },
         {
     messages = [
         {
             "role": "system",
+            "content": ocr_prompts.message.generate_agent_response
             .replace("{reports}", report.report)
             .replace("{changes}", report.changes or 'There is no changes.')
         },
         *messages
     ]
     return messages
+@openai_wrapper(is_json=True, temperature=0.6, return_='result')
+async def generate_consult_note(text: str, changes: str, type_: str):
+    prompt_map = {
+        "chief": ocr_prompts.consult.generate_chief,
+        "hpi": ocr_prompts.consult.generate_hpi,
+        "social": ocr_prompts.consult.generate_social,
+        "surgical": ocr_prompts.consult.generate_surgical,
+        "family": ocr_prompts.consult.generate_family,
+        "medications": ocr_prompts.consult.generate_medications,
+        "assessment": ocr_prompts.consult.generate_assessment,
+        "plan": ocr_prompts.consult.generate_plan,
+    }
+    messages = [
+        {
+            "role": "system",
+            "content": prompt_map[type_]
+        },
+        {
+            "role": "user",
+            "content": f"Medical information:\n```\n{text}\n```\n{changes}"
+        }
+    ]
+    return messages

ocr/api/prompts.py CHANGED Viewed

@@ -1,4 +1,32 @@
-class OCRPrompts:
     generate_report = """## Task
 You must analyze the text extracted from medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
@@ -44,7 +72,7 @@ The report should be structured as follows, with each section containing only re
 [/INST]"""
     generate_changes = """## Task
-You must perform a comparative analysis of the patient's new data from the attached user images against their previous data (`Previous Patient data`). Identify and explicitly highlight all differences, including but not limited to disease progression, remission, newly emerging conditions, and significant clinical changes. Your response must be formatted in **Markdown**.
 ## Data
@@ -63,25 +91,123 @@ You must perform a comparative analysis of the patient's new data from the attac
 - Do **not** include any speculative analysis—only factual differences explicitly observed in the data.
 [/INST]"""
-    generate_agent_response = """## Objective
-You are an AI medical assistant. Your task is to provide **precise and direct** answers to the doctor's questions based **only** on the provided `Report`, `Patient changes`, and your **verified medical knowledge**. Your responses must be **brief, factual, and strictly to the point**.
-## Data
-**Report**:
 ```
-{reports}
 ```
-**Patient changes**:
 ```
-{changes}
 ```
-## Mandatory Instructions
-- Do not elaborate or provide explanations unless explicitly requested.
-- **Do not include unnecessary details.** Only provide **essential** information relevant to the doctor's question.
-- **Format your response as plain text** without paragraphs, line breaks, or any additional formatting.
-- **Do not speculate.** If the requested information is unavailable in the provided data, respond with: `"Insufficient data to answer."`"""

+from functools import lru_cache
+class MessagesPrompt:
+    generate_agent_response = """## Objective
+You are an AI medical assistant. Your task is to provide **precise and direct** answers to the doctor's questions based **only** on the provided `Report`, `Patient changes`, and your **verified medical knowledge**. Your responses must be **brief, factual, and strictly to the point**.
+## Data
+**Report**:
+```
+{reports}
+```
+**Patient changes**:
+```
+{changes}
+```
+## Mandatory Instructions
+- Do not elaborate or provide explanations unless explicitly requested.
+- **Do not include unnecessary details.** Only provide **essential** information relevant to the doctor's question.
+- **Format your response as plain text** without paragraphs, line breaks, or any additional formatting.
+- **Do not speculate.** If the requested information is unavailable in the provided data, respond with: `"Insufficient data to answer."`"""
+class ReportPrompts:
     generate_report = """## Task
 You must analyze the text extracted from medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
 [/INST]"""
     generate_changes = """## Task
+You must perform a comparative analysis of the patient's new data from the user query against their previous data (`Previous Patient data`). Identify and explicitly highlight all differences, including but not limited to disease progression, remission, newly emerging conditions, and significant clinical changes. Your response must be formatted in **Markdown**.
 ## Data
 - Do **not** include any speculative analysis—only factual differences explicitly observed in the data.
 [/INST]"""
+class ConsultPrompts:
+    generate_chief = """## Task
+You must analyze the provided patient data from the user and then determine the **Primary Complaint/Reason for Visit**. Return your response in JSON format.
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
 ```
+- **[result]**: The chief complaint or reason for the visit. It must be represented as a single sentence."""
+    generate_hpi = """## Task
+You must analyze the provided patient data from the user and then determine the **History of Present Illness (HPI).**
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
 ```
+- **[result]**: The History of Present Illness (HPI). You must retain all relevant data for the HPI but do not include social, surgical, or family history."""
+    generate_social = """## Task
+You must analyze the provided patient data from the user and extract information about the **Social History.**
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
 ```
+- **[result]**: The Social History. You must retain all relevant data for the social history. If no data is provided, return `"No data available"`."""
+    generate_surgical = """## Task
+You must analyze the provided patient data from the user and extract information about the **Surgical History.**
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
 ```
+- **[result]**: The Surgical History. You must retain all relevant data for the Surgical history. If no data is provided, save `No data available`."""
+    generate_family = """## Task
+You must analyze the provided patient data from the user and extract information about the **Family History.**
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
+```
+- **[result]**: The Family History. You must retain all relevant data for the Family history. If no data is provided, return `"No data available"`."""
+    generate_medications = """## Task
+You must analyze the provided patient data from the user and extract information about the **Medications**
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
+```
+- **[result]**: The list of medications. You must retain all relevant data about medications. If no data is provided, return `"No data available"`."""
+    generate_assessment = """## Task
+You must analyze the provided patient data from the user and extract information about the **Assessment** (e.g., cancer stage, performance status, etc.).
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
+```
+- **[result]**: A summary of clinical evaluations, diagnoses, and relevant medical assessments, including disease staging, functional status (e.g., ECOG/WHO performance status). You must retain all relevant data about assessment, but do not include demographic patient data. If no data is provided, return `"No data available"`."""
+    generate_plan = """## Task
+You must analyze the provided patient data from the user and extract information about the **Assessment** (e.g., cancer stage, performance status, etc.).
+## JSON Response Format
+```json
+{
+  “result”: “string”
+}
+```
+- **[result]**: A structured **treatment and management strategy** based on the latest **evidence-based cancer guidelines** (e.g., ASCO, NCCN). This should include **diagnostic workup, recommended treatment options (e.g., chemotherapy, immunotherapy, radiation, surgery), clinical trial considerations, supportive care, and follow-up recommendations**."""
+class OCRPrompts:
+    message = MessagesPrompt()
+    report = ReportPrompts()
+    consult = ConsultPrompts()
+@lru_cache
+def get_prompts() -> OCRPrompts:
+    return OCRPrompts()
+ocr_prompts = get_prompts()

ocr/api/report/db_requests.py CHANGED Viewed

@@ -21,8 +21,8 @@ async def get_report_obj_by_id(report_id: str) -> ReportModel:
     return ReportModel.from_mongo(report)
-async def save_report_obj(report: str, changes: str | None, filename: str) -> ReportModel:
-    report = ReportModel(report=report, changes=changes, filename=filename)
     await settings.DB_CLIENT.reports.insert_one(report.to_mongo())
     return report

     return ReportModel.from_mongo(report)
+async def save_report_obj(report: str, changes: str | None, original_text: str, filename: str) -> ReportModel:
+    report = ReportModel(report=report, changes=changes, filename=filename, originalText=original_text)
     await settings.DB_CLIENT.reports.insert_one(report.to_mongo())
     return report

ocr/api/report/dto.py CHANGED Viewed

@@ -14,3 +14,4 @@ class Paging(BaseModel):
 class ReportModelShort(ReportModel):
     report: ClassVar[str]
     changes: ClassVar[str]

 class ReportModelShort(ReportModel):
     report: ClassVar[str]
     changes: ClassVar[str]
+    originalText: ClassVar[str]

ocr/api/report/model.py CHANGED Viewed

@@ -8,6 +8,7 @@ from ocr.core.database import MongoBaseModel
 class ReportModel(MongoBaseModel):
     report: str
     changes: str | None = None
     filename: str
     datetimeInserted: datetime = Field(default_factory=datetime.now)
     datetimeUpdated: datetime = Field(default_factory=datetime.now)

 class ReportModel(MongoBaseModel):
     report: str
     changes: str | None = None
+    originalText: str
     filename: str
     datetimeInserted: datetime = Field(default_factory=datetime.now)
     datetimeUpdated: datetime = Field(default_factory=datetime.now)

ocr/api/report/views.py CHANGED Viewed

@@ -4,12 +4,15 @@ from fastapi import UploadFile, File
 from ocr.api.openai_requests import generate_report, generate_changes
 from ocr.api.report import report_router
-from ocr.api.report.db_requests import get_all_reports_obj, delete_all_reports, get_report_obj_by_id, save_report_obj, \
-    get_last_report_obj
 from ocr.api.report.dto import Paging
 from ocr.api.report.model import ReportModel
 from ocr.api.report.schemas import AllReportResponse
-from ocr.api.utils import divide_images, prepare_request_content, clean_response
 from ocr.core.wrappers import OcrResponseWrapper
@@ -43,15 +46,15 @@ async def create_report(
         last_report, contents = await asyncio.gather(get_last_report_obj(), file.read())
         report, changes = None, None
         images = divide_images(contents)
-        content = prepare_request_content(images)
         if last_report:
             report, changes = await asyncio.gather(
-                generate_report(content),
-                generate_changes(content, last_report.report)
             )
         else:
-            report = await generate_report(content)
-        report = await save_report_obj(clean_response(report), clean_response(changes), file.filename)
     finally:
         await file.close()
     return OcrResponseWrapper(data=report)

 from ocr.api.openai_requests import generate_report, generate_changes
 from ocr.api.report import report_router
+from ocr.api.report.db_requests import (get_all_reports_obj,
+                                        delete_all_reports,
+                                        get_report_obj_by_id,
+                                        save_report_obj,
+                                        get_last_report_obj)
 from ocr.api.report.dto import Paging
 from ocr.api.report.model import ReportModel
 from ocr.api.report.schemas import AllReportResponse
+from ocr.api.utils import divide_images, prepare_request_content, clean_response, extract_text_from_images
 from ocr.core.wrappers import OcrResponseWrapper
         last_report, contents = await asyncio.gather(get_last_report_obj(), file.read())
         report, changes = None, None
         images = divide_images(contents)
+        text_content = extract_text_from_images(images)
         if last_report:
             report, changes = await asyncio.gather(
+                generate_report(text_content),
+                generate_changes(text_content, last_report.report)
             )
         else:
+            report = await generate_report(text_content)
+        report = await save_report_obj(clean_response(report), clean_response(changes), text_content, file.filename)
     finally:
         await file.close()
     return OcrResponseWrapper(data=report)

ocr/api/utils.py CHANGED Viewed

@@ -2,10 +2,8 @@ import base64
 import io
 import re
-import markdown2
 import pytesseract
 from PIL import Image
-from fpdf import FPDF
 from pdf2image import convert_from_bytes
@@ -64,18 +62,4 @@ def prepare_request_content(images: list[bytes]):
             for image in images
         ]
     ]
-    return content
-def text_to_pdf_base64(text: str) -> str:
-    pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    html_text = markdown2.markdown(text)
-    plain_text = ''.join(html_text.split('<')[::2])
-    pdf.multi_cell(0, 10, plain_text)
-    pdf_str = pdf.output(dest="S")
-    pdf_bytes = pdf_str.encode("latin1")
-    pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
-    return pdf_base64

 import io
 import re
 import pytesseract
 from PIL import Image
 from pdf2image import convert_from_bytes
             for image in images
         ]
     ]
+    return content

requirements.txt CHANGED Viewed

@@ -1,24 +1,31 @@
 annotated-types==0.7.0
 anyio==4.8.0
 Brotli==1.1.0
 certifi==2025.1.31
 cffi==1.17.1
 click==8.1.8
 cssselect2==0.7.0
 distro==1.9.0
 dnspython==2.7.0
 fastapi==0.115.8
 fonttools==4.56.0
-fpdf==1.7.2
 h11==0.14.0
 httpcore==1.0.7
 httptools==0.6.4
 httpx==0.28.1
 idna==3.10
 jiter==0.8.2
 markdown2==2.5.3
 motor==3.7.0
-openai==1.59.9
 packaging==24.2
 pdf2image==1.17.0
 pdfkit==1.0.0
@@ -28,21 +35,34 @@ pydantic==2.10.6
 pydantic_core==2.27.2
 pydash==8.0.5
 pydyf==0.11.0
 pymongo==4.11
 pyphen==0.17.2
 pytesseract==0.3.13
 python-dotenv==1.0.1
 python-multipart==0.0.20
 PyYAML==6.0.2
 sniffio==1.3.1
 starlette==0.45.3
 tinycss2==1.4.0
 tinyhtml5==2.0.0
 tqdm==4.67.1
 typing_extensions==4.12.2
 uvicorn==0.34.0
 uvloop==0.21.0
 watchfiles==1.0.4
 webencodings==0.5.1
 websockets==14.2
 zopfli==0.2.3.post1

 annotated-types==0.7.0
 anyio==4.8.0
+arabic-reshaper==3.0.0
+asn1crypto==1.5.1
 Brotli==1.1.0
 certifi==2025.1.31
 cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.1
 click==8.1.8
+cryptography==44.0.1
 cssselect2==0.7.0
 distro==1.9.0
 dnspython==2.7.0
 fastapi==0.115.8
 fonttools==4.56.0
 h11==0.14.0
+html5lib==1.1
 httpcore==1.0.7
 httptools==0.6.4
 httpx==0.28.1
 idna==3.10
 jiter==0.8.2
+lxml==5.3.1
 markdown2==2.5.3
 motor==3.7.0
+openai==1.64.0
+oscrypto==1.3.0
 packaging==24.2
 pdf2image==1.17.0
 pdfkit==1.0.0
 pydantic_core==2.27.2
 pydash==8.0.5
 pydyf==0.11.0
+pyHanko==0.25.3
+pyhanko-certvalidator==0.26.5
 pymongo==4.11
+pypdf==5.3.0
 pyphen==0.17.2
 pytesseract==0.3.13
+python-bidi==0.6.6
 python-dotenv==1.0.1
 python-multipart==0.0.20
 PyYAML==6.0.2
+qrcode==8.0
+reportlab==4.3.1
+requests==2.32.3
+six==1.17.0
 sniffio==1.3.1
 starlette==0.45.3
+svglib==1.5.1
 tinycss2==1.4.0
 tinyhtml5==2.0.0
 tqdm==4.67.1
 typing_extensions==4.12.2
+tzlocal==5.3
+uritools==4.0.3
+urllib3==2.3.0
 uvicorn==0.34.0
 uvloop==0.21.0
 watchfiles==1.0.4
 webencodings==0.5.1
 websockets==14.2
+xhtml2pdf==0.2.16
 zopfli==0.2.3.post1

test.html ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ <h3>Chief Complaint</h3><h3>History of Present Illness (HPI)</h3>\n\n<h3>Social History</h3>\n\n<h3>Surgical
2	+ History</h3>\n\n<h3>Family History</h3>\n\n<h3>Medications</h3>\n\n<h3>Assessment</h3>\n\n<h2>Plan</h2>