fixed consult notes
Browse files- ocr/api/consult/cunsult.py +63 -0
- ocr/api/consult/db_requests.py +0 -0
- ocr/api/consult/dto.py +0 -0
- ocr/api/consult/schemas.py +0 -0
- ocr/api/consult/views.py +6 -2
- ocr/api/message/dto.py +0 -2
- ocr/api/openai_requests.py +31 -6
- ocr/api/prompts.py +140 -14
- ocr/api/report/db_requests.py +2 -2
- ocr/api/report/dto.py +1 -0
- ocr/api/report/model.py +1 -0
- ocr/api/report/views.py +11 -8
- ocr/api/utils.py +1 -17
- requirements.txt +22 -2
- test.html +2 -0
ocr/api/consult/cunsult.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import base64
|
| 3 |
+
import io
|
| 4 |
+
|
| 5 |
+
import markdown2
|
| 6 |
+
from xhtml2pdf import pisa
|
| 7 |
+
|
| 8 |
+
from ocr.api.openai_requests import generate_consult_note
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
async def create_consult_notes(text: str, changes: str | None) -> str:
|
| 12 |
+
changes = '' if changes is None else f"\n\n**Changes**:\n```\n{changes}\n```"
|
| 13 |
+
chief_complaint, hpi, social_history, surgical_history, family_history, medications, assessment, plan = await asyncio.gather(
|
| 14 |
+
generate_consult_note(text, changes, 'chief'),
|
| 15 |
+
generate_consult_note(text, changes, 'hpi'),
|
| 16 |
+
generate_consult_note(text, changes, 'social'),
|
| 17 |
+
generate_consult_note(text, changes, 'surgical'),
|
| 18 |
+
generate_consult_note(text, changes, 'family'),
|
| 19 |
+
generate_consult_note(text, changes, 'medications'),
|
| 20 |
+
generate_consult_note(text, changes, 'assessment'),
|
| 21 |
+
generate_consult_note(text, changes, 'plan'),
|
| 22 |
+
)
|
| 23 |
+
response = f"""# Chief Complaint
|
| 24 |
+
|
| 25 |
+
{chief_complaint}
|
| 26 |
+
|
| 27 |
+
# History of Present Illness (HPI)
|
| 28 |
+
|
| 29 |
+
{hpi}
|
| 30 |
+
|
| 31 |
+
# Social History
|
| 32 |
+
|
| 33 |
+
{social_history}
|
| 34 |
+
|
| 35 |
+
# Surgical History
|
| 36 |
+
|
| 37 |
+
{surgical_history}
|
| 38 |
+
|
| 39 |
+
# Family History
|
| 40 |
+
|
| 41 |
+
{family_history}
|
| 42 |
+
|
| 43 |
+
# Medications
|
| 44 |
+
|
| 45 |
+
{medications}
|
| 46 |
+
|
| 47 |
+
# Assessment
|
| 48 |
+
|
| 49 |
+
{assessment}
|
| 50 |
+
|
| 51 |
+
# Plan
|
| 52 |
+
|
| 53 |
+
{plan}"""
|
| 54 |
+
return response
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def text_to_pdf_base64(text: str) -> str:
|
| 58 |
+
html_text = markdown2.markdown(text)
|
| 59 |
+
pdf_buffer = io.BytesIO()
|
| 60 |
+
pisa.CreatePDF(html_text, dest=pdf_buffer)
|
| 61 |
+
pdf_bytes = pdf_buffer.getvalue()
|
| 62 |
+
base64_pdf = base64.b64encode(pdf_bytes).decode("utf-8")
|
| 63 |
+
return base64_pdf
|
ocr/api/consult/db_requests.py
DELETED
|
File without changes
|
ocr/api/consult/dto.py
DELETED
|
File without changes
|
ocr/api/consult/schemas.py
DELETED
|
File without changes
|
ocr/api/consult/views.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
from ocr.api.consult import consult_router
|
| 2 |
-
from ocr.api.
|
|
|
|
| 3 |
from ocr.core.wrappers import OcrResponseWrapper
|
| 4 |
|
| 5 |
|
| 6 |
@consult_router.post('/{reportId}/generate')
|
| 7 |
async def generate_consult_report(reportId: str) -> OcrResponseWrapper[str]:
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from ocr.api.consult import consult_router
|
| 2 |
+
from ocr.api.consult.cunsult import text_to_pdf_base64, create_consult_notes
|
| 3 |
+
from ocr.api.report.db_requests import get_report_obj_by_id
|
| 4 |
from ocr.core.wrappers import OcrResponseWrapper
|
| 5 |
|
| 6 |
|
| 7 |
@consult_router.post('/{reportId}/generate')
|
| 8 |
async def generate_consult_report(reportId: str) -> OcrResponseWrapper[str]:
|
| 9 |
+
report = await get_report_obj_by_id(reportId)
|
| 10 |
+
consult_notes = await create_consult_notes(report.originalText, report.changes)
|
| 11 |
+
base64_string = text_to_pdf_base64(consult_notes)
|
| 12 |
+
return OcrResponseWrapper(data=base64_string)
|
ocr/api/message/dto.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
from enum import Enum
|
| 2 |
|
| 3 |
-
from pydantic import BaseModel
|
| 4 |
-
|
| 5 |
|
| 6 |
class Author(Enum):
|
| 7 |
User = "user"
|
|
|
|
| 1 |
from enum import Enum
|
| 2 |
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class Author(Enum):
|
| 5 |
User = "user"
|
ocr/api/openai_requests.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
from ocr.api.prompts import
|
| 2 |
from ocr.api.report.model import ReportModel
|
| 3 |
from ocr.core.wrappers import openai_wrapper
|
| 4 |
|
| 5 |
|
| 6 |
@openai_wrapper()
|
| 7 |
-
async def generate_report(request_content:
|
| 8 |
messages = [
|
| 9 |
{
|
| 10 |
"role": "system",
|
| 11 |
-
"content":
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"role": "user",
|
|
@@ -19,11 +19,11 @@ async def generate_report(request_content: list[dict]):
|
|
| 19 |
|
| 20 |
|
| 21 |
@openai_wrapper()
|
| 22 |
-
async def generate_changes(content:
|
| 23 |
messages = [
|
| 24 |
{
|
| 25 |
"role": "system",
|
| 26 |
-
"content":
|
| 27 |
.replace("{previous_report}", previous_report)
|
| 28 |
},
|
| 29 |
{
|
|
@@ -39,10 +39,35 @@ async def generate_agent_response(messages: list[dict], report: ReportModel):
|
|
| 39 |
messages = [
|
| 40 |
{
|
| 41 |
"role": "system",
|
| 42 |
-
"content":
|
| 43 |
.replace("{reports}", report.report)
|
| 44 |
.replace("{changes}", report.changes or 'There is no changes.')
|
| 45 |
},
|
| 46 |
*messages
|
| 47 |
]
|
| 48 |
return messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ocr.api.prompts import ocr_prompts
|
| 2 |
from ocr.api.report.model import ReportModel
|
| 3 |
from ocr.core.wrappers import openai_wrapper
|
| 4 |
|
| 5 |
|
| 6 |
@openai_wrapper()
|
| 7 |
+
async def generate_report(request_content: str):
|
| 8 |
messages = [
|
| 9 |
{
|
| 10 |
"role": "system",
|
| 11 |
+
"content": ocr_prompts.report.generate_report
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"role": "user",
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
@openai_wrapper()
|
| 22 |
+
async def generate_changes(content: str, previous_report: str):
|
| 23 |
messages = [
|
| 24 |
{
|
| 25 |
"role": "system",
|
| 26 |
+
"content": ocr_prompts.report.generate_changes
|
| 27 |
.replace("{previous_report}", previous_report)
|
| 28 |
},
|
| 29 |
{
|
|
|
|
| 39 |
messages = [
|
| 40 |
{
|
| 41 |
"role": "system",
|
| 42 |
+
"content": ocr_prompts.message.generate_agent_response
|
| 43 |
.replace("{reports}", report.report)
|
| 44 |
.replace("{changes}", report.changes or 'There is no changes.')
|
| 45 |
},
|
| 46 |
*messages
|
| 47 |
]
|
| 48 |
return messages
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@openai_wrapper(is_json=True, temperature=0.6, return_='result')
|
| 52 |
+
async def generate_consult_note(text: str, changes: str, type_: str):
|
| 53 |
+
prompt_map = {
|
| 54 |
+
"chief": ocr_prompts.consult.generate_chief,
|
| 55 |
+
"hpi": ocr_prompts.consult.generate_hpi,
|
| 56 |
+
"social": ocr_prompts.consult.generate_social,
|
| 57 |
+
"surgical": ocr_prompts.consult.generate_surgical,
|
| 58 |
+
"family": ocr_prompts.consult.generate_family,
|
| 59 |
+
"medications": ocr_prompts.consult.generate_medications,
|
| 60 |
+
"assessment": ocr_prompts.consult.generate_assessment,
|
| 61 |
+
"plan": ocr_prompts.consult.generate_plan,
|
| 62 |
+
}
|
| 63 |
+
messages = [
|
| 64 |
+
{
|
| 65 |
+
"role": "system",
|
| 66 |
+
"content": prompt_map[type_]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"role": "user",
|
| 70 |
+
"content": f"Medical information:\n```\n{text}\n```\n{changes}"
|
| 71 |
+
}
|
| 72 |
+
]
|
| 73 |
+
return messages
|
ocr/api/prompts.py
CHANGED
|
@@ -1,4 +1,32 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
generate_report = """## Task
|
| 3 |
|
| 4 |
You must analyze the text extracted from medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
|
|
@@ -44,7 +72,7 @@ The report should be structured as follows, with each section containing only re
|
|
| 44 |
[/INST]"""
|
| 45 |
generate_changes = """## Task
|
| 46 |
|
| 47 |
-
You must perform a comparative analysis of the patient's new data from the
|
| 48 |
|
| 49 |
## Data
|
| 50 |
|
|
@@ -63,25 +91,123 @@ You must perform a comparative analysis of the patient's new data from the attac
|
|
| 63 |
- Do **not** include any speculative analysis—only factual differences explicitly observed in the data.
|
| 64 |
|
| 65 |
[/INST]"""
|
| 66 |
-
generate_agent_response = """## Objective
|
| 67 |
|
| 68 |
-
You are an AI medical assistant. Your task is to provide **precise and direct** answers to the doctor's questions based **only** on the provided `Report`, `Patient changes`, and your **verified medical knowledge**. Your responses must be **brief, factual, and strictly to the point**.
|
| 69 |
|
| 70 |
-
|
|
|
|
| 71 |
|
| 72 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
```
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
```
|
| 76 |
|
| 77 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
```
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
```
|
| 81 |
|
| 82 |
-
|
|
|
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class MessagesPrompt:
|
| 5 |
+
generate_agent_response = """## Objective
|
| 6 |
+
|
| 7 |
+
You are an AI medical assistant. Your task is to provide **precise and direct** answers to the doctor's questions based **only** on the provided `Report`, `Patient changes`, and your **verified medical knowledge**. Your responses must be **brief, factual, and strictly to the point**.
|
| 8 |
+
|
| 9 |
+
## Data
|
| 10 |
+
|
| 11 |
+
**Report**:
|
| 12 |
+
```
|
| 13 |
+
{reports}
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
**Patient changes**:
|
| 17 |
+
```
|
| 18 |
+
{changes}
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Mandatory Instructions
|
| 22 |
+
|
| 23 |
+
- Do not elaborate or provide explanations unless explicitly requested.
|
| 24 |
+
- **Do not include unnecessary details.** Only provide **essential** information relevant to the doctor's question.
|
| 25 |
+
- **Format your response as plain text** without paragraphs, line breaks, or any additional formatting.
|
| 26 |
+
- **Do not speculate.** If the requested information is unavailable in the provided data, respond with: `"Insufficient data to answer."`"""
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ReportPrompts:
|
| 30 |
generate_report = """## Task
|
| 31 |
|
| 32 |
You must analyze the text extracted from medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
|
|
|
|
| 72 |
[/INST]"""
|
| 73 |
generate_changes = """## Task
|
| 74 |
|
| 75 |
+
You must perform a comparative analysis of the patient's new data from the user query against their previous data (`Previous Patient data`). Identify and explicitly highlight all differences, including but not limited to disease progression, remission, newly emerging conditions, and significant clinical changes. Your response must be formatted in **Markdown**.
|
| 76 |
|
| 77 |
## Data
|
| 78 |
|
|
|
|
| 91 |
- Do **not** include any speculative analysis—only factual differences explicitly observed in the data.
|
| 92 |
|
| 93 |
[/INST]"""
|
|
|
|
| 94 |
|
|
|
|
| 95 |
|
| 96 |
+
class ConsultPrompts:
|
| 97 |
+
generate_chief = """## Task
|
| 98 |
|
| 99 |
+
You must analyze the provided patient data from the user and then determine the **Primary Complaint/Reason for Visit**. Return your response in JSON format.
|
| 100 |
+
|
| 101 |
+
## JSON Response Format
|
| 102 |
+
|
| 103 |
+
```json
|
| 104 |
+
{
|
| 105 |
+
“result”: “string”
|
| 106 |
+
}
|
| 107 |
```
|
| 108 |
+
|
| 109 |
+
- **[result]**: The chief complaint or reason for the visit. It must be represented as a single sentence."""
|
| 110 |
+
generate_hpi = """## Task
|
| 111 |
+
|
| 112 |
+
You must analyze the provided patient data from the user and then determine the **History of Present Illness (HPI).**
|
| 113 |
+
|
| 114 |
+
## JSON Response Format
|
| 115 |
+
|
| 116 |
+
```json
|
| 117 |
+
{
|
| 118 |
+
“result”: “string”
|
| 119 |
+
}
|
| 120 |
```
|
| 121 |
|
| 122 |
+
- **[result]**: The History of Present Illness (HPI). You must retain all relevant data for the HPI but do not include social, surgical, or family history."""
|
| 123 |
+
generate_social = """## Task
|
| 124 |
+
|
| 125 |
+
You must analyze the provided patient data from the user and extract information about the **Social History.**
|
| 126 |
+
|
| 127 |
+
## JSON Response Format
|
| 128 |
+
|
| 129 |
+
```json
|
| 130 |
+
{
|
| 131 |
+
“result”: “string”
|
| 132 |
+
}
|
| 133 |
```
|
| 134 |
+
|
| 135 |
+
- **[result]**: The Social History. You must retain all relevant data for the social history. If no data is provided, return `"No data available"`."""
|
| 136 |
+
generate_surgical = """## Task
|
| 137 |
+
|
| 138 |
+
You must analyze the provided patient data from the user and extract information about the **Surgical History.**
|
| 139 |
+
|
| 140 |
+
## JSON Response Format
|
| 141 |
+
|
| 142 |
+
```json
|
| 143 |
+
{
|
| 144 |
+
“result”: “string”
|
| 145 |
+
}
|
| 146 |
```
|
| 147 |
|
| 148 |
+
- **[result]**: The Surgical History. You must retain all relevant data for the Surgical history. If no data is provided, save `No data available`."""
|
| 149 |
+
generate_family = """## Task
|
| 150 |
|
| 151 |
+
You must analyze the provided patient data from the user and extract information about the **Family History.**
|
| 152 |
+
|
| 153 |
+
## JSON Response Format
|
| 154 |
+
|
| 155 |
+
```json
|
| 156 |
+
{
|
| 157 |
+
“result”: “string”
|
| 158 |
+
}
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
- **[result]**: The Family History. You must retain all relevant data for the Family history. If no data is provided, return `"No data available"`."""
|
| 162 |
+
generate_medications = """## Task
|
| 163 |
+
|
| 164 |
+
You must analyze the provided patient data from the user and extract information about the **Medications**
|
| 165 |
+
|
| 166 |
+
## JSON Response Format
|
| 167 |
+
|
| 168 |
+
```json
|
| 169 |
+
{
|
| 170 |
+
“result”: “string”
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
- **[result]**: The list of medications. You must retain all relevant data about medications. If no data is provided, return `"No data available"`."""
|
| 175 |
+
generate_assessment = """## Task
|
| 176 |
+
|
| 177 |
+
You must analyze the provided patient data from the user and extract information about the **Assessment** (e.g., cancer stage, performance status, etc.).
|
| 178 |
+
|
| 179 |
+
## JSON Response Format
|
| 180 |
+
|
| 181 |
+
```json
|
| 182 |
+
{
|
| 183 |
+
“result”: “string”
|
| 184 |
+
}
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
- **[result]**: A summary of clinical evaluations, diagnoses, and relevant medical assessments, including disease staging, functional status (e.g., ECOG/WHO performance status). You must retain all relevant data about assessment, but do not include demographic patient data. If no data is provided, return `"No data available"`."""
|
| 188 |
+
generate_plan = """## Task
|
| 189 |
+
|
| 190 |
+
You must analyze the provided patient data from the user and extract information about the **Assessment** (e.g., cancer stage, performance status, etc.).
|
| 191 |
+
|
| 192 |
+
## JSON Response Format
|
| 193 |
+
|
| 194 |
+
```json
|
| 195 |
+
{
|
| 196 |
+
“result”: “string”
|
| 197 |
+
}
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
- **[result]**: A structured **treatment and management strategy** based on the latest **evidence-based cancer guidelines** (e.g., ASCO, NCCN). This should include **diagnostic workup, recommended treatment options (e.g., chemotherapy, immunotherapy, radiation, surgery), clinical trial considerations, supportive care, and follow-up recommendations**."""
|
| 201 |
+
|
| 202 |
+
class OCRPrompts:
|
| 203 |
+
message = MessagesPrompt()
|
| 204 |
+
report = ReportPrompts()
|
| 205 |
+
consult = ConsultPrompts()
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@lru_cache
|
| 209 |
+
def get_prompts() -> OCRPrompts:
|
| 210 |
+
return OCRPrompts()
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
ocr_prompts = get_prompts()
|
ocr/api/report/db_requests.py
CHANGED
|
@@ -21,8 +21,8 @@ async def get_report_obj_by_id(report_id: str) -> ReportModel:
|
|
| 21 |
return ReportModel.from_mongo(report)
|
| 22 |
|
| 23 |
|
| 24 |
-
async def save_report_obj(report: str, changes: str | None, filename: str) -> ReportModel:
|
| 25 |
-
report = ReportModel(report=report, changes=changes, filename=filename)
|
| 26 |
await settings.DB_CLIENT.reports.insert_one(report.to_mongo())
|
| 27 |
return report
|
| 28 |
|
|
|
|
| 21 |
return ReportModel.from_mongo(report)
|
| 22 |
|
| 23 |
|
| 24 |
+
async def save_report_obj(report: str, changes: str | None, original_text: str, filename: str) -> ReportModel:
|
| 25 |
+
report = ReportModel(report=report, changes=changes, filename=filename, originalText=original_text)
|
| 26 |
await settings.DB_CLIENT.reports.insert_one(report.to_mongo())
|
| 27 |
return report
|
| 28 |
|
ocr/api/report/dto.py
CHANGED
|
@@ -14,3 +14,4 @@ class Paging(BaseModel):
|
|
| 14 |
class ReportModelShort(ReportModel):
|
| 15 |
report: ClassVar[str]
|
| 16 |
changes: ClassVar[str]
|
|
|
|
|
|
| 14 |
class ReportModelShort(ReportModel):
|
| 15 |
report: ClassVar[str]
|
| 16 |
changes: ClassVar[str]
|
| 17 |
+
originalText: ClassVar[str]
|
ocr/api/report/model.py
CHANGED
|
@@ -8,6 +8,7 @@ from ocr.core.database import MongoBaseModel
|
|
| 8 |
class ReportModel(MongoBaseModel):
|
| 9 |
report: str
|
| 10 |
changes: str | None = None
|
|
|
|
| 11 |
filename: str
|
| 12 |
datetimeInserted: datetime = Field(default_factory=datetime.now)
|
| 13 |
datetimeUpdated: datetime = Field(default_factory=datetime.now)
|
|
|
|
| 8 |
class ReportModel(MongoBaseModel):
|
| 9 |
report: str
|
| 10 |
changes: str | None = None
|
| 11 |
+
originalText: str
|
| 12 |
filename: str
|
| 13 |
datetimeInserted: datetime = Field(default_factory=datetime.now)
|
| 14 |
datetimeUpdated: datetime = Field(default_factory=datetime.now)
|
ocr/api/report/views.py
CHANGED
|
@@ -4,12 +4,15 @@ from fastapi import UploadFile, File
|
|
| 4 |
|
| 5 |
from ocr.api.openai_requests import generate_report, generate_changes
|
| 6 |
from ocr.api.report import report_router
|
| 7 |
-
from ocr.api.report.db_requests import get_all_reports_obj,
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
| 9 |
from ocr.api.report.dto import Paging
|
| 10 |
from ocr.api.report.model import ReportModel
|
| 11 |
from ocr.api.report.schemas import AllReportResponse
|
| 12 |
-
from ocr.api.utils import divide_images, prepare_request_content, clean_response
|
| 13 |
from ocr.core.wrappers import OcrResponseWrapper
|
| 14 |
|
| 15 |
|
|
@@ -43,15 +46,15 @@ async def create_report(
|
|
| 43 |
last_report, contents = await asyncio.gather(get_last_report_obj(), file.read())
|
| 44 |
report, changes = None, None
|
| 45 |
images = divide_images(contents)
|
| 46 |
-
|
| 47 |
if last_report:
|
| 48 |
report, changes = await asyncio.gather(
|
| 49 |
-
generate_report(
|
| 50 |
-
generate_changes(
|
| 51 |
)
|
| 52 |
else:
|
| 53 |
-
report = await generate_report(
|
| 54 |
-
report = await save_report_obj(clean_response(report), clean_response(changes), file.filename)
|
| 55 |
finally:
|
| 56 |
await file.close()
|
| 57 |
return OcrResponseWrapper(data=report)
|
|
|
|
| 4 |
|
| 5 |
from ocr.api.openai_requests import generate_report, generate_changes
|
| 6 |
from ocr.api.report import report_router
|
| 7 |
+
from ocr.api.report.db_requests import (get_all_reports_obj,
|
| 8 |
+
delete_all_reports,
|
| 9 |
+
get_report_obj_by_id,
|
| 10 |
+
save_report_obj,
|
| 11 |
+
get_last_report_obj)
|
| 12 |
from ocr.api.report.dto import Paging
|
| 13 |
from ocr.api.report.model import ReportModel
|
| 14 |
from ocr.api.report.schemas import AllReportResponse
|
| 15 |
+
from ocr.api.utils import divide_images, prepare_request_content, clean_response, extract_text_from_images
|
| 16 |
from ocr.core.wrappers import OcrResponseWrapper
|
| 17 |
|
| 18 |
|
|
|
|
| 46 |
last_report, contents = await asyncio.gather(get_last_report_obj(), file.read())
|
| 47 |
report, changes = None, None
|
| 48 |
images = divide_images(contents)
|
| 49 |
+
text_content = extract_text_from_images(images)
|
| 50 |
if last_report:
|
| 51 |
report, changes = await asyncio.gather(
|
| 52 |
+
generate_report(text_content),
|
| 53 |
+
generate_changes(text_content, last_report.report)
|
| 54 |
)
|
| 55 |
else:
|
| 56 |
+
report = await generate_report(text_content)
|
| 57 |
+
report = await save_report_obj(clean_response(report), clean_response(changes), text_content, file.filename)
|
| 58 |
finally:
|
| 59 |
await file.close()
|
| 60 |
return OcrResponseWrapper(data=report)
|
ocr/api/utils.py
CHANGED
|
@@ -2,10 +2,8 @@ import base64
|
|
| 2 |
import io
|
| 3 |
import re
|
| 4 |
|
| 5 |
-
import markdown2
|
| 6 |
import pytesseract
|
| 7 |
from PIL import Image
|
| 8 |
-
from fpdf import FPDF
|
| 9 |
from pdf2image import convert_from_bytes
|
| 10 |
|
| 11 |
|
|
@@ -64,18 +62,4 @@ def prepare_request_content(images: list[bytes]):
|
|
| 64 |
for image in images
|
| 65 |
]
|
| 66 |
]
|
| 67 |
-
return content
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def text_to_pdf_base64(text: str) -> str:
|
| 71 |
-
pdf = FPDF()
|
| 72 |
-
pdf.set_auto_page_break(auto=True, margin=15)
|
| 73 |
-
pdf.add_page()
|
| 74 |
-
pdf.set_font("Arial", size=12)
|
| 75 |
-
html_text = markdown2.markdown(text)
|
| 76 |
-
plain_text = ''.join(html_text.split('<')[::2])
|
| 77 |
-
pdf.multi_cell(0, 10, plain_text)
|
| 78 |
-
pdf_str = pdf.output(dest="S")
|
| 79 |
-
pdf_bytes = pdf_str.encode("latin1")
|
| 80 |
-
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
| 81 |
-
return pdf_base64
|
|
|
|
| 2 |
import io
|
| 3 |
import re
|
| 4 |
|
|
|
|
| 5 |
import pytesseract
|
| 6 |
from PIL import Image
|
|
|
|
| 7 |
from pdf2image import convert_from_bytes
|
| 8 |
|
| 9 |
|
|
|
|
| 62 |
for image in images
|
| 63 |
]
|
| 64 |
]
|
| 65 |
+
return content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,24 +1,31 @@
|
|
| 1 |
annotated-types==0.7.0
|
| 2 |
anyio==4.8.0
|
|
|
|
|
|
|
| 3 |
Brotli==1.1.0
|
| 4 |
certifi==2025.1.31
|
| 5 |
cffi==1.17.1
|
|
|
|
|
|
|
| 6 |
click==8.1.8
|
|
|
|
| 7 |
cssselect2==0.7.0
|
| 8 |
distro==1.9.0
|
| 9 |
dnspython==2.7.0
|
| 10 |
fastapi==0.115.8
|
| 11 |
fonttools==4.56.0
|
| 12 |
-
fpdf==1.7.2
|
| 13 |
h11==0.14.0
|
|
|
|
| 14 |
httpcore==1.0.7
|
| 15 |
httptools==0.6.4
|
| 16 |
httpx==0.28.1
|
| 17 |
idna==3.10
|
| 18 |
jiter==0.8.2
|
|
|
|
| 19 |
markdown2==2.5.3
|
| 20 |
motor==3.7.0
|
| 21 |
-
openai==1.
|
|
|
|
| 22 |
packaging==24.2
|
| 23 |
pdf2image==1.17.0
|
| 24 |
pdfkit==1.0.0
|
|
@@ -28,21 +35,34 @@ pydantic==2.10.6
|
|
| 28 |
pydantic_core==2.27.2
|
| 29 |
pydash==8.0.5
|
| 30 |
pydyf==0.11.0
|
|
|
|
|
|
|
| 31 |
pymongo==4.11
|
|
|
|
| 32 |
pyphen==0.17.2
|
| 33 |
pytesseract==0.3.13
|
|
|
|
| 34 |
python-dotenv==1.0.1
|
| 35 |
python-multipart==0.0.20
|
| 36 |
PyYAML==6.0.2
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
sniffio==1.3.1
|
| 38 |
starlette==0.45.3
|
|
|
|
| 39 |
tinycss2==1.4.0
|
| 40 |
tinyhtml5==2.0.0
|
| 41 |
tqdm==4.67.1
|
| 42 |
typing_extensions==4.12.2
|
|
|
|
|
|
|
|
|
|
| 43 |
uvicorn==0.34.0
|
| 44 |
uvloop==0.21.0
|
| 45 |
watchfiles==1.0.4
|
| 46 |
webencodings==0.5.1
|
| 47 |
websockets==14.2
|
|
|
|
| 48 |
zopfli==0.2.3.post1
|
|
|
|
| 1 |
annotated-types==0.7.0
|
| 2 |
anyio==4.8.0
|
| 3 |
+
arabic-reshaper==3.0.0
|
| 4 |
+
asn1crypto==1.5.1
|
| 5 |
Brotli==1.1.0
|
| 6 |
certifi==2025.1.31
|
| 7 |
cffi==1.17.1
|
| 8 |
+
chardet==5.2.0
|
| 9 |
+
charset-normalizer==3.4.1
|
| 10 |
click==8.1.8
|
| 11 |
+
cryptography==44.0.1
|
| 12 |
cssselect2==0.7.0
|
| 13 |
distro==1.9.0
|
| 14 |
dnspython==2.7.0
|
| 15 |
fastapi==0.115.8
|
| 16 |
fonttools==4.56.0
|
|
|
|
| 17 |
h11==0.14.0
|
| 18 |
+
html5lib==1.1
|
| 19 |
httpcore==1.0.7
|
| 20 |
httptools==0.6.4
|
| 21 |
httpx==0.28.1
|
| 22 |
idna==3.10
|
| 23 |
jiter==0.8.2
|
| 24 |
+
lxml==5.3.1
|
| 25 |
markdown2==2.5.3
|
| 26 |
motor==3.7.0
|
| 27 |
+
openai==1.64.0
|
| 28 |
+
oscrypto==1.3.0
|
| 29 |
packaging==24.2
|
| 30 |
pdf2image==1.17.0
|
| 31 |
pdfkit==1.0.0
|
|
|
|
| 35 |
pydantic_core==2.27.2
|
| 36 |
pydash==8.0.5
|
| 37 |
pydyf==0.11.0
|
| 38 |
+
pyHanko==0.25.3
|
| 39 |
+
pyhanko-certvalidator==0.26.5
|
| 40 |
pymongo==4.11
|
| 41 |
+
pypdf==5.3.0
|
| 42 |
pyphen==0.17.2
|
| 43 |
pytesseract==0.3.13
|
| 44 |
+
python-bidi==0.6.6
|
| 45 |
python-dotenv==1.0.1
|
| 46 |
python-multipart==0.0.20
|
| 47 |
PyYAML==6.0.2
|
| 48 |
+
qrcode==8.0
|
| 49 |
+
reportlab==4.3.1
|
| 50 |
+
requests==2.32.3
|
| 51 |
+
six==1.17.0
|
| 52 |
sniffio==1.3.1
|
| 53 |
starlette==0.45.3
|
| 54 |
+
svglib==1.5.1
|
| 55 |
tinycss2==1.4.0
|
| 56 |
tinyhtml5==2.0.0
|
| 57 |
tqdm==4.67.1
|
| 58 |
typing_extensions==4.12.2
|
| 59 |
+
tzlocal==5.3
|
| 60 |
+
uritools==4.0.3
|
| 61 |
+
urllib3==2.3.0
|
| 62 |
uvicorn==0.34.0
|
| 63 |
uvloop==0.21.0
|
| 64 |
watchfiles==1.0.4
|
| 65 |
webencodings==0.5.1
|
| 66 |
websockets==14.2
|
| 67 |
+
xhtml2pdf==0.2.16
|
| 68 |
zopfli==0.2.3.post1
|
test.html
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<h3>Chief Complaint</h3><h3>History of Present Illness (HPI)</h3>\n\n<h3>Social History</h3>\n\n<h3>Surgical
|
| 2 |
+
History</h3>\n\n<h3>Family History</h3>\n\n<h3>Medications</h3>\n\n<h3>Assessment</h3>\n\n<h2>Plan</h2>
|