Ali2206 commited on
Commit
2925a8b
·
verified ·
1 Parent(s): 0873a07

Update api/routes/pdf.py

Browse files
Files changed (1) hide show
  1. api/routes/pdf.py +65 -341
api/routes/pdf.py CHANGED
@@ -5,17 +5,10 @@ from utils.helpers import calculate_age, escape_latex_special_chars, hyphenate_l
5
  from datetime import datetime
6
  from bson import ObjectId
7
  from bson.errors import InvalidId
8
- import os
9
- import subprocess
10
- from tempfile import TemporaryDirectory
11
- from string import Template
12
  import logging
13
  import asyncio
14
- import aiohttp
15
  from typing import List, Dict, Optional
16
- from pymongo import MongoClient
17
  from pymongo.errors import PyMongoError
18
- from pymongo.change_stream import CollectionChangeStream
19
 
20
  # Configure logging
21
  logging.basicConfig(
@@ -27,378 +20,109 @@ logger = logging.getLogger(__name__)
27
  router = APIRouter()
28
 
29
  # Configuration
30
- FILE_IO_API_URL = "https://file.io"
31
- FILE_IO_EXPIRATION = "1w" # 1 week expiration
32
- PDF_METADATA_COLLECTION = "pdf_metadata" # Collection to store file.io links
33
-
34
- async def upload_to_fileio(file_bytes: bytes, filename: str) -> Optional[Dict]:
35
- """Upload a file to file.io and return the response"""
36
- try:
37
- async with aiohttp.ClientSession() as session:
38
- form_data = aiohttp.FormData()
39
- form_data.add_field('file', file_bytes, filename=filename)
40
- form_data.add_field('expires', FILE_IO_EXPIRATION)
41
-
42
- async with session.post(FILE_IO_API_URL, data=form_data) as response:
43
- if response.status == 200:
44
- data = await response.json()
45
- if data.get('success'):
46
- return data
47
- logger.error(f"File.io upload failed: {data.get('message')}")
48
- else:
49
- logger.error(f"File.io upload failed with status {response.status}")
50
- return None
51
- except Exception as e:
52
- logger.error(f"Error uploading to file.io: {str(e)}")
53
- return None
54
-
55
- async def generate_pdf_bytes(patient: dict) -> Optional[bytes]:
56
- """Generate PDF bytes for a patient"""
57
- try:
58
- # Prepare table content with proper LaTeX formatting
59
- def prepare_table_content(items, columns, default_message):
60
- if not items:
61
- return f"\\multicolumn{{{columns}}}{{l}}{{{default_message}}} \\\\"
62
-
63
- content = []
64
- for item in items:
65
- row = []
66
- for field in item:
67
- value = item.get(field, "") or ""
68
- row.append(escape_latex_special_chars(hyphenate_long_strings(value)))
69
- content.append(" & ".join(row) + " \\\\")
70
- return "\n".join(content)
71
-
72
- # Prepare all table contents
73
- notes_content = prepare_table_content(
74
- [{
75
- "date": format_timestamp(n.get("date", "")),
76
- "type": n.get("type", ""),
77
- "text": n.get("text", "")
78
- } for n in patient.get("notes", [])],
79
- 3,
80
- "No notes available"
81
- )
82
-
83
- conditions_content = prepare_table_content(
84
- [{
85
- "id": c.get("id", ""),
86
- "code": c.get("code", ""),
87
- "status": c.get("status", ""),
88
- "onset": format_timestamp(c.get("onset_date", "")),
89
- "verification": c.get("verification_status", "")
90
- } for c in patient.get("conditions", [])],
91
- 5,
92
- "No conditions available"
93
- )
94
-
95
- medications_content = prepare_table_content(
96
- [{
97
- "id": m.get("id", ""),
98
- "name": m.get("name", ""),
99
- "status": m.get("status", ""),
100
- "date": format_timestamp(m.get("prescribed_date", "")),
101
- "dosage": m.get("dosage", "")
102
- } for m in patient.get("medications", [])],
103
- 5,
104
- "No medications available"
105
- )
106
-
107
- encounters_content = prepare_table_content(
108
- [{
109
- "id": e.get("id", ""),
110
- "type": e.get("type", ""),
111
- "status": e.get("status", ""),
112
- "start": format_timestamp(e.get("period", {}).get("start", "")),
113
- "provider": e.get("service_provider", "")
114
- } for e in patient.get("encounters", [])],
115
- 5,
116
- "No encounters available"
117
- )
118
-
119
- # LaTeX template
120
- latex_template = Template(r"""
121
- \documentclass[a4paper,12pt]{article}
122
- \usepackage[utf8]{inputenc}
123
- \usepackage[T1]{fontenc}
124
- \usepackage{geometry}
125
- \geometry{margin=1in}
126
- \usepackage{booktabs,longtable,fancyhdr}
127
- \usepackage{array}
128
- \usepackage{microtype}
129
- \microtypesetup{expansion=false}
130
- \setlength{\headheight}{14.5pt}
131
- \pagestyle{fancy}
132
- \fancyhf{}
133
- \fancyhead[L]{Patient Report}
134
- \fancyhead[R]{Generated: \today}
135
- \fancyfoot[C]{\thepage}
136
- \begin{document}
137
- \begin{center}
138
- \Large\textbf{Patient Medical Report} \\
139
- \vspace{0.2cm}
140
- \textit{Generated on $generated_on}
141
- \end{center}
142
- \section*{Demographics}
143
- \begin{itemize}
144
- \item \textbf{FHIR ID:} $fhir_id
145
- \item \textbf{Full Name:} $full_name
146
- \item \textbf{Gender:} $gender
147
- \item \textbf{Date of Birth:} $dob
148
- \item \textbf{Age:} $age
149
- \item \textbf{Address:} $address
150
- \item \textbf{Marital Status:} $marital_status
151
- \item \textbf{Language:} $language
152
- \end{itemize}
153
- \section*{Clinical Notes}
154
- \begin{longtable}[l]{>{\raggedright\arraybackslash}p{3.5cm}>{\raggedright\arraybackslash}p{3cm}>{\raggedright\arraybackslash}p{6.5cm}}
155
- \caption{Clinical Notes} \\
156
- \toprule
157
- \textbf{Date} & \textbf{Type} & \textbf{Text} \\
158
- \midrule
159
- $notes
160
- \bottomrule
161
- \end{longtable}
162
- \section*{Conditions}
163
- \begin{longtable}[l]{>{\raggedright\arraybackslash}p{2cm}>{\raggedright\arraybackslash}p{3cm}>{\raggedright\arraybackslash}p{2cm}>{\raggedright\arraybackslash}p{3.5cm}>{\raggedright\arraybackslash}p{3cm}}
164
- \caption{Conditions} \\
165
- \toprule
166
- \textbf{ID} & \textbf{Code} & \textbf{Status} & \textbf{Onset} & \textbf{Verification} \\
167
- \midrule
168
- $conditions
169
- \bottomrule
170
- \end{longtable}
171
- \section*{Medications}
172
- \begin{longtable}[l]{>{\raggedright\arraybackslash}p{2cm}>{\raggedright\arraybackslash}p{4cm}>{\raggedright\arraybackslash}p{2cm}>{\raggedright\arraybackslash}p{3.5cm}>{\raggedright\arraybackslash}p{3cm}}
173
- \caption{Medications} \\
174
- \toprule
175
- \textbf{ID} & \textbf{Name} & \textbf{Status} & \textbf{Date} & \textbf{Dosage} \\
176
- \midrule
177
- $medications
178
- \bottomrule
179
- \end{longtable}
180
- \section*{Encounters}
181
- \begin{longtable}[l]{>{\raggedright\arraybackslash}p{2.5cm}>{\raggedright\arraybackslash}p{4.5cm}>{\raggedright\arraybackslash}p{2.5cm}>{\raggedright\arraybackslash}p{4.5cm}>{\raggedright\arraybackslash}p{3.5cm}}
182
- \caption{Encounters} \\
183
- \toprule
184
- \textbf{ID} & \textbf{Type} & \textbf{Status} & \textbf{Start} & \textbf{Provider} \\
185
- \midrule
186
- $encounters
187
- \bottomrule
188
- \end{longtable}
189
- \end{document}
190
- """)
191
-
192
- # Fill template with patient data
193
- latex_filled = latex_template.substitute(
194
- generated_on=datetime.now().strftime("%A, %B %d, %Y at %I:%M %p %Z"),
195
- fhir_id=escape_latex_special_chars(hyphenate_long_strings(patient.get("fhir_id", "") or "")),
196
- full_name=escape_latex_special_chars(patient.get("full_name", "") or ""),
197
- gender=escape_latex_special_chars(patient.get("gender", "") or ""),
198
- dob=escape_latex_special_chars(patient.get("date_of_birth", "") or ""),
199
- age=escape_latex_special_chars(str(calculate_age(patient.get("date_of_birth", "")) or "N/A")),
200
- address=escape_latex_special_chars(", ".join(filter(None, [
201
- patient.get("address", ""),
202
- patient.get("city", ""),
203
- patient.get("state", ""),
204
- patient.get("postal_code", ""),
205
- patient.get("country", "")
206
- ]))),
207
- marital_status=escape_latex_special_chars(patient.get("marital_status", "") or ""),
208
- language=escape_latex_special_chars(patient.get("language", "") or ""),
209
- notes=notes_content,
210
- conditions=conditions_content,
211
- medications=medications_content,
212
- encounters=encounters_content
213
- )
214
-
215
- # Compile LaTeX to PDF
216
- with TemporaryDirectory() as tmpdir:
217
- tex_path = os.path.join(tmpdir, "report.tex")
218
- pdf_path = os.path.join(tmpdir, "report.pdf")
219
-
220
- with open(tex_path, "w", encoding="utf-8") as f:
221
- f.write(latex_filled)
222
-
223
- # Run latexmk twice to ensure proper table rendering
224
- for _ in range(2):
225
- result = subprocess.run(
226
- ["latexmk", "-pdf", "-interaction=nonstopmode", tex_path],
227
- cwd=tmpdir,
228
- check=False,
229
- capture_output=True,
230
- text=True
231
- )
232
-
233
- if result.returncode != 0:
234
- logger.error(f"LaTeX compilation failed: {result.stderr}")
235
- return None
236
-
237
- if os.path.exists(pdf_path):
238
- with open(pdf_path, "rb") as f:
239
- return f.read()
240
- return None
241
-
242
- except Exception as e:
243
- logger.error(f"Error generating PDF bytes: {str(e)}")
244
- return None
245
-
246
- async def generate_and_upload_pdf(patient: dict) -> Optional[Dict]:
247
- """Generate PDF and upload to file.io, returning metadata"""
248
- try:
249
- # Generate PDF bytes
250
- pdf_bytes = await generate_pdf_bytes(patient)
251
- if not pdf_bytes:
252
- logger.error(f"Failed to generate PDF for patient {patient.get('fhir_id')}")
253
- return None
254
-
255
- # Create filename
256
- patient_name = patient.get("full_name", "unknown").replace(" ", "_").lower()
257
- patient_id = patient.get("fhir_id", "unknown")
258
- filename = f"patient_{patient_id}_{patient_name}_report.pdf"
259
-
260
- # Upload to file.io
261
- upload_response = await upload_to_fileio(pdf_bytes, filename)
262
- if not upload_response:
263
- logger.error(f"Failed to upload PDF for patient {patient.get('fhir_id')}")
264
- return None
265
-
266
- # Prepare metadata
267
- metadata = {
268
- "patient_id": patient.get("fhir_id"),
269
- "patient_name": patient.get("full_name"),
270
- "file_key": upload_response.get('key'),
271
- "file_url": upload_response.get('link'),
272
- "expires_at": upload_response.get('expires'),
273
- "generated_at": datetime.utcnow(),
274
- "filename": filename
275
  }
 
276
 
277
- # Store metadata in MongoDB
 
 
278
  db = patients_collection.database
279
- await db[PDF_METADATA_COLLECTION].update_one(
280
- {"patient_id": patient.get("fhir_id")},
281
- {"$set": metadata},
282
  upsert=True
283
  )
284
-
285
- logger.info(f"Successfully uploaded PDF for patient {patient.get('fhir_id')}")
286
- return metadata
287
-
288
  except Exception as e:
289
- logger.error(f"Error in generate_and_upload_pdf: {str(e)}")
290
  return None
291
 
292
- async def generate_all_patient_pdfs() -> List[Dict]:
293
- """Generate and upload PDFs for all patients"""
294
- generated_files = []
295
  try:
296
  cursor = patients_collection.find({})
297
  patients = await cursor.to_list(length=None)
298
-
299
- if not patients:
300
- logger.warning("No patients found in the database")
301
- return []
302
-
303
- logger.info(f"Starting PDF generation for {len(patients)} patients")
304
-
305
- # Process patients in batches
306
- batch_size = 5 # Smaller batch size for API rate limiting
307
- for i in range(0, len(patients), batch_size):
308
- batch = patients[i:i + batch_size]
309
- tasks = [generate_and_upload_pdf(patient) for patient in batch]
310
- results = await asyncio.gather(*tasks)
311
-
312
- for result in results:
313
- if result:
314
- generated_files.append(result)
315
-
316
- logger.info(f"Processed batch {i//batch_size + 1}/{(len(patients)-1)//batch_size + 1}")
317
- await asyncio.sleep(1) # Brief pause between batches
318
-
319
- logger.info(f"Successfully processed {len(generated_files)} patients")
320
- return generated_files
321
-
322
  except Exception as e:
323
- logger.error(f"Error in generate_all_patient_pdfs: {str(e)}")
324
- return generated_files
325
 
326
  async def watch_for_new_patients():
327
- """Watch MongoDB change stream for new patients and generate PDFs"""
328
  try:
329
- logger.info("Starting MongoDB change stream watcher for new patients")
330
-
331
- # Get the database from the collection
332
  db = patients_collection.database
333
-
334
- # Open a change stream on the patients collection
335
  pipeline = [{'$match': {'operationType': 'insert'}}]
336
-
337
  while True:
338
  try:
339
  async with patients_collection.watch(pipeline) as stream:
340
  async for change in stream:
341
  try:
342
  patient = change['fullDocument']
343
- logger.info(f"New patient detected: {patient.get('fhir_id')}")
344
-
345
- # Generate and upload PDF
346
- result = await generate_and_upload_pdf(patient)
347
- if result:
348
- logger.info(f"Generated PDF for new patient {patient.get('fhir_id')}")
349
- else:
350
- logger.error(f"Failed to generate PDF for new patient {patient.get('fhir_id')}")
351
-
352
  except Exception as e:
353
- logger.error(f"Error processing change stream event: {str(e)}")
354
-
355
  except PyMongoError as e:
356
- logger.error(f"MongoDB change stream error: {str(e)}")
357
- await asyncio.sleep(5) # Wait before reconnecting
358
-
359
  except Exception as e:
360
- logger.error(f"Fatal error in watch_for_new_patients: {str(e)}")
361
 
362
  @router.on_event("startup")
363
  async def startup_event():
364
- """Start background tasks on application startup"""
365
- # Start the change stream watcher
366
  asyncio.create_task(watch_for_new_patients())
367
 
368
- @router.post("/generate-all-pdfs", status_code=202)
369
- async def trigger_pdf_generation(
370
  background_tasks: BackgroundTasks,
371
  current_user: dict = Depends(get_current_user)
372
  ):
373
- """Trigger background task to generate PDFs for all patients"""
374
  if current_user.get('role') not in ['admin', 'doctor']:
375
- raise HTTPException(
376
- status_code=403,
377
- detail="Only administrators and doctors can generate PDFs"
378
- )
379
-
380
- background_tasks.add_task(generate_all_patient_pdfs)
381
- return {"status": "accepted", "message": "PDF generation started in the background"}
382
 
383
- @router.get("/list-pdf-links", response_model=List[Dict])
384
- async def list_pdf_links(current_user: dict = Depends(get_current_user)):
385
- """List all stored PDF metadata"""
386
  if current_user.get('role') not in ['admin', 'doctor']:
387
- raise HTTPException(
388
- status_code=403,
389
- detail="Only administrators and doctors can list PDFs"
390
- )
391
-
392
  try:
393
  db = patients_collection.database
394
- cursor = db[PDF_METADATA_COLLECTION].find({})
395
- pdfs = await cursor.to_list(length=None)
396
- return pdfs
397
  except Exception as e:
398
- raise HTTPException(
399
- status_code=500,
400
- detail=f"Error listing PDF files: {str(e)}"
401
- )
402
 
403
- # Export the router as 'pdf' for api.__init__.py
404
- pdf = router
 
5
  from datetime import datetime
6
  from bson import ObjectId
7
  from bson.errors import InvalidId
 
 
 
 
8
  import logging
9
  import asyncio
 
10
  from typing import List, Dict, Optional
 
11
  from pymongo.errors import PyMongoError
 
12
 
13
  # Configure logging
14
  logging.basicConfig(
 
20
  router = APIRouter()
21
 
22
  # Configuration
23
+ SUMMARY_METADATA_COLLECTION = "patient_summary_json"
24
+
25
+ async def generate_patient_summary_json(patient: dict) -> Dict:
26
+ return {
27
+ "patient_id": patient.get("fhir_id"),
28
+ "patient_name": patient.get("full_name"),
29
+ "generated_at": datetime.utcnow(),
30
+ "summary": {
31
+ "demographics": {
32
+ "fhir_id": patient.get("fhir_id"),
33
+ "full_name": patient.get("full_name"),
34
+ "gender": patient.get("gender"),
35
+ "dob": patient.get("date_of_birth"),
36
+ "age": calculate_age(patient.get("date_of_birth")),
37
+ "address": ", ".join(filter(None, [
38
+ patient.get("address", ""),
39
+ patient.get("city", ""),
40
+ patient.get("state", ""),
41
+ patient.get("postal_code", ""),
42
+ patient.get("country", "")
43
+ ])),
44
+ "marital_status": patient.get("marital_status"),
45
+ "language": patient.get("language")
46
+ },
47
+ "notes": patient.get("notes", []),
48
+ "conditions": patient.get("conditions", []),
49
+ "medications": patient.get("medications", []),
50
+ "encounters": patient.get("encounters", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
+ }
53
 
54
+ async def generate_and_store_summary(patient: dict) -> Optional[Dict]:
55
+ try:
56
+ summary = await generate_patient_summary_json(patient)
57
  db = patients_collection.database
58
+ await db[SUMMARY_METADATA_COLLECTION].update_one(
59
+ {"patient_id": summary["patient_id"]},
60
+ {"$set": summary},
61
  upsert=True
62
  )
63
+ logger.info(f"Stored JSON summary for patient {summary['patient_id']}")
64
+ return summary
 
 
65
  except Exception as e:
66
+ logger.error(f"Error generating/storing summary: {str(e)}")
67
  return None
68
 
69
+ async def generate_all_patient_summaries() -> List[Dict]:
70
+ generated = []
 
71
  try:
72
  cursor = patients_collection.find({})
73
  patients = await cursor.to_list(length=None)
74
+ for patient in patients:
75
+ summary = await generate_and_store_summary(patient)
76
+ if summary:
77
+ generated.append(summary)
78
+ return generated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
+ logger.error(f"Error generating all summaries: {str(e)}")
81
+ return generated
82
 
83
  async def watch_for_new_patients():
 
84
  try:
85
+ logger.info("Starting MongoDB change stream for new summaries")
 
 
86
  db = patients_collection.database
 
 
87
  pipeline = [{'$match': {'operationType': 'insert'}}]
 
88
  while True:
89
  try:
90
  async with patients_collection.watch(pipeline) as stream:
91
  async for change in stream:
92
  try:
93
  patient = change['fullDocument']
94
+ await generate_and_store_summary(patient)
 
 
 
 
 
 
 
 
95
  except Exception as e:
96
+ logger.error(f"Change stream processing error: {str(e)}")
 
97
  except PyMongoError as e:
98
+ logger.error(f"MongoDB watch error: {str(e)}")
99
+ await asyncio.sleep(5)
 
100
  except Exception as e:
101
+ logger.error(f"Fatal error in watcher: {str(e)}")
102
 
103
  @router.on_event("startup")
104
  async def startup_event():
 
 
105
  asyncio.create_task(watch_for_new_patients())
106
 
107
+ @router.post("/generate-all-summaries", status_code=202)
108
+ async def trigger_summary_generation(
109
  background_tasks: BackgroundTasks,
110
  current_user: dict = Depends(get_current_user)
111
  ):
 
112
  if current_user.get('role') not in ['admin', 'doctor']:
113
+ raise HTTPException(status_code=403, detail="Only doctors/admins allowed")
114
+ background_tasks.add_task(generate_all_patient_summaries)
115
+ return {"status": "accepted", "message": "Summary generation started"}
 
 
 
 
116
 
117
+ @router.get("/list-summaries", response_model=List[Dict])
118
+ async def list_summaries(current_user: dict = Depends(get_current_user)):
 
119
  if current_user.get('role') not in ['admin', 'doctor']:
120
+ raise HTTPException(status_code=403, detail="Only doctors/admins allowed")
 
 
 
 
121
  try:
122
  db = patients_collection.database
123
+ cursor = db[SUMMARY_METADATA_COLLECTION].find({})
124
+ return await cursor.to_list(length=None)
 
125
  except Exception as e:
126
+ raise HTTPException(status_code=500, detail=f"Error listing summaries: {str(e)}")
 
 
 
127
 
128
+ pdf = router