Vrda commited on
Commit
464134d
·
verified ·
1 Parent(s): 3053868

Upload backend.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend.py +428 -428
backend.py CHANGED
@@ -1,428 +1,428 @@
1
- """
2
- Internal Medicine Discharge Letter Error-Check — Backend
3
- Prospective study: AI-assisted error detection in ED discharge letters
4
-
5
- Flow:
6
- 1. Receive Croatian discharge letter from doctor
7
- 2. Translate to English (Gemini 3.1 Flash Lite)
8
- 3. Run concurrent error-detection analysis:
9
- - DeepSeek Reasoner (via DeepSeek API)
10
- - GPT-OSS-120B (via Groq)
11
- 4. Parse structured output and return errors + suggestions
12
- """
13
-
14
- import os
15
- import json
16
- import time
17
- from concurrent.futures import ThreadPoolExecutor
18
- from dataclasses import dataclass, field
19
- from typing import Optional
20
- from dotenv import load_dotenv
21
- from google import genai
22
- from openai import OpenAI
23
- from groq import Groq
24
-
25
- load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), ".env"))
26
-
27
- # ---------------------------------------------------------------------------
28
- # API clients
29
- # ---------------------------------------------------------------------------
30
-
31
- def get_gemini_client() -> genai.Client:
32
- key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
33
- return genai.Client(api_key=key)
34
-
35
-
36
- def get_deepseek_client() -> OpenAI:
37
- return OpenAI(
38
- api_key=os.environ.get("DEEPSEEK_API_KEY"),
39
- base_url="https://api.deepseek.com",
40
- )
41
-
42
-
43
- def get_groq_client() -> Groq:
44
- return Groq(api_key=os.environ.get("GROQ_API_KEY_OSS"))
45
-
46
-
47
- DEEPSEEK_TIMEOUT_SECONDS = 120
48
- DEEPSEEK_MAX_TOKENS = 8192
49
- DEEPSEEK_MAX_ATTEMPTS = 2
50
- DEEPSEEK_RETRY_SLEEP_SECONDS = 2
51
-
52
-
53
- def _log_deepseek(event: str, **kwargs):
54
- parts = [f"{key}={value!r}" for key, value in kwargs.items()]
55
- suffix = f" | {' | '.join(parts)}" if parts else ""
56
- print(f"[DeepSeek] {event}{suffix}", flush=True)
57
-
58
-
59
- def _deepseek_response_meta(response) -> dict:
60
- choice = response.choices[0]
61
- message = choice.message
62
- content = message.content or ""
63
- reasoning = getattr(message, "reasoning_content", "") or ""
64
- return {
65
- "finish_reason": getattr(choice, "finish_reason", None),
66
- "content_len": len(content),
67
- "reasoning_len": len(reasoning),
68
- }
69
-
70
-
71
- # ---------------------------------------------------------------------------
72
- # Prompts
73
- # ---------------------------------------------------------------------------
74
-
75
- TRANSLATION_PROMPT = """You are a medical translator. Translate the following Croatian clinical discharge letter to English.
76
- Preserve ALL medical terminology, values, units, drug names, dosages, and clinical details exactly.
77
- Output ONLY the English translation, nothing else.
78
-
79
- Croatian text:
80
- {text}"""
81
-
82
- ERROR_CHECK_SYSTEM_PROMPT = """You are an expert internal medicine physician reviewing emergency department discharge letters for errors and quality issues.
83
-
84
- Your task: carefully analyze the discharge letter and identify up to 3 ERRORS and up to 2 IMPROVEMENT SUGGESTIONS.
85
- The goal is precision, not forcing findings.
86
-
87
- ERRORS are factual, clinical, or documentation mistakes present in the letter, such as:
88
- - Medication errors (wrong drug, wrong dose, drug interactions, contraindications)
89
- - Diagnostic errors (incorrect diagnosis given the findings, missed diagnosis)
90
- - Dosing errors (incorrect dose for patient weight/age/renal function)
91
- - Lab interpretation errors (misinterpreted lab values, missed abnormal results)
92
- - Documentation errors (inconsistencies, contradictions within the letter)
93
- - Omissions (critical missing information that should be documented)
94
-
95
- SUGGESTIONS are general quality improvements that are NOT necessarily errors, such as:
96
- - Documentation completeness improvements
97
- - Clinical workflow recommendations
98
- - Patient safety enhancements
99
- - Follow-up care suggestions
100
-
101
- For every suggestion you MUST:
102
- - Identify the specific part of the letter that could be improved
103
- - Quote the relevant original text (or note what is missing)
104
- - Provide the exact rewritten version or additional text you would use instead
105
- This makes every suggestion concrete and immediately usable rather than vague or generic.
106
-
107
- CRITICAL RULES:
108
- - Only report genuine errors you are confident about. Do NOT fabricate errors.
109
- - Do NOT force yourself to find 3 errors.
110
- - If you find fewer than 3 errors, report only what you find.
111
- - It is acceptable to find 0 errors. If no clear error is present, return "errors": [].
112
- - When uncertain, prefer returning no error rather than a speculative one.
113
- - You may still provide 0-2 useful improvement suggestions even when errors is empty.
114
- - Be specific: quote the relevant part of the letter for each error and suggestion.
115
- - Categorize each error and suggestion precisely.
116
- - For every suggestion, always include both the original quote and your exact suggested rewrite.
117
-
118
- You MUST respond in the following JSON format and NOTHING else:
119
-
120
- {
121
- "errors": [
122
- {
123
- "description": "Clear description of the error",
124
- "category": "medication_error|diagnostic_error|dosing_error|documentation_error|lab_interpretation_error|contraindication|omission|other",
125
- "severity": "low|medium|high|critical",
126
- "quote": "Exact quote from the letter where the error appears"
127
- }
128
- ],
129
- "suggestions": [
130
- {
131
- "description": "Clear description of the improvement suggestion",
132
- "category": "documentation_quality|clinical_workflow|patient_safety|completeness|other",
133
- "quote": "Exact quote from the letter (or 'N/A' if adding entirely new content)",
134
- "suggested_rewrite": "Exactly how you would have written it differently - the full improved text you recommend"
135
- }
136
- ],
137
- "summary": "One-sentence overall assessment of the discharge letter quality"
138
- }
139
-
140
- Valid zero-error example:
141
- {
142
- "errors": [],
143
- "suggestions": [
144
- {
145
- "description": "Make the follow-up plan more explicit and actionable for the patient and primary care provider.",
146
- "category": "documentation_quality",
147
- "quote": "Follow up with primary care in 1 week.",
148
- "suggested_rewrite": "Please follow up with your primary care physician within 7 days for repeat labs and clinical reassessment. If you experience worsening shortness of breath, chest pain, or fever, return to the emergency department immediately or call the 24-hour advice line at (555) 123-4567."
149
- }
150
- ],
151
- "summary": "No clear clinical or documentation errors were identified, but the discharge letter could be improved with more specific follow-up instructions."
152
- }"""
153
-
154
- ERROR_CHECK_USER_PROMPT = """Analyze the following internal medicine emergency department discharge letter for errors and quality issues.
155
-
156
- DISCHARGE LETTER:
157
- {clinical_text}
158
-
159
- Respond with the JSON format specified in your instructions.
160
- Remember:
161
- - up to 3 errors
162
- - up to 2 suggestions
163
- - only report genuine errors
164
- - if no clear errors are present, return `"errors": []` and optionally provide suggestions"""
165
-
166
-
167
- # ---------------------------------------------------------------------------
168
- # Data classes
169
- # ---------------------------------------------------------------------------
170
-
171
- @dataclass
172
- class ParsedError:
173
- description: str
174
- category: str
175
- severity: str
176
- quote: str
177
-
178
-
179
- @dataclass
180
- class ParsedSuggestion:
181
- description: str
182
- category: str
183
- quote: str = ""
184
- suggested_rewrite: str = ""
185
-
186
-
187
- @dataclass
188
- class ModelResult:
189
- model_name: str
190
- raw_response: str
191
- errors: list = field(default_factory=list)
192
- suggestions: list = field(default_factory=list)
193
- summary: str = ""
194
- success: bool = True
195
- error_message: Optional[str] = None
196
- latency_seconds: float = 0.0
197
-
198
-
199
- @dataclass
200
- class AnalysisResponse:
201
- original_text: str
202
- translated_text: str
203
- model_a_result: ModelResult
204
- model_b_result: ModelResult
205
- translation_latency: float = 0.0
206
-
207
-
208
- # ---------------------------------------------------------------------------
209
- # Translation
210
- # ---------------------------------------------------------------------------
211
-
212
- def translate_to_english(text: str) -> str:
213
- client = get_gemini_client()
214
- response = client.models.generate_content(
215
- model="gemini-3.1-flash-lite-preview",
216
- contents=TRANSLATION_PROMPT.format(text=text),
217
- )
218
- return response.text
219
-
220
-
221
- # ---------------------------------------------------------------------------
222
- # JSON parsing helper
223
- # ---------------------------------------------------------------------------
224
-
225
- def parse_model_json(raw: str) -> dict:
226
- """Extract JSON from model response, handling markdown code fences."""
227
- text = raw.strip()
228
- if text.startswith("```"):
229
- first_newline = text.index("\n")
230
- last_fence = text.rfind("```")
231
- text = text[first_newline + 1 : last_fence].strip()
232
- try:
233
- return json.loads(text)
234
- except json.JSONDecodeError:
235
- start = text.find("{")
236
- end = text.rfind("}") + 1
237
- if start != -1 and end > start:
238
- return json.loads(text[start:end])
239
- raise
240
-
241
-
242
- # ---------------------------------------------------------------------------
243
- # Model calls
244
- # ---------------------------------------------------------------------------
245
-
246
- def _parse_to_result(model_label: str, raw: str, latency: float) -> ModelResult:
247
- parsed = parse_model_json(raw)
248
- errors = [
249
- ParsedError(
250
- description=e.get("description", ""),
251
- category=e.get("category", "other"),
252
- severity=e.get("severity", "medium"),
253
- quote=e.get("quote", ""),
254
- )
255
- for e in parsed.get("errors", [])
256
- ]
257
- suggestions = [
258
- ParsedSuggestion(
259
- description=s.get("description", ""),
260
- category=s.get("category", "other"),
261
- quote=s.get("quote", ""),
262
- suggested_rewrite=s.get("suggested_rewrite", ""),
263
- )
264
- for s in parsed.get("suggestions", [])
265
- ]
266
- return ModelResult(
267
- model_name=model_label,
268
- raw_response=raw,
269
- errors=errors,
270
- suggestions=suggestions,
271
- summary=parsed.get("summary", ""),
272
- success=True,
273
- latency_seconds=round(latency, 2),
274
- )
275
-
276
-
277
- def call_model_a(clinical_text: str) -> ModelResult:
278
- """DeepSeek Reasoner via DeepSeek API."""
279
- start = time.time()
280
- client = get_deepseek_client()
281
- last_error = None
282
-
283
- for attempt in range(1, DEEPSEEK_MAX_ATTEMPTS + 1):
284
- attempt_start = time.time()
285
- try:
286
- _log_deepseek("attempt_start", attempt=attempt)
287
- response = client.chat.completions.create(
288
- model="deepseek-reasoner",
289
- messages=[
290
- {"role": "system", "content": ERROR_CHECK_SYSTEM_PROMPT},
291
- {
292
- "role": "user",
293
- "content": ERROR_CHECK_USER_PROMPT.format(
294
- clinical_text=clinical_text
295
- ),
296
- },
297
- ],
298
- max_tokens=DEEPSEEK_MAX_TOKENS,
299
- timeout=DEEPSEEK_TIMEOUT_SECONDS,
300
- )
301
- meta = _deepseek_response_meta(response)
302
- _log_deepseek("attempt_response", attempt=attempt, **meta)
303
-
304
- raw = response.choices[0].message.content or ""
305
- if not raw.strip():
306
- raise ValueError(
307
- "DeepSeek returned an empty response body "
308
- f"(finish_reason={meta['finish_reason']}, "
309
- f"reasoning_len={meta['reasoning_len']})."
310
- )
311
-
312
- result = _parse_to_result("DeepSeek Reasoner", raw, time.time() - start)
313
- _log_deepseek(
314
- "attempt_success",
315
- attempt=attempt,
316
- elapsed_total=round(time.time() - start, 2),
317
- errors_found=len(result.errors),
318
- suggestions_found=len(result.suggestions),
319
- )
320
- return result
321
- except Exception as exc:
322
- last_error = exc
323
- _log_deepseek(
324
- "attempt_failed",
325
- attempt=attempt,
326
- elapsed_attempt=round(time.time() - attempt_start, 2),
327
- error_type=type(exc).__name__,
328
- error=str(exc),
329
- )
330
- if attempt < DEEPSEEK_MAX_ATTEMPTS:
331
- time.sleep(DEEPSEEK_RETRY_SLEEP_SECONDS)
332
-
333
- return ModelResult(
334
- model_name="DeepSeek Reasoner",
335
- raw_response="",
336
- success=False,
337
- error_message=(
338
- f"{last_error} after {DEEPSEEK_MAX_ATTEMPTS} attempts"
339
- if last_error
340
- else "DeepSeek failed for an unknown reason."
341
- ),
342
- latency_seconds=round(time.time() - start, 2),
343
- )
344
-
345
-
346
- def call_model_b(clinical_text: str) -> ModelResult:
347
- """GPT-OSS-120B via Groq."""
348
- start = time.time()
349
- try:
350
- client = get_groq_client()
351
- response = client.chat.completions.create(
352
- model="openai/gpt-oss-120b",
353
- messages=[
354
- {"role": "system", "content": ERROR_CHECK_SYSTEM_PROMPT},
355
- {"role": "user", "content": ERROR_CHECK_USER_PROMPT.format(clinical_text=clinical_text)},
356
- ],
357
- temperature=0.2,
358
- max_tokens=4096,
359
- )
360
- raw = response.choices[0].message.content
361
- return _parse_to_result("GPT-OSS-120B", raw, time.time() - start)
362
- except Exception as exc:
363
- return ModelResult(
364
- model_name="GPT-OSS-120B",
365
- raw_response="",
366
- success=False,
367
- error_message=str(exc),
368
- latency_seconds=round(time.time() - start, 2),
369
- )
370
-
371
-
372
- # ---------------------------------------------------------------------------
373
- # Main pipeline
374
- # ---------------------------------------------------------------------------
375
-
376
- def run_error_check(croatian_text: str) -> AnalysisResponse:
377
- """Full pipeline: translate, then run both models concurrently."""
378
- t0 = time.time()
379
- english_text = translate_to_english(croatian_text)
380
- translation_latency = round(time.time() - t0, 2)
381
-
382
- with ThreadPoolExecutor(max_workers=2) as pool:
383
- future_a = pool.submit(call_model_a, english_text)
384
- future_b = pool.submit(call_model_b, english_text)
385
- result_a = future_a.result()
386
- result_b = future_b.result()
387
-
388
- return AnalysisResponse(
389
- original_text=croatian_text,
390
- translated_text=english_text,
391
- model_a_result=result_a,
392
- model_b_result=result_b,
393
- translation_latency=translation_latency,
394
- )
395
-
396
-
397
- # ---------------------------------------------------------------------------
398
- # CLI test
399
- # ---------------------------------------------------------------------------
400
-
401
- if __name__ == "__main__":
402
- import sys, io
403
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
404
-
405
- sample = """Bolesnik 68 godina, dolazi zbog bolova u prsištu.
406
- Dijagnoza: STEMI prednje stijenke.
407
- Terapija: Aspirin 100mg, Klopidogrel 75mg, Ramipril 5mg, Atorvastatin 40mg.
408
- Preporučen kontrolni pregled za 7 dana."""
409
-
410
- print("=" * 60)
411
- print("ERROR CHECK TEST")
412
- print("=" * 60)
413
-
414
- result = run_error_check(sample)
415
- print(f"\nTranslation ({result.translation_latency}s):")
416
- print(result.translated_text)
417
-
418
- for r in [result.model_a_result, result.model_b_result]:
419
- print(f"\n{'=' * 60}")
420
- print(f"{r.model_name} ({r.latency_seconds}s):")
421
- if r.success:
422
- print(f"Summary: {r.summary}")
423
- for i, e in enumerate(r.errors, 1):
424
- print(f" Error {i}: [{e.category}/{e.severity}] {e.description}")
425
- for i, s in enumerate(r.suggestions, 1):
426
- print(f" Suggestion {i}: [{s.category}] {s.description}")
427
- else:
428
- print(f"ERROR: {r.error_message}")
 
1
+ """
2
+ Internal Medicine Discharge Letter Error-Check — Backend
3
+ Prospective study: AI-assisted error detection in ED discharge letters
4
+
5
+ Flow:
6
+ 1. Receive Croatian discharge letter from doctor
7
+ 2. Translate to English (Gemini 3.1 Flash Lite)
8
+ 3. Run concurrent error-detection analysis:
9
+ - DeepSeek Reasoner (via DeepSeek API)
10
+ - GPT-OSS-120B (via Groq)
11
+ 4. Parse structured output and return errors + suggestions
12
+ """
13
+
14
+ import os
15
+ import json
16
+ import time
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from dataclasses import dataclass, field
19
+ from typing import Optional
20
+ from dotenv import load_dotenv
21
+ from google import genai
22
+ from openai import OpenAI
23
+ from groq import Groq
24
+
25
+ load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), ".env"))
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # API clients
29
+ # ---------------------------------------------------------------------------
30
+
31
+ def get_gemini_client() -> genai.Client:
32
+ key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
33
+ return genai.Client(api_key=key)
34
+
35
+
36
+ def get_deepseek_client() -> OpenAI:
37
+ return OpenAI(
38
+ api_key=os.environ.get("DEEPSEEK_API_KEY"),
39
+ base_url="https://api.deepseek.com",
40
+ )
41
+
42
+
43
+ def get_groq_client() -> Groq:
44
+ return Groq(api_key=os.environ.get("GROQ_API_KEY_OSS"))
45
+
46
+
47
+ DEEPSEEK_TIMEOUT_SECONDS = 120
48
+ DEEPSEEK_MAX_TOKENS = 8192
49
+ DEEPSEEK_MAX_ATTEMPTS = 2
50
+ DEEPSEEK_RETRY_SLEEP_SECONDS = 2
51
+
52
+
53
+ def _log_deepseek(event: str, **kwargs):
54
+ parts = [f"{key}={value!r}" for key, value in kwargs.items()]
55
+ suffix = f" | {' | '.join(parts)}" if parts else ""
56
+ print(f"[DeepSeek] {event}{suffix}", flush=True)
57
+
58
+
59
+ def _deepseek_response_meta(response) -> dict:
60
+ choice = response.choices[0]
61
+ message = choice.message
62
+ content = message.content or ""
63
+ reasoning = getattr(message, "reasoning_content", "") or ""
64
+ return {
65
+ "finish_reason": getattr(choice, "finish_reason", None),
66
+ "content_len": len(content),
67
+ "reasoning_len": len(reasoning),
68
+ }
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Prompts
73
+ # ---------------------------------------------------------------------------
74
+
75
+ TRANSLATION_PROMPT = """You are a medical translator. Translate the following Croatian clinical discharge letter to English.
76
+ Preserve ALL medical terminology, values, units, drug names, dosages, and clinical details exactly.
77
+ Output ONLY the English translation, nothing else.
78
+
79
+ Croatian text:
80
+ {text}"""
81
+
82
+ ERROR_CHECK_SYSTEM_PROMPT = """You are an expert internal medicine physician reviewing emergency department discharge letters for errors and quality issues.
83
+
84
+ Your task: carefully analyze the discharge letter and identify up to 3 ERRORS and up to 2 IMPROVEMENT SUGGESTIONS.
85
+ The goal is precision, not forcing findings.
86
+
87
+ ERRORS are factual, clinical, or documentation mistakes present in the letter, such as:
88
+ - Medication errors (wrong drug, wrong dose, drug interactions, contraindications)
89
+ - Diagnostic errors (incorrect diagnosis given the findings, missed diagnosis)
90
+ - Dosing errors (incorrect dose for patient weight/age/renal function)
91
+ - Lab interpretation errors (misinterpreted lab values, missed abnormal results)
92
+ - Documentation errors (inconsistencies, contradictions within the letter)
93
+ - Omissions (critical missing information that should be documented)
94
+
95
+ SUGGESTIONS are general quality improvements that are NOT necessarily errors, such as:
96
+ - Documentation completeness improvements
97
+ - Clinical workflow recommendations
98
+ - Patient safety enhancements
99
+ - Follow-up care suggestions
100
+
101
+ For every suggestion you MUST:
102
+ - Identify the specific part of the letter that could be improved
103
+ - Quote the relevant original text (or note what is missing)
104
+ - Provide the exact rewritten version or additional text you would use instead
105
+ This makes every suggestion concrete and immediately usable rather than vague or generic.
106
+
107
+ CRITICAL RULES:
108
+ - Only report genuine errors you are confident about. Do NOT fabricate errors.
109
+ - Do NOT force yourself to find 3 errors.
110
+ - If you find fewer than 3 errors, report only what you find.
111
+ - It is acceptable to find 0 errors. If no clear error is present, return "errors": [].
112
+ - When uncertain, prefer returning no error rather than a speculative one.
113
+ - You may still provide 0-2 useful improvement suggestions even when errors is empty.
114
+ - Be specific: quote the relevant part of the letter for each error and suggestion.
115
+ - Categorize each error and suggestion precisely.
116
+ - For every suggestion, always include both the original quote and your exact suggested rewrite.
117
+
118
+ You MUST respond in the following JSON format and NOTHING else:
119
+
120
+ {
121
+ "errors": [
122
+ {
123
+ "description": "Clear description of the error",
124
+ "category": "medication_error|diagnostic_error|dosing_error|documentation_error|lab_interpretation_error|contraindication|omission|other",
125
+ "severity": "low|medium|high|critical",
126
+ "quote": "Exact quote from the letter where the error appears"
127
+ }
128
+ ],
129
+ "suggestions": [
130
+ {
131
+ "description": "Clear description of the improvement suggestion",
132
+ "category": "documentation_quality|clinical_workflow|patient_safety|completeness|other",
133
+ "quote": "Exact quote from the letter (or 'N/A' if adding entirely new content)",
134
+ "suggested_rewrite": "Exactly how you would have written it differently - the full improved text you recommend"
135
+ }
136
+ ],
137
+ "summary": "One-sentence overall assessment of the discharge letter quality"
138
+ }
139
+
140
+ Valid zero-error example:
141
+ {
142
+ "errors": [],
143
+ "suggestions": [
144
+ {
145
+ "description": "Make the follow-up plan more explicit and actionable for the patient and primary care provider.",
146
+ "category": "documentation_quality",
147
+ "quote": "Follow up with primary care in 1 week.",
148
+ "suggested_rewrite": "Please follow up with your primary care physician within 7 days for repeat labs and clinical reassessment. If you experience worsening shortness of breath, chest pain, or fever, return to the emergency department immediately or call the 24-hour advice line at (555) 123-4567."
149
+ }
150
+ ],
151
+ "summary": "No clear clinical or documentation errors were identified, but the discharge letter could be improved with more specific follow-up instructions."
152
+ }"""
153
+
154
+ ERROR_CHECK_USER_PROMPT = """Analyze the following internal medicine emergency department discharge letter for errors and quality issues.
155
+
156
+ DISCHARGE LETTER:
157
+ {clinical_text}
158
+
159
+ Respond with the JSON format specified in your instructions.
160
+ Remember:
161
+ - up to 3 errors
162
+ - up to 2 suggestions
163
+ - only report genuine errors
164
+ - if no clear errors are present, return `"errors": []` and optionally provide suggestions"""
165
+
166
+
167
+ # ---------------------------------------------------------------------------
168
+ # Data classes
169
+ # ---------------------------------------------------------------------------
170
+
171
+ @dataclass
172
+ class ParsedError:
173
+ description: str
174
+ category: str
175
+ severity: str
176
+ quote: str
177
+
178
+
179
+ @dataclass
180
+ class ParsedSuggestion:
181
+ description: str
182
+ category: str
183
+ quote: str = ""
184
+ suggested_rewrite: str = ""
185
+
186
+
187
+ @dataclass
188
+ class ModelResult:
189
+ model_name: str
190
+ raw_response: str
191
+ errors: list = field(default_factory=list)
192
+ suggestions: list = field(default_factory=list)
193
+ summary: str = ""
194
+ success: bool = True
195
+ error_message: Optional[str] = None
196
+ latency_seconds: float = 0.0
197
+
198
+
199
+ @dataclass
200
+ class AnalysisResponse:
201
+ original_text: str
202
+ translated_text: str
203
+ model_a_result: ModelResult
204
+ model_b_result: ModelResult
205
+ translation_latency: float = 0.0
206
+
207
+
208
+ # ---------------------------------------------------------------------------
209
+ # Translation
210
+ # ---------------------------------------------------------------------------
211
+
212
+ def translate_to_english(text: str) -> str:
213
+ client = get_gemini_client()
214
+ response = client.models.generate_content(
215
+ model="gemini-3.1-flash-lite-preview",
216
+ contents=TRANSLATION_PROMPT.format(text=text),
217
+ )
218
+ return response.text
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # JSON parsing helper
223
+ # ---------------------------------------------------------------------------
224
+
225
+ def parse_model_json(raw: str) -> dict:
226
+ """Extract JSON from model response, handling markdown code fences."""
227
+ text = raw.strip()
228
+ if text.startswith("```"):
229
+ first_newline = text.index("\n")
230
+ last_fence = text.rfind("```")
231
+ text = text[first_newline + 1 : last_fence].strip()
232
+ try:
233
+ return json.loads(text)
234
+ except json.JSONDecodeError:
235
+ start = text.find("{")
236
+ end = text.rfind("}") + 1
237
+ if start != -1 and end > start:
238
+ return json.loads(text[start:end])
239
+ raise
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # Model calls
244
+ # ---------------------------------------------------------------------------
245
+
246
+ def _parse_to_result(model_label: str, raw: str, latency: float) -> ModelResult:
247
+ parsed = parse_model_json(raw)
248
+ errors = [
249
+ ParsedError(
250
+ description=e.get("description", ""),
251
+ category=e.get("category", "other"),
252
+ severity=e.get("severity", "medium"),
253
+ quote=e.get("quote", ""),
254
+ )
255
+ for e in parsed.get("errors", [])
256
+ ]
257
+ suggestions = [
258
+ ParsedSuggestion(
259
+ description=s.get("description", ""),
260
+ category=s.get("category", "other"),
261
+ quote=s.get("quote", ""),
262
+ suggested_rewrite=s.get("suggested_rewrite", ""),
263
+ )
264
+ for s in parsed.get("suggestions", [])
265
+ ]
266
+ return ModelResult(
267
+ model_name=model_label,
268
+ raw_response=raw,
269
+ errors=errors,
270
+ suggestions=suggestions,
271
+ summary=parsed.get("summary", ""),
272
+ success=True,
273
+ latency_seconds=round(latency, 2),
274
+ )
275
+
276
+
277
+ def call_model_a(clinical_text: str) -> ModelResult:
278
+ """DeepSeek Reasoner via DeepSeek API."""
279
+ start = time.time()
280
+ client = get_deepseek_client()
281
+ last_error = None
282
+
283
+ for attempt in range(1, DEEPSEEK_MAX_ATTEMPTS + 1):
284
+ attempt_start = time.time()
285
+ try:
286
+ _log_deepseek("attempt_start", attempt=attempt)
287
+ response = client.chat.completions.create(
288
+ model="deepseek-reasoner",
289
+ messages=[
290
+ {"role": "system", "content": ERROR_CHECK_SYSTEM_PROMPT},
291
+ {
292
+ "role": "user",
293
+ "content": ERROR_CHECK_USER_PROMPT.format(
294
+ clinical_text=clinical_text
295
+ ),
296
+ },
297
+ ],
298
+ max_tokens=DEEPSEEK_MAX_TOKENS,
299
+ timeout=DEEPSEEK_TIMEOUT_SECONDS,
300
+ )
301
+ meta = _deepseek_response_meta(response)
302
+ _log_deepseek("attempt_response", attempt=attempt, **meta)
303
+
304
+ raw = response.choices[0].message.content or ""
305
+ if not raw.strip():
306
+ raise ValueError(
307
+ "DeepSeek returned an empty response body "
308
+ f"(finish_reason={meta['finish_reason']}, "
309
+ f"reasoning_len={meta['reasoning_len']})."
310
+ )
311
+
312
+ result = _parse_to_result("DeepSeek Reasoner", raw, time.time() - start)
313
+ _log_deepseek(
314
+ "attempt_success",
315
+ attempt=attempt,
316
+ elapsed_total=round(time.time() - start, 2),
317
+ errors_found=len(result.errors),
318
+ suggestions_found=len(result.suggestions),
319
+ )
320
+ return result
321
+ except Exception as exc:
322
+ last_error = exc
323
+ _log_deepseek(
324
+ "attempt_failed",
325
+ attempt=attempt,
326
+ elapsed_attempt=round(time.time() - attempt_start, 2),
327
+ error_type=type(exc).__name__,
328
+ error=str(exc),
329
+ )
330
+ if attempt < DEEPSEEK_MAX_ATTEMPTS:
331
+ time.sleep(DEEPSEEK_RETRY_SLEEP_SECONDS)
332
+
333
+ return ModelResult(
334
+ model_name="DeepSeek Reasoner",
335
+ raw_response="",
336
+ success=False,
337
+ error_message=(
338
+ f"{last_error} after {DEEPSEEK_MAX_ATTEMPTS} attempts"
339
+ if last_error
340
+ else "DeepSeek failed for an unknown reason."
341
+ ),
342
+ latency_seconds=round(time.time() - start, 2),
343
+ )
344
+
345
+
346
+ def call_model_b(clinical_text: str) -> ModelResult:
347
+ """GPT-OSS-120B via Groq."""
348
+ start = time.time()
349
+ try:
350
+ client = get_groq_client()
351
+ response = client.chat.completions.create(
352
+ model="openai/gpt-oss-120b",
353
+ messages=[
354
+ {"role": "system", "content": ERROR_CHECK_SYSTEM_PROMPT},
355
+ {"role": "user", "content": ERROR_CHECK_USER_PROMPT.format(clinical_text=clinical_text)},
356
+ ],
357
+ temperature=0.2,
358
+ max_tokens=4096,
359
+ )
360
+ raw = response.choices[0].message.content
361
+ return _parse_to_result("GPT-OSS-120B", raw, time.time() - start)
362
+ except Exception as exc:
363
+ return ModelResult(
364
+ model_name="GPT-OSS-120B",
365
+ raw_response="",
366
+ success=False,
367
+ error_message=str(exc),
368
+ latency_seconds=round(time.time() - start, 2),
369
+ )
370
+
371
+
372
+ # ---------------------------------------------------------------------------
373
+ # Main pipeline
374
+ # ---------------------------------------------------------------------------
375
+
376
+ def run_error_check(croatian_text: str) -> AnalysisResponse:
377
+ """Full pipeline: translate, then run both models concurrently."""
378
+ t0 = time.time()
379
+ english_text = translate_to_english(croatian_text)
380
+ translation_latency = round(time.time() - t0, 2)
381
+
382
+ with ThreadPoolExecutor(max_workers=2) as pool:
383
+ future_a = pool.submit(call_model_a, english_text)
384
+ future_b = pool.submit(call_model_b, english_text)
385
+ result_a = future_a.result()
386
+ result_b = future_b.result()
387
+
388
+ return AnalysisResponse(
389
+ original_text=croatian_text,
390
+ translated_text=english_text,
391
+ model_a_result=result_a,
392
+ model_b_result=result_b,
393
+ translation_latency=translation_latency,
394
+ )
395
+
396
+
397
+ # ---------------------------------------------------------------------------
398
+ # CLI test
399
+ # ---------------------------------------------------------------------------
400
+
401
+ if __name__ == "__main__":
402
+ import sys, io
403
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
404
+
405
+ sample = """Bolesnik 68 godina, dolazi zbog bolova u prsištu.
406
+ Dijagnoza: STEMI prednje stijenke.
407
+ Terapija: Aspirin 100mg, Klopidogrel 75mg, Ramipril 5mg, Atorvastatin 40mg.
408
+ Preporučen kontrolni pregled za 7 dana."""
409
+
410
+ print("=" * 60)
411
+ print("ERROR CHECK TEST")
412
+ print("=" * 60)
413
+
414
+ result = run_error_check(sample)
415
+ print(f"\nTranslation ({result.translation_latency}s):")
416
+ print(result.translated_text)
417
+
418
+ for r in [result.model_a_result, result.model_b_result]:
419
+ print(f"\n{'=' * 60}")
420
+ print(f"{r.model_name} ({r.latency_seconds}s):")
421
+ if r.success:
422
+ print(f"Summary: {r.summary}")
423
+ for i, e in enumerate(r.errors, 1):
424
+ print(f" Error {i}: [{e.category}/{e.severity}] {e.description}")
425
+ for i, s in enumerate(r.suggestions, 1):
426
+ print(f" Suggestion {i}: [{s.category}] {s.description}")
427
+ else:
428
+ print(f"ERROR: {r.error_message}")