Vrda commited on
Commit
dd1a514
·
verified ·
1 Parent(s): a5a4feb

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +104 -37
app.py CHANGED
@@ -8,9 +8,10 @@ import time
8
  import json
9
  import os
10
  import tempfile
 
11
  from datetime import datetime
12
  from pathlib import Path
13
- from backend import run_error_check
14
 
15
  FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
16
  HF_DATASET_REPO = "Vrda/im-error-check-data"
@@ -173,8 +174,11 @@ Preporučen kontrolni pregled za 14 dana."""
173
 
174
  for key, default in [
175
  ("input_text", ""),
176
- ("result", None),
177
- ("elapsed", 0),
 
 
 
178
  ("run_analysis", False),
179
  ("physician_id", ""),
180
  ]:
@@ -238,15 +242,48 @@ st.text_area(
238
  st.button("Analyze", type="primary", on_click=trigger_analysis)
239
 
240
  # -------------------------------------------------------------------------
241
- # Run analysis
242
  # -------------------------------------------------------------------------
243
 
244
  if st.session_state.run_analysis and st.session_state.input_text.strip():
245
  st.session_state.run_analysis = False
246
- with st.spinner("Running error-check with both AI models (15-45 seconds)..."):
247
- start = time.time()
248
- st.session_state.result = run_error_check(st.session_state.input_text)
249
- st.session_state.elapsed = time.time() - start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  st.rerun()
251
 
252
 
@@ -319,43 +356,63 @@ def render_model_output(result, header_class: str):
319
 
320
 
321
  # -------------------------------------------------------------------------
322
- # Display results
323
  # -------------------------------------------------------------------------
324
 
325
- if st.session_state.result:
326
- r = st.session_state.result
327
 
 
328
  st.markdown("---")
329
  st.header("Analysis Results")
330
- st.success(
331
- f"Completed in {st.session_state.elapsed:.1f}s "
332
- f"(translation: {r.translation_latency}s, "
333
- f"Model A: {r.model_a_result.latency_seconds}s, "
334
- f"Model B: {r.model_b_result.latency_seconds}s)"
335
- )
 
 
 
 
 
 
 
 
336
 
337
  with st.expander("English Translation"):
338
- st.markdown(r.translated_text)
339
 
340
  st.subheader("Model Comparison")
341
 
342
  col_a, col_b = st.columns(2, gap="large")
343
 
344
- with col_a:
345
  st.markdown(
346
- '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
347
- "DeepSeek Reasoner</h4></div>",
348
  unsafe_allow_html=True,
349
  )
350
- render_model_output(r.model_a_result, "model-header-a")
351
 
352
- with col_b:
353
  st.markdown(
354
- '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
355
- "GPT-OSS-120B</h4></div>",
356
  unsafe_allow_html=True,
357
  )
358
- render_model_output(r.model_b_result, "model-header-b")
 
 
 
 
 
 
 
 
 
 
 
359
 
360
  # -----------------------------------------------------------------
361
  # Feedback
@@ -373,10 +430,11 @@ if st.session_state.result:
373
 
374
  feedback_data = {}
375
 
376
- for model_key, model_label, res in [
377
- ("model_a", "DeepSeek Reasoner", r.model_a_result),
378
- ("model_b", "GPT-OSS-120B", r.model_b_result),
379
- ]:
 
380
  st.markdown(f"#### {model_label}")
381
 
382
  error_ratings = []
@@ -438,6 +496,12 @@ if st.session_state.result:
438
 
439
  st.markdown("---")
440
 
 
 
 
 
 
 
441
  # Missed errors
442
  st.markdown("#### Missed Errors")
443
  missed_errors = st.text_area(
@@ -459,17 +523,20 @@ if st.session_state.result:
459
  if not st.session_state.physician_id.strip():
460
  st.warning("Please enter a Physician ID in the sidebar before submitting.")
461
  else:
 
 
462
  entry = {
463
  "timestamp": datetime.now().isoformat(),
464
  "physician_id": st.session_state.physician_id.strip(),
465
  "clinical_input": st.session_state.input_text,
466
- "translation": r.translated_text,
467
- "model_a_output": r.model_a_result.raw_response,
468
- "model_b_output": r.model_b_result.raw_response,
469
- "model_a_latency": r.model_a_result.latency_seconds,
470
- "model_b_latency": r.model_b_result.latency_seconds,
471
- "translation_latency": r.translation_latency,
472
- "total_latency": round(st.session_state.elapsed, 2),
 
473
  "ratings": feedback_data,
474
  "missed_errors": missed_errors,
475
  "comments": comments,
 
8
  import json
9
  import os
10
  import tempfile
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
  from datetime import datetime
13
  from pathlib import Path
14
+ from backend import translate_to_english, call_model_a, call_model_b
15
 
16
  FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
17
  HF_DATASET_REPO = "Vrda/im-error-check-data"
 
174
 
175
  for key, default in [
176
  ("input_text", ""),
177
+ ("translated_text", None),
178
+ ("model_a_result", None),
179
+ ("model_b_result", None),
180
+ ("translation_latency", 0),
181
+ ("total_elapsed", 0),
182
  ("run_analysis", False),
183
  ("physician_id", ""),
184
  ]:
 
242
  st.button("Analyze", type="primary", on_click=trigger_analysis)
243
 
244
  # -------------------------------------------------------------------------
245
+ # Run analysis (progressive: show GPT-OSS first, DeepSeek when ready)
246
  # -------------------------------------------------------------------------
247
 
248
  if st.session_state.run_analysis and st.session_state.input_text.strip():
249
  st.session_state.run_analysis = False
250
+ st.session_state.model_a_result = None
251
+ st.session_state.model_b_result = None
252
+
253
+ total_start = time.time()
254
+
255
+ with st.spinner("Translating discharge letter..."):
256
+ t0 = time.time()
257
+ st.session_state.translated_text = translate_to_english(st.session_state.input_text)
258
+ st.session_state.translation_latency = round(time.time() - t0, 2)
259
+
260
+ english = st.session_state.translated_text
261
+
262
+ pool = ThreadPoolExecutor(max_workers=2)
263
+ future_a = pool.submit(call_model_a, english)
264
+ future_b = pool.submit(call_model_b, english)
265
+ futures = {future_b: "model_b", future_a: "model_a"}
266
+
267
+ progress_placeholder = st.empty()
268
+ progress_placeholder.info(
269
+ "GPT-OSS-120B responding (~5s)... DeepSeek Reasoner thinking (~60-90s)..."
270
+ )
271
+
272
+ for fut in as_completed(futures):
273
+ key = futures[fut]
274
+ result = fut.result()
275
+ if key == "model_b":
276
+ st.session_state.model_b_result = result
277
+ progress_placeholder.info(
278
+ f"GPT-OSS-120B ready ({result.latency_seconds}s). "
279
+ "Waiting for DeepSeek Reasoner... Review GPT-OSS results below while you wait."
280
+ )
281
+ st.rerun()
282
+ else:
283
+ st.session_state.model_a_result = result
284
+
285
+ pool.shutdown(wait=False)
286
+ st.session_state.total_elapsed = round(time.time() - total_start, 2)
287
  st.rerun()
288
 
289
 
 
356
 
357
 
358
  # -------------------------------------------------------------------------
359
+ # Display results (progressive: GPT-OSS first, DeepSeek when ready)
360
  # -------------------------------------------------------------------------
361
 
362
+ has_any_result = st.session_state.model_b_result is not None
363
+ both_ready = has_any_result and st.session_state.model_a_result is not None
364
 
365
+ if has_any_result:
366
  st.markdown("---")
367
  st.header("Analysis Results")
368
+
369
+ if both_ready:
370
+ st.success(
371
+ f"Both models complete (total: {st.session_state.total_elapsed}s | "
372
+ f"translation: {st.session_state.translation_latency}s | "
373
+ f"DeepSeek: {st.session_state.model_a_result.latency_seconds}s | "
374
+ f"GPT-OSS: {st.session_state.model_b_result.latency_seconds}s)"
375
+ )
376
+ else:
377
+ st.info(
378
+ f"GPT-OSS-120B ready ({st.session_state.model_b_result.latency_seconds}s). "
379
+ "DeepSeek Reasoner is still thinking — review and rate GPT-OSS results below while you wait, "
380
+ "then click **Analyze** again when ready to see DeepSeek results."
381
+ )
382
 
383
  with st.expander("English Translation"):
384
+ st.markdown(st.session_state.translated_text)
385
 
386
  st.subheader("Model Comparison")
387
 
388
  col_a, col_b = st.columns(2, gap="large")
389
 
390
+ with col_b:
391
  st.markdown(
392
+ '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
393
+ "GPT-OSS-120B</h4></div>",
394
  unsafe_allow_html=True,
395
  )
396
+ render_model_output(st.session_state.model_b_result, "model-header-b")
397
 
398
+ with col_a:
399
  st.markdown(
400
+ '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
401
+ "DeepSeek Reasoner</h4></div>",
402
  unsafe_allow_html=True,
403
  )
404
+ if st.session_state.model_a_result is not None:
405
+ render_model_output(st.session_state.model_a_result, "model-header-a")
406
+ else:
407
+ st.markdown(
408
+ '<div style="background:#f7fafc; border:2px dashed #cbd5e0; '
409
+ 'border-radius:8px; padding:2rem; text-align:center; color:#718096;">'
410
+ "<strong>DeepSeek Reasoner</strong> is still processing...<br>"
411
+ "This typically takes 60-90 seconds.<br>"
412
+ "Review and rate GPT-OSS results below while you wait."
413
+ "</div>",
414
+ unsafe_allow_html=True,
415
+ )
416
 
417
  # -----------------------------------------------------------------
418
  # Feedback
 
430
 
431
  feedback_data = {}
432
 
433
+ available_models = [("model_b", "GPT-OSS-120B", st.session_state.model_b_result)]
434
+ if st.session_state.model_a_result is not None:
435
+ available_models.insert(0, ("model_a", "DeepSeek Reasoner", st.session_state.model_a_result))
436
+
437
+ for model_key, model_label, res in available_models:
438
  st.markdown(f"#### {model_label}")
439
 
440
  error_ratings = []
 
496
 
497
  st.markdown("---")
498
 
499
+ if not both_ready:
500
+ st.warning(
501
+ "DeepSeek Reasoner has not finished yet. You can submit partial feedback now "
502
+ "(GPT-OSS only) or wait for both models to complete."
503
+ )
504
+
505
  # Missed errors
506
  st.markdown("#### Missed Errors")
507
  missed_errors = st.text_area(
 
523
  if not st.session_state.physician_id.strip():
524
  st.warning("Please enter a Physician ID in the sidebar before submitting.")
525
  else:
526
+ model_a_res = st.session_state.model_a_result
527
+ model_b_res = st.session_state.model_b_result
528
  entry = {
529
  "timestamp": datetime.now().isoformat(),
530
  "physician_id": st.session_state.physician_id.strip(),
531
  "clinical_input": st.session_state.input_text,
532
+ "translation": st.session_state.translated_text,
533
+ "model_a_output": model_a_res.raw_response if model_a_res else "",
534
+ "model_b_output": model_b_res.raw_response if model_b_res else "",
535
+ "model_a_latency": model_a_res.latency_seconds if model_a_res else None,
536
+ "model_b_latency": model_b_res.latency_seconds if model_b_res else None,
537
+ "translation_latency": st.session_state.translation_latency,
538
+ "total_latency": st.session_state.total_elapsed,
539
+ "both_models_complete": both_ready,
540
  "ratings": feedback_data,
541
  "missed_errors": missed_errors,
542
  "comments": comments,