Spaces:

Vrda
/

im-error-check

Running

App Files Files Community

Vrda commited on 10 days ago

Commit

dd1a514

verified ·

1 Parent(s): a5a4feb

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +104 -37

app.py CHANGED Viewed

@@ -8,9 +8,10 @@ import time
 import json
 import os
 import tempfile
 from datetime import datetime
 from pathlib import Path
-from backend import run_error_check
 FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
 HF_DATASET_REPO = "Vrda/im-error-check-data"
@@ -173,8 +174,11 @@ Preporučen kontrolni pregled za 14 dana."""
 for key, default in [
     ("input_text", ""),
-    ("result", None),
-    ("elapsed", 0),
     ("run_analysis", False),
     ("physician_id", ""),
 ]:
@@ -238,15 +242,48 @@ st.text_area(
 st.button("Analyze", type="primary", on_click=trigger_analysis)
 # -------------------------------------------------------------------------
-# Run analysis
 # -------------------------------------------------------------------------
 if st.session_state.run_analysis and st.session_state.input_text.strip():
     st.session_state.run_analysis = False
-    with st.spinner("Running error-check with both AI models (15-45 seconds)..."):
-        start = time.time()
-        st.session_state.result = run_error_check(st.session_state.input_text)
-        st.session_state.elapsed = time.time() - start
     st.rerun()
@@ -319,43 +356,63 @@ def render_model_output(result, header_class: str):
 # -------------------------------------------------------------------------
-# Display results
 # -------------------------------------------------------------------------
-if st.session_state.result:
-    r = st.session_state.result
     st.markdown("---")
     st.header("Analysis Results")
-    st.success(
-        f"Completed in {st.session_state.elapsed:.1f}s "
-        f"(translation: {r.translation_latency}s, "
-        f"Model A: {r.model_a_result.latency_seconds}s, "
-        f"Model B: {r.model_b_result.latency_seconds}s)"
-    )
     with st.expander("English Translation"):
-        st.markdown(r.translated_text)
     st.subheader("Model Comparison")
     col_a, col_b = st.columns(2, gap="large")
-    with col_a:
         st.markdown(
-            '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
-            "DeepSeek Reasoner</h4></div>",
             unsafe_allow_html=True,
         )
-        render_model_output(r.model_a_result, "model-header-a")
-    with col_b:
         st.markdown(
-            '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
-            "GPT-OSS-120B</h4></div>",
             unsafe_allow_html=True,
         )
-        render_model_output(r.model_b_result, "model-header-b")
     # -----------------------------------------------------------------
     # Feedback
@@ -373,10 +430,11 @@ if st.session_state.result:
     feedback_data = {}
-    for model_key, model_label, res in [
-        ("model_a", "DeepSeek Reasoner", r.model_a_result),
-        ("model_b", "GPT-OSS-120B", r.model_b_result),
-    ]:
         st.markdown(f"#### {model_label}")
         error_ratings = []
@@ -438,6 +496,12 @@ if st.session_state.result:
         st.markdown("---")
     # Missed errors
     st.markdown("#### Missed Errors")
     missed_errors = st.text_area(
@@ -459,17 +523,20 @@ if st.session_state.result:
         if not st.session_state.physician_id.strip():
             st.warning("Please enter a Physician ID in the sidebar before submitting.")
         else:
             entry = {
                 "timestamp": datetime.now().isoformat(),
                 "physician_id": st.session_state.physician_id.strip(),
                 "clinical_input": st.session_state.input_text,
-                "translation": r.translated_text,
-                "model_a_output": r.model_a_result.raw_response,
-                "model_b_output": r.model_b_result.raw_response,
-                "model_a_latency": r.model_a_result.latency_seconds,
-                "model_b_latency": r.model_b_result.latency_seconds,
-                "translation_latency": r.translation_latency,
-                "total_latency": round(st.session_state.elapsed, 2),
                 "ratings": feedback_data,
                 "missed_errors": missed_errors,
                 "comments": comments,

 import json
 import os
 import tempfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from pathlib import Path
+from backend import translate_to_english, call_model_a, call_model_b
 FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
 HF_DATASET_REPO = "Vrda/im-error-check-data"
 for key, default in [
     ("input_text", ""),
+    ("translated_text", None),
+    ("model_a_result", None),
+    ("model_b_result", None),
+    ("translation_latency", 0),
+    ("total_elapsed", 0),
     ("run_analysis", False),
     ("physician_id", ""),
 ]:
 st.button("Analyze", type="primary", on_click=trigger_analysis)
 # -------------------------------------------------------------------------
+# Run analysis (progressive: show GPT-OSS first, DeepSeek when ready)
 # -------------------------------------------------------------------------
 if st.session_state.run_analysis and st.session_state.input_text.strip():
     st.session_state.run_analysis = False
+    st.session_state.model_a_result = None
+    st.session_state.model_b_result = None
+    total_start = time.time()
+    with st.spinner("Translating discharge letter..."):
+        t0 = time.time()
+        st.session_state.translated_text = translate_to_english(st.session_state.input_text)
+        st.session_state.translation_latency = round(time.time() - t0, 2)
+    english = st.session_state.translated_text
+    pool = ThreadPoolExecutor(max_workers=2)
+    future_a = pool.submit(call_model_a, english)
+    future_b = pool.submit(call_model_b, english)
+    futures = {future_b: "model_b", future_a: "model_a"}
+    progress_placeholder = st.empty()
+    progress_placeholder.info(
+        "GPT-OSS-120B responding (~5s)... DeepSeek Reasoner thinking (~60-90s)..."
+    )
+    for fut in as_completed(futures):
+        key = futures[fut]
+        result = fut.result()
+        if key == "model_b":
+            st.session_state.model_b_result = result
+            progress_placeholder.info(
+                f"GPT-OSS-120B ready ({result.latency_seconds}s). "
+                "Waiting for DeepSeek Reasoner... Review GPT-OSS results below while you wait."
+            )
+            st.rerun()
+        else:
+            st.session_state.model_a_result = result
+    pool.shutdown(wait=False)
+    st.session_state.total_elapsed = round(time.time() - total_start, 2)
     st.rerun()
 # -------------------------------------------------------------------------
+# Display results (progressive: GPT-OSS first, DeepSeek when ready)
 # -------------------------------------------------------------------------
+has_any_result = st.session_state.model_b_result is not None
+both_ready = has_any_result and st.session_state.model_a_result is not None
+if has_any_result:
     st.markdown("---")
     st.header("Analysis Results")
+    if both_ready:
+        st.success(
+            f"Both models complete (total: {st.session_state.total_elapsed}s | "
+            f"translation: {st.session_state.translation_latency}s | "
+            f"DeepSeek: {st.session_state.model_a_result.latency_seconds}s | "
+            f"GPT-OSS: {st.session_state.model_b_result.latency_seconds}s)"
+        )
+    else:
+        st.info(
+            f"GPT-OSS-120B ready ({st.session_state.model_b_result.latency_seconds}s). "
+            "DeepSeek Reasoner is still thinking — review and rate GPT-OSS results below while you wait, "
+            "then click **Analyze** again when ready to see DeepSeek results."
+        )
     with st.expander("English Translation"):
+        st.markdown(st.session_state.translated_text)
     st.subheader("Model Comparison")
     col_a, col_b = st.columns(2, gap="large")
+    with col_b:
         st.markdown(
+            '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
+            "GPT-OSS-120B</h4></div>",
             unsafe_allow_html=True,
         )
+        render_model_output(st.session_state.model_b_result, "model-header-b")
+    with col_a:
         st.markdown(
+            '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
+            "DeepSeek Reasoner</h4></div>",
             unsafe_allow_html=True,
         )
+        if st.session_state.model_a_result is not None:
+            render_model_output(st.session_state.model_a_result, "model-header-a")
+        else:
+            st.markdown(
+                '<div style="background:#f7fafc; border:2px dashed #cbd5e0; '
+                'border-radius:8px; padding:2rem; text-align:center; color:#718096;">'
+                "<strong>DeepSeek Reasoner</strong> is still processing...<br>"
+                "This typically takes 60-90 seconds.<br>"
+                "Review and rate GPT-OSS results below while you wait."
+                "</div>",
+                unsafe_allow_html=True,
+            )
     # -----------------------------------------------------------------
     # Feedback
     feedback_data = {}
+    available_models = [("model_b", "GPT-OSS-120B", st.session_state.model_b_result)]
+    if st.session_state.model_a_result is not None:
+        available_models.insert(0, ("model_a", "DeepSeek Reasoner", st.session_state.model_a_result))
+    for model_key, model_label, res in available_models:
         st.markdown(f"#### {model_label}")
         error_ratings = []
         st.markdown("---")
+    if not both_ready:
+        st.warning(
+            "DeepSeek Reasoner has not finished yet. You can submit partial feedback now "
+            "(GPT-OSS only) or wait for both models to complete."
+        )
     # Missed errors
     st.markdown("#### Missed Errors")
     missed_errors = st.text_area(
         if not st.session_state.physician_id.strip():
             st.warning("Please enter a Physician ID in the sidebar before submitting.")
         else:
+            model_a_res = st.session_state.model_a_result
+            model_b_res = st.session_state.model_b_result
             entry = {
                 "timestamp": datetime.now().isoformat(),
                 "physician_id": st.session_state.physician_id.strip(),
                 "clinical_input": st.session_state.input_text,
+                "translation": st.session_state.translated_text,
+                "model_a_output": model_a_res.raw_response if model_a_res else "",
+                "model_b_output": model_b_res.raw_response if model_b_res else "",
+                "model_a_latency": model_a_res.latency_seconds if model_a_res else None,
+                "model_b_latency": model_b_res.latency_seconds if model_b_res else None,
+                "translation_latency": st.session_state.translation_latency,
+                "total_latency": st.session_state.total_elapsed,
+                "both_models_complete": both_ready,
                 "ratings": feedback_data,
                 "missed_errors": missed_errors,
                 "comments": comments,