Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Sleeping

App Files Files Community

gpaasch commited on Jun 8, 2025

Commit

3a7b207

1 Parent(s): 5c7ca93

Revert "1. Structure responses with both diagnoses and follow-up questions"

Browse files

Files changed (1) hide show

src/app.py +55 -64

src/app.py CHANGED Viewed

@@ -102,17 +102,15 @@ def get_system_specs() -> Dict[str, float]:
         "gpu_vram_gb": gpu_vram_gb
     }
-def select_best_model():
     """Select the best model based on system specifications."""
     specs = get_system_specs()
-    # Prioritize Mistral if we have API key or sufficient resources
-    if any(k.startswith("mk-") for k in [api_key.value]):  # Check for Mistral API key
-        return "mistral-7b-instruct-v0.1.Q4_K_M.gguf", "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
-    elif specs['gpu_vram_gb'] >= 6 or specs['ram_gb'] >= 16:
-        return MODEL_OPTIONS["medium"]["name"], MODEL_OPTIONS["medium"]["repo"]
     # Prioritize GPU if available
-    elif specs['gpu_vram_gb'] >= 4:  # You have 6GB, so this should work
         model_tier = "small"  # phi-2 should work well on RTX 2060
     elif specs['ram_gb'] >= 8:
         model_tier = "small"
@@ -214,15 +212,12 @@ symptom_index = create_symptom_index()
 print("Index created successfully")
 # --- System prompt ---
-SYSTEM_PROMPT = """You are a medical assistant using the Mistral model to analyze symptoms and determine ICD-10 codes.
-Your responses should ALWAYS be in this format:
-{
-    "diagnoses": ["ICD10 code - description"],
-    "confidences": [confidence score between 0-1],
-    "follow_up": "ONE specific follow-up question to refine the diagnosis",
-    "explanation": "Brief explanation of why you're asking this question"
-}
-Keep responses focused and clinical."""
 def process_speech(audio_data, history):
     """Process speech input and convert to text."""
@@ -526,84 +521,80 @@ with gr.Blocks(theme="default") as demo:
             features = process_audio(audio_array, sample_rate)
             asr = get_asr_pipeline()
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
         except Exception as e:
-            print(f"Transcription error: {str(e)}")f isinstance(result, dict) else str(result).strip()
-            return ""ion as e:
             print(f"Transcription error: {str(e)}")
     microphone.stream(
         fn=update_live_transcription,
         inputs=[microphone],
-        outputs=transcript_box,ption,
         show_progress="hidden",
-        queue=Trueanscript_box,
-    )   show_progress="hidden",
         queue=True
     clear_btn.click(
         fn=lambda: (None, "", ""),
         outputs=[chatbot, transcript_box, text_input],
-        queue=False(None, "", ""),
-    )   outputs=[chatbot, transcript_box, text_input],
         queue=False
     def cleanup_memory():
         """Release unused memory (placeholder for future memory management)."""
-        import gcemory():
-        gc.collect()nused memory (placeholder for future memory management)."""
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        if torch.cuda.is_available():
     def process_text_input(text, history):
-        """Process text input with interactive follow-up."""
-        if not text:_input(text, history):
-            return history, ""with interactive follow-up."""
         if not text:
-        try:return history, ""
-            # Add context from history
-            context = "\n".join([m["content"] for m in history if m["role"] == "user"]) if history else ""
-            # Add context from history
-            prompt = f"""{SYSTEM_PROMPT}ent"] for m in history if m["role"] == "user"]) if history else ""
-Previous context: {context}
-Current symptoms: {text}"{SYSTEM_PROMPT}
-Analyze and respond with likely diagnoses and ONE key follow-up question."""
-            toms: {text}
-            response = llm.complete(prompt)nd ONE key follow-up question."""
-            try:onse = llm.complete(prompt)
                 result = json.loads(response.text)
             except json.JSONDecodeError:
-                result = {son.loads(response.text)
-                    "diagnoses": ["R69 - Illness, unspecified"],
-                    "confidences": [0.5],
-                    "follow_up": str(response.text)[:200],ied"],
-                    "explanation": "Could not parse response"
-                }   "follow_up": str(response.text)[:200],
-                    "explanation": "Could not parse response"
-            formatted_response = f"""Possible Diagnoses:
-{''.join(f'- {d} ({c*100:.0f}%)\n' for d, c in zip(result['diagnoses'], result['confidences']))}
-            formatted_response = f"""Possible Diagnoses:
-Follow-up Question: {result['follow_up']} c in zip(result['diagnoses'], result['confidences']))}
-({result['explanation']})"""
-Follow-up Question: {result['follow_up']}
             new_history = history + [
                 {"role": "user", "content": text},
-                {"role": "assistant", "content": formatted_response}
-            ]   {"role": "user", "content": text},
-            return new_history, ""t", "content": formatted_response}
         except Exception as e:
             print(f"Error processing text: {str(e)}")
-            return history, text
-            print(f"Error processing text: {str(e)}")
     # Update the submit button handler
     submit_btn.click(
-        fn=process_text_input, handler
         inputs=[text_input, chatbot],
         outputs=[chatbot, text_input],
-        queue=Truext_input, chatbot],
     ).success(  # Changed from .then to .success for better error handling
         fn=cleanup_memory,
-        inputs=None,anged from .then to .success for better error handling
-        outputs=None,mory,
-        queue=False,
-    )   outputs=None,
         queue=False
     )

         "gpu_vram_gb": gpu_vram_gb
     }
+def select_best_model() -> Tuple[str, str]:
     """Select the best model based on system specifications."""
     specs = get_system_specs()
+    print(f"\nSystem specifications:")
+    print(f"RAM: {specs['ram_gb']:.1f} GB")
+    print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")
     # Prioritize GPU if available
+    if specs['gpu_vram_gb'] >= 4:  # You have 6GB, so this should work
         model_tier = "small"  # phi-2 should work well on RTX 2060
     elif specs['ram_gb'] >= 8:
         model_tier = "small"
 print("Index created successfully")
 # --- System prompt ---
+SYSTEM_PROMPT = """
+You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
+At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?")
+or, if you have enough info, output a final JSON with fields:
+{"diagnoses":[…], "confidences":[…]}.
+"""
 def process_speech(audio_data, history):
     """Process speech input and convert to text."""
             features = process_audio(audio_array, sample_rate)
             asr = get_asr_pipeline()
+            result = asr(features)
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
         except Exception as e:
             print(f"Transcription error: {str(e)}")
+            return ""
     microphone.stream(
         fn=update_live_transcription,
         inputs=[microphone],
+        outputs=transcript_box,
         show_progress="hidden",
         queue=True
+    )
     clear_btn.click(
         fn=lambda: (None, "", ""),
         outputs=[chatbot, transcript_box, text_input],
         queue=False
+    )
     def cleanup_memory():
         """Release unused memory (placeholder for future memory management)."""
+        import gc
+        gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
     def process_text_input(text, history):
+        """Process text input with memory management."""
         if not text:
+            return history, ""  # Return tuple to clear input
+        try:
+            # Process the symptoms using the configured LLM
+            prompt = f"""Given these symptoms: '{text}'
+            Please provide:
+            1. Most likely ICD-10 codes
+            2. Confidence levels for each diagnosis
+            3. Key follow-up questions
+            Format as JSON with diagnoses, confidences, and follow_up fields."""
+            response = llm.complete(prompt)
+            try:
+                # Try to parse as JSON first
                 result = json.loads(response.text)
             except json.JSONDecodeError:
+                # If not JSON, wrap in our format
+                result = {
+                    "diagnoses": [],
+                    "confidences": [],
+                    "follow_up": str(response.text)[:1000]  # Limit response length
+                }
             new_history = history + [
                 {"role": "user", "content": text},
+                {"role": "assistant", "content": format_response_for_user(result)}
+            ]
+            return new_history, ""  # Return empty string to clear input
         except Exception as e:
             print(f"Error processing text: {str(e)}")
+            return history, text  # Keep text on error
     # Update the submit button handler
     submit_btn.click(
+        fn=process_text_input,
         inputs=[text_input, chatbot],
         outputs=[chatbot, text_input],
+        queue=True
     ).success(  # Changed from .then to .success for better error handling
         fn=cleanup_memory,
+        inputs=None,
+        outputs=None,
         queue=False
     )