Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Sleeping

gpaasch commited on Jun 8, 2025

Commit

5c7ca93

1 Parent(s): b348126

1. Structure responses with both diagnoses and follow-up questions

2. Maintain context between interactions
3. Use Mistral model (either local or API) when available
4. Format responses in a more interactive way

Files changed (1) hide show

src/app.py +64 -55

src/app.py CHANGED Viewed

@@ -102,15 +102,17 @@ def get_system_specs() -> Dict[str, float]:
         "gpu_vram_gb": gpu_vram_gb
     }
-def select_best_model() -> Tuple[str, str]:
     """Select the best model based on system specifications."""
     specs = get_system_specs()
-    print(f"\nSystem specifications:")
-    print(f"RAM: {specs['ram_gb']:.1f} GB")
-    print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB")
     # Prioritize GPU if available
-    if specs['gpu_vram_gb'] >= 4:  # You have 6GB, so this should work
         model_tier = "small"  # phi-2 should work well on RTX 2060
     elif specs['ram_gb'] >= 8:
         model_tier = "small"
@@ -212,12 +214,15 @@ symptom_index = create_symptom_index()
 print("Index created successfully")
 # --- System prompt ---
-SYSTEM_PROMPT = """
-You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
-At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?")
-or, if you have enough info, output a final JSON with fields:
-{"diagnoses":[…], "confidences":[…]}.
-"""
 def process_speech(audio_data, history):
     """Process speech input and convert to text."""
@@ -521,80 +526,84 @@ with gr.Blocks(theme="default") as demo:
             features = process_audio(audio_array, sample_rate)
             asr = get_asr_pipeline()
-            result = asr(features)
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
         except Exception as e:
             print(f"Transcription error: {str(e)}")
-            return ""
     microphone.stream(
         fn=update_live_transcription,
         inputs=[microphone],
-        outputs=transcript_box,
         show_progress="hidden",
         queue=True
-    )
     clear_btn.click(
         fn=lambda: (None, "", ""),
         outputs=[chatbot, transcript_box, text_input],
         queue=False
-    )
     def cleanup_memory():
         """Release unused memory (placeholder for future memory management)."""
-        import gc
-        gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
     def process_text_input(text, history):
-        """Process text input with memory management."""
         if not text:
-            return history, ""  # Return tuple to clear input
-        try:
-            # Process the symptoms using the configured LLM
-            prompt = f"""Given these symptoms: '{text}'
-            Please provide:
-            1. Most likely ICD-10 codes
-            2. Confidence levels for each diagnosis
-            3. Key follow-up questions
-            Format as JSON with diagnoses, confidences, and follow_up fields."""
-            response = llm.complete(prompt)
-            try:
-                # Try to parse as JSON first
                 result = json.loads(response.text)
             except json.JSONDecodeError:
-                # If not JSON, wrap in our format
-                result = {
-                    "diagnoses": [],
-                    "confidences": [],
-                    "follow_up": str(response.text)[:1000]  # Limit response length
-                }
             new_history = history + [
                 {"role": "user", "content": text},
-                {"role": "assistant", "content": format_response_for_user(result)}
-            ]
-            return new_history, ""  # Return empty string to clear input
         except Exception as e:
             print(f"Error processing text: {str(e)}")
-            return history, text  # Keep text on error
     # Update the submit button handler
     submit_btn.click(
-        fn=process_text_input,
         inputs=[text_input, chatbot],
         outputs=[chatbot, text_input],
-        queue=True
     ).success(  # Changed from .then to .success for better error handling
         fn=cleanup_memory,
-        inputs=None,
-        outputs=None,
         queue=False
     )

         "gpu_vram_gb": gpu_vram_gb
     }
+def select_best_model():
     """Select the best model based on system specifications."""
     specs = get_system_specs()
+    # Prioritize Mistral if we have API key or sufficient resources
+    if any(k.startswith("mk-") for k in [api_key.value]):  # Check for Mistral API key
+        return "mistral-7b-instruct-v0.1.Q4_K_M.gguf", "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
+    elif specs['gpu_vram_gb'] >= 6 or specs['ram_gb'] >= 16:
+        return MODEL_OPTIONS["medium"]["name"], MODEL_OPTIONS["medium"]["repo"]
     # Prioritize GPU if available
+    elif specs['gpu_vram_gb'] >= 4:  # You have 6GB, so this should work
         model_tier = "small"  # phi-2 should work well on RTX 2060
     elif specs['ram_gb'] >= 8:
         model_tier = "small"
 print("Index created successfully")
 # --- System prompt ---
+SYSTEM_PROMPT = """You are a medical assistant using the Mistral model to analyze symptoms and determine ICD-10 codes.
+Your responses should ALWAYS be in this format:
+{
+    "diagnoses": ["ICD10 code - description"],
+    "confidences": [confidence score between 0-1],
+    "follow_up": "ONE specific follow-up question to refine the diagnosis",
+    "explanation": "Brief explanation of why you're asking this question"
+}
+Keep responses focused and clinical."""
 def process_speech(audio_data, history):
     """Process speech input and convert to text."""
             features = process_audio(audio_array, sample_rate)
             asr = get_asr_pipeline()
             return result.get("text", "").strip() if isinstance(result, dict) else str(result).strip()
         except Exception as e:
+            print(f"Transcription error: {str(e)}")f isinstance(result, dict) else str(result).strip()
+            return ""ion as e:
             print(f"Transcription error: {str(e)}")
     microphone.stream(
         fn=update_live_transcription,
         inputs=[microphone],
+        outputs=transcript_box,ption,
         show_progress="hidden",
+        queue=Trueanscript_box,
+    )   show_progress="hidden",
         queue=True
     clear_btn.click(
         fn=lambda: (None, "", ""),
         outputs=[chatbot, transcript_box, text_input],
+        queue=False(None, "", ""),
+    )   outputs=[chatbot, transcript_box, text_input],
         queue=False
     def cleanup_memory():
         """Release unused memory (placeholder for future memory management)."""
+        import gcemory():
+        gc.collect()nused memory (placeholder for future memory management)."""
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        if torch.cuda.is_available():
     def process_text_input(text, history):
+        """Process text input with interactive follow-up."""
+        if not text:_input(text, history):
+            return history, ""with interactive follow-up."""
         if not text:
+        try:return history, ""
+            # Add context from history
+            context = "\n".join([m["content"] for m in history if m["role"] == "user"]) if history else ""
+            # Add context from history
+            prompt = f"""{SYSTEM_PROMPT}ent"] for m in history if m["role"] == "user"]) if history else ""
+Previous context: {context}
+Current symptoms: {text}"{SYSTEM_PROMPT}
+Analyze and respond with likely diagnoses and ONE key follow-up question."""
+            toms: {text}
+            response = llm.complete(prompt)nd ONE key follow-up question."""
+            try:onse = llm.complete(prompt)
                 result = json.loads(response.text)
             except json.JSONDecodeError:
+                result = {son.loads(response.text)
+                    "diagnoses": ["R69 - Illness, unspecified"],
+                    "confidences": [0.5],
+                    "follow_up": str(response.text)[:200],ied"],
+                    "explanation": "Could not parse response"
+                }   "follow_up": str(response.text)[:200],
+                    "explanation": "Could not parse response"
+            formatted_response = f"""Possible Diagnoses:
+{''.join(f'- {d} ({c*100:.0f}%)\n' for d, c in zip(result['diagnoses'], result['confidences']))}
+            formatted_response = f"""Possible Diagnoses:
+Follow-up Question: {result['follow_up']} c in zip(result['diagnoses'], result['confidences']))}
+({result['explanation']})"""
+Follow-up Question: {result['follow_up']}
             new_history = history + [
                 {"role": "user", "content": text},
+                {"role": "assistant", "content": formatted_response}
+            ]   {"role": "user", "content": text},
+            return new_history, ""t", "content": formatted_response}
         except Exception as e:
             print(f"Error processing text: {str(e)}")
+            return history, text
+            print(f"Error processing text: {str(e)}")
     # Update the submit button handler
     submit_btn.click(
+        fn=process_text_input, handler
         inputs=[text_input, chatbot],
         outputs=[chatbot, text_input],
+        queue=Truext_input, chatbot],
     ).success(  # Changed from .then to .success for better error handling
         fn=cleanup_memory,
+        inputs=None,anged from .then to .success for better error handling
+        outputs=None,mory,
+        queue=False,
+    )   outputs=None,
         queue=False
     )