Spaces:

SalmaHassan
/

HyperClinical

Sleeping

App Files Files Community

salmasoma commited on Feb 24

Commit

feb0b0a

1 Parent(s): a11e8f7

Use local foundation MedGemma generation when remote API fails

Browse files

Files changed (4) hide show

src/demo_backend/foundation_embeddings.py +17 -0
src/demo_backend/neurofusion/medgemma_encoder.py +36 -0
src/demo_backend/pipeline.py +3 -0
src/demo_backend/reporting.py +11 -0

src/demo_backend/foundation_embeddings.py CHANGED Viewed

@@ -86,6 +86,7 @@ def extract_foundation_embeddings(
     status: Dict[str, str] = {}
     siglib_embedding: Optional[torch.Tensor] = None
     gemma_embedding: Optional[torch.Tensor] = None
     use_cache = _cache_foundation_models()
     # Extract MedGemma first (typically larger memory footprint), then release.
@@ -118,6 +119,21 @@ def extract_foundation_embeddings(
             gemma_embedding = clinical_encoder.extract_embeddings([narrative], device=device).float()
             model_type = getattr(clinical_encoder, "model_type", "unknown")
             status["medgemma"] = f"{model_type}:{medgemma_model_name}"
             if require_true_hf_models and model_type != "medgemma":
                 raise RuntimeError(
                     f"Expected MedGemma but got fallback model_type='{model_type}' "
@@ -165,5 +181,6 @@ def extract_foundation_embeddings(
     return {
         "siglib_embedding": siglib_embedding,
         "gemma_embedding": gemma_embedding,
         "status": status,
     }

     status: Dict[str, str] = {}
     siglib_embedding: Optional[torch.Tensor] = None
     gemma_embedding: Optional[torch.Tensor] = None
+    medgemma_local_output: Optional[str] = None
     use_cache = _cache_foundation_models()
     # Extract MedGemma first (typically larger memory footprint), then release.
             gemma_embedding = clinical_encoder.extract_embeddings([narrative], device=device).float()
             model_type = getattr(clinical_encoder, "model_type", "unknown")
             status["medgemma"] = f"{model_type}:{medgemma_model_name}"
+            if model_type == "medgemma" and _is_true(os.getenv("HF_LOCAL_MEDGEMMA_REPORT"), default=True):
+                try:
+                    local_prompt = (
+                        "Given this patient summary and class probabilities, write a concise clinical report "
+                        "with key evidence and one-line impression.\n\n"
+                        f"Patient summary:\n{narrative}"
+                    )
+                    medgemma_local_output = clinical_encoder.generate_local_report(
+                        prompt=local_prompt,
+                        device=device,
+                        max_new_tokens=160,
+                    )
+                except Exception as local_exc:
+                    status["medgemma_local_generation"] = f"error:{type(local_exc).__name__}: {_short_error(local_exc)}"
             if require_true_hf_models and model_type != "medgemma":
                 raise RuntimeError(
                     f"Expected MedGemma but got fallback model_type='{model_type}' "
     return {
         "siglib_embedding": siglib_embedding,
         "gemma_embedding": gemma_embedding,
+        "medgemma_local_output": medgemma_local_output,
         "status": status,
     }

src/demo_backend/neurofusion/medgemma_encoder.py CHANGED Viewed

@@ -293,6 +293,42 @@ class MedGemmaEncoder(nn.Module):
         self.eval()
         return self.encode_text(narratives, device).float()
 class StructuredClinicalEncoder(nn.Module):
     """MLP encoder for structured clinical features (demographics + health history).

         self.eval()
         return self.encode_text(narratives, device).float()
+    @torch.no_grad()
+    def generate_local_report(
+        self,
+        prompt: str,
+        device: torch.device,
+        max_new_tokens: int = 160,
+    ) -> str:
+        """Generate text locally with MedGemma when remote inference is unavailable."""
+        if self.model_type != "medgemma" or self.tokenizer is None or self.lm_backbone is None:
+            return ""
+        if self.tokenizer.pad_token_id is None and self.tokenizer.eos_token_id is not None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        inputs = self.tokenizer(
+            prompt,
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt",
+        ).to(device)
+        generated = self.lm_backbone.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+            temperature=0.2,
+            pad_token_id=self.tokenizer.pad_token_id,
+            eos_token_id=self.tokenizer.eos_token_id,
+        )
+        text = self.tokenizer.decode(generated[0], skip_special_tokens=True).strip()
+        if text.startswith(prompt):
+            text = text[len(prompt) :].strip()
+        return text
 class StructuredClinicalEncoder(nn.Module):
     """MLP encoder for structured clinical features (demographics + health history).

src/demo_backend/pipeline.py CHANGED Viewed

@@ -67,6 +67,7 @@ def run_full_inference(
     foundation = {
         "siglib_embedding": None,
         "gemma_embedding": None,
         "status": {"medsiglip": "disabled", "medgemma": "disabled"},
     }
     if use_hf_foundation_embeddings:
@@ -101,6 +102,7 @@ def run_full_inference(
         prediction=prediction,
         enable_remote_llm=enable_remote_medgemma_report,
         foundation_status=foundation["status"],
     )
     final_payload = {
@@ -113,6 +115,7 @@ def run_full_inference(
         "avra_scores": avra_scores,
         "clinical_narrative": narrative,
         "foundation_embeddings": foundation["status"],
         "medgemma_report": report,
         "prediction": prediction,
     }

     foundation = {
         "siglib_embedding": None,
         "gemma_embedding": None,
+        "medgemma_local_output": None,
         "status": {"medsiglip": "disabled", "medgemma": "disabled"},
     }
     if use_hf_foundation_embeddings:
         prediction=prediction,
         enable_remote_llm=enable_remote_medgemma_report,
         foundation_status=foundation["status"],
+        local_medgemma_output=foundation.get("medgemma_local_output"),
     )
     final_payload = {
         "avra_scores": avra_scores,
         "clinical_narrative": narrative,
         "foundation_embeddings": foundation["status"],
+        "medgemma_local_output": foundation.get("medgemma_local_output"),
         "medgemma_report": report,
         "prediction": prediction,
     }

src/demo_backend/reporting.py CHANGED Viewed

@@ -117,6 +117,7 @@ def generate_medgemma_report(
     prediction: Dict,
     enable_remote_llm: bool = True,
     foundation_status: Optional[Mapping[str, str]] = None,
 ) -> Dict[str, str]:
     """Generate clinical report.
@@ -138,6 +139,16 @@ def generate_medgemma_report(
             "medgemma_available": "false",
         }
     token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
     configured = os.getenv("MEDGEMMA_MODEL_ID", "").strip()
     model_candidates = _build_model_candidates(configured)

     prediction: Dict,
     enable_remote_llm: bool = True,
     foundation_status: Optional[Mapping[str, str]] = None,
+    local_medgemma_output: Optional[str] = None,
 ) -> Dict[str, str]:
     """Generate clinical report.
             "medgemma_available": "false",
         }
+    # If local MedGemma generation is available from the foundation encoder, use it.
+    if local_medgemma_output and local_medgemma_output.strip():
+        med_out = local_medgemma_output.strip()
+        return {
+            "report": _compose_report(base_narrative, med_out, prediction),
+            "source": "local_foundation_medgemma",
+            "medgemma_output": med_out,
+            "medgemma_available": "true",
+        }
     token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
     configured = os.getenv("MEDGEMMA_MODEL_ID", "").strip()
     model_candidates = _build_model_candidates(configured)