Spaces:

manuxe
/

med-gemma

Build error

root commited on Feb 4

Commit

0d2448c

1 Parent(s): ae51aec

Feature: Integrate google/MedASR for speech to text prompting for MedGemma

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     curl \
     gcc \
     musl \
     && rm -rf /var/lib/apt/lists/*
 # 2. CREATE THE BRIDGE (The critical step)

     curl \
     gcc \
     musl \
+    libsndfile1
     && rm -rf /var/lib/apt/lists/*
 # 2. CREATE THE BRIDGE (The critical step)

app/main.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI, HTTPException
 from contextlib import asynccontextmanager
 from app.schemas import ClinicalNoteRequest, DiagnosisResponse
 from app.model import predictor
 import os
 # 1. Define the lifespan (startup/shutdown logic)
@@ -22,6 +23,9 @@ async def lifespan(app: FastAPI):
     # --- SHUTDOWN ---
     print("🛑 Cleaning up...")
 # 2. Create the 'app' object
 app = FastAPI(lifespan=lifespan, title="Med-Gemma Impact API")
@@ -37,6 +41,25 @@ def check_emergency(notes: str, symptoms: list) -> bool:
             return True
     return False
 # ---------------------------------
 # 3. Define the endpoints
 @app.get("/")

 from contextlib import asynccontextmanager
 from app.schemas import ClinicalNoteRequest, DiagnosisResponse
 from app.model import predictor
+from transformers import pipeline
 import os
 # 1. Define the lifespan (startup/shutdown logic)
     # --- SHUTDOWN ---
     print("🛑 Cleaning up...")
 # 2. Create the 'app' object
 app = FastAPI(lifespan=lifespan, title="Med-Gemma Impact API")
             return True
     return False
 # ---------------------------------
+# Initialize the MedASR pipeline (Ears)
+# We use the CPU version of torch specifically
+med_asr_pipe = pipeline(
+    "automatic-speech-recognition",
+    model="google/medasr",
+    device="cpu"
+)
+@app.post("/transcribe")
+async def transcribe_audio(file: UploadFile = File(...)):
+    # Save temporary audio file
+    with open("temp_audio.wav", "wb") as f:
+        f.write(await file.read())
+    # Transcribe using MedASR
+    result = med_asr_pipe("temp_audio.wav")
+    return {"transcription": result["text"]}
 # 3. Define the endpoints
 @app.get("/")

frontend/dashboard.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import streamlit as st
 import requests
 import json
 import os
@@ -10,6 +11,28 @@ st.set_page_config(page_title="Med-Gemma Triage", page_icon="🏥")
 # 2. The UI Layout
 st.title("🏥 Med-Gemma: Clinical Triage Assistant")
 st.markdown("---")
 col1, col2 = st.columns(2)

 import streamlit as st
+from streamlit_mic_recorder import mic_recorder
 import requests
 import json
 import os
 # 2. The UI Layout
 st.title("🏥 Med-Gemma: Clinical Triage Assistant")
+# Add a voice recording section
+st.subheader("Dictate Symptoms")
+audio = mic_recorder(
+    start_prompt="⏺️ Start Dictation",
+    stop_prompt="⏹️ Stop",
+    key='recorder'
+)
+# Process the audio if recorded
+if audio:
+    with st.spinner("Transcribing medical dictation..."):
+        # Send audio bytes to our new /transcribe endpoint
+        files = {'file': audio['bytes']}
+        response = requests.post(f"{st.secrets['API_URL']}/transcribe", files=files)
+        if response.status_code == 200:
+            transcription = response.json().get("transcription")
+            # Fill the symptoms box with the transcription
+            st.session_state.symptoms = transcription
+            st.success("Transcription complete!")
 st.markdown("---")
 col1, col2 = st.columns(2)

requirements.txt CHANGED Viewed

@@ -2,4 +2,8 @@ fastapi
 uvicorn[standard]
 pydantic
 huggingface_hub
-llama-cpp-python

 uvicorn[standard]
 pydantic
 huggingface_hub
+llama-cpp-python
+transformers
+torch --index-url https://download.pytorch.org/whl/cpu
+librosa
+streamlit-mic-recorder