root commited on
Commit
0d2448c
·
1 Parent(s): ae51aec

Feature: Integrate google/MedASR for speech to text prompting for MedGemma

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -0
  2. app/main.py +23 -0
  3. frontend/dashboard.py +23 -0
  4. requirements.txt +5 -1
Dockerfile CHANGED
@@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
6
  curl \
7
  gcc \
8
  musl \
 
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
  # 2. CREATE THE BRIDGE (The critical step)
 
6
  curl \
7
  gcc \
8
  musl \
9
+ libsndfile1
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # 2. CREATE THE BRIDGE (The critical step)
app/main.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI, HTTPException
2
  from contextlib import asynccontextmanager
3
  from app.schemas import ClinicalNoteRequest, DiagnosisResponse
4
  from app.model import predictor
 
5
  import os
6
 
7
  # 1. Define the lifespan (startup/shutdown logic)
@@ -22,6 +23,9 @@ async def lifespan(app: FastAPI):
22
  # --- SHUTDOWN ---
23
  print("🛑 Cleaning up...")
24
 
 
 
 
25
  # 2. Create the 'app' object
26
  app = FastAPI(lifespan=lifespan, title="Med-Gemma Impact API")
27
 
@@ -37,6 +41,25 @@ def check_emergency(notes: str, symptoms: list) -> bool:
37
  return True
38
  return False
39
  # ---------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # 3. Define the endpoints
42
  @app.get("/")
 
2
  from contextlib import asynccontextmanager
3
  from app.schemas import ClinicalNoteRequest, DiagnosisResponse
4
  from app.model import predictor
5
+ from transformers import pipeline
6
  import os
7
 
8
  # 1. Define the lifespan (startup/shutdown logic)
 
23
  # --- SHUTDOWN ---
24
  print("🛑 Cleaning up...")
25
 
26
+
27
+
28
+
29
  # 2. Create the 'app' object
30
  app = FastAPI(lifespan=lifespan, title="Med-Gemma Impact API")
31
 
 
41
  return True
42
  return False
43
  # ---------------------------------
44
+ # Initialize the MedASR pipeline (Ears)
45
+ # We use the CPU version of torch specifically
46
+ med_asr_pipe = pipeline(
47
+ "automatic-speech-recognition",
48
+ model="google/medasr",
49
+ device="cpu"
50
+ )
51
+
52
+ @app.post("/transcribe")
53
+ async def transcribe_audio(file: UploadFile = File(...)):
54
+ # Save temporary audio file
55
+ with open("temp_audio.wav", "wb") as f:
56
+ f.write(await file.read())
57
+
58
+ # Transcribe using MedASR
59
+ result = med_asr_pipe("temp_audio.wav")
60
+ return {"transcription": result["text"]}
61
+
62
+
63
 
64
  # 3. Define the endpoints
65
  @app.get("/")
frontend/dashboard.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  import requests
3
  import json
4
  import os
@@ -10,6 +11,28 @@ st.set_page_config(page_title="Med-Gemma Triage", page_icon="🏥")
10
 
11
  # 2. The UI Layout
12
  st.title("🏥 Med-Gemma: Clinical Triage Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  st.markdown("---")
14
 
15
  col1, col2 = st.columns(2)
 
1
  import streamlit as st
2
+ from streamlit_mic_recorder import mic_recorder
3
  import requests
4
  import json
5
  import os
 
11
 
12
  # 2. The UI Layout
13
  st.title("🏥 Med-Gemma: Clinical Triage Assistant")
14
+
15
+ # Add a voice recording section
16
+ st.subheader("Dictate Symptoms")
17
+ audio = mic_recorder(
18
+ start_prompt="⏺️ Start Dictation",
19
+ stop_prompt="⏹️ Stop",
20
+ key='recorder'
21
+ )
22
+
23
+ # Process the audio if recorded
24
+ if audio:
25
+ with st.spinner("Transcribing medical dictation..."):
26
+ # Send audio bytes to our new /transcribe endpoint
27
+ files = {'file': audio['bytes']}
28
+ response = requests.post(f"{st.secrets['API_URL']}/transcribe", files=files)
29
+
30
+ if response.status_code == 200:
31
+ transcription = response.json().get("transcription")
32
+ # Fill the symptoms box with the transcription
33
+ st.session_state.symptoms = transcription
34
+ st.success("Transcription complete!")
35
+
36
  st.markdown("---")
37
 
38
  col1, col2 = st.columns(2)
requirements.txt CHANGED
@@ -2,4 +2,8 @@ fastapi
2
  uvicorn[standard]
3
  pydantic
4
  huggingface_hub
5
- llama-cpp-python
 
 
 
 
 
2
  uvicorn[standard]
3
  pydantic
4
  huggingface_hub
5
+ llama-cpp-python
6
+ transformers
7
+ torch --index-url https://download.pytorch.org/whl/cpu
8
+ librosa
9
+ streamlit-mic-recorder