Spaces:

akisg
/

care-notes

Sleeping

App Files Files Community

Akis Giannoukos commited on Oct 14, 2025

Commit

d517324

1 Parent(s): 497441d

Added GPU decorator

Browse files

Files changed (2) hide show

app.py +60 -50
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ from transformers import (
     pipeline,
 )
 from gtts import gTTS
 # ---------------------------
@@ -39,13 +40,17 @@ _gen_pipe = None
 _tokenizer = None
 def get_asr_pipeline():
     global _asr_pipe
     if _asr_pipe is None:
         _asr_pipe = pipeline(
             "automatic-speech-recognition",
             model=DEFAULT_ASR_MODEL_ID,
-            device=-1,
         )
     return _asr_pipe
@@ -58,8 +63,8 @@ def get_textgen_pipeline():
             task="text-generation",
             model=DEFAULT_CHAT_MODEL_ID,
             tokenizer=DEFAULT_CHAT_MODEL_ID,
-            device=-1,
-            torch_dtype=torch.float32,
         )
     return _gen_pipe
@@ -334,6 +339,7 @@ def init_state() -> Tuple[List[Tuple[str, str]], Dict[str, Any], Dict[str, Any],
     return chat_history, scores, meta, finished, turns
 def process_turn(
     audio_path: Optional[str],
     text_input: Optional[str],
@@ -454,57 +460,61 @@ def reset_app():
 # ---------------------------
 # UI
 # ---------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        ### PHQ-9 Conversational Clinician Agent
-        Engage in a brief, empathetic conversation. Your audio is transcribed, analyzed, and used to infer PHQ-9 scores.
-        The system stops when confidence is high enough or any safety risk is detected. It does not provide therapy or emergency counseling.
-        """
-    )
-    with gr.Row():
-        chatbot = gr.Chatbot(height=400, type="tuples")
-        with gr.Column():
-            score_json = gr.JSON(label="PHQ-9 Assessment (live)")
-            severity_label = gr.Label(label="Severity")
-            threshold = gr.Slider(0.5, 1.0, value=CONFIDENCE_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold (stop when min ≥ τ)")
-            tts_enable = gr.Checkbox(label="Speak clinician responses (TTS)", value=USE_TTS_DEFAULT)
-            tts_audio = gr.Audio(label="Clinician voice", interactive=False)
-    with gr.Row():
-        audio = gr.Audio(sources=["microphone"], type="filepath", label="Speak your response (or use text)")
-        text = gr.Textbox(lines=2, placeholder="Optional: type your response instead of audio")
-    with gr.Row():
-        send_btn = gr.Button("Send")
-        reset_btn = gr.Button("Reset")
-    # App state
-    chat_state = gr.State()
-    scores_state = gr.State()
-    meta_state = gr.State()
-    finished_state = gr.State()
-    turns_state = gr.State()
-    # Initialize on load
-    def _on_load():
-        return init_state()
-    demo.load(_on_load, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
-    # Wire interactions
-    send_btn.click(
-        fn=process_turn,
-        inputs=[audio, text, chatbot, threshold, tts_enable, finished_state, turns_state, scores_state, meta_state],
-        outputs=[chatbot, score_json, severity_label, finished_state, turns_state, audio, text, tts_audio],
-        queue=True,
-        api_name="message",
-    )
-    reset_btn.click(fn=reset_app, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
 if __name__ == "__main__":
     # For local dev
     demo.queue(max_size=16).launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

     pipeline,
 )
 from gtts import gTTS
+import spaces
 # ---------------------------
 _tokenizer = None
+def _hf_device() -> int:
+    return 0 if torch.cuda.is_available() else -1
 def get_asr_pipeline():
     global _asr_pipe
     if _asr_pipe is None:
         _asr_pipe = pipeline(
             "automatic-speech-recognition",
             model=DEFAULT_ASR_MODEL_ID,
+            device=_hf_device(),
         )
     return _asr_pipe
             task="text-generation",
             model=DEFAULT_CHAT_MODEL_ID,
             tokenizer=DEFAULT_CHAT_MODEL_ID,
+            device=_hf_device(),
+            torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
         )
     return _gen_pipe
     return chat_history, scores, meta, finished, turns
+@spaces.GPU
 def process_turn(
     audio_path: Optional[str],
     text_input: Optional[str],
 # ---------------------------
 # UI
 # ---------------------------
+@spaces.GPU
+def create_demo():
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+            ### PHQ-9 Conversational Clinician Agent
+            Engage in a brief, empathetic conversation. Your audio is transcribed, analyzed, and used to infer PHQ-9 scores.
+            The system stops when confidence is high enough or any safety risk is detected. It does not provide therapy or emergency counseling.
+            """
+        )
+        with gr.Row():
+            chatbot = gr.Chatbot(height=400, type="tuples")
+            with gr.Column():
+                score_json = gr.JSON(label="PHQ-9 Assessment (live)")
+                severity_label = gr.Label(label="Severity")
+                threshold = gr.Slider(0.5, 1.0, value=CONFIDENCE_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold (stop when min ≥ τ)")
+                tts_enable = gr.Checkbox(label="Speak clinician responses (TTS)", value=USE_TTS_DEFAULT)
+                tts_audio = gr.Audio(label="Clinician voice", interactive=False)
+        with gr.Row():
+            audio = gr.Audio(sources=["microphone"], type="filepath", label="Speak your response (or use text)")
+            text = gr.Textbox(lines=2, placeholder="Optional: type your response instead of audio")
+        with gr.Row():
+            send_btn = gr.Button("Send")
+            reset_btn = gr.Button("Reset")
+        # App state
+        chat_state = gr.State()
+        scores_state = gr.State()
+        meta_state = gr.State()
+        finished_state = gr.State()
+        turns_state = gr.State()
+        # Initialize on load
+        def _on_load():
+            return init_state()
+        demo.load(_on_load, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
+        # Wire interactions
+        send_btn.click(
+            fn=process_turn,
+            inputs=[audio, text, chatbot, threshold, tts_enable, finished_state, turns_state, scores_state, meta_state],
+            outputs=[chatbot, score_json, severity_label, finished_state, turns_state, audio, text, tts_audio],
+            queue=True,
+            api_name="message",
+        )
+        reset_btn.click(fn=reset_app, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
+    return demo
+demo = create_demo()
 if __name__ == "__main__":
     # For local dev
     demo.queue(max_size=16).launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ numpy>=1.26.4
 scipy>=1.11.4
 protobuf>=4.25.3
 gTTS>=2.5.3

 scipy>=1.11.4
 protobuf>=4.25.3
 gTTS>=2.5.3
+spaces>=0.27.1