Spaces:

headway
/

medicode

Runtime error

App Files Files Community

timgremore commited on Apr 25, 2024

Commit

7494a1c

1 Parent(s): 93ae114

feat: Try ligher weight and more accurate whisper

Browse files

Files changed (2) hide show

fly.toml +1 -1
lib/medicode/serving_supervisor.ex +33 -3

fly.toml CHANGED Viewed

@@ -24,7 +24,7 @@ kill_signal = 'SIGTERM'
 [[mounts]]
   source = 'data'
   destination = '/data'
-  initial_size = '100gb'
 [http_service]
   internal_port = 8080

 [[mounts]]
   source = 'data'
   destination = '/data'
+  initial_size = '40gb'
 [http_service]
   internal_port = 8080

lib/medicode/serving_supervisor.ex CHANGED Viewed

@@ -7,7 +7,8 @@ defmodule Medicode.ServingSupervisor do
   alias AudioTagger.{KeywordFinder, Transcriber, Vectors}
-  @model_name "openai/whisper-small"
   @question_answer_model_name "distilbert-base-cased-distilled-squad"
   def start_link(init_arg) do
@@ -20,14 +21,20 @@ defmodule Medicode.ServingSupervisor do
       transcription_spec(),
       token_classification_spec(),
       text_embedding_spec(),
-      question_answer_spec(),
     ]
     Supervisor.init(children, strategy: :one_for_one)
   end
   defp transcription_spec do
-    Transcriber.child_spec(Medicode.TranscriptionServing, @model_name)
   end
   defp token_classification_spec do
@@ -53,4 +60,27 @@ defmodule Medicode.ServingSupervisor do
       serving: serving, name: Medicode.QAServing, batch_size: 1, batch_timeout: 100
     }
   end
 end

   alias AudioTagger.{KeywordFinder, Transcriber, Vectors}
+  # @model_name "openai/whisper-small"
+  @model_name "distil-whisper/distil-medium.en"
   @question_answer_model_name "distilbert-base-cased-distilled-squad"
   def start_link(init_arg) do
       transcription_spec(),
       token_classification_spec(),
       text_embedding_spec(),
+      question_answer_spec()
     ]
     Supervisor.init(children, strategy: :one_for_one)
   end
   defp transcription_spec do
+    {:ok, featurizer} = Bumblebee.load_featurizer({:hf, @model_name})
+    serving = serving_with_featurizer(featurizer, @model_name)
+    {
+      Nx.Serving,
+      serving: serving, name: Medicode.TranscriptionServing, batch_size: 4, batch_timeout: 100
+    }
   end
   defp token_classification_spec do
       serving: serving, name: Medicode.QAServing, batch_size: 1, batch_timeout: 100
     }
   end
+  @doc "Creates an Nx.Serving to perform speech-to-text tasks, using the passed featurizer. This is helpful for direct use from Livebook where the featurizer is needed to define the Kino audio input."
+  def serving_with_featurizer(featurizer, model_name) do
+    {:ok, model_info} = Bumblebee.load_model({:hf, model_name})
+    {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name})
+    {:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name})
+    generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
+    # Docs: https://hexdocs.pm/bumblebee/Bumblebee.Audio.html#speech_to_text_whisper/5
+    Bumblebee.Audio.speech_to_text_whisper(
+      model_info,
+      featurizer,
+      tokenizer,
+      generation_config,
+      task: nil,
+      compile: [batch_size: 4],
+      chunk_num_seconds: 30,
+      # context_num_seconds: 5, # Defaults to 1/6 of :chunk_num_seconds
+      timestamps: :segments,
+      stream: true,
+      defn_options: [compiler: EXLA]
+    )
+  end
 end