timgremore commited on
Commit
50d1bef
·
1 Parent(s): 84e495d

feat: Store vector representation of chunk text

Browse files
lib/medical_transcription/transcription_server.ex CHANGED
@@ -62,6 +62,7 @@ defmodule Medicode.TranscriptionServer do
62
  Transcriptions.create_chunk(%{
63
  transcription_id: id,
64
  text: String.trim(result.text),
 
65
  start_mark: result.start_mark,
66
  end_mark: result.end_mark
67
  })
 
62
  Transcriptions.create_chunk(%{
63
  transcription_id: id,
64
  text: String.trim(result.text),
65
+ text_vector: Medicode.Coding.compute_vector_as_list(result.text),
66
  start_mark: result.start_mark,
67
  end_mark: result.end_mark
68
  })
lib/medical_transcription/transcriptions/transcription_chunk.ex CHANGED
@@ -1,4 +1,8 @@
1
  defmodule Medicode.Transcriptions.TranscriptionChunk do
 
 
 
 
2
  use Ecto.Schema
3
  import Ecto.Changeset
4
 
@@ -6,6 +10,7 @@ defmodule Medicode.Transcriptions.TranscriptionChunk do
6
  @foreign_key_type :binary_id
7
  schema "transcription_chunks" do
8
  field(:text, :string)
 
9
  field(:start_mark, :string)
10
  field(:end_mark, :string)
11
 
@@ -31,7 +36,7 @@ defmodule Medicode.Transcriptions.TranscriptionChunk do
31
  @doc false
32
  def changeset(transcription_chunk, attrs) do
33
  transcription_chunk
34
- |> cast(attrs, [:transcription_id, :text, :start_mark, :end_mark])
35
- |> validate_required([:transcription_id, :text, :start_mark, :end_mark])
36
  end
37
  end
 
1
  defmodule Medicode.Transcriptions.TranscriptionChunk do
2
+ @moduledoc """
3
+ Represents a portion of a transcription, along with a vector embedding for its text.
4
+ """
5
+
6
  use Ecto.Schema
7
  import Ecto.Changeset
8
 
 
10
  @foreign_key_type :binary_id
11
  schema "transcription_chunks" do
12
  field(:text, :string)
13
+ field(:text_vector, Pgvector.Ecto.Vector, redact: true)
14
  field(:start_mark, :string)
15
  field(:end_mark, :string)
16
 
 
36
  @doc false
37
  def changeset(transcription_chunk, attrs) do
38
  transcription_chunk
39
+ |> cast(attrs, [:transcription_id, :text, :text_vector, :start_mark, :end_mark])
40
+ |> validate_required([:transcription_id, :text, :text_vector, :start_mark, :end_mark])
41
  end
42
  end
lib/medical_transcription_web/components/transcription_text_component.ex CHANGED
@@ -134,7 +134,8 @@ defmodule MedicodeWeb.Components.TranscriptionTextComponent do
134
  else
135
  {:ok, chunk} =
136
  Transcriptions.update_transcription_chunk(socket.assigns.chunk, %{
137
- text: new_chunk_text_trimmed
 
138
  })
139
 
140
  Medicode.ClassificationSupervisor.start_classification(chunk)
 
134
  else
135
  {:ok, chunk} =
136
  Transcriptions.update_transcription_chunk(socket.assigns.chunk, %{
137
+ text: new_chunk_text_trimmed,
138
+ text_vector: Medicode.Coding.compute_vector_as_list(new_chunk_text_trimmed)
139
  })
140
 
141
  Medicode.ClassificationSupervisor.start_classification(chunk)
priv/repo/migrations/20240228015523_add_text_vector_to_transcription_chunks.exs ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule Medicode.Repo.Migrations.AddTextVectorToTranscriptionChunks do
2
+ use Ecto.Migration
3
+
4
+ def change do
5
+ # TODO: text_vector should be not nullable
6
+ alter table(:transcription_chunks) do
7
+ add :text_vector, :vector, size: 384, null: true
8
+ end
9
+ end
10
+ end