Commit ·
50d1bef
1
Parent(s): 84e495d
feat: Store vector representation of chunk text
Browse files
lib/medical_transcription/transcription_server.ex
CHANGED
|
@@ -62,6 +62,7 @@ defmodule Medicode.TranscriptionServer do
|
|
| 62 |
Transcriptions.create_chunk(%{
|
| 63 |
transcription_id: id,
|
| 64 |
text: String.trim(result.text),
|
|
|
|
| 65 |
start_mark: result.start_mark,
|
| 66 |
end_mark: result.end_mark
|
| 67 |
})
|
|
|
|
| 62 |
Transcriptions.create_chunk(%{
|
| 63 |
transcription_id: id,
|
| 64 |
text: String.trim(result.text),
|
| 65 |
+
text_vector: Medicode.Coding.compute_vector_as_list(result.text),
|
| 66 |
start_mark: result.start_mark,
|
| 67 |
end_mark: result.end_mark
|
| 68 |
})
|
lib/medical_transcription/transcriptions/transcription_chunk.ex
CHANGED
|
@@ -1,4 +1,8 @@
|
|
| 1 |
defmodule Medicode.Transcriptions.TranscriptionChunk do
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
use Ecto.Schema
|
| 3 |
import Ecto.Changeset
|
| 4 |
|
|
@@ -6,6 +10,7 @@ defmodule Medicode.Transcriptions.TranscriptionChunk do
|
|
| 6 |
@foreign_key_type :binary_id
|
| 7 |
schema "transcription_chunks" do
|
| 8 |
field(:text, :string)
|
|
|
|
| 9 |
field(:start_mark, :string)
|
| 10 |
field(:end_mark, :string)
|
| 11 |
|
|
@@ -31,7 +36,7 @@ defmodule Medicode.Transcriptions.TranscriptionChunk do
|
|
| 31 |
@doc false
|
| 32 |
def changeset(transcription_chunk, attrs) do
|
| 33 |
transcription_chunk
|
| 34 |
-
|> cast(attrs, [:transcription_id, :text, :start_mark, :end_mark])
|
| 35 |
-
|> validate_required([:transcription_id, :text, :start_mark, :end_mark])
|
| 36 |
end
|
| 37 |
end
|
|
|
|
| 1 |
defmodule Medicode.Transcriptions.TranscriptionChunk do
|
| 2 |
+
@moduledoc """
|
| 3 |
+
Represents a portion of a transcription, along with a vector embedding for its text.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
use Ecto.Schema
|
| 7 |
import Ecto.Changeset
|
| 8 |
|
|
|
|
| 10 |
@foreign_key_type :binary_id
|
| 11 |
schema "transcription_chunks" do
|
| 12 |
field(:text, :string)
|
| 13 |
+
field(:text_vector, Pgvector.Ecto.Vector, redact: true)
|
| 14 |
field(:start_mark, :string)
|
| 15 |
field(:end_mark, :string)
|
| 16 |
|
|
|
|
| 36 |
@doc false
|
| 37 |
def changeset(transcription_chunk, attrs) do
|
| 38 |
transcription_chunk
|
| 39 |
+
|> cast(attrs, [:transcription_id, :text, :text_vector, :start_mark, :end_mark])
|
| 40 |
+
|> validate_required([:transcription_id, :text, :text_vector, :start_mark, :end_mark])
|
| 41 |
end
|
| 42 |
end
|
lib/medical_transcription_web/components/transcription_text_component.ex
CHANGED
|
@@ -134,7 +134,8 @@ defmodule MedicodeWeb.Components.TranscriptionTextComponent do
|
|
| 134 |
else
|
| 135 |
{:ok, chunk} =
|
| 136 |
Transcriptions.update_transcription_chunk(socket.assigns.chunk, %{
|
| 137 |
-
text: new_chunk_text_trimmed
|
|
|
|
| 138 |
})
|
| 139 |
|
| 140 |
Medicode.ClassificationSupervisor.start_classification(chunk)
|
|
|
|
| 134 |
else
|
| 135 |
{:ok, chunk} =
|
| 136 |
Transcriptions.update_transcription_chunk(socket.assigns.chunk, %{
|
| 137 |
+
text: new_chunk_text_trimmed,
|
| 138 |
+
text_vector: Medicode.Coding.compute_vector_as_list(new_chunk_text_trimmed)
|
| 139 |
})
|
| 140 |
|
| 141 |
Medicode.ClassificationSupervisor.start_classification(chunk)
|
priv/repo/migrations/20240228015523_add_text_vector_to_transcription_chunks.exs
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defmodule Medicode.Repo.Migrations.AddTextVectorToTranscriptionChunks do
|
| 2 |
+
use Ecto.Migration
|
| 3 |
+
|
| 4 |
+
def change do
|
| 5 |
+
# TODO: text_vector should be not nullable
|
| 6 |
+
alter table(:transcription_chunks) do
|
| 7 |
+
add :text_vector, :vector, size: 384, null: true
|
| 8 |
+
end
|
| 9 |
+
end
|
| 10 |
+
end
|