timgremore commited on
Commit
5579301
·
1 Parent(s): 5676fb5

feat: Populate code vectors from cache csv file

Browse files
code_vectors.csv ADDED
The diff for this file is too large to render. See raw diff
 
mix.exs CHANGED
@@ -65,7 +65,8 @@ defmodule MedicalTranscription.MixProject do
65
  {:progress_bar, "~> 3.0"},
66
  {:membrane_core, "~> 1.0"},
67
  {:membrane_raw_audio_format, "~> 0.12.0"},
68
- {:kino, "~> 0.12.3"}
 
69
  # {:membrane_portaudio_plugin, "~> 0.18.0"}
70
  ]
71
  end
 
65
  {:progress_bar, "~> 3.0"},
66
  {:membrane_core, "~> 1.0"},
67
  {:membrane_raw_audio_format, "~> 0.12.0"},
68
+ {:kino, "~> 0.12.3"},
69
+ {:csv, "~> 3.2"}
70
  # {:membrane_portaudio_plugin, "~> 0.18.0"}
71
  ]
72
  end
mix.lock CHANGED
@@ -17,6 +17,7 @@
17
  "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"},
18
  "cowlib": {:hex, :cowlib, "2.12.1", "a9fa9a625f1d2025fe6b462cb865881329b5caff8f1854d1cbc9f9533f00e1e1", [:make, :rebar3], [], "hexpm", "163b73f6367a7341b33c794c4e88e7dbfe6498ac42dcd69ef44c5bc5507c8db0"},
19
  "credo": {:hex, :credo, "1.7.3", "05bb11eaf2f2b8db370ecaa6a6bda2ec49b2acd5e0418bc106b73b07128c0436", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "35ea675a094c934c22fb1dca3696f3c31f2728ae6ef5a53b5d648c11180a4535"},
 
20
  "db_connection": {:hex, :db_connection, "2.6.0", "77d835c472b5b67fc4f29556dee74bf511bbafecdcaf98c27d27fa5918152086", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c2f992d15725e721ec7fbc1189d4ecdb8afef76648c746a8e1cad35e3b8a35f3"},
21
  "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
22
  "dns_cluster": {:hex, :dns_cluster, "0.1.1", "73b4b2c3ec692f8a64276c43f8c929733a9ab9ac48c34e4c0b3d9d1b5cd69155", [:mix], [], "hexpm", "03a3f6ff16dcbb53e219b99c7af6aab29eb6b88acf80164b4bd76ac18dc890b3"},
 
17
  "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"},
18
  "cowlib": {:hex, :cowlib, "2.12.1", "a9fa9a625f1d2025fe6b462cb865881329b5caff8f1854d1cbc9f9533f00e1e1", [:make, :rebar3], [], "hexpm", "163b73f6367a7341b33c794c4e88e7dbfe6498ac42dcd69ef44c5bc5507c8db0"},
19
  "credo": {:hex, :credo, "1.7.3", "05bb11eaf2f2b8db370ecaa6a6bda2ec49b2acd5e0418bc106b73b07128c0436", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "35ea675a094c934c22fb1dca3696f3c31f2728ae6ef5a53b5d648c11180a4535"},
20
+ "csv": {:hex, :csv, "3.2.1", "6d401f1ed33acb2627682a9ab6021e96d33ca6c1c6bccc243d8f7e2197d032f5", [:mix], [], "hexpm", "8f55a0524923ae49e97ff2642122a2ce7c61e159e7fe1184670b2ce847aee6c8"},
21
  "db_connection": {:hex, :db_connection, "2.6.0", "77d835c472b5b67fc4f29556dee74bf511bbafecdcaf98c27d27fa5918152086", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c2f992d15725e721ec7fbc1189d4ecdb8afef76648c746a8e1cad35e3b8a35f3"},
22
  "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
23
  "dns_cluster": {:hex, :dns_cluster, "0.1.1", "73b4b2c3ec692f8a64276c43f8c929733a9ab9ac48c34e4c0b3d9d1b5cd69155", [:mix], [], "hexpm", "03a3f6ff16dcbb53e219b99c7af6aab29eb6b88acf80164b4bd76ac18dc890b3"},
priv/repo/seeds.exs CHANGED
@@ -9,3 +9,31 @@
9
  #
10
  # We recommend using the bang functions (`insert!`, `update!`
11
  # and so on) as they will fail if something goes wrong.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  #
10
  # We recommend using the bang functions (`insert!`, `update!`
11
  # and so on) as they will fail if something goes wrong.
12
+
13
+ # Populate code_vectors with cached version of ICD-9 codes
14
+ code_vectors =
15
+ "../../code_vectors.csv"
16
+ |> Path.expand(__DIR__)
17
+ |> File.stream!()
18
+ |> CSV.decode(headers: true)
19
+ |> Enum.map(fn {:ok,
20
+ %{
21
+ "code" => code,
22
+ "description" => description,
23
+ "description_vector" => description_vector
24
+ }} ->
25
+ vector =
26
+ description_vector
27
+ |> String.replace_prefix("[", "")
28
+ |> String.replace_suffix("]", "")
29
+ |> String.split(",")
30
+ |> Enum.map(&String.to_float/1)
31
+ |> Pgvector.new()
32
+
33
+ %{code: code, description: description, description_vector: vector}
34
+ end)
35
+
36
+ MedicalTranscription.Repo.insert_all(
37
+ MedicalTranscription.Coding.CodeVector,
38
+ code_vectors
39
+ )
test/support/fixtures/code_vectors_fixtures.ex ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defmodule MedicalTranscription.CodeVectorsFixtures do
2
+ @moduledoc """
3
+ This module defines test helpers for creating %CodeVector{}
4
+ entities via the `MedicalTranscription.Coding` context.
5
+ """
6
+
7
+ @doc """
8
+ Insert code vectors from cached csv file.
9
+ """
10
+ def insert_code_vector_fixtures(attrs \\ %{}) do
11
+ code_vectors =
12
+ "../../../code_vectors.csv"
13
+ |> Path.expand(__DIR__)
14
+ |> File.stream!()
15
+ |> CSV.decode(headers: true)
16
+ |> Enum.take(5)
17
+ |> Enum.map(fn {:ok,
18
+ %{
19
+ "code" => code,
20
+ "description" => description,
21
+ "description_vector" => description_vector
22
+ }} ->
23
+ vector =
24
+ description_vector
25
+ |> String.replace_prefix("[", "")
26
+ |> String.replace_suffix("]", "")
27
+ |> String.split(",")
28
+ |> Enum.map(&String.to_float/1)
29
+ |> Pgvector.new()
30
+
31
+ %{code: code, description: description, description_vector: vector}
32
+ end)
33
+
34
+ MedicalTranscription.Repo.insert_all(
35
+ MedicalTranscription.Coding.CodeVector,
36
+ code_vectors
37
+ )
38
+ end
39
+ end