hkai20000 commited on
Commit
ee0bd33
·
verified ·
1 Parent(s): 76ce598

Update faq_store.py

Browse files
Files changed (1) hide show
  1. faq_store.py +2 -7
faq_store.py CHANGED
@@ -5,9 +5,8 @@ import numpy as np
5
  from openai import OpenAI
6
 
7
  _current_dir = os.path.dirname(__file__)
8
- _parent_dir = os.path.dirname(_current_dir)
9
- _FAQS_FILE = os.path.join(_parent_dir, "data", "medical_faqs.jsonl")
10
- _EMBED_CACHE_FILE = os.path.join(_parent_dir, "data", "medical_faqs_embeddings.pkl")
11
  _EMBED_MODEL = "text-embedding-3-small"
12
 
13
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@@ -21,14 +20,12 @@ def initialize_faq_store():
21
  FAQ_ENTRIES.clear()
22
  FAQ_VECS.clear()
23
 
24
- # Load cached embeddings if present
25
  cached_vecs: dict[str, np.ndarray] = {}
26
  if os.path.exists(_EMBED_CACHE_FILE):
27
  with open(_EMBED_CACHE_FILE, "rb") as f:
28
  cached_entries, cached_vecs = pickle.load(f)
29
  FAQ_ENTRIES.update(cached_entries)
30
 
31
- # Load (or reload) FAQ entries from JSONL
32
  with open(_FAQS_FILE, "r", encoding="utf-8") as f:
33
  for line in f:
34
  line = line.strip()
@@ -37,7 +34,6 @@ def initialize_faq_store():
37
  obj = json.loads(line)
38
  FAQ_ENTRIES[obj["id"]] = obj
39
 
40
- # Embed any entries not yet in cache
41
  new_vecs: dict[str, np.ndarray] = {}
42
  for fid, entry in FAQ_ENTRIES.items():
43
  if fid in cached_vecs:
@@ -48,7 +44,6 @@ def initialize_faq_store():
48
  new_vecs[fid] = vec
49
  FAQ_VECS.append((fid, vec))
50
 
51
- # Persist updated cache
52
  all_vecs = {**cached_vecs, **new_vecs}
53
  with open(_EMBED_CACHE_FILE, "wb") as f:
54
  pickle.dump((FAQ_ENTRIES, all_vecs), f)
 
5
  from openai import OpenAI
6
 
7
  _current_dir = os.path.dirname(__file__)
8
+ _FAQS_FILE = os.path.join(_current_dir, "medical_faqs.jsonl")
9
+ _EMBED_CACHE_FILE = os.path.join(_current_dir, "medical_faqs_embeddings.pkl")
 
10
  _EMBED_MODEL = "text-embedding-3-small"
11
 
12
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
20
  FAQ_ENTRIES.clear()
21
  FAQ_VECS.clear()
22
 
 
23
  cached_vecs: dict[str, np.ndarray] = {}
24
  if os.path.exists(_EMBED_CACHE_FILE):
25
  with open(_EMBED_CACHE_FILE, "rb") as f:
26
  cached_entries, cached_vecs = pickle.load(f)
27
  FAQ_ENTRIES.update(cached_entries)
28
 
 
29
  with open(_FAQS_FILE, "r", encoding="utf-8") as f:
30
  for line in f:
31
  line = line.strip()
 
34
  obj = json.loads(line)
35
  FAQ_ENTRIES[obj["id"]] = obj
36
 
 
37
  new_vecs: dict[str, np.ndarray] = {}
38
  for fid, entry in FAQ_ENTRIES.items():
39
  if fid in cached_vecs:
 
44
  new_vecs[fid] = vec
45
  FAQ_VECS.append((fid, vec))
46
 
 
47
  all_vecs = {**cached_vecs, **new_vecs}
48
  with open(_EMBED_CACHE_FILE, "wb") as f:
49
  pickle.dump((FAQ_ENTRIES, all_vecs), f)