Update faq_store.py
Browse files- faq_store.py +2 -7
faq_store.py
CHANGED
|
@@ -5,9 +5,8 @@ import numpy as np
|
|
| 5 |
from openai import OpenAI
|
| 6 |
|
| 7 |
_current_dir = os.path.dirname(__file__)
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
_EMBED_CACHE_FILE = os.path.join(_parent_dir, "data", "medical_faqs_embeddings.pkl")
|
| 11 |
_EMBED_MODEL = "text-embedding-3-small"
|
| 12 |
|
| 13 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
@@ -21,14 +20,12 @@ def initialize_faq_store():
|
|
| 21 |
FAQ_ENTRIES.clear()
|
| 22 |
FAQ_VECS.clear()
|
| 23 |
|
| 24 |
-
# Load cached embeddings if present
|
| 25 |
cached_vecs: dict[str, np.ndarray] = {}
|
| 26 |
if os.path.exists(_EMBED_CACHE_FILE):
|
| 27 |
with open(_EMBED_CACHE_FILE, "rb") as f:
|
| 28 |
cached_entries, cached_vecs = pickle.load(f)
|
| 29 |
FAQ_ENTRIES.update(cached_entries)
|
| 30 |
|
| 31 |
-
# Load (or reload) FAQ entries from JSONL
|
| 32 |
with open(_FAQS_FILE, "r", encoding="utf-8") as f:
|
| 33 |
for line in f:
|
| 34 |
line = line.strip()
|
|
@@ -37,7 +34,6 @@ def initialize_faq_store():
|
|
| 37 |
obj = json.loads(line)
|
| 38 |
FAQ_ENTRIES[obj["id"]] = obj
|
| 39 |
|
| 40 |
-
# Embed any entries not yet in cache
|
| 41 |
new_vecs: dict[str, np.ndarray] = {}
|
| 42 |
for fid, entry in FAQ_ENTRIES.items():
|
| 43 |
if fid in cached_vecs:
|
|
@@ -48,7 +44,6 @@ def initialize_faq_store():
|
|
| 48 |
new_vecs[fid] = vec
|
| 49 |
FAQ_VECS.append((fid, vec))
|
| 50 |
|
| 51 |
-
# Persist updated cache
|
| 52 |
all_vecs = {**cached_vecs, **new_vecs}
|
| 53 |
with open(_EMBED_CACHE_FILE, "wb") as f:
|
| 54 |
pickle.dump((FAQ_ENTRIES, all_vecs), f)
|
|
|
|
| 5 |
from openai import OpenAI
|
| 6 |
|
| 7 |
_current_dir = os.path.dirname(__file__)
|
| 8 |
+
_FAQS_FILE = os.path.join(_current_dir, "medical_faqs.jsonl")
|
| 9 |
+
_EMBED_CACHE_FILE = os.path.join(_current_dir, "medical_faqs_embeddings.pkl")
|
|
|
|
| 10 |
_EMBED_MODEL = "text-embedding-3-small"
|
| 11 |
|
| 12 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
| 20 |
FAQ_ENTRIES.clear()
|
| 21 |
FAQ_VECS.clear()
|
| 22 |
|
|
|
|
| 23 |
cached_vecs: dict[str, np.ndarray] = {}
|
| 24 |
if os.path.exists(_EMBED_CACHE_FILE):
|
| 25 |
with open(_EMBED_CACHE_FILE, "rb") as f:
|
| 26 |
cached_entries, cached_vecs = pickle.load(f)
|
| 27 |
FAQ_ENTRIES.update(cached_entries)
|
| 28 |
|
|
|
|
| 29 |
with open(_FAQS_FILE, "r", encoding="utf-8") as f:
|
| 30 |
for line in f:
|
| 31 |
line = line.strip()
|
|
|
|
| 34 |
obj = json.loads(line)
|
| 35 |
FAQ_ENTRIES[obj["id"]] = obj
|
| 36 |
|
|
|
|
| 37 |
new_vecs: dict[str, np.ndarray] = {}
|
| 38 |
for fid, entry in FAQ_ENTRIES.items():
|
| 39 |
if fid in cached_vecs:
|
|
|
|
| 44 |
new_vecs[fid] = vec
|
| 45 |
FAQ_VECS.append((fid, vec))
|
| 46 |
|
|
|
|
| 47 |
all_vecs = {**cached_vecs, **new_vecs}
|
| 48 |
with open(_EMBED_CACHE_FILE, "wb") as f:
|
| 49 |
pickle.dump((FAQ_ENTRIES, all_vecs), f)
|