hkai20000 commited on
Commit
97bec02
·
verified ·
1 Parent(s): baec8cb

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +73 -73
rag.py CHANGED
@@ -1,73 +1,73 @@
1
- import os
2
- from typing import List, Dict, Tuple
3
- import numpy as np
4
- from openai import OpenAI
5
- from services.faq_store import FAQ_ENTRIES, FAQ_VECS
6
-
7
- RAG_CONFIDENCE_THRESHOLD = 0.6
8
- MAX_FAQ_MATCHES = 3
9
- _EMBED_MODEL = "text-embedding-3-small"
10
- _CHAT_MODEL = "gpt-4o-mini"
11
-
12
- SYSTEM_PROMPT = (
13
- "You are a helpful assistant for ScanAssured, a medical document OCR and NER app. "
14
- "Answer only based on the provided FAQ context. "
15
- "You do NOT have access to any user scan results or personal medical data. "
16
- "For personal medical advice, always direct users to a qualified healthcare professional. "
17
- "Keep answers concise and clear."
18
- )
19
- FALLBACK_MESSAGE = (
20
- "I'm not certain about that. Please consult a qualified healthcare professional "
21
- "for personal medical advice, or refer to the app documentation for usage questions."
22
- )
23
-
24
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
25
-
26
- # In-memory embedding cache for repeated queries
27
- _query_cache: dict[str, np.ndarray] = {}
28
-
29
-
30
- def cosine(a: np.ndarray, b: np.ndarray) -> float:
31
- return float(a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b)))
32
-
33
-
34
- async def get_answer(question: str, history: List[Dict]) -> Tuple[str, List[Dict]]:
35
- # Embed query (with in-memory cache)
36
- if question in _query_cache:
37
- vec = _query_cache[question]
38
- else:
39
- resp = client.embeddings.create(model=_EMBED_MODEL, input=question)
40
- vec = np.array(resp.data[0].embedding, dtype=np.float32)
41
- _query_cache[question] = vec
42
-
43
- # Cosine similarity against all FAQ vectors
44
- scores = [(fid, cosine(vec, fvec)) for fid, fvec in FAQ_VECS]
45
- scores.sort(key=lambda x: x[1], reverse=True)
46
-
47
- # Fallback if no FAQ meets the confidence threshold
48
- if not scores or scores[0][1] < RAG_CONFIDENCE_THRESHOLD:
49
- return FALLBACK_MESSAGE, []
50
-
51
- # Gather top matches
52
- matches = []
53
- for fid, score in scores[:MAX_FAQ_MATCHES]:
54
- faq = FAQ_ENTRIES[fid]
55
- matches.append({"id": fid, "answer": faq["answer"], "source": faq["source"], "score": score})
56
-
57
- # Build message list for GPT
58
- messages: List[Dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
59
- for msg in history:
60
- messages.append({"role": msg["role"], "content": msg["content"]})
61
- for faq in matches:
62
- messages.append({"role": "system", "content": faq["answer"]})
63
- messages.append({"role": "user", "content": question})
64
-
65
- chat_resp = client.chat.completions.create(
66
- model=_CHAT_MODEL,
67
- messages=messages,
68
- stream=False,
69
- )
70
- answer = chat_resp.choices[0].message.content
71
-
72
- citations = [{"id": faq["id"], "source": faq["source"]} for faq in matches]
73
- return answer, citations
 
1
+ import os
2
+ from typing import List, Dict, Tuple
3
+ import numpy as np
4
+ from openai import OpenAI
5
+ from services.faq_store import FAQ_ENTRIES, FAQ_VECS
6
+
7
+ RAG_CONFIDENCE_THRESHOLD = 0.6
8
+ MAX_FAQ_MATCHES = 3
9
+ _EMBED_MODEL = "text-embedding-3-small"
10
+ _CHAT_MODEL = "gpt-4o-mini"
11
+
12
+ SYSTEM_PROMPT = (
13
+ "You are a helpful assistant for ScanAssured, a medical document OCR and NER app. "
14
+ "Answer only based on the provided FAQ context. "
15
+ "You do NOT have access to any user scan results or personal medical data. "
16
+ "For personal medical advice, always direct users to a qualified healthcare professional. "
17
+ "Keep answers concise and clear."
18
+ )
19
+ FALLBACK_MESSAGE = (
20
+ "I'm not certain about that. Please consult a qualified healthcare professional "
21
+ "for personal medical advice, or refer to the app documentation for usage questions."
22
+ )
23
+
24
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
25
+
26
+ # In-memory embedding cache for repeated queries
27
+ _query_cache: dict[str, np.ndarray] = {}
28
+
29
+
30
+ def cosine(a: np.ndarray, b: np.ndarray) -> float:
31
+ return float(a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b)))
32
+
33
+
34
+ async def get_answer(question: str, history: List[Dict]) -> Tuple[str, List[Dict]]:
35
+ # Embed query (with in-memory cache)
36
+ if question in _query_cache:
37
+ vec = _query_cache[question]
38
+ else:
39
+ resp = client.embeddings.create(model=_EMBED_MODEL, input=question)
40
+ vec = np.array(resp.data[0].embedding, dtype=np.float32)
41
+ _query_cache[question] = vec
42
+
43
+ # Cosine similarity against all FAQ vectors
44
+ scores = [(fid, cosine(vec, fvec)) for fid, fvec in FAQ_VECS]
45
+ scores.sort(key=lambda x: x[1], reverse=True)
46
+
47
+ # Fallback if no FAQ meets the confidence threshold
48
+ if not scores or scores[0][1] < RAG_CONFIDENCE_THRESHOLD:
49
+ return FALLBACK_MESSAGE, []
50
+
51
+ # Gather top matches
52
+ matches = []
53
+ for fid, score in scores[:MAX_FAQ_MATCHES]:
54
+ faq = FAQ_ENTRIES[fid]
55
+ matches.append({"id": fid, "answer": faq["answer"], "source": faq["source"], "score": score})
56
+
57
+ # Build message list for GPT
58
+ messages: List[Dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
59
+ for msg in history:
60
+ messages.append({"role": msg["role"], "content": msg["content"]})
61
+ for faq in matches:
62
+ messages.append({"role": "system", "content": faq["answer"]})
63
+ messages.append({"role": "user", "content": question})
64
+
65
+ chat_resp = client.chat.completions.create(
66
+ model=_CHAT_MODEL,
67
+ messages=messages,
68
+ stream=False,
69
+ )
70
+ answer = chat_resp.choices[0].message.content
71
+
72
+ citations = [{"id": faq["id"], "source": faq["source"]} for faq in matches]
73
+ return answer, citations