Spaces:
Sleeping
Sleeping
File size: 5,890 Bytes
e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 e223a00 f42f0c1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | import re
from dataclasses import dataclass
from typing import List, Tuple
from data import RAW_DATA, GENERIC_HELP, EXAMPLES
import gradio as gr
# ======= Retrieval utilities (dependency-free) =======
@dataclass
class QA:
question: str
answer: str
keywords: List[str]
def normalize(text: str) -> str:
text = text.lower()
text = re.sub(r"[^a-z0-9\s\(\)\-\'&/]", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text
# A simplified tokenizer to reduce latency
def tokenize(text: str) -> List[str]:
return normalize(text).split()
# Other similarity measures could be used, but jaccard is simple enough and it works
def jaccard(a: List[str], b: List[str]) -> float:
sa, sb = set(a), set(b)
if not sa and not sb:
return 0.0
return len(sa & sb) / len(sa | sb)
def seq_ratio(a: str, b: str) -> float:
# lightweight character-overlap ratio (no external dependencies)
sa, sb = set(a), set(b)
if not sa and not sb:
return 0.0
return len(sa & sb) / max(len(sa), len(sb))
def contains_any(text: str, needles: List[str]) -> int:
t = normalize(text)
return sum(1 for n in needles if n in t)
def build_qa_bank(raw) -> List[QA]:
bank = []
for item in raw["questions"]:
q = item["question"]
a = item["answer"]
kws = []
lq = q.lower()
if "eva" in lq:
kws += ["eva", "eligibility", "benefits", "verification"]
if "cam" in lq:
kws += ["cam", "claims", "processing", "reimbursement"]
if "phil" in lq:
kws += ["phil", "payment", "posting", "reconciliation"]
if "agents" in lq or "thoughtful ai" in lq:
kws += ["agents", "thoughtful ai", "suite", "automation", "healthcare"]
bank.append(QA(q, a, kws))
return bank
QA_BANK = build_qa_bank(RAW_DATA)
def score_query(user_msg: str, qa: QA) -> float:
"""Return a confidence score for how well `qa` answers `user_msg`."""
u_norm = normalize(user_msg)
q_tokens = tokenize(qa.question + " " + qa.answer)
u_tokens = tokenize(u_norm)
s_jaccard = jaccard(u_tokens, q_tokens) # word overlap
s_seq_q = seq_ratio(u_norm, normalize(qa.question)) # char overlap vs question
s_seq_a = seq_ratio(u_norm, normalize(qa.answer)) # char overlap vs answer
s_kw = 0.06 * contains_any(u_norm, qa.keywords) # keyword hints
s_agent_hint = 0.03 if "agent" in u_norm else 0.0
score = (0.5 * s_jaccard) + (0.25 * s_seq_q) + (0.15 * s_seq_a) + s_kw + s_agent_hint
return min(score, 1.5)
def retrieve_best_answer(user_msg: str) -> Tuple[str, str, float]:
best = None
best_score = -1.0
for qa in QA_BANK:
s = score_query(user_msg, qa)
if s > best_score:
best, best_score = qa, s
return best.question, best.answer, best_score
# ======= Chat logic =======
def chat_step(user_msg: str, history: List[Tuple[str, str]], show_conf: bool):
"""
Stateless step function for the UI.
Returns updated history and an empty textbox string.
"""
try:
user_msg = (user_msg or "").strip()
if not user_msg:
# gentle nudge without crashing the flow
bot_reply = "Please enter a question about Thoughtful AI’s agents (EVA, CAM, PHIL)."
return history + [(user_msg, bot_reply)], ""
matched_q, answer, score = retrieve_best_answer(user_msg)
# Arbitrarily setting the matching score to 0.18
if score < 0.18:
bot_reply = (
f"Here’s a quick overview:\n\n{GENERIC_HELP}\n\n"
f"_Tip: mention an agent name like EVA, CAM, or PHIL for a precise answer._"
)
else:
bot_reply = f"**Answer:** {answer}"
if show_conf:
bot_reply += (
f"\n\n_Matched topic:_ “{matched_q}” \n"
f"_Confidence:_ {score:.2f}"
)
return history + [(user_msg, bot_reply)], ""
except Exception as e:
# UI Robustness
bot_reply = (
"Sorry — I ran into an unexpected error while processing that. "
"Please try again or rephrase your question."
)
# In a real setting, I would log `e` to a file/monitoring system.
print(e)
return history + [(user_msg or "", bot_reply)], ""
# ======= UI =======
CSS = """
#app-title {font-size: 28px; font-weight: 700; margin-bottom: 2px;}
#app-sub {opacity: 0.8; margin-bottom: 16px;}
"""
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
gr.Markdown(
"<div id='app-title'>Thoughtful AI – Support Assistant</div>"
"<div id='app-sub'>Ask about EVA, CAM, PHIL, or general benefits.</div>"
)
with gr.Row():
show_conf = gr.Checkbox(label="Show match & confidence", value=True)
chatbot = gr.Chatbot(type='tuples', height=380)
with gr.Row():
inp = gr.Textbox(placeholder="Ask a question about Thoughtful AI…", lines=2)
with gr.Row():
submit = gr.Button("Ask", variant="primary")
clear = gr.Button("Clear Chat")
gr.Examples(examples=EXAMPLES, inputs=inp, label="Try these")
state = gr.State([]) # chat history
def on_submit(user_msg, history, conf):
new_history, cleared = chat_step(user_msg, history, conf)
return new_history, cleared
submit.click(on_submit, inputs=[inp, state, show_conf], outputs=[chatbot, inp])
inp.submit(on_submit, inputs=[inp, state, show_conf], outputs=[chatbot, inp])
def on_clear():
return [], ""
clear.click(on_clear, outputs=[chatbot, inp])
# keep state in sync with what's shown
def sync_state(chat_history):
return chat_history
chatbot.change(sync_state, inputs=[chatbot], outputs=[state])
if __name__ == "__main__":
demo.launch()
|