Spaces:

csmith715
/

thoughtful_test

Sleeping

File size: 5,890 Bytes

import re
from dataclasses import dataclass
from typing import List, Tuple
from data import RAW_DATA, GENERIC_HELP, EXAMPLES
import gradio as gr

# ======= Retrieval utilities (dependency-free) =======
@dataclass
class QA:
    question: str
    answer: str
    keywords: List[str]

def normalize(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s\(\)\-\'&/]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

# A simplified tokenizer to reduce latency
def tokenize(text: str) -> List[str]:
    return normalize(text).split()

# Other similarity measures could be used, but jaccard is simple enough and it works
def jaccard(a: List[str], b: List[str]) -> float:
    sa, sb = set(a), set(b)
    if not sa and not sb:
        return 0.0
    return len(sa & sb) / len(sa | sb)

def seq_ratio(a: str, b: str) -> float:
    # lightweight character-overlap ratio (no external dependencies)
    sa, sb = set(a), set(b)
    if not sa and not sb:
        return 0.0
    return len(sa & sb) / max(len(sa), len(sb))

def contains_any(text: str, needles: List[str]) -> int:
    t = normalize(text)
    return sum(1 for n in needles if n in t)

def build_qa_bank(raw) -> List[QA]:
    bank = []
    for item in raw["questions"]:
        q = item["question"]
        a = item["answer"]
        kws = []
        lq = q.lower()
        if "eva" in lq:
            kws += ["eva", "eligibility", "benefits", "verification"]
        if "cam" in lq:
            kws += ["cam", "claims", "processing", "reimbursement"]
        if "phil" in lq:
            kws += ["phil", "payment", "posting", "reconciliation"]
        if "agents" in lq or "thoughtful ai" in lq:
            kws += ["agents", "thoughtful ai", "suite", "automation", "healthcare"]
        bank.append(QA(q, a, kws))
    return bank


QA_BANK = build_qa_bank(RAW_DATA)

def score_query(user_msg: str, qa: QA) -> float:
    """Return a confidence score for how well `qa` answers `user_msg`."""
    u_norm = normalize(user_msg)
    q_tokens = tokenize(qa.question + " " + qa.answer)
    u_tokens = tokenize(u_norm)

    s_jaccard = jaccard(u_tokens, q_tokens)            # word overlap
    s_seq_q = seq_ratio(u_norm, normalize(qa.question))  # char overlap vs question
    s_seq_a = seq_ratio(u_norm, normalize(qa.answer))  # char overlap vs answer
    s_kw = 0.06 * contains_any(u_norm, qa.keywords)    # keyword hints
    s_agent_hint = 0.03 if "agent" in u_norm else 0.0

    score = (0.5 * s_jaccard) + (0.25 * s_seq_q) + (0.15 * s_seq_a) + s_kw + s_agent_hint
    return min(score, 1.5)

def retrieve_best_answer(user_msg: str) -> Tuple[str, str, float]:
    best = None
    best_score = -1.0
    for qa in QA_BANK:
        s = score_query(user_msg, qa)
        if s > best_score:
            best, best_score = qa, s
    return best.question, best.answer, best_score

# ======= Chat logic =======
def chat_step(user_msg: str, history: List[Tuple[str, str]], show_conf: bool):
    """
    Stateless step function for the UI.
    Returns updated history and an empty textbox string.
    """
    try:
        user_msg = (user_msg or "").strip()
        if not user_msg:
            # gentle nudge without crashing the flow
            bot_reply = "Please enter a question about Thoughtful AI’s agents (EVA, CAM, PHIL)."
            return history + [(user_msg, bot_reply)], ""

        matched_q, answer, score = retrieve_best_answer(user_msg)

        # Arbitrarily setting the matching score to 0.18
        if score < 0.18:
            bot_reply = (
                f"Here’s a quick overview:\n\n{GENERIC_HELP}\n\n"
                f"_Tip: mention an agent name like EVA, CAM, or PHIL for a precise answer._"
            )
        else:
            bot_reply = f"**Answer:** {answer}"
            if show_conf:
                bot_reply += (
                    f"\n\n_Matched topic:_ “{matched_q}”  \n"
                    f"_Confidence:_ {score:.2f}"
                )

        return history + [(user_msg, bot_reply)], ""

    except Exception as e:
        # UI Robustness
        bot_reply = (
            "Sorry — I ran into an unexpected error while processing that. "
            "Please try again or rephrase your question."
        )
        # In a real setting, I would log `e` to a file/monitoring system.
        print(e)
        return history + [(user_msg or "", bot_reply)], ""


# ======= UI =======
CSS = """
#app-title {font-size: 28px; font-weight: 700; margin-bottom: 2px;}
#app-sub  {opacity: 0.8; margin-bottom: 16px;}
"""

with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        "<div id='app-title'>Thoughtful AI – Support Assistant</div>"
        "<div id='app-sub'>Ask about EVA, CAM, PHIL, or general benefits.</div>"
    )

    with gr.Row():
        show_conf = gr.Checkbox(label="Show match & confidence", value=True)

    chatbot = gr.Chatbot(type='tuples', height=380)
    with gr.Row():
        inp = gr.Textbox(placeholder="Ask a question about Thoughtful AI…", lines=2)
    with gr.Row():
        submit = gr.Button("Ask", variant="primary")
        clear = gr.Button("Clear Chat")

    gr.Examples(examples=EXAMPLES, inputs=inp, label="Try these")

    state = gr.State([])  # chat history

    def on_submit(user_msg, history, conf):
        new_history, cleared = chat_step(user_msg, history, conf)
        return new_history, cleared

    submit.click(on_submit, inputs=[inp, state, show_conf], outputs=[chatbot, inp])
    inp.submit(on_submit, inputs=[inp, state, show_conf], outputs=[chatbot, inp])

    def on_clear():
        return [], ""

    clear.click(on_clear, outputs=[chatbot, inp])

    # keep state in sync with what's shown
    def sync_state(chat_history):
        return chat_history

    chatbot.change(sync_state, inputs=[chatbot], outputs=[state])

if __name__ == "__main__":
    demo.launch()