File size: 3,660 Bytes
71245aa
5bad468
71245aa
 
5bad468
71245aa
5bad468
 
71245aa
5bad468
 
71245aa
 
5bad468
 
71245aa
5bad468
71245aa
 
 
5bad468
 
 
 
 
 
 
71245aa
5bad468
 
 
 
 
 
 
 
 
9eff4e9
5bad468
9eff4e9
5bad468
 
 
 
e9be0e7
 
5bad468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71245aa
 
5bad468
71245aa
 
 
 
 
 
 
 
5bad468
71245aa
5bad468
 
71245aa
 
5bad468
71245aa
 
 
 
 
 
5bad468
 
 
 
 
 
 
 
71245aa
 
 
 
5bad468
71245aa
5bad468
 
 
71245aa
5bad468
 
71245aa
5bad468
 
71245aa
 
5bad468
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import json
import os
import gradio as gr
from sentence_transformers import SentenceTransformer, util
from huggingface_hub import InferenceClient

print("Loading embedding model...")
model = SentenceTransformer("BAAI/bge-small-zh-v1.5")

print("Loading FAQs...")
with open("faqs.json", "r", encoding="utf-8") as f:
    faqs = json.load(f)

print(f"Encoding {len(faqs)} FAQ questions...")
questions = [item["q"] for item in faqs]
faq_embeddings = model.encode(questions, normalize_embeddings=True)
print("Ready!")

THRESHOLD = 0.55

# --- LLM ---
SYSTEM_PROMPT = """你是一个友好、简洁的 AI 学习答疑助手。
规则:
1. 严格基于"参考资料"回答,不要编造
2. 资料里没有的内容,直接说"我暂时没这方面的资料"
3. 用自然、口语化的中文,避免生硬复读资料原文
4. 控制在 3 句话以内"""

USER_PROMPT_TEMPLATE = """【参考资料】
{context}

【用户问题】
{question}

请基于资料用自然语言回答。"""

client = InferenceClient(
    model="Qwen/Qwen2.5-72B-Instruct",
    token=os.environ.get("HF_TOKEN"),
    timeout=20,
)


def llm_answer(question, top_faqs):
    if not os.environ.get("HF_TOKEN"):
        return top_faqs[0]["a"] + "\n\n_(需要在 Space Secrets 设置 HF_TOKEN 以启用 LLM)_"
    context = "\n\n".join(f"Q: {f['q']}\nA: {f['a']}" for f in top_faqs)
    user_prompt = USER_PROMPT_TEMPLATE.format(context=context, question=question)
    try:
        resp = client.chat_completion(
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt},
            ],
            max_tokens=200,
            temperature=0.3,
        )
        return resp.choices[0].message.content
    except Exception as e:
        return top_faqs[0]["a"] + f"\n\n_(LLM 暂不可用:{e})_"


# --- Gradio ---
def chat(query):
    if not query or not query.strip():
        return "请输入您的问题", "", ""

    q_emb = model.encode(query, normalize_embeddings=True)
    scores = util.cos_sim(q_emb, faq_embeddings)[0]
    top_idx = scores.argsort(descending=True)[:3].tolist()
    top1 = top_idx[0]
    top1_score = float(scores[top1])

    if top1_score < THRESHOLD:
        reply = "抱歉,我暂时无法理解您的问题。建议换个说法,或查看下方相关问题。"
    else:
        top3_faqs = [faqs[i] for i in top_idx]
        reply = llm_answer(query, top3_faqs)

    info = f"**类别**: {faqs[top1]['category']} | **匹配度**: {top1_score:.2f} | **匹配的问题**: {faqs[top1]['q']}"
    related = "### 您可能也想问:\n"
    for i in top_idx[1:]:
        related += f"- {faqs[i]['q']} _(相似度 {float(scores[i]):.2f})_\n"
    return reply, info, related


examples = [
    "embedding 是什么意思?",
    "中文应该用哪个向量模型?",
    "BERT 和 GPT 有什么不一样?",
    "pipeline 是干什么用的?",
    "AI 怎么知道两句话意思一样?",
    "怎么把模型跑到 GPU 上?",
    "为什么 LLM 会胡说八道?",
    "今天天气怎么样?",
]

iface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(label="您的问题", placeholder="例如:embedding 是什么?", lines=2),
    outputs=[
        gr.Markdown(label="答案"),
        gr.Markdown(label="检索详情"),
        gr.Markdown(label="相关问题"),
    ],
    title="🤖 AI 学习 FAQ 机器人(RAG)",
    description="基于 BAAI/bge-small-zh-v1.5 检索 + Qwen2.5-7B-Instruct 生成 · 30 条 AI 学习 FAQ",
    examples=examples,
    flagging_mode="never",
    theme="soft",
)

if __name__ == "__main__":
    iface.launch()