Spaces:

Email-addon
/

GmailAddOn

Sleeping

App Files Files Community

fsojni commited on May 11, 2025

Commit

71db3d7

verified ·

1 Parent(s): 2756958

Debug00

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -73,6 +73,19 @@ def add_docs(user_id: str, docs: list[str]) -> int:
         else torch.cat([store["vecs"], new_vecs])
     )
     return len(docs)
 # ---------- 3. FastAPI layer --------------------------------------------------
 class IngestReq(BaseModel):
@@ -104,13 +117,7 @@ def rag(req:QueryReq):
     topk  = torch.topk(sims, k=min(4, sims.size(0))).indices
     context = "\n".join(store["texts"][i] for i in topk.tolist())
-    prompt = f"""You are an email assistant.
-Use the context to answer.
-Context:
-{context}
-User question: {req.question}
-Assistant:"""
     load_chat()
     inputs = tokenizer(prompt, return_tensors="pt").to(chat_model.device)
@@ -128,35 +135,30 @@ def store_doc(doc_text: str, user_id="demo"):
     return f"📚 Stored ✅ — KB now has {len(kb[user_id]['texts'])} passage(s)."
 def answer(question: str, user_id="demo"):
-    """UI callback: retrieve, build prompt, generate answer."""
     if not question.strip():
         return "⚠️ Please ask a question."
     if not kb[user_id]["texts"]:
         return "⚠️ No reference passage yet. Add one first."
-    # 1️⃣  Retrieve top-k similar chunks (k ≤ #chunks)
     q_vec  = embed(question)
     store  = kb[user_id]
-    sims   = torch.matmul(store["vecs"], q_vec)                  # [N]
     k      = min(4, sims.numel())
     idxs   = torch.topk(sims, k=k).indices.tolist()
     context = "\n".join(store["texts"][i] for i in idxs)
-    # 2️⃣  Build prompt
-    prompt = f"""You are an email assistant.
-Use ONLY the context below to answer.
-Context:
-{context}
-Question: {question}
-Answer:"""
-    # 3️⃣  Generate
     load_chat()
-    inputs  = tokenizer(prompt, return_tensors="pt").to(chat_model.device)
-    output  = chat_model.generate(**inputs, max_new_tokens=512)
-    reply   = tokenizer.decode(output[0], skip_special_tokens=True)
-    return reply.split("Answer:", 1)[-1].strip()
 # ---- UI layout (feel free to tweak cosmetics) -----------------------------
 with gr.Blocks() as demo:

         else torch.cat([store["vecs"], new_vecs])
     )
     return len(docs)
+# ----- Qwen-chat prompt helper ---------------------------------------------
+def build_qwen_prompt(context: str, user_question: str) -> str:
+    """Return a string that follows Qwen-Chat’s template."""
+    conversation = [
+        {"role": "system",
+         "content": "You are an email assistant. Use ONLY the context provided."},
+        {"role": "user",
+         "content": f"Context:\n{context}\n\n{user_question}"}
+    ]
+    # add_generation_prompt=True appends the assistant tag
+    return tokenizer.apply_chat_template(
+        conversation, tokenize=False, add_generation_prompt=True
+    )
 # ---------- 3. FastAPI layer --------------------------------------------------
 class IngestReq(BaseModel):
     topk  = torch.topk(sims, k=min(4, sims.size(0))).indices
     context = "\n".join(store["texts"][i] for i in topk.tolist())
+    prompt = build_qwen_prompt(context, req.question)
     load_chat()
     inputs = tokenizer(prompt, return_tensors="pt").to(chat_model.device)
     return f"📚 Stored ✅ — KB now has {len(kb[user_id]['texts'])} passage(s)."
 def answer(question: str, user_id="demo"):
+    """UI callback: retrieve, build prompt with Qwen tags, generate answer."""
     if not question.strip():
         return "⚠️ Please ask a question."
     if not kb[user_id]["texts"]:
         return "⚠️ No reference passage yet. Add one first."
+    # 1️⃣  Retrieve top-k similar passages
     q_vec  = embed(question)
     store  = kb[user_id]
+    sims   = torch.matmul(store["vecs"], q_vec)          # [N]
     k      = min(4, sims.numel())
     idxs   = torch.topk(sims, k=k).indices.tolist()
     context = "\n".join(store["texts"][i] for i in idxs)
+    # 2️⃣  Build a Qwen-chat prompt (helper defined earlier)
+    prompt = build_qwen_prompt(context, question)
+    # 3️⃣  Generate and strip everything before the assistant tag
     load_chat()
+    inputs = tokenizer(prompt, return_tensors="pt").to(chat_model.device)
+    output = chat_model.generate(**inputs, max_new_tokens=512)
+    full   = tokenizer.decode(output[0], skip_special_tokens=True)
+    reply  = full.split("<|im_start|>assistant")[-1].strip()
+    return reply
 # ---- UI layout (feel free to tweak cosmetics) -----------------------------
 with gr.Blocks() as demo: