Spaces:

Email-addon
/

GmailAddOn

Sleeping

App Files Files Community

fsojni commited on May 23, 2025

Commit

50e96a1

verified ·

1 Parent(s): 4586eff

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -19

app.py CHANGED Viewed

@@ -133,27 +133,30 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
         prompt = build_qwen_prompt(system, context_list, question)
-        # 3.  Generate and strip everything before the assistant tag
         load_chat()
         tokens = tokenizer(
             prompt,
             return_tensors="pt",
-            add_special_tokens=False,     # important – we already built chat template
         )
-        if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
             tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
-    tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
-# --- generate ------------------------------------------------------
-    output = chat_model.generate(
-        **tokens,
-        max_new_tokens=512,
-        max_length=MAX_PROMPT_TOKENS + 512,
-    )
-    full   = tokenizer.decode(output[0], skip_special_tokens=True)
-    reply  = full.split("<|im_start|>assistant")[-1].strip()
-    return reply
     except Exception as e:
         return f"Error in app.py: {e}"
     finally:
@@ -232,17 +235,17 @@ def rag(req:QueryReq):
     topk  = torch.topk(sims, k=min(4, sims.size(0))).indices
     context = "\n".join(store["texts"][i] for i in topk.tolist())
-    SYSTEM_PROMPT = "You are a helpful assistant."
     prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
     load_chat()
     tokens = tokenizer(
-    prompt,
-    return_tensors="pt",
-    add_special_tokens=False,
     )
-    if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
         tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
     tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
@@ -253,6 +256,7 @@ def rag(req:QueryReq):
         max_length=MAX_PROMPT_TOKENS + 512,
     )
     full = tokenizer.decode(out[0], skip_special_tokens=True)
     ans  = full.split("<|im_start|>assistant")[-1].strip()
     return {"answer": ans}

         # 2.  Build a Qwen-chat prompt (helper defined earlier)
         prompt = build_qwen_prompt(system, context_list, question)
+        # 3.  Tokenise & cap
         load_chat()
         tokens = tokenizer(
             prompt,
             return_tensors="pt",
+            add_special_tokens=False,        # we built the chat template ourselves
         )
+        if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
             tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
+        tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
+        # --- generate ------------------------------------------------------
+        output = chat_model.generate(
+            **tokens,
+            max_new_tokens=512,
+            max_length=MAX_PROMPT_TOKENS + 512,
+        )
+        full   = tokenizer.decode(output[0], skip_special_tokens=True)
+        reply  = full.split("<|im_start|>assistant")[-1].strip()
+        return reply
     except Exception as e:
         return f"Error in app.py: {e}"
     finally:
     topk  = torch.topk(sims, k=min(4, sims.size(0))).indices
     context = "\n".join(store["texts"][i] for i in topk.tolist())
+        SYSTEM_PROMPT = "You are a helpful assistant."
     prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
     load_chat()
     tokens = tokenizer(
+        prompt,
+        return_tensors="pt",
+        add_special_tokens=False,
     )
+    if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
         tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
     tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
         max_length=MAX_PROMPT_TOKENS + 512,
     )
     full = tokenizer.decode(out[0], skip_special_tokens=True)
     ans  = full.split("<|im_start|>assistant")[-1].strip()
     return {"answer": ans}