Spaces:

Email-addon
/

GmailAddOn

Sleeping

fsojni commited on May 23, 2025

Commit

6947209

verified ·

1 Parent(s): d52709d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -130,7 +130,23 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
-        prompt = build_qwen_prompt(system, context_list, question)
         # 3.  Generate and strip everything before the assistant tag
         load_chat()

             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
+        MAX_PROMPT_TOKENS = 8192          # 8 k is ~4 GB KV-cache
+    prompt = build_qwen_prompt(system, context_list, question)
+    tokens = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
+    if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
+    # keep the last MAX_PROMPT_TOKENS tokens (most recent content)
+        tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
+    tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
+    output = chat_model.generate(
+        **tokens,
+        max_new_tokens=512,
+        max_length=MAX_PROMPT_TOKENS + 512,
+    )
         # 3.  Generate and strip everything before the assistant tag
         load_chat()