Spaces:

Email-addon
/

GmailAddOn

Sleeping

App Files Files Community

Cyantist8208 commited on May 27, 2025

Commit

c0c0f5a

1 Parent(s): 2102b2f

debug

Browse files

Files changed (1) hide show

app.py +19 -14

app.py CHANGED Viewed

@@ -116,19 +116,12 @@ def build_llm_prompt(system: str, context: list[str], user_question: str) -> str
     conversation.append({"role": "user", "content": user_question.strip()})
     # 套用 LLaMA-style prompt 格式
-    input_token = tokenizer.apply_chat_template(
         conversation,
-        add_generation_prompt=True,
-        return_tensors="pt"
     )
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
-    return input_token, terminators
 # ---------- 4. Gradio playground (same UI as before) --------------------------
 def store_doc(doc_text: str,user_id="demo",chunk_size=DEFAULT_CHUNK_SIZE,chunk_overlap=DEFAULT_CHUNK_OVERLAP):
     try:
@@ -171,13 +164,25 @@ def answer(system: str, context: str, question: str,
             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
-        input_ids, terminators = build_llm_prompt(system, context_list, question)
         # --- generate ------------------------------------------------------
         output = chat_model.generate(
-            input_ids,
             max_new_tokens=512,
-            eos_token_id=terminators,
             max_length=MAX_PROMPT_TOKENS + 512,
             do_sample=True,
             temperature=temperature,
@@ -185,7 +190,7 @@ def answer(system: str, context: str, question: str,
             top_k=top_k_tok
         )
         full   = tokenizer.decode(output[0], skip_special_tokens=True)
-        reply  = full.split("<|im_start|>assistant")[-1].strip()
         return reply
     except Exception as e:

     conversation.append({"role": "user", "content": user_question.strip()})
     # 套用 LLaMA-style prompt 格式
+    return tokenizer.apply_chat_template(
         conversation,
+        tokenize=False,
+        add_generation_prompt=False
     )
 # ---------- 4. Gradio playground (same UI as before) --------------------------
 def store_doc(doc_text: str,user_id="demo",chunk_size=DEFAULT_CHUNK_SIZE,chunk_overlap=DEFAULT_CHUNK_OVERLAP):
     try:
             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
+        prompt = build_llm_prompt(system, context_list, question)
+        # 3.  Tokenise & cap
+        load_chat()
+        tokens = tokenizer(
+            prompt,
+            return_tensors="pt",
+            add_special_tokens=False,        # we built the chat template ourselves
+        )
+        if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
+            tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
+        tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
         # --- generate ------------------------------------------------------
         output = chat_model.generate(
+            **tokens,
             max_new_tokens=512,
             max_length=MAX_PROMPT_TOKENS + 512,
             do_sample=True,
             temperature=temperature,
             top_k=top_k_tok
         )
         full   = tokenizer.decode(output[0], skip_special_tokens=True)
+        reply  = full.split("<|im_start|>assistant")[-1].strip() + tokenizer.chat_template
         return reply
     except Exception as e: