Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -133,27 +133,30 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
|
|
| 133 |
# 2. Build a Qwen-chat prompt (helper defined earlier)
|
| 134 |
prompt = build_qwen_prompt(system, context_list, question)
|
| 135 |
|
| 136 |
-
# 3.
|
| 137 |
load_chat()
|
| 138 |
tokens = tokenizer(
|
| 139 |
prompt,
|
| 140 |
return_tensors="pt",
|
| 141 |
-
add_special_tokens=False,
|
| 142 |
)
|
| 143 |
-
|
|
|
|
| 144 |
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
# --- generate ------------------------------------------------------
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
| 157 |
except Exception as e:
|
| 158 |
return f"Error in app.py: {e}"
|
| 159 |
finally:
|
|
@@ -232,17 +235,17 @@ def rag(req:QueryReq):
|
|
| 232 |
topk = torch.topk(sims, k=min(4, sims.size(0))).indices
|
| 233 |
context = "\n".join(store["texts"][i] for i in topk.tolist())
|
| 234 |
|
| 235 |
-
|
| 236 |
prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
|
| 237 |
|
| 238 |
-
|
| 239 |
load_chat()
|
| 240 |
tokens = tokenizer(
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
)
|
| 245 |
-
|
|
|
|
| 246 |
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 247 |
|
| 248 |
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
|
@@ -253,6 +256,7 @@ def rag(req:QueryReq):
|
|
| 253 |
max_length=MAX_PROMPT_TOKENS + 512,
|
| 254 |
)
|
| 255 |
|
|
|
|
| 256 |
full = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 257 |
ans = full.split("<|im_start|>assistant")[-1].strip()
|
| 258 |
return {"answer": ans}
|
|
|
|
| 133 |
# 2. Build a Qwen-chat prompt (helper defined earlier)
|
| 134 |
prompt = build_qwen_prompt(system, context_list, question)
|
| 135 |
|
| 136 |
+
# 3. Tokenise & cap
|
| 137 |
load_chat()
|
| 138 |
tokens = tokenizer(
|
| 139 |
prompt,
|
| 140 |
return_tensors="pt",
|
| 141 |
+
add_special_tokens=False, # we built the chat template ourselves
|
| 142 |
)
|
| 143 |
+
|
| 144 |
+
if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
|
| 145 |
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 146 |
|
| 147 |
+
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
| 148 |
|
| 149 |
+
# --- generate ------------------------------------------------------
|
| 150 |
+
output = chat_model.generate(
|
| 151 |
+
**tokens,
|
| 152 |
+
max_new_tokens=512,
|
| 153 |
+
max_length=MAX_PROMPT_TOKENS + 512,
|
| 154 |
+
)
|
| 155 |
+
full = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 156 |
+
reply = full.split("<|im_start|>assistant")[-1].strip()
|
| 157 |
+
return reply
|
| 158 |
+
|
| 159 |
+
|
| 160 |
except Exception as e:
|
| 161 |
return f"Error in app.py: {e}"
|
| 162 |
finally:
|
|
|
|
| 235 |
topk = torch.topk(sims, k=min(4, sims.size(0))).indices
|
| 236 |
context = "\n".join(store["texts"][i] for i in topk.tolist())
|
| 237 |
|
| 238 |
+
SYSTEM_PROMPT = "You are a helpful assistant."
|
| 239 |
prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
|
| 240 |
|
|
|
|
| 241 |
load_chat()
|
| 242 |
tokens = tokenizer(
|
| 243 |
+
prompt,
|
| 244 |
+
return_tensors="pt",
|
| 245 |
+
add_special_tokens=False,
|
| 246 |
)
|
| 247 |
+
|
| 248 |
+
if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
|
| 249 |
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 250 |
|
| 251 |
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
|
|
|
| 256 |
max_length=MAX_PROMPT_TOKENS + 512,
|
| 257 |
)
|
| 258 |
|
| 259 |
+
|
| 260 |
full = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 261 |
ans = full.split("<|im_start|>assistant")[-1].strip()
|
| 262 |
return {"answer": ans}
|