fsojni commited on
Commit
50e96a1
·
verified ·
1 Parent(s): 4586eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -133,27 +133,30 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
133
  # 2. Build a Qwen-chat prompt (helper defined earlier)
134
  prompt = build_qwen_prompt(system, context_list, question)
135
 
136
- # 3. Generate and strip everything before the assistant tag
137
  load_chat()
138
  tokens = tokenizer(
139
  prompt,
140
  return_tensors="pt",
141
- add_special_tokens=False, # important – we already built chat template
142
  )
143
- if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
 
144
  tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
145
 
146
- tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
147
 
148
- # --- generate ------------------------------------------------------
149
- output = chat_model.generate(
150
- **tokens,
151
- max_new_tokens=512,
152
- max_length=MAX_PROMPT_TOKENS + 512,
153
- )
154
- full = tokenizer.decode(output[0], skip_special_tokens=True)
155
- reply = full.split("<|im_start|>assistant")[-1].strip()
156
- return reply
 
 
157
  except Exception as e:
158
  return f"Error in app.py: {e}"
159
  finally:
@@ -232,17 +235,17 @@ def rag(req:QueryReq):
232
  topk = torch.topk(sims, k=min(4, sims.size(0))).indices
233
  context = "\n".join(store["texts"][i] for i in topk.tolist())
234
 
235
- SYSTEM_PROMPT = "You are a helpful assistant."
236
  prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
237
 
238
-
239
  load_chat()
240
  tokens = tokenizer(
241
- prompt,
242
- return_tensors="pt",
243
- add_special_tokens=False,
244
  )
245
- if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
 
246
  tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
247
 
248
  tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
@@ -253,6 +256,7 @@ def rag(req:QueryReq):
253
  max_length=MAX_PROMPT_TOKENS + 512,
254
  )
255
 
 
256
  full = tokenizer.decode(out[0], skip_special_tokens=True)
257
  ans = full.split("<|im_start|>assistant")[-1].strip()
258
  return {"answer": ans}
 
133
  # 2. Build a Qwen-chat prompt (helper defined earlier)
134
  prompt = build_qwen_prompt(system, context_list, question)
135
 
136
+ # 3. Tokenise & cap
137
  load_chat()
138
  tokens = tokenizer(
139
  prompt,
140
  return_tensors="pt",
141
+ add_special_tokens=False, # we built the chat template ourselves
142
  )
143
+
144
+ if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
145
  tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
146
 
147
+ tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
148
 
149
+ # --- generate ------------------------------------------------------
150
+ output = chat_model.generate(
151
+ **tokens,
152
+ max_new_tokens=512,
153
+ max_length=MAX_PROMPT_TOKENS + 512,
154
+ )
155
+ full = tokenizer.decode(output[0], skip_special_tokens=True)
156
+ reply = full.split("<|im_start|>assistant")[-1].strip()
157
+ return reply
158
+
159
+
160
  except Exception as e:
161
  return f"Error in app.py: {e}"
162
  finally:
 
235
  topk = torch.topk(sims, k=min(4, sims.size(0))).indices
236
  context = "\n".join(store["texts"][i] for i in topk.tolist())
237
 
238
+ SYSTEM_PROMPT = "You are a helpful assistant."
239
  prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
240
 
 
241
  load_chat()
242
  tokens = tokenizer(
243
+ prompt,
244
+ return_tensors="pt",
245
+ add_special_tokens=False,
246
  )
247
+
248
+ if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
249
  tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
250
 
251
  tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
 
256
  max_length=MAX_PROMPT_TOKENS + 512,
257
  )
258
 
259
+
260
  full = tokenizer.decode(out[0], skip_special_tokens=True)
261
  ans = full.split("<|im_start|>assistant")[-1].strip()
262
  return {"answer": ans}