fsojni commited on
Commit
4586eff
·
verified ·
1 Parent(s): d926ec5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -136,24 +136,24 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
136
  # 3. Generate and strip everything before the assistant tag
137
  load_chat()
138
  tokens = tokenizer(
139
- prompt,
140
- return_tensors="pt",
141
- add_special_tokens=False, # important – we already built chat template
142
- )
143
- if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
144
- tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
145
 
146
- tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
147
 
148
  # --- generate ------------------------------------------------------
149
- output = chat_model.generate(
150
- **tokens,
151
- max_new_tokens=512,
152
- max_length=MAX_PROMPT_TOKENS + 512,
153
- )
154
- full = tokenizer.decode(output[0], skip_special_tokens=True)
155
- reply = full.split("<|im_start|>assistant")[-1].strip()
156
- return reply
157
  except Exception as e:
158
  return f"Error in app.py: {e}"
159
  finally:
@@ -241,17 +241,17 @@ def rag(req:QueryReq):
241
  prompt,
242
  return_tensors="pt",
243
  add_special_tokens=False,
244
- )
245
- if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
246
- tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
247
 
248
- tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
249
 
250
- out = chat_model.generate(
251
- **tokens,
252
- max_new_tokens=512,
253
- max_length=MAX_PROMPT_TOKENS + 512,
254
- )
255
 
256
  full = tokenizer.decode(out[0], skip_special_tokens=True)
257
  ans = full.split("<|im_start|>assistant")[-1].strip()
 
136
  # 3. Generate and strip everything before the assistant tag
137
  load_chat()
138
  tokens = tokenizer(
139
+ prompt,
140
+ return_tensors="pt",
141
+ add_special_tokens=False, # important – we already built chat template
142
+ )
143
+ if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
144
+ tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
145
 
146
+ tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
147
 
148
  # --- generate ------------------------------------------------------
149
+ output = chat_model.generate(
150
+ **tokens,
151
+ max_new_tokens=512,
152
+ max_length=MAX_PROMPT_TOKENS + 512,
153
+ )
154
+ full = tokenizer.decode(output[0], skip_special_tokens=True)
155
+ reply = full.split("<|im_start|>assistant")[-1].strip()
156
+ return reply
157
  except Exception as e:
158
  return f"Error in app.py: {e}"
159
  finally:
 
241
  prompt,
242
  return_tensors="pt",
243
  add_special_tokens=False,
244
+ )
245
+ if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
246
+ tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
247
 
248
+ tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
249
 
250
+ out = chat_model.generate(
251
+ **tokens,
252
+ max_new_tokens=512,
253
+ max_length=MAX_PROMPT_TOKENS + 512,
254
+ )
255
 
256
  full = tokenizer.decode(out[0], skip_special_tokens=True)
257
  ans = full.split("<|im_start|>assistant")[-1].strip()