Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -136,24 +136,24 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
|
|
| 136 |
# 3. Generate and strip everything before the assistant tag
|
| 137 |
load_chat()
|
| 138 |
tokens = tokenizer(
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
)
|
| 143 |
-
if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
|
| 144 |
-
|
| 145 |
|
| 146 |
-
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
| 147 |
|
| 148 |
# --- generate ------------------------------------------------------
|
| 149 |
-
output = chat_model.generate(
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
)
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
except Exception as e:
|
| 158 |
return f"Error in app.py: {e}"
|
| 159 |
finally:
|
|
@@ -241,17 +241,17 @@ def rag(req:QueryReq):
|
|
| 241 |
prompt,
|
| 242 |
return_tensors="pt",
|
| 243 |
add_special_tokens=False,
|
| 244 |
-
)
|
| 245 |
-
if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
|
| 246 |
-
|
| 247 |
|
| 248 |
-
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
| 249 |
|
| 250 |
-
out = chat_model.generate(
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
)
|
| 255 |
|
| 256 |
full = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 257 |
ans = full.split("<|im_start|>assistant")[-1].strip()
|
|
|
|
| 136 |
# 3. Generate and strip everything before the assistant tag
|
| 137 |
load_chat()
|
| 138 |
tokens = tokenizer(
|
| 139 |
+
prompt,
|
| 140 |
+
return_tensors="pt",
|
| 141 |
+
add_special_tokens=False, # important – we already built chat template
|
| 142 |
+
)
|
| 143 |
+
if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
|
| 144 |
+
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 145 |
|
| 146 |
+
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
| 147 |
|
| 148 |
# --- generate ------------------------------------------------------
|
| 149 |
+
output = chat_model.generate(
|
| 150 |
+
**tokens,
|
| 151 |
+
max_new_tokens=512,
|
| 152 |
+
max_length=MAX_PROMPT_TOKENS + 512,
|
| 153 |
+
)
|
| 154 |
+
full = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 155 |
+
reply = full.split("<|im_start|>assistant")[-1].strip()
|
| 156 |
+
return reply
|
| 157 |
except Exception as e:
|
| 158 |
return f"Error in app.py: {e}"
|
| 159 |
finally:
|
|
|
|
| 241 |
prompt,
|
| 242 |
return_tensors="pt",
|
| 243 |
add_special_tokens=False,
|
| 244 |
+
)
|
| 245 |
+
if tokens.input_ids.size(1) > MAX_PROMPT_TOKENS:
|
| 246 |
+
tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
|
| 247 |
|
| 248 |
+
tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
|
| 249 |
|
| 250 |
+
out = chat_model.generate(
|
| 251 |
+
**tokens,
|
| 252 |
+
max_new_tokens=512,
|
| 253 |
+
max_length=MAX_PROMPT_TOKENS + 512,
|
| 254 |
+
)
|
| 255 |
|
| 256 |
full = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 257 |
ans = full.split("<|im_start|>assistant")[-1].strip()
|