Update src/qa.py
Browse files
src/qa.py
CHANGED
|
@@ -160,20 +160,29 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 160 |
)
|
| 161 |
|
| 162 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
result = _answer_model(
|
| 164 |
prompt,
|
| 165 |
-
max_new_tokens=
|
| 166 |
temperature=0.6 if reasoning_mode else 0.3,
|
| 167 |
do_sample=reasoning_mode,
|
| 168 |
early_stopping=True,
|
| 169 |
pad_token_id=_tokenizer.eos_token_id,
|
| 170 |
)
|
|
|
|
| 171 |
text = result[0]["generated_text"].strip()
|
| 172 |
return text.split("Answer:")[-1].strip() if "Answer:" in text else text
|
|
|
|
| 173 |
except Exception as e:
|
| 174 |
print(f"⚠️ Generation failed: {e}")
|
| 175 |
return "⚠️ Error: Could not generate an answer."
|
| 176 |
|
|
|
|
| 177 |
# ==========================================================
|
| 178 |
# 7️⃣ Local Test
|
| 179 |
# ==========================================================
|
|
|
|
| 160 |
)
|
| 161 |
|
| 162 |
try:
|
| 163 |
+
# 🧠 Adaptive length for factual mode (reasoning_mode=False)
|
| 164 |
+
if reasoning_mode:
|
| 165 |
+
max_tokens = 180 # keep reasoning slightly longer
|
| 166 |
+
else:
|
| 167 |
+
max_tokens = 120 if len(query.split()) < 6 else 180 # short Qs stay fast
|
| 168 |
+
|
| 169 |
result = _answer_model(
|
| 170 |
prompt,
|
| 171 |
+
max_new_tokens=max_tokens,
|
| 172 |
temperature=0.6 if reasoning_mode else 0.3,
|
| 173 |
do_sample=reasoning_mode,
|
| 174 |
early_stopping=True,
|
| 175 |
pad_token_id=_tokenizer.eos_token_id,
|
| 176 |
)
|
| 177 |
+
|
| 178 |
text = result[0]["generated_text"].strip()
|
| 179 |
return text.split("Answer:")[-1].strip() if "Answer:" in text else text
|
| 180 |
+
|
| 181 |
except Exception as e:
|
| 182 |
print(f"⚠️ Generation failed: {e}")
|
| 183 |
return "⚠️ Error: Could not generate an answer."
|
| 184 |
|
| 185 |
+
|
| 186 |
# ==========================================================
|
| 187 |
# 7️⃣ Local Test
|
| 188 |
# ==========================================================
|