Update app.py
Browse files
app.py
CHANGED
|
@@ -349,8 +349,14 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
| 349 |
return all_results
|
| 350 |
|
| 351 |
def estimate_tokens(text):
|
| 352 |
-
#
|
| 353 |
-
return len(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
|
| 356 |
if not question:
|
|
@@ -370,7 +376,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 370 |
|
| 371 |
max_attempts = 3
|
| 372 |
context_reduction_factor = 0.7
|
| 373 |
-
|
|
|
|
| 374 |
|
| 375 |
if web_search:
|
| 376 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
|
@@ -432,23 +439,29 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 432 |
entities=json.dumps(current_entities)
|
| 433 |
)
|
| 434 |
|
| 435 |
-
estimated_tokens =
|
| 436 |
|
| 437 |
-
if estimated_tokens <=
|
| 438 |
break
|
| 439 |
|
| 440 |
-
|
| 441 |
-
|
|
|
|
| 442 |
current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
|
| 443 |
current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
|
| 444 |
|
| 445 |
-
if
|
| 446 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 447 |
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
except ValueError as ve:
|
| 454 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
|
@@ -496,18 +509,22 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 496 |
|
| 497 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 498 |
|
| 499 |
-
if estimated_tokens <=
|
| 500 |
break
|
| 501 |
|
| 502 |
-
context_str = context_str
|
| 503 |
|
| 504 |
-
if
|
| 505 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 506 |
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
except ValueError as ve:
|
| 513 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
|
|
|
| 349 |
return all_results
|
| 350 |
|
| 351 |
def estimate_tokens(text):
|
| 352 |
+
# A more accurate estimation (still an estimate, but better than 1 token = 4 chars)
|
| 353 |
+
return len(text.split())
|
| 354 |
+
|
| 355 |
+
def truncate_text(text, max_tokens):
|
| 356 |
+
words = text.split()
|
| 357 |
+
if len(words) <= max_tokens:
|
| 358 |
+
return text
|
| 359 |
+
return ' '.join(words[:max_tokens])
|
| 360 |
|
| 361 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
|
| 362 |
if not question:
|
|
|
|
| 376 |
|
| 377 |
max_attempts = 3
|
| 378 |
context_reduction_factor = 0.7
|
| 379 |
+
max_input_tokens = 31000 # Leave room for the model's response
|
| 380 |
+
max_output_tokens = 1000
|
| 381 |
|
| 382 |
if web_search:
|
| 383 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
|
|
|
| 439 |
entities=json.dumps(current_entities)
|
| 440 |
)
|
| 441 |
|
| 442 |
+
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 443 |
|
| 444 |
+
if estimated_tokens <= max_input_tokens:
|
| 445 |
break
|
| 446 |
|
| 447 |
+
# Reduce context sizes
|
| 448 |
+
current_context = truncate_text(current_context, int(estimate_tokens(current_context) * context_reduction_factor))
|
| 449 |
+
current_conv_context = truncate_text(current_conv_context, int(estimate_tokens(current_conv_context) * context_reduction_factor))
|
| 450 |
current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
|
| 451 |
current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
|
| 452 |
|
| 453 |
+
if estimate_tokens(current_context) + estimate_tokens(current_conv_context) + estimate_tokens(", ".join(current_topics)) + estimate_tokens(json.dumps(current_entities)) < 100:
|
| 454 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 455 |
|
| 456 |
+
try:
|
| 457 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
|
| 458 |
+
answer = extract_answer(full_response, user_instructions)
|
| 459 |
+
all_answers.append(answer)
|
| 460 |
+
break
|
| 461 |
+
except Exception as e:
|
| 462 |
+
print(f"Error in generate_chunked_response: {e}")
|
| 463 |
+
if attempt == max_attempts - 1:
|
| 464 |
+
all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
|
| 465 |
|
| 466 |
except ValueError as ve:
|
| 467 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
|
|
|
| 509 |
|
| 510 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 511 |
|
| 512 |
+
if estimated_tokens <= max_input_tokens:
|
| 513 |
break
|
| 514 |
|
| 515 |
+
context_str = truncate_text(context_str, int(estimate_tokens(context_str) * context_reduction_factor))
|
| 516 |
|
| 517 |
+
if estimate_tokens(context_str) < 100:
|
| 518 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 519 |
|
| 520 |
+
try:
|
| 521 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
|
| 522 |
+
answer = extract_answer(full_response, user_instructions)
|
| 523 |
+
return answer
|
| 524 |
+
except Exception as e:
|
| 525 |
+
print(f"Error in generate_chunked_response: {e}")
|
| 526 |
+
if attempt == max_attempts - 1:
|
| 527 |
+
return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
|
| 528 |
|
| 529 |
except ValueError as ve:
|
| 530 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|