Update app.py
Browse files
app.py
CHANGED
|
@@ -347,7 +347,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 347 |
|
| 348 |
max_attempts = 5
|
| 349 |
context_reduction_factor = 0.7
|
| 350 |
-
|
| 351 |
|
| 352 |
if web_search:
|
| 353 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
|
@@ -403,7 +403,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 403 |
# Estimate token count
|
| 404 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 405 |
|
| 406 |
-
if estimated_tokens <=
|
| 407 |
break
|
| 408 |
|
| 409 |
# Reduce context if estimated token count is too high
|
|
@@ -415,7 +415,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 415 |
if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
|
| 416 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 417 |
|
| 418 |
-
full_response = generate_chunked_response(model, formatted_prompt)
|
| 419 |
answer = extract_answer(full_response, instructions)
|
| 420 |
all_answers.append(answer)
|
| 421 |
break
|
|
@@ -464,7 +464,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 464 |
|
| 465 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 466 |
|
| 467 |
-
if estimated_tokens <=
|
| 468 |
break
|
| 469 |
|
| 470 |
# Reduce context if estimated token count is too high
|
|
@@ -473,7 +473,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 473 |
if len(context_str) < 100:
|
| 474 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 475 |
|
| 476 |
-
full_response = generate_chunked_response(model, formatted_prompt)
|
| 477 |
answer = extract_answer(full_response)
|
| 478 |
|
| 479 |
return answer
|
|
|
|
| 347 |
|
| 348 |
max_attempts = 5
|
| 349 |
context_reduction_factor = 0.7
|
| 350 |
+
max_tokens = 32000 # Maximum tokens allowed by the model
|
| 351 |
|
| 352 |
if web_search:
|
| 353 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
|
|
|
| 403 |
# Estimate token count
|
| 404 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 405 |
|
| 406 |
+
if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
|
| 407 |
break
|
| 408 |
|
| 409 |
# Reduce context if estimated token count is too high
|
|
|
|
| 415 |
if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
|
| 416 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 417 |
|
| 418 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
|
| 419 |
answer = extract_answer(full_response, instructions)
|
| 420 |
all_answers.append(answer)
|
| 421 |
break
|
|
|
|
| 464 |
|
| 465 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
| 466 |
|
| 467 |
+
if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
|
| 468 |
break
|
| 469 |
|
| 470 |
# Reduce context if estimated token count is too high
|
|
|
|
| 473 |
if len(context_str) < 100:
|
| 474 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
| 475 |
|
| 476 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
|
| 477 |
answer = extract_answer(full_response)
|
| 478 |
|
| 479 |
return answer
|