Update app.py
Browse files
app.py
CHANGED
|
@@ -217,14 +217,14 @@ def get_model(temperature, top_p, repetition_penalty):
|
|
| 217 |
"temperature": temperature,
|
| 218 |
"top_p": top_p,
|
| 219 |
"repetition_penalty": repetition_penalty,
|
| 220 |
-
"max_length":
|
| 221 |
},
|
| 222 |
huggingfacehub_api_token=huggingface_token
|
| 223 |
)
|
| 224 |
|
| 225 |
-
MAX_PROMPT_CHARS =
|
| 226 |
|
| 227 |
-
def chunk_text(text: str, max_chunk_size: int =
|
| 228 |
chunks = []
|
| 229 |
current_chunk = ""
|
| 230 |
for sentence in re.split(r'(?<=[.!?])\s+', text):
|
|
@@ -244,7 +244,7 @@ def get_most_relevant_chunks(question: str, chunks: List[str], top_k: int = 3) -
|
|
| 244 |
top_indices = np.argsort(similarities)[-top_k:]
|
| 245 |
return [chunks[i] for i in top_indices]
|
| 246 |
|
| 247 |
-
def generate_chunked_response(model, prompt, max_tokens=
|
| 248 |
full_response = ""
|
| 249 |
for i in range(max_chunks):
|
| 250 |
try:
|
|
@@ -395,8 +395,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 395 |
database = None
|
| 396 |
|
| 397 |
max_attempts = 3
|
| 398 |
-
max_input_tokens =
|
| 399 |
-
max_output_tokens =
|
| 400 |
|
| 401 |
if web_search:
|
| 402 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
|
|
|
| 217 |
"temperature": temperature,
|
| 218 |
"top_p": top_p,
|
| 219 |
"repetition_penalty": repetition_penalty,
|
| 220 |
+
"max_length": 800
|
| 221 |
},
|
| 222 |
huggingfacehub_api_token=huggingface_token
|
| 223 |
)
|
| 224 |
|
| 225 |
+
MAX_PROMPT_CHARS = 20000 # Adjust based on your model's limitations
|
| 226 |
|
| 227 |
+
def chunk_text(text: str, max_chunk_size: int = 800) -> List[str]:
|
| 228 |
chunks = []
|
| 229 |
current_chunk = ""
|
| 230 |
for sentence in re.split(r'(?<=[.!?])\s+', text):
|
|
|
|
| 244 |
top_indices = np.argsort(similarities)[-top_k:]
|
| 245 |
return [chunks[i] for i in top_indices]
|
| 246 |
|
| 247 |
+
def generate_chunked_response(model, prompt, max_tokens=800, max_chunks=5):
|
| 248 |
full_response = ""
|
| 249 |
for i in range(max_chunks):
|
| 250 |
try:
|
|
|
|
| 395 |
database = None
|
| 396 |
|
| 397 |
max_attempts = 3
|
| 398 |
+
max_input_tokens = 20000 # Leave room for the model's response
|
| 399 |
+
max_output_tokens = 800
|
| 400 |
|
| 401 |
if web_search:
|
| 402 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|