Update app.py
Browse files
app.py
CHANGED
|
@@ -471,6 +471,12 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
|
|
| 471 |
|
| 472 |
logging.info("Finished generating response for Excel data")
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
|
| 475 |
logging.info(f"Getting response from Llama using model: {model}")
|
| 476 |
|
|
@@ -492,6 +498,9 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
|
|
| 492 |
relevant_docs = retriever.get_relevant_documents(query)
|
| 493 |
context = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 494 |
|
|
|
|
|
|
|
|
|
|
| 495 |
prompt = f"{system_instruction}\n\nBased on the following data extracted from Excel spreadsheets:\n{context}\n\nPlease provide the Python code needed to execute the following task: '{query}'. Ensure that the code is derived directly from the dataset. If a chart is requested, use the matplotlib library to generate the appropriate visualization."
|
| 496 |
|
| 497 |
elif file_type == "pdf":
|
|
@@ -504,6 +513,9 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
|
|
| 504 |
|
| 505 |
context_str = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 506 |
|
|
|
|
|
|
|
|
|
|
| 507 |
system_instruction = """You are a highly specialized financial analyst assistant with expertise in analyzing and summarizing financial documents.
|
| 508 |
Your goal is to provide accurate, detailed, and precise summaries based on the context provided.
|
| 509 |
Avoid making assumptions or adding information that is not explicitly supported by the context from the PDF documents."""
|
|
@@ -519,9 +531,12 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
|
|
| 519 |
# Generate content with streaming enabled
|
| 520 |
for response in client.text_generation(
|
| 521 |
prompt=prompt,
|
| 522 |
-
max_new_tokens=
|
| 523 |
temperature=temperature,
|
| 524 |
stream=True,
|
|
|
|
|
|
|
|
|
|
| 525 |
):
|
| 526 |
if response.token.text:
|
| 527 |
chunk = response.token.text
|
|
|
|
| 471 |
|
| 472 |
logging.info("Finished generating response for Excel data")
|
| 473 |
|
| 474 |
+
def truncate_context(context, max_chars=24000):
|
| 475 |
+
"""Truncate context to a maximum number of characters."""
|
| 476 |
+
if len(context) <= max_chars:
|
| 477 |
+
return context
|
| 478 |
+
return context[:max_chars] + "..."
|
| 479 |
+
|
| 480 |
def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
|
| 481 |
logging.info(f"Getting response from Llama using model: {model}")
|
| 482 |
|
|
|
|
| 498 |
relevant_docs = retriever.get_relevant_documents(query)
|
| 499 |
context = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 500 |
|
| 501 |
+
# Truncate context
|
| 502 |
+
context = truncate_context(context)
|
| 503 |
+
|
| 504 |
prompt = f"{system_instruction}\n\nBased on the following data extracted from Excel spreadsheets:\n{context}\n\nPlease provide the Python code needed to execute the following task: '{query}'. Ensure that the code is derived directly from the dataset. If a chart is requested, use the matplotlib library to generate the appropriate visualization."
|
| 505 |
|
| 506 |
elif file_type == "pdf":
|
|
|
|
| 513 |
|
| 514 |
context_str = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 515 |
|
| 516 |
+
# Truncate context
|
| 517 |
+
context_str = truncate_context(context_str)
|
| 518 |
+
|
| 519 |
system_instruction = """You are a highly specialized financial analyst assistant with expertise in analyzing and summarizing financial documents.
|
| 520 |
Your goal is to provide accurate, detailed, and precise summaries based on the context provided.
|
| 521 |
Avoid making assumptions or adding information that is not explicitly supported by the context from the PDF documents."""
|
|
|
|
| 531 |
# Generate content with streaming enabled
|
| 532 |
for response in client.text_generation(
|
| 533 |
prompt=prompt,
|
| 534 |
+
max_new_tokens=1000, # Reduced to ensure we stay within token limits
|
| 535 |
temperature=temperature,
|
| 536 |
stream=True,
|
| 537 |
+
repetition_penalty=1.1,
|
| 538 |
+
top_k=50,
|
| 539 |
+
top_p=0.9,
|
| 540 |
):
|
| 541 |
if response.token.text:
|
| 542 |
chunk = response.token.text
|