Update app.py
Browse files
app.py
CHANGED
|
@@ -98,6 +98,25 @@ def format_prompt(prompt,retrieved_documents,k):
|
|
| 98 |
PROMPT+= f"{retrieved_documents['0'][idx]}\n"
|
| 99 |
return PROMPT
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
# Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
|
| 102 |
|
| 103 |
def talk(prompt, history):
|
|
@@ -109,8 +128,10 @@ def talk(prompt, history):
|
|
| 109 |
print("check5")
|
| 110 |
print(retrieved_documents['0'])
|
| 111 |
print(formatted_prompt)
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
|
| 115 |
# binding the system context and new prompt for LLM
|
| 116 |
# the chat template structure should be based on text generation model format
|
|
@@ -130,7 +151,7 @@ def talk(prompt, history):
|
|
| 130 |
]
|
| 131 |
# indicates the end of a sequence
|
| 132 |
text = ""
|
| 133 |
-
stream = model(
|
| 134 |
for output in stream:
|
| 135 |
text += output["choices"][0]["text"]
|
| 136 |
yield text
|
|
|
|
| 98 |
PROMPT+= f"{retrieved_documents['0'][idx]}\n"
|
| 99 |
return PROMPT
|
| 100 |
|
| 101 |
+
#def add_history(formatted_prompt, history, memory_limit=3):
|
| 102 |
+
# always keep len(history) <= memory_limit
|
| 103 |
+
# if len(history) > memory_limit:
|
| 104 |
+
# history = history[-memory_limit:]
|
| 105 |
+
|
| 106 |
+
# if len(history) == 0:
|
| 107 |
+
# return PROMPT + f"{formatted_prompt} [/INST]"
|
| 108 |
+
|
| 109 |
+
#formatted_message = PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
|
| 110 |
+
|
| 111 |
+
# Handle conversation history
|
| 112 |
+
# for user_msg, model_answer in history[1:]:
|
| 113 |
+
# formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
|
| 114 |
+
|
| 115 |
+
# # Handle the current message
|
| 116 |
+
# formatted_message += f"<s>[INST] {formatted_prompt} [/INST]"
|
| 117 |
+
|
| 118 |
+
#return formatted_message
|
| 119 |
+
|
| 120 |
# Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
|
| 121 |
|
| 122 |
def talk(prompt, history):
|
|
|
|
| 128 |
print("check5")
|
| 129 |
print(retrieved_documents['0'])
|
| 130 |
print(formatted_prompt)
|
| 131 |
+
# formatted_prompt_with_history = add_history(formatted_prompt, history)
|
| 132 |
+
|
| 133 |
+
# formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
|
| 134 |
+
print(formatted_prompt_with_history)
|
| 135 |
messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
|
| 136 |
# binding the system context and new prompt for LLM
|
| 137 |
# the chat template structure should be based on text generation model format
|
|
|
|
| 151 |
]
|
| 152 |
# indicates the end of a sequence
|
| 153 |
text = ""
|
| 154 |
+
stream = model(messages, max_tokens=1000, stop=["</s>"], stream=True)
|
| 155 |
for output in stream:
|
| 156 |
text += output["choices"][0]["text"]
|
| 157 |
yield text
|