Update app.py
Browse files
app.py
CHANGED
|
@@ -98,6 +98,7 @@ else:
|
|
| 98 |
persist_directory=persist_directory
|
| 99 |
)
|
| 100 |
print("Created new vector store and persisted embeddings.")
|
|
|
|
| 101 |
# Create a BM25 retriever from the document splits
|
| 102 |
bm25_retriever = BM25Retriever.from_documents(all_splits)
|
| 103 |
|
|
@@ -112,38 +113,19 @@ retriever = ensemble_retriever
|
|
| 112 |
# Prepare Retrieval and Generation Chain
|
| 113 |
# -------------------------------
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
# Prompt
|
| 118 |
-
prompt = hub.pull("rlm/rag-prompt")
|
| 119 |
-
|
| 120 |
-
# LLM
|
| 121 |
-
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
|
| 122 |
-
|
| 123 |
-
# Post-processing
|
| 124 |
-
def format_docs(docs):
|
| 125 |
-
return "\n\n".join(doc.page_content for doc in docs)
|
| 126 |
-
|
| 127 |
-
# Chain
|
| 128 |
-
rag_chain = (
|
| 129 |
-
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
| 130 |
-
| prompt
|
| 131 |
-
| llm
|
| 132 |
-
| StrOutputParser()
|
| 133 |
-
)
|
| 134 |
system_prompt = (
|
| 135 |
-
"You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations "
|
| 136 |
-
"and
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
-
"When answering, include the full name of the organization, a brief (1-2 sentence) description, and a link to its website or social media "
|
| 140 |
-
"(as provided under the website column; please do not alter or normalize the URL). "
|
| 141 |
-
"Also include suggestions for engagement such as donating, volunteering, or learning more. "
|
| 142 |
-
"If a company's personal website is unavailable, navigate to the LA2050 URLs. "
|
| 143 |
"Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
|
| 144 |
"Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
|
| 145 |
-
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times
|
|
|
|
|
|
|
| 146 |
)
|
|
|
|
| 147 |
prompt = ChatPromptTemplate.from_messages(
|
| 148 |
[
|
| 149 |
("system", system_prompt),
|
|
@@ -151,10 +133,14 @@ prompt = ChatPromptTemplate.from_messages(
|
|
| 151 |
]
|
| 152 |
)
|
| 153 |
|
| 154 |
-
|
| 155 |
-
question_answer_chain = create_stuff_documents_chain(
|
| 156 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
green_theme = gr.themes.Base(
|
| 159 |
primary_hue=gr.themes.Color(
|
| 160 |
c50="#00A168",
|
|
@@ -186,23 +172,29 @@ green_theme = gr.themes.Base(
|
|
| 186 |
button_secondary_text_color='#000000'
|
| 187 |
)
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
# integrate gradio with RAG logic
|
| 192 |
def message_and_history(message, history):
|
|
|
|
| 193 |
history = history or [{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}]
|
| 194 |
-
|
| 195 |
-
|
|
|
|
| 196 |
time.sleep(1)
|
| 197 |
|
| 198 |
-
|
| 199 |
-
if not
|
| 200 |
history.append({"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Please enter a valid message."})
|
| 201 |
yield history, history
|
| 202 |
return
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
try:
|
| 205 |
-
response = rag_chain.invoke(
|
| 206 |
answer = response["answer"]
|
| 207 |
except Exception as e:
|
| 208 |
answer = f"An error occurred: {e}"
|
|
@@ -210,40 +202,39 @@ def message_and_history(message, history):
|
|
| 210 |
dynamic_message = {"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> "}
|
| 211 |
history.append(dynamic_message)
|
| 212 |
|
|
|
|
| 213 |
for character in answer:
|
| 214 |
dynamic_message["content"] += character
|
| 215 |
yield history, history
|
| 216 |
|
|
|
|
| 217 |
history[-1]["content"] = f"<b>LA2050 Navigator:</b><br> {answer}"
|
| 218 |
yield history, history
|
| 219 |
|
| 220 |
|
| 221 |
-
#
|
| 222 |
js_func = """
|
| 223 |
function refresh() {
|
| 224 |
const url = new URL(window.location);
|
| 225 |
-
|
| 226 |
if (url.searchParams.get('__theme') !== 'light') {
|
| 227 |
url.searchParams.set('__theme', 'light');
|
| 228 |
window.location.href = url.href;
|
| 229 |
}
|
| 230 |
}
|
| 231 |
"""
|
| 232 |
-
|
| 233 |
-
|
|
|
|
| 234 |
text-color: #FFFFFF;
|
| 235 |
text-align: center;
|
| 236 |
-
|
| 237 |
-
|
| 238 |
color: #FFFFFF;
|
| 239 |
text-align: center;
|
| 240 |
-
}
|
| 241 |
-
|
| 242 |
}
|
| 243 |
"""
|
| 244 |
|
| 245 |
-
#
|
| 246 |
-
|
| 247 |
with gr.Blocks(theme=green_theme, js=js_func, css=css) as block:
|
| 248 |
gr.HTML('<div class="chat-header"><h1>LA2050 Navigator</h1></div>')
|
| 249 |
|
|
@@ -265,6 +256,7 @@ with gr.Blocks(theme=green_theme, js=js_func, css=css) as block:
|
|
| 265 |
show_label=False
|
| 266 |
)
|
| 267 |
|
|
|
|
| 268 |
message.submit(
|
| 269 |
message_and_history,
|
| 270 |
inputs=[message, state],
|
|
@@ -274,3 +266,4 @@ with gr.Blocks(theme=green_theme, js=js_func, css=css) as block:
|
|
| 274 |
)
|
| 275 |
|
| 276 |
block.launch(debug=True, share=True)
|
|
|
|
|
|
| 98 |
persist_directory=persist_directory
|
| 99 |
)
|
| 100 |
print("Created new vector store and persisted embeddings.")
|
| 101 |
+
|
| 102 |
# Create a BM25 retriever from the document splits
|
| 103 |
bm25_retriever = BM25Retriever.from_documents(all_splits)
|
| 104 |
|
|
|
|
| 113 |
# Prepare Retrieval and Generation Chain
|
| 114 |
# -------------------------------
|
| 115 |
|
| 116 |
+
# Update the system prompt to instruct the model on using provided context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
system_prompt = (
|
| 118 |
+
"You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
|
| 119 |
+
"Your role is to provide personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
|
| 120 |
+
"When answering, include the full name of the organization, a brief (1-2 sentence) description, and a link to its website or social media (as provided under the website column; please do not alter or normalize the URL). "
|
| 121 |
+
"Also include suggestions for engagement such as donating, volunteering, or learning more. If a company's personal website is unavailable, navigate to the LA2050 URLs. "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
"Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
|
| 123 |
"Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
|
| 124 |
+
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
|
| 125 |
+
"If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
|
| 126 |
+
"\n\n{context}"
|
| 127 |
)
|
| 128 |
+
|
| 129 |
prompt = ChatPromptTemplate.from_messages(
|
| 130 |
[
|
| 131 |
("system", system_prompt),
|
|
|
|
| 133 |
]
|
| 134 |
)
|
| 135 |
|
| 136 |
+
# Build the chain that will combine documents with the prompt
|
| 137 |
+
question_answer_chain = create_stuff_documents_chain(ChatOpenAI(model_name="gpt-4o-mini", temperature=0), prompt)
|
| 138 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 139 |
|
| 140 |
+
# -------------------------------
|
| 141 |
+
# Gradio Interface and Conversation Handling
|
| 142 |
+
# -------------------------------
|
| 143 |
+
|
| 144 |
green_theme = gr.themes.Base(
|
| 145 |
primary_hue=gr.themes.Color(
|
| 146 |
c50="#00A168",
|
|
|
|
| 172 |
button_secondary_text_color='#000000'
|
| 173 |
)
|
| 174 |
|
| 175 |
+
# Here we update the conversation function so that each new query includes recent conversation history.
|
|
|
|
|
|
|
| 176 |
def message_and_history(message, history):
|
| 177 |
+
# If no conversation exists yet, initialize with a welcome message.
|
| 178 |
history = history or [{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}]
|
| 179 |
+
user_text = message.get("text", "")
|
| 180 |
+
history.append({"role": "user", "content": user_text})
|
| 181 |
+
|
| 182 |
time.sleep(1)
|
| 183 |
|
| 184 |
+
# If the user did not provide any input, ask for a valid message.
|
| 185 |
+
if not user_text:
|
| 186 |
history.append({"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Please enter a valid message."})
|
| 187 |
yield history, history
|
| 188 |
return
|
| 189 |
|
| 190 |
+
# Combine the most recent conversation turns to include as context (here, the last 6 messages)
|
| 191 |
+
conversation_context = "\n".join(
|
| 192 |
+
[f"{msg['role']}: {msg['content']}" for msg in history[-6:]]
|
| 193 |
+
)
|
| 194 |
+
chain_input = {"input": conversation_context}
|
| 195 |
+
|
| 196 |
try:
|
| 197 |
+
response = rag_chain.invoke(chain_input)
|
| 198 |
answer = response["answer"]
|
| 199 |
except Exception as e:
|
| 200 |
answer = f"An error occurred: {e}"
|
|
|
|
| 202 |
dynamic_message = {"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> "}
|
| 203 |
history.append(dynamic_message)
|
| 204 |
|
| 205 |
+
# Stream the answer character by character.
|
| 206 |
for character in answer:
|
| 207 |
dynamic_message["content"] += character
|
| 208 |
yield history, history
|
| 209 |
|
| 210 |
+
# Finalize the answer.
|
| 211 |
history[-1]["content"] = f"<b>LA2050 Navigator:</b><br> {answer}"
|
| 212 |
yield history, history
|
| 213 |
|
| 214 |
|
| 215 |
+
# Set Gradio to light mode via JavaScript
|
| 216 |
js_func = """
|
| 217 |
function refresh() {
|
| 218 |
const url = new URL(window.location);
|
|
|
|
| 219 |
if (url.searchParams.get('__theme') !== 'light') {
|
| 220 |
url.searchParams.set('__theme', 'light');
|
| 221 |
window.location.href = url.href;
|
| 222 |
}
|
| 223 |
}
|
| 224 |
"""
|
| 225 |
+
|
| 226 |
+
css = """
|
| 227 |
+
.chat-header {
|
| 228 |
text-color: #FFFFFF;
|
| 229 |
text-align: center;
|
| 230 |
+
}
|
| 231 |
+
.gradio-container .prose .chat-header h1 {
|
| 232 |
color: #FFFFFF;
|
| 233 |
text-align: center;
|
|
|
|
|
|
|
| 234 |
}
|
| 235 |
"""
|
| 236 |
|
| 237 |
+
# Setup Gradio interface with the custom theme, JS, and CSS
|
|
|
|
| 238 |
with gr.Blocks(theme=green_theme, js=js_func, css=css) as block:
|
| 239 |
gr.HTML('<div class="chat-header"><h1>LA2050 Navigator</h1></div>')
|
| 240 |
|
|
|
|
| 256 |
show_label=False
|
| 257 |
)
|
| 258 |
|
| 259 |
+
# When a message is submitted, the function now sends the recent conversation history along with the new input.
|
| 260 |
message.submit(
|
| 261 |
message_and_history,
|
| 262 |
inputs=[message, state],
|
|
|
|
| 266 |
)
|
| 267 |
|
| 268 |
block.launch(debug=True, share=True)
|
| 269 |
+
|