Update app.py
Browse files
app.py
CHANGED
|
@@ -126,8 +126,8 @@ docs = load_and_process_data(file_path)
|
|
| 126 |
# (If you find that key fields are getting split, consider implementing a custom splitter.)
|
| 127 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 128 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 129 |
-
chunk_size=
|
| 130 |
-
chunk_overlap=
|
| 131 |
add_start_index=True
|
| 132 |
)
|
| 133 |
all_splits = text_splitter.split_documents(docs)
|
|
@@ -157,8 +157,8 @@ bm25_retriever = BM25Retriever.from_documents(all_splits)
|
|
| 157 |
|
| 158 |
# Combine the retrievers using an ensemble approach.
|
| 159 |
ensemble_retriever = EnsembleRetriever(
|
| 160 |
-
retrievers=[vectorstore.as_retriever(search_kwargs={"k":
|
| 161 |
-
weights=[0.
|
| 162 |
)
|
| 163 |
retriever = ensemble_retriever
|
| 164 |
|
|
@@ -166,17 +166,24 @@ retriever = ensemble_retriever
|
|
| 166 |
# Prepare Retrieval and Generation Chain
|
| 167 |
# -------------------------------
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
"Your
|
| 172 |
-
|
| 173 |
-
"When answering, include the full name of the organization, a brief (1-2 sentence) description, and
|
|
|
|
| 174 |
"If a company's personal website is unavailable, navigate to the LA2050 URLs. "
|
| 175 |
-
|
| 176 |
-
"
|
|
|
|
|
|
|
|
|
|
| 177 |
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
|
| 178 |
-
|
| 179 |
-
)
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
prompt = ChatPromptTemplate.from_messages(
|
| 182 |
[
|
|
|
|
| 126 |
# (If you find that key fields are getting split, consider implementing a custom splitter.)
|
| 127 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 128 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 129 |
+
chunk_size=1600,
|
| 130 |
+
chunk_overlap=100,
|
| 131 |
add_start_index=True
|
| 132 |
)
|
| 133 |
all_splits = text_splitter.split_documents(docs)
|
|
|
|
| 157 |
|
| 158 |
# Combine the retrievers using an ensemble approach.
|
| 159 |
ensemble_retriever = EnsembleRetriever(
|
| 160 |
+
retrievers=[vectorstore.as_retriever(search_kwargs={"k": 5}), bm25_retriever],
|
| 161 |
+
weights=[0.2, 0.8]
|
| 162 |
)
|
| 163 |
retriever = ensemble_retriever
|
| 164 |
|
|
|
|
| 166 |
# Prepare Retrieval and Generation Chain
|
| 167 |
# -------------------------------
|
| 168 |
|
| 169 |
+
"You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
|
| 170 |
+
|
| 171 |
+
"Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
|
| 172 |
+
|
| 173 |
+
"When answering, include the full name of the organization, a brief (1-2 sentence) description, and a link to its website or social media (as provided under the website column; please do not alter or normalize the URL). "
|
| 174 |
+
|
| 175 |
"If a company's personal website is unavailable, navigate to the LA2050 URLs. "
|
| 176 |
+
|
| 177 |
+
"Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
|
| 178 |
+
|
| 179 |
+
"Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
|
| 180 |
+
|
| 181 |
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
|
| 182 |
+
|
| 183 |
+
"If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
|
| 184 |
+
|
| 185 |
+
"\n\n{context}"
|
| 186 |
+
|
| 187 |
|
| 188 |
prompt = ChatPromptTemplate.from_messages(
|
| 189 |
[
|