anl139 commited on
Commit
7b879ab
·
verified ·
1 Parent(s): 45cee9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -13
app.py CHANGED
@@ -126,8 +126,8 @@ docs = load_and_process_data(file_path)
126
  # (If you find that key fields are getting split, consider implementing a custom splitter.)
127
  from langchain_text_splitters import RecursiveCharacterTextSplitter
128
  text_splitter = RecursiveCharacterTextSplitter(
129
- chunk_size=1700,
130
- chunk_overlap=150,
131
  add_start_index=True
132
  )
133
  all_splits = text_splitter.split_documents(docs)
@@ -157,8 +157,8 @@ bm25_retriever = BM25Retriever.from_documents(all_splits)
157
 
158
  # Combine the retrievers using an ensemble approach.
159
  ensemble_retriever = EnsembleRetriever(
160
- retrievers=[vectorstore.as_retriever(search_kwargs={"k": 6}), bm25_retriever],
161
- weights=[0.8, 0.4]
162
  )
163
  retriever = ensemble_retriever
164
 
@@ -166,17 +166,24 @@ retriever = ensemble_retriever
166
  # Prepare Retrieval and Generation Chain
167
  # -------------------------------
168
 
169
- system_prompt = (
170
- "You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
171
- "Your responses must be based solely on the context provided from the JSON dataset below. Do not add or hallucinate any information that is not present in the provided data. "
172
- "If a piece of information is not found in the context, clearly state that the information is unavailable. "
173
- "When answering, include the full name of the organization, a brief (1-2 sentence) description, and links to its website or social media (as provided under the website column; please do not alter or normalize the URL). "
 
174
  "If a company's personal website is unavailable, navigate to the LA2050 URLs. "
175
- "Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'Winner' under the ranking field) and those with multiple proposal submissions. "
176
- "Use the data files as your primary source of information. "
 
 
 
177
  "Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
178
- "If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information.\n\n{context}"
179
- )
 
 
 
180
 
181
  prompt = ChatPromptTemplate.from_messages(
182
  [
 
126
  # (If you find that key fields are getting split, consider implementing a custom splitter.)
127
  from langchain_text_splitters import RecursiveCharacterTextSplitter
128
  text_splitter = RecursiveCharacterTextSplitter(
129
+ chunk_size=1600,
130
+ chunk_overlap=100,
131
  add_start_index=True
132
  )
133
  all_splits = text_splitter.split_documents(docs)
 
157
 
158
  # Combine the retrievers using an ensemble approach.
159
  ensemble_retriever = EnsembleRetriever(
160
+ retrievers=[vectorstore.as_retriever(search_kwargs={"k": 5}), bm25_retriever],
161
+ weights=[0.2, 0.8]
162
  )
163
  retriever = ensemble_retriever
164
 
 
166
  # Prepare Retrieval and Generation Chain
167
  # -------------------------------
168
 
169
+ "You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
170
+
171
+ "Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
172
+
173
+ "When answering, include the full name of the organization, a brief (1-2 sentence) description, and a link to its website or social media (as provided under the website column; please do not alter or normalize the URL). "
174
+
175
  "If a company's personal website is unavailable, navigate to the LA2050 URLs. "
176
+
177
+ "Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
178
+
179
+ "Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
180
+
181
  "Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
182
+
183
+ "If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
184
+
185
+ "\n\n{context}"
186
+
187
 
188
  prompt = ChatPromptTemplate.from_messages(
189
  [