anl139 commited on
Commit
2098069
·
verified ·
1 Parent(s): caca53e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -38,6 +38,8 @@ def flatten_metadata(metadata):
38
  return str(metadata) # If it's not a dict, just return the string version
39
 
40
  def metadata_func(record, additional_fields=None):
 
 
41
  return {
42
  "title": record.get("Title", ""),
43
  "organization": record.get("Organization", ""),
@@ -59,9 +61,9 @@ def metadata_func(record, additional_fields=None):
59
  "facebook": record.get("FaceBook", "")
60
  }),
61
  "working_area": record.get("Working Areas in LA", ""),
62
- "zipcode": record.get("Zipcode", "")
 
63
  }
64
-
65
  # Load the JSON data with custom metadata and content key
66
  loader = JSONLoader(
67
  file_path='data.json',
@@ -78,10 +80,15 @@ data = loader.load()
78
  from langchain_text_splitters import RecursiveCharacterTextSplitter
79
  text_splitter = RecursiveCharacterTextSplitter(
80
  chunk_size=760,
81
- chunk_overlap=100,
82
  add_start_index=True
83
  )
84
  all_splits = text_splitter.split_documents(data)
 
 
 
 
 
85
 
86
  # -------------------------------
87
  # Set Up Retrievers
@@ -97,7 +104,7 @@ if os.path.exists(persist_directory) and os.listdir(persist_directory):
97
  print("Loaded vector store from persist directory.")
98
  else:
99
  vectorstore = Chroma.from_documents(
100
- documents=all_splits,
101
  embedding=OpenAIEmbeddings(),
102
  persist_directory=persist_directory
103
  )
@@ -118,7 +125,6 @@ retriever = ensemble_retriever
118
  # -------------------------------
119
 
120
  system_prompt = (
121
-
122
  "You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
123
 
124
  "Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
@@ -129,6 +135,8 @@ system_prompt = (
129
 
130
  "Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
131
 
 
 
132
  "Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
133
 
134
  "Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
@@ -136,11 +144,11 @@ system_prompt = (
136
  "If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
137
 
138
  "\n\n{context}"
139
-
140
  )
141
 
142
 
143
 
 
144
  prompt = ChatPromptTemplate.from_messages(
145
  [
146
  ("system", system_prompt),
 
38
  return str(metadata) # If it's not a dict, just return the string version
39
 
40
  def metadata_func(record, additional_fields=None):
41
+ is_winner = record.get("Ranking", "").lower() == "winner"
42
+
43
  return {
44
  "title": record.get("Title", ""),
45
  "organization": record.get("Organization", ""),
 
61
  "facebook": record.get("FaceBook", "")
62
  }),
63
  "working_area": record.get("Working Areas in LA", ""),
64
+ "zipcode": record.get("Zipcode", ""),
65
+ "priority": 1 if is_winner else 0 # Assign a priority value
66
  }
 
67
  # Load the JSON data with custom metadata and content key
68
  loader = JSONLoader(
69
  file_path='data.json',
 
80
  from langchain_text_splitters import RecursiveCharacterTextSplitter
81
  text_splitter = RecursiveCharacterTextSplitter(
82
  chunk_size=760,
83
+ chunk_overlap=50,
84
  add_start_index=True
85
  )
86
  all_splits = text_splitter.split_documents(data)
87
+ priority_docs = [doc for doc in all_splits if doc.metadata.get("priority", 0) == 1]
88
+ other_docs = [doc for doc in all_splits if doc.metadata.get("priority", 0) == 0]
89
+
90
+ # Combine priority documents with other documents, ensuring winners from 2023 are prioritized
91
+ all_docs = priority_docs + other_docs
92
 
93
  # -------------------------------
94
  # Set Up Retrievers
 
104
  print("Loaded vector store from persist directory.")
105
  else:
106
  vectorstore = Chroma.from_documents(
107
+ documents=all_docs,
108
  embedding=OpenAIEmbeddings(),
109
  persist_directory=persist_directory
110
  )
 
125
  # -------------------------------
126
 
127
  system_prompt = (
 
128
  "You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
129
 
130
  "Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
 
135
 
136
  "Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
137
 
138
+ "If the user asks about the LA2050 grant winners for a specific year, be sure to reference the 'Year' and 'LA 2050 Grant Status' fields in your responses. If the organization was awarded the grant in that year, mention it explicitly."
139
+
140
  "Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
141
 
142
  "Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
 
144
  "If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
145
 
146
  "\n\n{context}"
 
147
  )
148
 
149
 
150
 
151
+
152
  prompt = ChatPromptTemplate.from_messages(
153
  [
154
  ("system", system_prompt),