Update app.py
Browse files
app.py
CHANGED
|
@@ -38,6 +38,8 @@ def flatten_metadata(metadata):
|
|
| 38 |
return str(metadata) # If it's not a dict, just return the string version
|
| 39 |
|
| 40 |
def metadata_func(record, additional_fields=None):
|
|
|
|
|
|
|
| 41 |
return {
|
| 42 |
"title": record.get("Title", ""),
|
| 43 |
"organization": record.get("Organization", ""),
|
|
@@ -59,9 +61,9 @@ def metadata_func(record, additional_fields=None):
|
|
| 59 |
"facebook": record.get("FaceBook", "")
|
| 60 |
}),
|
| 61 |
"working_area": record.get("Working Areas in LA", ""),
|
| 62 |
-
"zipcode": record.get("Zipcode", "")
|
|
|
|
| 63 |
}
|
| 64 |
-
|
| 65 |
# Load the JSON data with custom metadata and content key
|
| 66 |
loader = JSONLoader(
|
| 67 |
file_path='data.json',
|
|
@@ -78,10 +80,15 @@ data = loader.load()
|
|
| 78 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 79 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 80 |
chunk_size=760,
|
| 81 |
-
chunk_overlap=
|
| 82 |
add_start_index=True
|
| 83 |
)
|
| 84 |
all_splits = text_splitter.split_documents(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# -------------------------------
|
| 87 |
# Set Up Retrievers
|
|
@@ -97,7 +104,7 @@ if os.path.exists(persist_directory) and os.listdir(persist_directory):
|
|
| 97 |
print("Loaded vector store from persist directory.")
|
| 98 |
else:
|
| 99 |
vectorstore = Chroma.from_documents(
|
| 100 |
-
documents=
|
| 101 |
embedding=OpenAIEmbeddings(),
|
| 102 |
persist_directory=persist_directory
|
| 103 |
)
|
|
@@ -118,7 +125,6 @@ retriever = ensemble_retriever
|
|
| 118 |
# -------------------------------
|
| 119 |
|
| 120 |
system_prompt = (
|
| 121 |
-
|
| 122 |
"You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
|
| 123 |
|
| 124 |
"Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
|
|
@@ -129,6 +135,8 @@ system_prompt = (
|
|
| 129 |
|
| 130 |
"Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
|
| 131 |
|
|
|
|
|
|
|
| 132 |
"Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
|
| 133 |
|
| 134 |
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
|
|
@@ -136,11 +144,11 @@ system_prompt = (
|
|
| 136 |
"If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
|
| 137 |
|
| 138 |
"\n\n{context}"
|
| 139 |
-
|
| 140 |
)
|
| 141 |
|
| 142 |
|
| 143 |
|
|
|
|
| 144 |
prompt = ChatPromptTemplate.from_messages(
|
| 145 |
[
|
| 146 |
("system", system_prompt),
|
|
|
|
| 38 |
return str(metadata) # If it's not a dict, just return the string version
|
| 39 |
|
| 40 |
def metadata_func(record, additional_fields=None):
|
| 41 |
+
is_winner = record.get("Ranking", "").lower() == "winner"
|
| 42 |
+
|
| 43 |
return {
|
| 44 |
"title": record.get("Title", ""),
|
| 45 |
"organization": record.get("Organization", ""),
|
|
|
|
| 61 |
"facebook": record.get("FaceBook", "")
|
| 62 |
}),
|
| 63 |
"working_area": record.get("Working Areas in LA", ""),
|
| 64 |
+
"zipcode": record.get("Zipcode", ""),
|
| 65 |
+
"priority": 1 if is_winner else 0 # Assign a priority value
|
| 66 |
}
|
|
|
|
| 67 |
# Load the JSON data with custom metadata and content key
|
| 68 |
loader = JSONLoader(
|
| 69 |
file_path='data.json',
|
|
|
|
| 80 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 81 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 82 |
chunk_size=760,
|
| 83 |
+
chunk_overlap=50,
|
| 84 |
add_start_index=True
|
| 85 |
)
|
| 86 |
all_splits = text_splitter.split_documents(data)
|
| 87 |
+
priority_docs = [doc for doc in all_splits if doc.metadata.get("priority", 0) == 1]
|
| 88 |
+
other_docs = [doc for doc in all_splits if doc.metadata.get("priority", 0) == 0]
|
| 89 |
+
|
| 90 |
+
# Combine priority documents with other documents, ensuring winners from 2023 are prioritized
|
| 91 |
+
all_docs = priority_docs + other_docs
|
| 92 |
|
| 93 |
# -------------------------------
|
| 94 |
# Set Up Retrievers
|
|
|
|
| 104 |
print("Loaded vector store from persist directory.")
|
| 105 |
else:
|
| 106 |
vectorstore = Chroma.from_documents(
|
| 107 |
+
documents=all_docs,
|
| 108 |
embedding=OpenAIEmbeddings(),
|
| 109 |
persist_directory=persist_directory
|
| 110 |
)
|
|
|
|
| 125 |
# -------------------------------
|
| 126 |
|
| 127 |
system_prompt = (
|
|
|
|
| 128 |
"You are the LA2050 Navigator, an AI-powered chatbot designed to help users explore organizations and community initiatives within the Goldhirsh Foundation’s LA2050 Ideas Hub. "
|
| 129 |
|
| 130 |
"Your role is to provide concise, personalized recommendations, guide users toward supporting these organizations and initiatives, and answer relevant questions about the Goldhirsh Foundation, LA2050, and its projects. "
|
|
|
|
| 135 |
|
| 136 |
"Prioritize nonprofit organizations awarded by the Goldhirsh Foundation (designated 'winner') and those with multiple proposal submissions. "
|
| 137 |
|
| 138 |
+
"If the user asks about the LA2050 grant winners for a specific year, be sure to reference the 'Year' and 'LA 2050 Grant Status' fields in your responses. If the organization was awarded the grant in that year, mention it explicitly."
|
| 139 |
+
|
| 140 |
"Use the data files as your primary source of information. If information is unavailable, acknowledge it and guide the user to relevant resources. "
|
| 141 |
|
| 142 |
"Maintain a polite, helpful, respectful, and enthusiastic tone at all times. "
|
|
|
|
| 144 |
"If the user responds with a follow-up confirmation (e.g. 'yes') after a previous answer, please expand on that topic with additional information. "
|
| 145 |
|
| 146 |
"\n\n{context}"
|
|
|
|
| 147 |
)
|
| 148 |
|
| 149 |
|
| 150 |
|
| 151 |
+
|
| 152 |
prompt = ChatPromptTemplate.from_messages(
|
| 153 |
[
|
| 154 |
("system", system_prompt),
|