Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +39 -18
custom_utils.py
CHANGED
|
@@ -25,7 +25,13 @@ def rag_ingestion(collection):
|
|
| 25 |
collection.insert_many(dataset)
|
| 26 |
return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
|
| 27 |
|
| 28 |
-
def rag_retrieval(openai_api_key,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
###
|
| 30 |
### Pre-retrieval processing: index filter
|
| 31 |
### Post-retrieval processing: result filter
|
|
@@ -108,7 +114,15 @@ def rag_retrieval(openai_api_key, prompt, db, collection, vector_index="vector_i
|
|
| 108 |
###
|
| 109 |
###
|
| 110 |
|
| 111 |
-
get_knowledge = vector_search(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
if not get_knowledge:
|
| 114 |
return "No results found.", "No source information available."
|
|
@@ -119,7 +133,9 @@ def rag_retrieval(openai_api_key, prompt, db, collection, vector_index="vector_i
|
|
| 119 |
|
| 120 |
return get_knowledge
|
| 121 |
|
| 122 |
-
def rag_inference(openai_api_key,
|
|
|
|
|
|
|
| 123 |
openai.api_key = openai_api_key
|
| 124 |
|
| 125 |
content = f"Answer this user question: {prompt} with the following context:\n{search_results}"
|
|
@@ -139,39 +155,44 @@ def rag_inference(openai_api_key, prompt, search_results):
|
|
| 139 |
|
| 140 |
return completion.choices[0].message.content
|
| 141 |
|
| 142 |
-
def vector_search(openai_api_key,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
query_embedding = get_text_embedding(openai_api_key, user_query)
|
| 144 |
|
| 145 |
if query_embedding is None:
|
| 146 |
return "Invalid query or embedding generation failed."
|
| 147 |
|
| 148 |
-
vector_search_stage = {
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
}
|
| 157 |
|
| 158 |
-
""" filter
|
| 159 |
vector_search_stage = {
|
| 160 |
"$vectorSearch": {
|
| 161 |
"index": vector_index,
|
| 162 |
"queryVector": query_embedding,
|
| 163 |
"path": "description_embedding",
|
| 164 |
"numCandidates": 150,
|
| 165 |
-
"limit":
|
| 166 |
"filter": {
|
| 167 |
"$and": [
|
| 168 |
-
{"accommodates": {"$eq":
|
| 169 |
-
{"bedrooms": {"$eq":
|
| 170 |
]
|
| 171 |
},
|
| 172 |
}
|
| 173 |
}
|
| 174 |
-
"""
|
| 175 |
|
| 176 |
remove_embedding_stage = {
|
| 177 |
"$unset": "description_embedding"
|
|
|
|
| 25 |
collection.insert_many(dataset)
|
| 26 |
return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
|
| 27 |
|
| 28 |
+
def rag_retrieval(openai_api_key,
|
| 29 |
+
prompt,
|
| 30 |
+
accomodates,
|
| 31 |
+
bedrooms,
|
| 32 |
+
db,
|
| 33 |
+
collection,
|
| 34 |
+
vector_index="vector_index"):
|
| 35 |
###
|
| 36 |
### Pre-retrieval processing: index filter
|
| 37 |
### Post-retrieval processing: result filter
|
|
|
|
| 114 |
###
|
| 115 |
###
|
| 116 |
|
| 117 |
+
get_knowledge = vector_search(
|
| 118 |
+
openai_api_key,
|
| 119 |
+
prompt,
|
| 120 |
+
accomodates,
|
| 121 |
+
bedrooms,
|
| 122 |
+
db,
|
| 123 |
+
collection,
|
| 124 |
+
additional_stages,
|
| 125 |
+
vector_index)
|
| 126 |
|
| 127 |
if not get_knowledge:
|
| 128 |
return "No results found.", "No source information available."
|
|
|
|
| 133 |
|
| 134 |
return get_knowledge
|
| 135 |
|
| 136 |
+
def rag_inference(openai_api_key,
|
| 137 |
+
prompt,
|
| 138 |
+
search_results):
|
| 139 |
openai.api_key = openai_api_key
|
| 140 |
|
| 141 |
content = f"Answer this user question: {prompt} with the following context:\n{search_results}"
|
|
|
|
| 155 |
|
| 156 |
return completion.choices[0].message.content
|
| 157 |
|
| 158 |
+
def vector_search(openai_api_key,
|
| 159 |
+
user_query,
|
| 160 |
+
accommodates,
|
| 161 |
+
bedrooms,
|
| 162 |
+
db,
|
| 163 |
+
collection,
|
| 164 |
+
additional_stages=[],
|
| 165 |
+
vector_index="vector_index"):
|
| 166 |
query_embedding = get_text_embedding(openai_api_key, user_query)
|
| 167 |
|
| 168 |
if query_embedding is None:
|
| 169 |
return "Invalid query or embedding generation failed."
|
| 170 |
|
| 171 |
+
#vector_search_stage = {
|
| 172 |
+
# "$vectorSearch": {
|
| 173 |
+
# "index": vector_index,
|
| 174 |
+
# "queryVector": query_embedding,
|
| 175 |
+
# "path": "description_embedding",
|
| 176 |
+
# "numCandidates": 150,
|
| 177 |
+
# "limit": 3,
|
| 178 |
+
# }
|
| 179 |
+
#}
|
| 180 |
|
|
|
|
| 181 |
vector_search_stage = {
|
| 182 |
"$vectorSearch": {
|
| 183 |
"index": vector_index,
|
| 184 |
"queryVector": query_embedding,
|
| 185 |
"path": "description_embedding",
|
| 186 |
"numCandidates": 150,
|
| 187 |
+
"limit": 10,
|
| 188 |
"filter": {
|
| 189 |
"$and": [
|
| 190 |
+
{"accommodates": {"$eq": accommodates}},
|
| 191 |
+
{"bedrooms": {"$eq": bedrooms}}
|
| 192 |
]
|
| 193 |
},
|
| 194 |
}
|
| 195 |
}
|
|
|
|
| 196 |
|
| 197 |
remove_embedding_stage = {
|
| 198 |
"$unset": "description_embedding"
|