Spaces:
Runtime error
Runtime error
moved length checking upstream to retrieval
Browse files
RAG.py
CHANGED
|
@@ -70,6 +70,9 @@ def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 1000) -> Tuple
|
|
| 70 |
documents = []
|
| 71 |
scores = []
|
| 72 |
for res, score in results:
|
|
|
|
|
|
|
|
|
|
| 73 |
documents.append(res)
|
| 74 |
scores.append(score)
|
| 75 |
logging.info(f"Finished Retrieval: {time.time() - start}")
|
|
@@ -233,9 +236,6 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
|
|
| 233 |
|
| 234 |
# Generate response
|
| 235 |
ans_prompt = answer_template.invoke({"context": context, "query": query})
|
| 236 |
-
# Max input tokens is 10,000 for 4o-mini. This is a quick and dirty solution
|
| 237 |
-
if len(ans_prompt) > 30000:
|
| 238 |
-
ans_prompt = ans_prompt[:30000]
|
| 239 |
response = llm.invoke(ans_prompt)
|
| 240 |
|
| 241 |
# Parse and return response
|
|
|
|
| 70 |
documents = []
|
| 71 |
scores = []
|
| 72 |
for res, score in results:
|
| 73 |
+
# check to make sure response isnt too long for context window of 4o-mini
|
| 74 |
+
if len(res.page_content) > 4000:
|
| 75 |
+
res.page_content = res.page_content[:4000]
|
| 76 |
documents.append(res)
|
| 77 |
scores.append(score)
|
| 78 |
logging.info(f"Finished Retrieval: {time.time() - start}")
|
|
|
|
| 236 |
|
| 237 |
# Generate response
|
| 238 |
ans_prompt = answer_template.invoke({"context": context, "query": query})
|
|
|
|
|
|
|
|
|
|
| 239 |
response = llm.invoke(ans_prompt)
|
| 240 |
|
| 241 |
# Parse and return response
|