Spaces:
Runtime error
Runtime error
more robust time analysis for reranking
Browse files
RAG.py
CHANGED
|
@@ -68,6 +68,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
|
|
| 68 |
return []
|
| 69 |
|
| 70 |
full_docs = []
|
|
|
|
| 71 |
for doc in documents:
|
| 72 |
if not doc.metadata.get('source'):
|
| 73 |
continue
|
|
@@ -79,7 +80,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
|
|
| 79 |
text_content = extract_text_from_json(json_data)
|
| 80 |
if text_content: # Only add documents with actual content
|
| 81 |
full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
|
| 82 |
-
|
| 83 |
# If no valid documents were processed, return empty list
|
| 84 |
if not full_docs:
|
| 85 |
return []
|
|
@@ -150,7 +151,7 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
|
|
| 150 |
query_prompt = query_template.invoke({"query":query})
|
| 151 |
query_response = llm.invoke(query_prompt)
|
| 152 |
new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
|
| 153 |
-
|
| 154 |
|
| 155 |
retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
|
| 156 |
if not retrieved:
|
|
|
|
| 68 |
return []
|
| 69 |
|
| 70 |
full_docs = []
|
| 71 |
+
meta_start = time.time()
|
| 72 |
for doc in documents:
|
| 73 |
if not doc.metadata.get('source'):
|
| 74 |
continue
|
|
|
|
| 80 |
text_content = extract_text_from_json(json_data)
|
| 81 |
if text_content: # Only add documents with actual content
|
| 82 |
full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
|
| 83 |
+
logging.info(f"Took {time.time()-meta_start} seconds to retrieve all metadata")
|
| 84 |
# If no valid documents were processed, return empty list
|
| 85 |
if not full_docs:
|
| 86 |
return []
|
|
|
|
| 151 |
query_prompt = query_template.invoke({"query":query})
|
| 152 |
query_response = llm.invoke(query_prompt)
|
| 153 |
new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
|
| 154 |
+
logging.info(f"Old_Query: {query},New_Query: {new_query}")
|
| 155 |
|
| 156 |
retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
|
| 157 |
if not retrieved:
|