Spaces:
Runtime error
Runtime error
testing new fields for reranking and scaling up initial retrieved docs
Browse files
RAG.py
CHANGED
|
@@ -56,7 +56,7 @@ class RunLogger:
|
|
| 56 |
"""Ensure logs are output if logger is garbage collected"""
|
| 57 |
self.output_logs()
|
| 58 |
|
| 59 |
-
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int =
|
| 60 |
start = time.time()
|
| 61 |
# pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
| 62 |
# pc = Pinecone(api_key=pinecone_api_key)
|
|
@@ -97,7 +97,7 @@ def extract_text_from_json(json_data: Dict) -> str:
|
|
| 97 |
text_parts = []
|
| 98 |
|
| 99 |
# Handle direct text fields
|
| 100 |
-
text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_specific_ssim"]
|
| 101 |
for field in text_fields:
|
| 102 |
if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
|
| 103 |
# print(json_data[field])
|
|
@@ -162,7 +162,7 @@ def parse_xml_and_check(xml_string: str) -> str:
|
|
| 162 |
|
| 163 |
return parsed_response.get('RESPONSE', "No response found in the output")
|
| 164 |
|
| 165 |
-
def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int =
|
| 166 |
"""Main RAG function with improved error handling and validation."""
|
| 167 |
start = time.time()
|
| 168 |
try:
|
|
|
|
| 56 |
"""Ensure logs are output if logger is garbage collected"""
|
| 57 |
self.output_logs()
|
| 58 |
|
| 59 |
+
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 100) -> Tuple[List[Document], List[float]]:
|
| 60 |
start = time.time()
|
| 61 |
# pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
| 62 |
# pc = Pinecone(api_key=pinecone_api_key)
|
|
|
|
| 97 |
text_parts = []
|
| 98 |
|
| 99 |
# Handle direct text fields
|
| 100 |
+
text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_basic_ssim","genre_specific_ssim","date_tsim"]
|
| 101 |
for field in text_fields:
|
| 102 |
if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
|
| 103 |
# print(json_data[field])
|
|
|
|
| 162 |
|
| 163 |
return parsed_response.get('RESPONSE', "No response found in the output")
|
| 164 |
|
| 165 |
+
def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int = 1000) -> Tuple[str, List[Document]]:
|
| 166 |
"""Main RAG function with improved error handling and validation."""
|
| 167 |
start = time.time()
|
| 168 |
try:
|