MrSimple07 commited on
Commit
a313838
1 Parent(s): 07e9959

fixed loggers in retrieving

Browse files
Files changed (1) hide show
  1. utils.py +17 -9
utils.py CHANGED
@@ -197,9 +197,7 @@ def debug_search_tables(vector_index, search_term="小-25"):
197
 
198
  from documents_prep import normalize_text
199
 
200
- # MODIFIED: Update answer_question function signature
201
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
202
- # NORMALIZE the question to convert C to 小
203
  normalized_question = normalize_text(question)
204
 
205
  if query_engine is None:
@@ -207,23 +205,33 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
207
 
208
  try:
209
  start_time = time.time()
210
- # Use NORMALIZED question for retrieval
211
  retrieved_nodes = query_engine.retriever.retrieve(normalized_question)
212
  log_message(f"user query: {question}")
213
  log_message(f"normalized query: {normalized_question}")
214
-
215
 
216
  log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
217
 
218
  unique_retrieved = deduplicate_nodes(retrieved_nodes)
219
 
220
- # DEBUG: Log what was retrieved
221
  log_message(f"RETRIEVED: unique {len(unique_retrieved)} nodes")
222
- for i, node in enumerate(unique_retrieved): # All debug
223
- table_num = node.metadata.get('table_number', 'N/A')
224
- table_title = node.metadata.get('table_title', 'N/A')
225
  doc_id = node.metadata.get('document_id', 'N/A')
226
- log_message(f" [{i+1}] {doc_id} - Table {table_num}: {table_title[:50]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  log_message(f"UNIQUE NODES: {len(unique_retrieved)} nodes")
228
 
229
  # Simple reranking with NORMALIZED question and PARAMETERIZED top_k
 
197
 
198
  from documents_prep import normalize_text
199
 
 
200
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None, rerank_top_k=20):
 
201
  normalized_question = normalize_text(question)
202
 
203
  if query_engine is None:
 
205
 
206
  try:
207
  start_time = time.time()
 
208
  retrieved_nodes = query_engine.retriever.retrieve(normalized_question)
209
  log_message(f"user query: {question}")
210
  log_message(f"normalized query: {normalized_question}")
 
211
 
212
  log_message(f"RETRIEVED: {len(retrieved_nodes)} nodes")
213
 
214
  unique_retrieved = deduplicate_nodes(retrieved_nodes)
215
 
216
+ # IMPROVED DEBUG: Log what was actually retrieved with FULL metadata
217
  log_message(f"RETRIEVED: unique {len(unique_retrieved)} nodes")
218
+ for i, node in enumerate(unique_retrieved):
219
+ node_type = node.metadata.get('type', 'text')
 
220
  doc_id = node.metadata.get('document_id', 'N/A')
221
+
222
+ if node_type == 'table':
223
+ table_num = node.metadata.get('table_number', 'N/A')
224
+ table_id = node.metadata.get('table_identifier', 'N/A')
225
+ table_title = node.metadata.get('table_title', 'N/A')
226
+ # Show first 200 chars of content to verify it's the right table
227
+ content_preview = node.text[:200].replace('\n', ' ')
228
+ log_message(f" [{i+1}] {doc_id} - Table {table_num} | ID: {table_id}")
229
+ log_message(f" Title: {table_title[:80]}")
230
+ log_message(f" Content: {content_preview}...")
231
+ else:
232
+ section = node.metadata.get('section_id', 'N/A')
233
+ log_message(f" [{i+1}] {doc_id} - Text section {section}")
234
+
235
  log_message(f"UNIQUE NODES: {len(unique_retrieved)} nodes")
236
 
237
  # Simple reranking with NORMALIZED question and PARAMETERIZED top_k