Kazel commited on
Commit
cfd58b0
Β·
1 Parent(s): af476a6
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -688,11 +688,13 @@ class PDFSearchApp:
688
  print(f"πŸ” Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
689
 
690
  for i, (score, doc_id) in enumerate(selected_results):
691
- # Use the index as page number since doc_id is just an identifier
692
- # This ensures we look for page_1.png, page_2.png, etc.
693
- display_page_num = i + 1
694
  coll_num = collection_name # Use the current collection name
695
 
 
 
696
  # Use debug function to get paths and check existence
697
  img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
698
 
@@ -701,7 +703,7 @@ class PDFSearchApp:
701
  all_paths.append(path)
702
  page_scores.append(score)
703
  cited_pages.append(f"Page {display_page_num} from {coll_num}")
704
- print(f"βœ… Retrieved page {i+1}: {img_path} (Score: {score:.3f})")
705
  else:
706
  print(f"❌ Image file not found: {img_path}")
707
  # Try alternative paths with better fallback logic
@@ -728,6 +730,7 @@ class PDFSearchApp:
728
  all_paths.append(alt_path.replace(".png", ""))
729
  page_scores.append(score)
730
  cited_pages.append(f"Page {display_page_num} from {coll_num}")
 
731
  break
732
  else:
733
  print(f"❌ No alternative path found for page {display_page_num}")
@@ -742,7 +745,7 @@ class PDFSearchApp:
742
  for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
743
  # Extract page number from path
744
  page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
745
- print(f" {i}. {page_num} - Score: {score:.4f}")
746
 
747
  if page_scores:
748
  final_avg_score = sum(page_scores) / len(page_scores)
 
688
  print(f"πŸ” Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
689
 
690
  for i, (score, doc_id) in enumerate(selected_results):
691
+ # 🎯 FIX: Use the actual page number from doc_id, not the index position
692
+ # doc_id represents the actual page number in the document
693
+ display_page_num = doc_id + 1 # Convert 0-based doc_id to 1-based page number
694
  coll_num = collection_name # Use the current collection name
695
 
696
+ print(f"πŸ” Processing result {i+1}: doc_id={doc_id}, actual_page={display_page_num}, score={score:.4f}")
697
+
698
  # Use debug function to get paths and check existence
699
  img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
700
 
 
703
  all_paths.append(path)
704
  page_scores.append(score)
705
  cited_pages.append(f"Page {display_page_num} from {coll_num}")
706
+ print(f"βœ… Retrieved page {display_page_num}: {img_path} (Score: {score:.3f})")
707
  else:
708
  print(f"❌ Image file not found: {img_path}")
709
  # Try alternative paths with better fallback logic
 
730
  all_paths.append(alt_path.replace(".png", ""))
731
  page_scores.append(score)
732
  cited_pages.append(f"Page {display_page_num} from {coll_num}")
733
+ print(f"βœ… Retrieved page {display_page_num}: {alt_path} (Score: {score:.3f})")
734
  break
735
  else:
736
  print(f"❌ No alternative path found for page {display_page_num}")
 
745
  for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
746
  # Extract page number from path
747
  page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
748
+ print(f" {i}. Page {page_num} - Score: {score:.4f}")
749
 
750
  if page_scores:
751
  final_avg_score = sum(page_scores) / len(page_scores)