Spaces:
Runtime error
Runtime error
logging
Browse files
app.py
CHANGED
|
@@ -688,11 +688,13 @@ class PDFSearchApp:
|
|
| 688 |
print(f"π Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
|
| 689 |
|
| 690 |
for i, (score, doc_id) in enumerate(selected_results):
|
| 691 |
-
# Use the
|
| 692 |
-
#
|
| 693 |
-
display_page_num =
|
| 694 |
coll_num = collection_name # Use the current collection name
|
| 695 |
|
|
|
|
|
|
|
| 696 |
# Use debug function to get paths and check existence
|
| 697 |
img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
|
| 698 |
|
|
@@ -701,7 +703,7 @@ class PDFSearchApp:
|
|
| 701 |
all_paths.append(path)
|
| 702 |
page_scores.append(score)
|
| 703 |
cited_pages.append(f"Page {display_page_num} from {coll_num}")
|
| 704 |
-
print(f"β
Retrieved page {
|
| 705 |
else:
|
| 706 |
print(f"β Image file not found: {img_path}")
|
| 707 |
# Try alternative paths with better fallback logic
|
|
@@ -728,6 +730,7 @@ class PDFSearchApp:
|
|
| 728 |
all_paths.append(alt_path.replace(".png", ""))
|
| 729 |
page_scores.append(score)
|
| 730 |
cited_pages.append(f"Page {display_page_num} from {coll_num}")
|
|
|
|
| 731 |
break
|
| 732 |
else:
|
| 733 |
print(f"β No alternative path found for page {display_page_num}")
|
|
@@ -742,7 +745,7 @@ class PDFSearchApp:
|
|
| 742 |
for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
|
| 743 |
# Extract page number from path
|
| 744 |
page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
|
| 745 |
-
print(f" {i}. {page_num} - Score: {score:.4f}")
|
| 746 |
|
| 747 |
if page_scores:
|
| 748 |
final_avg_score = sum(page_scores) / len(page_scores)
|
|
|
|
| 688 |
print(f"π Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
|
| 689 |
|
| 690 |
for i, (score, doc_id) in enumerate(selected_results):
|
| 691 |
+
# π― FIX: Use the actual page number from doc_id, not the index position
|
| 692 |
+
# doc_id represents the actual page number in the document
|
| 693 |
+
display_page_num = doc_id + 1 # Convert 0-based doc_id to 1-based page number
|
| 694 |
coll_num = collection_name # Use the current collection name
|
| 695 |
|
| 696 |
+
print(f"π Processing result {i+1}: doc_id={doc_id}, actual_page={display_page_num}, score={score:.4f}")
|
| 697 |
+
|
| 698 |
# Use debug function to get paths and check existence
|
| 699 |
img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
|
| 700 |
|
|
|
|
| 703 |
all_paths.append(path)
|
| 704 |
page_scores.append(score)
|
| 705 |
cited_pages.append(f"Page {display_page_num} from {coll_num}")
|
| 706 |
+
print(f"β
Retrieved page {display_page_num}: {img_path} (Score: {score:.3f})")
|
| 707 |
else:
|
| 708 |
print(f"β Image file not found: {img_path}")
|
| 709 |
# Try alternative paths with better fallback logic
|
|
|
|
| 730 |
all_paths.append(alt_path.replace(".png", ""))
|
| 731 |
page_scores.append(score)
|
| 732 |
cited_pages.append(f"Page {display_page_num} from {coll_num}")
|
| 733 |
+
print(f"β
Retrieved page {display_page_num}: {alt_path} (Score: {score:.3f})")
|
| 734 |
break
|
| 735 |
else:
|
| 736 |
print(f"β No alternative path found for page {display_page_num}")
|
|
|
|
| 745 |
for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
|
| 746 |
# Extract page number from path
|
| 747 |
page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
|
| 748 |
+
print(f" {i}. Page {page_num} - Score: {score:.4f}")
|
| 749 |
|
| 750 |
if page_scores:
|
| 751 |
final_avg_score = sum(page_scores) / len(page_scores)
|