Spaces:

Kazel
/

demo-updated

Runtime error

App Files Files Community

Kazel commited on Sep 7

Commit

cfd58b0

1 Parent(s): af476a6

logging

Browse files

Files changed (1) hide show

app.py +8 -5

app.py CHANGED Viewed

@@ -688,11 +688,13 @@ class PDFSearchApp:
             print(f"🔍 Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
             for i, (score, doc_id) in enumerate(selected_results):
-                # Use the index as page number since doc_id is just an identifier
-                # This ensures we look for page_1.png, page_2.png, etc.
-                display_page_num = i + 1
                 coll_num = collection_name  # Use the current collection name
                 # Use debug function to get paths and check existence
                 img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
@@ -701,7 +703,7 @@ class PDFSearchApp:
                     all_paths.append(path)
                     page_scores.append(score)
                     cited_pages.append(f"Page {display_page_num} from {coll_num}")
-                    print(f"✅ Retrieved page {i+1}: {img_path} (Score: {score:.3f})")
                 else:
                     print(f"❌ Image file not found: {img_path}")
                     # Try alternative paths with better fallback logic
@@ -728,6 +730,7 @@ class PDFSearchApp:
                             all_paths.append(alt_path.replace(".png", ""))
                             page_scores.append(score)
                             cited_pages.append(f"Page {display_page_num} from {coll_num}")
                             break
                     else:
                         print(f"❌ No alternative path found for page {display_page_num}")
@@ -742,7 +745,7 @@ class PDFSearchApp:
                 for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
                     # Extract page number from path
                     page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
-                    print(f"   {i}. {page_num} - Score: {score:.4f}")
                 if page_scores:
                     final_avg_score = sum(page_scores) / len(page_scores)

             print(f"🔍 Environment: {'Hugging Face Spaces' if self._is_huggingface_spaces() else 'Local Development'}")
             for i, (score, doc_id) in enumerate(selected_results):
+                # 🎯 FIX: Use the actual page number from doc_id, not the index position
+                # doc_id represents the actual page number in the document
+                display_page_num = doc_id + 1  # Convert 0-based doc_id to 1-based page number
                 coll_num = collection_name  # Use the current collection name
+                print(f"🔍 Processing result {i+1}: doc_id={doc_id}, actual_page={display_page_num}, score={score:.4f}")
                 # Use debug function to get paths and check existence
                 img_path, path, file_exists = self._debug_file_paths(base_output_dir, coll_num, display_page_num)
                     all_paths.append(path)
                     page_scores.append(score)
                     cited_pages.append(f"Page {display_page_num} from {coll_num}")
+                    print(f"✅ Retrieved page {display_page_num}: {img_path} (Score: {score:.3f})")
                 else:
                     print(f"❌ Image file not found: {img_path}")
                     # Try alternative paths with better fallback logic
                             all_paths.append(alt_path.replace(".png", ""))
                             page_scores.append(score)
                             cited_pages.append(f"Page {display_page_num} from {coll_num}")
+                            print(f"✅ Retrieved page {display_page_num}: {alt_path} (Score: {score:.3f})")
                             break
                     else:
                         print(f"❌ No alternative path found for page {display_page_num}")
                 for i, (img_path, score) in enumerate(zip(img_paths, page_scores), 1):
                     # Extract page number from path
                     page_num = img_path.split('page_')[1].split('.png')[0] if 'page_' in img_path else f"Page {i}"
+                    print(f"   {i}. Page {page_num} - Score: {score:.4f}")
                 if page_scores:
                     final_avg_score = sum(page_scores) / len(page_scores)