Update app.py
Browse files
app.py
CHANGED
|
@@ -460,7 +460,7 @@ def display_source_documents_with_images(source_documents, query):
|
|
| 460 |
highlighted_snippet = highlight_query_words(snippet, query)
|
| 461 |
|
| 462 |
st.markdown(f'<div class="source-content">{highlighted_snippet}</div>', unsafe_allow_html=True)
|
| 463 |
-
|
| 464 |
|
| 465 |
logger.debug(f"Successfully displayed content for {pdf_name}, page {page_number + 1}")
|
| 466 |
|
|
@@ -519,41 +519,33 @@ def get_pdf_details(filename, page_number):
|
|
| 519 |
"""Get details of a specific PDF page."""
|
| 520 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
| 521 |
try:
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
data_path = 'data' # Local storage
|
| 527 |
-
|
| 528 |
file_path = os.path.join(data_path, filename)
|
| 529 |
|
| 530 |
-
# Ensure file exists
|
| 531 |
-
if not os.path.exists(file_path):
|
| 532 |
-
logger.error(f"File does not exist at {file_path}")
|
| 533 |
-
st.error(f"File not found at {file_path}")
|
| 534 |
-
return
|
| 535 |
-
|
| 536 |
# Open the PDF
|
| 537 |
logger.debug(f"Opening PDF file: {file_path}")
|
| 538 |
doc = fitz.open(file_path)
|
| 539 |
-
|
| 540 |
# Extract full PDF text
|
| 541 |
full_text = ""
|
| 542 |
for page in doc:
|
| 543 |
full_text += page.get_text()
|
| 544 |
-
|
| 545 |
# Get PDF metadata
|
| 546 |
pdf_metadata = doc.metadata or {}
|
| 547 |
-
|
| 548 |
# Extract page text and render page image
|
| 549 |
page = doc.load_page(page_number)
|
| 550 |
page_text = page.get_text()
|
| 551 |
-
|
| 552 |
# Render page as image
|
| 553 |
pix = page.get_pixmap()
|
| 554 |
img_bytes = pix.tobytes("png")
|
| 555 |
page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 556 |
-
|
| 557 |
# Detect language
|
| 558 |
try:
|
| 559 |
lang_code = detect(page_text)
|
|
@@ -561,7 +553,7 @@ def get_pdf_details(filename, page_number):
|
|
| 561 |
except Exception as e:
|
| 562 |
logger.warning(f"Language detection failed: {str(e)}")
|
| 563 |
language = 'Unknown'
|
| 564 |
-
|
| 565 |
# Prepare response
|
| 566 |
return {
|
| 567 |
"file_path": file_path,
|
|
@@ -590,20 +582,12 @@ def get_romanized_text(filename):
|
|
| 590 |
"""Get romanized text from a PDF."""
|
| 591 |
logger.info(f"Processing romanized text for file: {filename}")
|
| 592 |
try:
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
data_path = 'data' # Use local directory
|
| 598 |
-
|
| 599 |
file_path = os.path.join(data_path, filename)
|
| 600 |
|
| 601 |
-
# Ensure file exists
|
| 602 |
-
if not os.path.exists(file_path):
|
| 603 |
-
logger.error(f"File does not exist at {file_path}")
|
| 604 |
-
st.error(f"File not found at {file_path}")
|
| 605 |
-
return
|
| 606 |
-
|
| 607 |
# Open the PDF
|
| 608 |
logger.debug(f"Opening PDF file for romanization: {file_path}")
|
| 609 |
doc = fitz.open(file_path)
|
|
|
|
| 460 |
highlighted_snippet = highlight_query_words(snippet, query)
|
| 461 |
|
| 462 |
st.markdown(f'<div class="source-content">{highlighted_snippet}</div>', unsafe_allow_html=True)
|
| 463 |
+
st.markdown(f"[View other results in this book](?page=pdf_details&filename={pdf_name}&page_number={page_number})", unsafe_allow_html=True)
|
| 464 |
|
| 465 |
logger.debug(f"Successfully displayed content for {pdf_name}, page {page_number + 1}")
|
| 466 |
|
|
|
|
| 519 |
"""Get details of a specific PDF page."""
|
| 520 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
| 521 |
try:
|
| 522 |
+
with open(CONFIG_FILE, 'r') as f:
|
| 523 |
+
config = json.load(f)
|
| 524 |
+
|
| 525 |
+
data_path = config.get('data_path', '/tmp/data')
|
|
|
|
|
|
|
| 526 |
file_path = os.path.join(data_path, filename)
|
| 527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
# Open the PDF
|
| 529 |
logger.debug(f"Opening PDF file: {file_path}")
|
| 530 |
doc = fitz.open(file_path)
|
| 531 |
+
|
| 532 |
# Extract full PDF text
|
| 533 |
full_text = ""
|
| 534 |
for page in doc:
|
| 535 |
full_text += page.get_text()
|
| 536 |
+
|
| 537 |
# Get PDF metadata
|
| 538 |
pdf_metadata = doc.metadata or {}
|
| 539 |
+
|
| 540 |
# Extract page text and render page image
|
| 541 |
page = doc.load_page(page_number)
|
| 542 |
page_text = page.get_text()
|
| 543 |
+
|
| 544 |
# Render page as image
|
| 545 |
pix = page.get_pixmap()
|
| 546 |
img_bytes = pix.tobytes("png")
|
| 547 |
page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 548 |
+
|
| 549 |
# Detect language
|
| 550 |
try:
|
| 551 |
lang_code = detect(page_text)
|
|
|
|
| 553 |
except Exception as e:
|
| 554 |
logger.warning(f"Language detection failed: {str(e)}")
|
| 555 |
language = 'Unknown'
|
| 556 |
+
|
| 557 |
# Prepare response
|
| 558 |
return {
|
| 559 |
"file_path": file_path,
|
|
|
|
| 582 |
"""Get romanized text from a PDF."""
|
| 583 |
logger.info(f"Processing romanized text for file: {filename}")
|
| 584 |
try:
|
| 585 |
+
with open(CONFIG_FILE, 'r') as f:
|
| 586 |
+
config = json.load(f)
|
| 587 |
+
|
| 588 |
+
data_path = config.get('data_path', '/tmp/data')
|
|
|
|
|
|
|
| 589 |
file_path = os.path.join(data_path, filename)
|
| 590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
# Open the PDF
|
| 592 |
logger.debug(f"Opening PDF file for romanization: {file_path}")
|
| 593 |
doc = fitz.open(file_path)
|