Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,6 +37,10 @@ KEYWORD_COLORS = [
|
|
| 37 |
'#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
|
| 38 |
]
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
class KeywordExtractionManager:
|
| 41 |
def __init__(self):
|
| 42 |
self.rake_extractor = None
|
|
@@ -453,7 +457,7 @@ def get_relevance_level(score, max_score):
|
|
| 453 |
else:
|
| 454 |
return 'low'
|
| 455 |
|
| 456 |
-
def create_highlighted_html(text, keywords):
|
| 457 |
"""Create HTML with highlighted keywords in the text"""
|
| 458 |
if not keywords:
|
| 459 |
return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
|
|
@@ -472,12 +476,15 @@ def create_highlighted_html(text, keywords):
|
|
| 472 |
color = get_score_color(score, max_score)
|
| 473 |
relevance = get_relevance_level(score, max_score)
|
| 474 |
|
|
|
|
|
|
|
|
|
|
| 475 |
# Create regex pattern for whole word matching (case-insensitive)
|
| 476 |
pattern = r'\b' + re.escape(keyword) + r'\b'
|
| 477 |
|
| 478 |
# Replace with highlighted version
|
| 479 |
-
replacement = f'<span
|
| 480 |
-
f'border-radius: 3px; margin: 0 1px; ' \
|
| 481 |
f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
|
| 482 |
f'title="Score: {score:.3f}">{keyword}</span>'
|
| 483 |
|
|
@@ -486,11 +493,11 @@ def create_highlighted_html(text, keywords):
|
|
| 486 |
return f"""
|
| 487 |
<div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
|
| 488 |
<h4 style='margin: 0 0 15px 0; color: #333;'>π Text with Highlighted Keywords</h4>
|
| 489 |
-
<div
|
| 490 |
</div>
|
| 491 |
"""
|
| 492 |
|
| 493 |
-
def create_keyword_table_html(keywords):
|
| 494 |
"""Create HTML table for keywords with filtering capability"""
|
| 495 |
if not keywords:
|
| 496 |
return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
|
|
@@ -499,10 +506,22 @@ def create_keyword_table_html(keywords):
|
|
| 499 |
sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
|
| 500 |
max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
|
| 501 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
table_html = """
|
| 503 |
<div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
|
| 504 |
<h3 style="margin: 0 0 20px 0;">π― Extracted Keywords</h3>
|
| 505 |
-
<table
|
| 506 |
<thead>
|
| 507 |
<tr style="background-color: #4ECDC4; color: white;">
|
| 508 |
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
|
|
@@ -518,7 +537,6 @@ def create_keyword_table_html(keywords):
|
|
| 518 |
for i, kw_data in enumerate(sorted_keywords):
|
| 519 |
score = kw_data['score']
|
| 520 |
color = get_score_color(score, max_score)
|
| 521 |
-
relevance = get_relevance_level(score, max_score)
|
| 522 |
|
| 523 |
# Create relevance bar
|
| 524 |
bar_width = int((score / max_score) * 100) if max_score > 0 else 0
|
|
@@ -529,7 +547,7 @@ def create_keyword_table_html(keywords):
|
|
| 529 |
"""
|
| 530 |
|
| 531 |
table_html += f"""
|
| 532 |
-
<tr
|
| 533 |
<td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
|
| 534 |
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
|
| 535 |
<td style="padding: 10px; border: 1px solid #ddd;">
|
|
@@ -553,87 +571,23 @@ def create_keyword_table_html(keywords):
|
|
| 553 |
return table_html
|
| 554 |
|
| 555 |
def create_legend_html():
|
| 556 |
-
"""Create
|
| 557 |
html = """
|
| 558 |
<div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
|
| 559 |
-
<h4 style='margin: 0 0 15px 0;'>π¨ Relevance Score Legend
|
|
|
|
| 560 |
<div style='display: flex; flex-wrap: wrap; gap: 15px;'>
|
| 561 |
-
<
|
| 562 |
-
style='background-color: #6c757d; padding: 8px 16px; border-radius: 15px;
|
| 563 |
-
color: white; font-weight: bold; border: none; cursor: pointer;
|
| 564 |
-
transition: all 0.3s ease;'>
|
| 565 |
-
Show All
|
| 566 |
-
</button>
|
| 567 |
-
<button onclick="filterByRelevance('high')"
|
| 568 |
-
style='background-color: #00B894; padding: 8px 16px; border-radius: 15px;
|
| 569 |
-
color: white; font-weight: bold; border: none; cursor: pointer;
|
| 570 |
-
transition: all 0.3s ease;'>
|
| 571 |
High Relevance (70%+)
|
| 572 |
-
</
|
| 573 |
-
<
|
| 574 |
-
style='background-color: #F9CA24; padding: 8px 16px; border-radius: 15px;
|
| 575 |
-
color: white; font-weight: bold; border: none; cursor: pointer;
|
| 576 |
-
transition: all 0.3s ease;'>
|
| 577 |
Medium Relevance (40-70%)
|
| 578 |
-
</
|
| 579 |
-
<
|
| 580 |
-
style='background-color: #FF6B6B; padding: 8px 16px; border-radius: 15px;
|
| 581 |
-
color: white; font-weight: bold; border: none; cursor: pointer;
|
| 582 |
-
transition: all 0.3s ease;'>
|
| 583 |
Low Relevance (<40%)
|
| 584 |
-
</
|
| 585 |
</div>
|
| 586 |
</div>
|
| 587 |
-
|
| 588 |
-
<script>
|
| 589 |
-
function filterByRelevance(level) {
|
| 590 |
-
const table = document.getElementById('keywords-table');
|
| 591 |
-
const rows = table.getElementsByClassName('keyword-row');
|
| 592 |
-
const textContainer = document.getElementById('highlighted-text');
|
| 593 |
-
const keywords = textContainer.getElementsByTagName('span');
|
| 594 |
-
|
| 595 |
-
// Filter table rows
|
| 596 |
-
for (let row of rows) {
|
| 597 |
-
if (level === 'all') {
|
| 598 |
-
row.style.display = '';
|
| 599 |
-
} else {
|
| 600 |
-
if (row.classList.contains('relevance-' + level)) {
|
| 601 |
-
row.style.display = '';
|
| 602 |
-
} else {
|
| 603 |
-
row.style.display = 'none';
|
| 604 |
-
}
|
| 605 |
-
}
|
| 606 |
-
}
|
| 607 |
-
|
| 608 |
-
// Highlight keywords in text
|
| 609 |
-
for (let keyword of keywords) {
|
| 610 |
-
if (level === 'all') {
|
| 611 |
-
keyword.style.opacity = '1';
|
| 612 |
-
keyword.style.filter = 'none';
|
| 613 |
-
} else {
|
| 614 |
-
if (keyword.classList.contains('keyword-' + level)) {
|
| 615 |
-
keyword.style.opacity = '1';
|
| 616 |
-
keyword.style.filter = 'none';
|
| 617 |
-
} else {
|
| 618 |
-
keyword.style.opacity = '0.3';
|
| 619 |
-
keyword.style.filter = 'grayscale(100%)';
|
| 620 |
-
}
|
| 621 |
-
}
|
| 622 |
-
}
|
| 623 |
-
|
| 624 |
-
// Update button styles
|
| 625 |
-
const buttons = document.querySelectorAll('button');
|
| 626 |
-
buttons.forEach(button => {
|
| 627 |
-
if (button.onclick && button.onclick.toString().includes(level)) {
|
| 628 |
-
button.style.transform = 'scale(1.1)';
|
| 629 |
-
button.style.boxShadow = '0 4px 8px rgba(0,0,0,0.2)';
|
| 630 |
-
} else {
|
| 631 |
-
button.style.transform = 'scale(1)';
|
| 632 |
-
button.style.boxShadow = 'none';
|
| 633 |
-
}
|
| 634 |
-
});
|
| 635 |
-
}
|
| 636 |
-
</script>
|
| 637 |
"""
|
| 638 |
return html
|
| 639 |
|
|
@@ -643,11 +597,16 @@ keyword_manager = KeywordExtractionManager()
|
|
| 643 |
|
| 644 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
| 645 |
"""Main processing function for Gradio interface with progress tracking"""
|
|
|
|
|
|
|
| 646 |
if not text.strip():
|
| 647 |
-
return "β Please enter some text to analyse", "", ""
|
| 648 |
|
| 649 |
progress(0.1, desc="Initialising...")
|
| 650 |
|
|
|
|
|
|
|
|
|
|
| 651 |
# Extract keywords
|
| 652 |
progress(0.2, desc="Extracting keywords...")
|
| 653 |
keywords = keyword_manager.extract_keywords(
|
|
@@ -659,7 +618,10 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
|
|
| 659 |
)
|
| 660 |
|
| 661 |
if not keywords:
|
| 662 |
-
return "β No keywords found. Try adjusting the parameters.", "", ""
|
|
|
|
|
|
|
|
|
|
| 663 |
|
| 664 |
progress(0.8, desc="Processing results...")
|
| 665 |
|
|
@@ -683,7 +645,26 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
|
|
| 683 |
|
| 684 |
progress(1.0, desc="Complete!")
|
| 685 |
|
| 686 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
|
| 688 |
# Create Gradio interface
|
| 689 |
def create_interface():
|
|
@@ -698,7 +679,7 @@ def create_interface():
|
|
| 698 |
2. **π― Select a model** from the dropdown for keyword extraction
|
| 699 |
3. **βοΈ Adjust parameters** (number of keywords, n-gram range)
|
| 700 |
4. **π Click "Extract Keywords"** to see results with organized output
|
| 701 |
-
5. **π¨
|
| 702 |
""")
|
| 703 |
|
| 704 |
# Add tip box
|
|
@@ -788,6 +769,21 @@ def create_interface():
|
|
| 788 |
|
| 789 |
extract_btn = gr.Button("π Extract Keywords", variant="primary", size="lg")
|
| 790 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
# Output sections
|
| 792 |
with gr.Row():
|
| 793 |
summary_output = gr.Markdown(label="Summary")
|
|
@@ -811,7 +807,14 @@ def create_interface():
|
|
| 811 |
ngram_min,
|
| 812 |
ngram_max
|
| 813 |
],
|
| 814 |
-
outputs=[summary_output, highlighted_output, results_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
)
|
| 816 |
|
| 817 |
gr.Examples(
|
|
|
|
| 37 |
'#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
|
| 38 |
]
|
| 39 |
|
| 40 |
+
# Global variable to store current keywords for filtering
|
| 41 |
+
current_keywords = []
|
| 42 |
+
current_text = ""
|
| 43 |
+
|
| 44 |
class KeywordExtractionManager:
|
| 45 |
def __init__(self):
|
| 46 |
self.rake_extractor = None
|
|
|
|
| 457 |
else:
|
| 458 |
return 'low'
|
| 459 |
|
| 460 |
+
def create_highlighted_html(text, keywords, filter_level='all'):
|
| 461 |
"""Create HTML with highlighted keywords in the text"""
|
| 462 |
if not keywords:
|
| 463 |
return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
|
|
|
|
| 476 |
color = get_score_color(score, max_score)
|
| 477 |
relevance = get_relevance_level(score, max_score)
|
| 478 |
|
| 479 |
+
# Apply filtering based on filter_level
|
| 480 |
+
opacity = '1' if filter_level == 'all' or relevance == filter_level else '0.3'
|
| 481 |
+
|
| 482 |
# Create regex pattern for whole word matching (case-insensitive)
|
| 483 |
pattern = r'\b' + re.escape(keyword) + r'\b'
|
| 484 |
|
| 485 |
# Replace with highlighted version
|
| 486 |
+
replacement = f'<span style="background-color: {color}; padding: 2px 4px; ' \
|
| 487 |
+
f'border-radius: 3px; margin: 0 1px; opacity: {opacity}; ' \
|
| 488 |
f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
|
| 489 |
f'title="Score: {score:.3f}">{keyword}</span>'
|
| 490 |
|
|
|
|
| 493 |
return f"""
|
| 494 |
<div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
|
| 495 |
<h4 style='margin: 0 0 15px 0; color: #333;'>π Text with Highlighted Keywords</h4>
|
| 496 |
+
<div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_text}</div>
|
| 497 |
</div>
|
| 498 |
"""
|
| 499 |
|
| 500 |
+
def create_keyword_table_html(keywords, filter_level='all'):
|
| 501 |
"""Create HTML table for keywords with filtering capability"""
|
| 502 |
if not keywords:
|
| 503 |
return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
|
|
|
|
| 506 |
sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
|
| 507 |
max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
|
| 508 |
|
| 509 |
+
# Filter keywords based on filter_level
|
| 510 |
+
if filter_level != 'all':
|
| 511 |
+
filtered_keywords = []
|
| 512 |
+
for kw in sorted_keywords:
|
| 513 |
+
relevance = get_relevance_level(kw['score'], max_score)
|
| 514 |
+
if relevance == filter_level:
|
| 515 |
+
filtered_keywords.append(kw)
|
| 516 |
+
sorted_keywords = filtered_keywords
|
| 517 |
+
|
| 518 |
+
if not sorted_keywords:
|
| 519 |
+
return f"<p style='text-align: center; padding: 20px;'>No {filter_level} relevance keywords found.</p>"
|
| 520 |
+
|
| 521 |
table_html = """
|
| 522 |
<div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
|
| 523 |
<h3 style="margin: 0 0 20px 0;">π― Extracted Keywords</h3>
|
| 524 |
+
<table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; background-color: white;">
|
| 525 |
<thead>
|
| 526 |
<tr style="background-color: #4ECDC4; color: white;">
|
| 527 |
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
|
|
|
|
| 537 |
for i, kw_data in enumerate(sorted_keywords):
|
| 538 |
score = kw_data['score']
|
| 539 |
color = get_score_color(score, max_score)
|
|
|
|
| 540 |
|
| 541 |
# Create relevance bar
|
| 542 |
bar_width = int((score / max_score) * 100) if max_score > 0 else 0
|
|
|
|
| 547 |
"""
|
| 548 |
|
| 549 |
table_html += f"""
|
| 550 |
+
<tr style="background-color: #fff;">
|
| 551 |
<td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
|
| 552 |
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
|
| 553 |
<td style="padding: 10px; border: 1px solid #ddd;">
|
|
|
|
| 571 |
return table_html
|
| 572 |
|
| 573 |
def create_legend_html():
|
| 574 |
+
"""Create a legend showing score colors"""
|
| 575 |
html = """
|
| 576 |
<div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
|
| 577 |
+
<h4 style='margin: 0 0 15px 0;'>π¨ Relevance Score Legend</h4>
|
| 578 |
+
<p style='font-size: 14px; color: #666; margin-bottom: 15px;'>Use the radio buttons below to filter keywords by relevance level</p>
|
| 579 |
<div style='display: flex; flex-wrap: wrap; gap: 15px;'>
|
| 580 |
+
<span style='background-color: #00B894; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
High Relevance (70%+)
|
| 582 |
+
</span>
|
| 583 |
+
<span style='background-color: #F9CA24; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
|
|
|
|
|
|
|
|
|
|
| 584 |
Medium Relevance (40-70%)
|
| 585 |
+
</span>
|
| 586 |
+
<span style='background-color: #FF6B6B; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
|
|
|
|
|
|
|
|
|
|
| 587 |
Low Relevance (<40%)
|
| 588 |
+
</span>
|
| 589 |
</div>
|
| 590 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
"""
|
| 592 |
return html
|
| 593 |
|
|
|
|
| 597 |
|
| 598 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
| 599 |
"""Main processing function for Gradio interface with progress tracking"""
|
| 600 |
+
global current_keywords, current_text
|
| 601 |
+
|
| 602 |
if not text.strip():
|
| 603 |
+
return "β Please enter some text to analyse", "", "", gr.update(visible=False)
|
| 604 |
|
| 605 |
progress(0.1, desc="Initialising...")
|
| 606 |
|
| 607 |
+
# Store the text globally for filtering
|
| 608 |
+
current_text = text
|
| 609 |
+
|
| 610 |
# Extract keywords
|
| 611 |
progress(0.2, desc="Extracting keywords...")
|
| 612 |
keywords = keyword_manager.extract_keywords(
|
|
|
|
| 618 |
)
|
| 619 |
|
| 620 |
if not keywords:
|
| 621 |
+
return "β No keywords found. Try adjusting the parameters.", "", "", gr.update(visible=False)
|
| 622 |
+
|
| 623 |
+
# Store keywords globally for filtering
|
| 624 |
+
current_keywords = keywords
|
| 625 |
|
| 626 |
progress(0.8, desc="Processing results...")
|
| 627 |
|
|
|
|
| 645 |
|
| 646 |
progress(1.0, desc="Complete!")
|
| 647 |
|
| 648 |
+
# Make filter controls visible
|
| 649 |
+
return summary, legend_html + highlighted_html, results_html, gr.update(visible=True)
|
| 650 |
+
|
| 651 |
+
def filter_results(filter_level):
|
| 652 |
+
"""Filter the results based on relevance level"""
|
| 653 |
+
global current_keywords, current_text
|
| 654 |
+
|
| 655 |
+
if not current_keywords:
|
| 656 |
+
return "", ""
|
| 657 |
+
|
| 658 |
+
# Update highlighted text
|
| 659 |
+
highlighted_html = create_highlighted_html(current_text, current_keywords, filter_level)
|
| 660 |
+
|
| 661 |
+
# Update table
|
| 662 |
+
results_html = create_keyword_table_html(current_keywords, filter_level)
|
| 663 |
+
|
| 664 |
+
# Add legend to highlighted output
|
| 665 |
+
legend_html = create_legend_html()
|
| 666 |
+
|
| 667 |
+
return legend_html + highlighted_html, results_html
|
| 668 |
|
| 669 |
# Create Gradio interface
|
| 670 |
def create_interface():
|
|
|
|
| 679 |
2. **π― Select a model** from the dropdown for keyword extraction
|
| 680 |
3. **βοΈ Adjust parameters** (number of keywords, n-gram range)
|
| 681 |
4. **π Click "Extract Keywords"** to see results with organized output
|
| 682 |
+
5. **π¨ Use the filter buttons** to show keywords by relevance level
|
| 683 |
""")
|
| 684 |
|
| 685 |
# Add tip box
|
|
|
|
| 769 |
|
| 770 |
extract_btn = gr.Button("π Extract Keywords", variant="primary", size="lg")
|
| 771 |
|
| 772 |
+
# Filter controls (initially hidden)
|
| 773 |
+
with gr.Row(visible=False) as filter_row:
|
| 774 |
+
gr.Markdown("### π― Filter by Relevance Level:")
|
| 775 |
+
filter_radio = gr.Radio(
|
| 776 |
+
choices=[
|
| 777 |
+
("Show All", "all"),
|
| 778 |
+
("High Relevance (70%+)", "high"),
|
| 779 |
+
("Medium Relevance (40-70%)", "medium"),
|
| 780 |
+
("Low Relevance (<40%)", "low")
|
| 781 |
+
],
|
| 782 |
+
value="all",
|
| 783 |
+
label="",
|
| 784 |
+
interactive=True
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
# Output sections
|
| 788 |
with gr.Row():
|
| 789 |
summary_output = gr.Markdown(label="Summary")
|
|
|
|
| 807 |
ngram_min,
|
| 808 |
ngram_max
|
| 809 |
],
|
| 810 |
+
outputs=[summary_output, highlighted_output, results_output, filter_row]
|
| 811 |
+
)
|
| 812 |
+
|
| 813 |
+
# Connect filter radio to filter function
|
| 814 |
+
filter_radio.change(
|
| 815 |
+
fn=filter_results,
|
| 816 |
+
inputs=[filter_radio],
|
| 817 |
+
outputs=[highlighted_output, results_output]
|
| 818 |
)
|
| 819 |
|
| 820 |
gr.Examples(
|