Update app.py
Browse files
app.py
CHANGED
|
@@ -37,10 +37,6 @@ KEYWORD_COLORS = [
|
|
| 37 |
'#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
|
| 38 |
]
|
| 39 |
|
| 40 |
-
# Global variable to store current keywords for filtering
|
| 41 |
-
current_keywords = []
|
| 42 |
-
current_text = ""
|
| 43 |
-
|
| 44 |
class KeywordExtractionManager:
|
| 45 |
def __init__(self):
|
| 46 |
self.rake_extractor = None
|
|
@@ -457,7 +453,7 @@ def get_relevance_level(score, max_score):
|
|
| 457 |
else:
|
| 458 |
return 'low'
|
| 459 |
|
| 460 |
-
def create_highlighted_html(text, keywords
|
| 461 |
"""Create HTML with highlighted keywords in the text"""
|
| 462 |
if not keywords:
|
| 463 |
return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
|
|
@@ -474,17 +470,13 @@ def create_highlighted_html(text, keywords, filter_level='all'):
|
|
| 474 |
keyword = kw_data['keyword']
|
| 475 |
score = kw_data['score']
|
| 476 |
color = get_score_color(score, max_score)
|
| 477 |
-
relevance = get_relevance_level(score, max_score)
|
| 478 |
-
|
| 479 |
-
# Apply filtering based on filter_level
|
| 480 |
-
opacity = '1' if filter_level == 'all' or relevance == filter_level else '0.3'
|
| 481 |
|
| 482 |
# Create regex pattern for whole word matching (case-insensitive)
|
| 483 |
pattern = r'\b' + re.escape(keyword) + r'\b'
|
| 484 |
|
| 485 |
# Replace with highlighted version
|
| 486 |
replacement = f'<span style="background-color: {color}; padding: 2px 4px; ' \
|
| 487 |
-
f'border-radius: 3px; margin: 0 1px;
|
| 488 |
f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
|
| 489 |
f'title="Score: {score:.3f}">{keyword}</span>'
|
| 490 |
|
|
@@ -497,8 +489,8 @@ def create_highlighted_html(text, keywords, filter_level='all'):
|
|
| 497 |
</div>
|
| 498 |
"""
|
| 499 |
|
| 500 |
-
def create_keyword_table_html(keywords
|
| 501 |
-
"""Create HTML table for keywords
|
| 502 |
if not keywords:
|
| 503 |
return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
|
| 504 |
|
|
@@ -506,18 +498,6 @@ def create_keyword_table_html(keywords, filter_level='all'):
|
|
| 506 |
sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
|
| 507 |
max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
|
| 508 |
|
| 509 |
-
# Filter keywords based on filter_level
|
| 510 |
-
if filter_level != 'all':
|
| 511 |
-
filtered_keywords = []
|
| 512 |
-
for kw in sorted_keywords:
|
| 513 |
-
relevance = get_relevance_level(kw['score'], max_score)
|
| 514 |
-
if relevance == filter_level:
|
| 515 |
-
filtered_keywords.append(kw)
|
| 516 |
-
sorted_keywords = filtered_keywords
|
| 517 |
-
|
| 518 |
-
if not sorted_keywords:
|
| 519 |
-
return f"<p style='text-align: center; padding: 20px;'>No {filter_level} relevance keywords found.</p>"
|
| 520 |
-
|
| 521 |
table_html = """
|
| 522 |
<div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
|
| 523 |
<h3 style="margin: 0 0 20px 0;">π― Extracted Keywords</h3>
|
|
@@ -575,7 +555,6 @@ def create_legend_html():
|
|
| 575 |
html = """
|
| 576 |
<div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
|
| 577 |
<h4 style='margin: 0 0 15px 0;'>π¨ Relevance Score Legend</h4>
|
| 578 |
-
<p style='font-size: 14px; color: #666; margin-bottom: 15px;'>Use the radio buttons below to filter keywords by relevance level</p>
|
| 579 |
<div style='display: flex; flex-wrap: wrap; gap: 15px;'>
|
| 580 |
<span style='background-color: #00B894; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
|
| 581 |
High Relevance (70%+)
|
|
@@ -597,16 +576,12 @@ keyword_manager = KeywordExtractionManager()
|
|
| 597 |
|
| 598 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
| 599 |
"""Main processing function for Gradio interface with progress tracking"""
|
| 600 |
-
global current_keywords, current_text
|
| 601 |
|
| 602 |
if not text.strip():
|
| 603 |
-
return "β Please enter some text to analyse", "", ""
|
| 604 |
|
| 605 |
progress(0.1, desc="Initialising...")
|
| 606 |
|
| 607 |
-
# Store the text globally for filtering
|
| 608 |
-
current_text = text
|
| 609 |
-
|
| 610 |
# Extract keywords
|
| 611 |
progress(0.2, desc="Extracting keywords...")
|
| 612 |
keywords = keyword_manager.extract_keywords(
|
|
@@ -618,10 +593,7 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
|
|
| 618 |
)
|
| 619 |
|
| 620 |
if not keywords:
|
| 621 |
-
return "β No keywords found. Try adjusting the parameters.", "", ""
|
| 622 |
-
|
| 623 |
-
# Store keywords globally for filtering
|
| 624 |
-
current_keywords = keywords
|
| 625 |
|
| 626 |
progress(0.8, desc="Processing results...")
|
| 627 |
|
|
@@ -645,26 +617,7 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
|
|
| 645 |
|
| 646 |
progress(1.0, desc="Complete!")
|
| 647 |
|
| 648 |
-
|
| 649 |
-
return summary, legend_html + highlighted_html, results_html, gr.update(visible=True)
|
| 650 |
-
|
| 651 |
-
def filter_results(filter_level):
|
| 652 |
-
"""Filter the results based on relevance level"""
|
| 653 |
-
global current_keywords, current_text
|
| 654 |
-
|
| 655 |
-
if not current_keywords:
|
| 656 |
-
return "", ""
|
| 657 |
-
|
| 658 |
-
# Update highlighted text
|
| 659 |
-
highlighted_html = create_highlighted_html(current_text, current_keywords, filter_level)
|
| 660 |
-
|
| 661 |
-
# Update table
|
| 662 |
-
results_html = create_keyword_table_html(current_keywords, filter_level)
|
| 663 |
-
|
| 664 |
-
# Add legend to highlighted output
|
| 665 |
-
legend_html = create_legend_html()
|
| 666 |
-
|
| 667 |
-
return legend_html + highlighted_html, results_html
|
| 668 |
|
| 669 |
# Create Gradio interface
|
| 670 |
def create_interface():
|
|
@@ -679,7 +632,6 @@ def create_interface():
|
|
| 679 |
2. **π― Select a model** from the dropdown for keyword extraction
|
| 680 |
3. **βοΈ Adjust parameters** (number of keywords, n-gram range)
|
| 681 |
4. **π Click "Extract Keywords"** to see results with organized output
|
| 682 |
-
5. **π¨ Use the filter buttons** to show keywords by relevance level
|
| 683 |
""")
|
| 684 |
|
| 685 |
# Add tip box
|
|
@@ -769,21 +721,6 @@ def create_interface():
|
|
| 769 |
|
| 770 |
extract_btn = gr.Button("π Extract Keywords", variant="primary", size="lg")
|
| 771 |
|
| 772 |
-
# Filter controls (initially hidden)
|
| 773 |
-
with gr.Row(visible=False) as filter_row:
|
| 774 |
-
gr.Markdown("### π― Filter by Relevance Level:")
|
| 775 |
-
filter_radio = gr.Radio(
|
| 776 |
-
choices=[
|
| 777 |
-
("Show All", "all"),
|
| 778 |
-
("High Relevance (70%+)", "high"),
|
| 779 |
-
("Medium Relevance (40-70%)", "medium"),
|
| 780 |
-
("Low Relevance (<40%)", "low")
|
| 781 |
-
],
|
| 782 |
-
value="all",
|
| 783 |
-
label="",
|
| 784 |
-
interactive=True
|
| 785 |
-
)
|
| 786 |
-
|
| 787 |
# Output sections
|
| 788 |
with gr.Row():
|
| 789 |
summary_output = gr.Markdown(label="Summary")
|
|
@@ -807,14 +744,7 @@ def create_interface():
|
|
| 807 |
ngram_min,
|
| 808 |
ngram_max
|
| 809 |
],
|
| 810 |
-
outputs=[summary_output, highlighted_output, results_output
|
| 811 |
-
)
|
| 812 |
-
|
| 813 |
-
# Connect filter radio to filter function
|
| 814 |
-
filter_radio.change(
|
| 815 |
-
fn=filter_results,
|
| 816 |
-
inputs=[filter_radio],
|
| 817 |
-
outputs=[highlighted_output, results_output]
|
| 818 |
)
|
| 819 |
|
| 820 |
gr.Examples(
|
|
|
|
| 37 |
'#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
|
| 38 |
]
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
class KeywordExtractionManager:
|
| 41 |
def __init__(self):
|
| 42 |
self.rake_extractor = None
|
|
|
|
| 453 |
else:
|
| 454 |
return 'low'
|
| 455 |
|
| 456 |
+
def create_highlighted_html(text, keywords):
|
| 457 |
"""Create HTML with highlighted keywords in the text"""
|
| 458 |
if not keywords:
|
| 459 |
return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
|
|
|
|
| 470 |
keyword = kw_data['keyword']
|
| 471 |
score = kw_data['score']
|
| 472 |
color = get_score_color(score, max_score)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
|
| 474 |
# Create regex pattern for whole word matching (case-insensitive)
|
| 475 |
pattern = r'\b' + re.escape(keyword) + r'\b'
|
| 476 |
|
| 477 |
# Replace with highlighted version
|
| 478 |
replacement = f'<span style="background-color: {color}; padding: 2px 4px; ' \
|
| 479 |
+
f'border-radius: 3px; margin: 0 1px; ' \
|
| 480 |
f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
|
| 481 |
f'title="Score: {score:.3f}">{keyword}</span>'
|
| 482 |
|
|
|
|
| 489 |
</div>
|
| 490 |
"""
|
| 491 |
|
| 492 |
+
def create_keyword_table_html(keywords):
|
| 493 |
+
"""Create HTML table for keywords"""
|
| 494 |
if not keywords:
|
| 495 |
return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
|
| 496 |
|
|
|
|
| 498 |
sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
|
| 499 |
max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
table_html = """
|
| 502 |
<div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
|
| 503 |
<h3 style="margin: 0 0 20px 0;">π― Extracted Keywords</h3>
|
|
|
|
| 555 |
html = """
|
| 556 |
<div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
|
| 557 |
<h4 style='margin: 0 0 15px 0;'>π¨ Relevance Score Legend</h4>
|
|
|
|
| 558 |
<div style='display: flex; flex-wrap: wrap; gap: 15px;'>
|
| 559 |
<span style='background-color: #00B894; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
|
| 560 |
High Relevance (70%+)
|
|
|
|
| 576 |
|
| 577 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
| 578 |
"""Main processing function for Gradio interface with progress tracking"""
|
|
|
|
| 579 |
|
| 580 |
if not text.strip():
|
| 581 |
+
return "β Please enter some text to analyse", "", ""
|
| 582 |
|
| 583 |
progress(0.1, desc="Initialising...")
|
| 584 |
|
|
|
|
|
|
|
|
|
|
| 585 |
# Extract keywords
|
| 586 |
progress(0.2, desc="Extracting keywords...")
|
| 587 |
keywords = keyword_manager.extract_keywords(
|
|
|
|
| 593 |
)
|
| 594 |
|
| 595 |
if not keywords:
|
| 596 |
+
return "β No keywords found. Try adjusting the parameters.", "", ""
|
|
|
|
|
|
|
|
|
|
| 597 |
|
| 598 |
progress(0.8, desc="Processing results...")
|
| 599 |
|
|
|
|
| 617 |
|
| 618 |
progress(1.0, desc="Complete!")
|
| 619 |
|
| 620 |
+
return summary, legend_html + highlighted_html, results_html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
|
| 622 |
# Create Gradio interface
|
| 623 |
def create_interface():
|
|
|
|
| 632 |
2. **π― Select a model** from the dropdown for keyword extraction
|
| 633 |
3. **βοΈ Adjust parameters** (number of keywords, n-gram range)
|
| 634 |
4. **π Click "Extract Keywords"** to see results with organized output
|
|
|
|
| 635 |
""")
|
| 636 |
|
| 637 |
# Add tip box
|
|
|
|
| 721 |
|
| 722 |
extract_btn = gr.Button("π Extract Keywords", variant="primary", size="lg")
|
| 723 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
# Output sections
|
| 725 |
with gr.Row():
|
| 726 |
summary_output = gr.Markdown(label="Summary")
|
|
|
|
| 744 |
ngram_min,
|
| 745 |
ngram_max
|
| 746 |
],
|
| 747 |
+
outputs=[summary_output, highlighted_output, results_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
)
|
| 749 |
|
| 750 |
gr.Examples(
|