SorrelC commited on
Commit
5311f88
Β·
verified Β·
1 Parent(s): 05d6a22

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pke
3
+ import nltk
4
+ import re
5
+
6
+ nltk.download('stopwords')
7
+
8
+ # Models to offer
9
+ AVAILABLE_MODELS = [
10
+ "kw_pke_multipartiterank",
11
+ "kw_pke_singlerank",
12
+ "kw_pke_tfidf",
13
+ "kw_pke_topicrank",
14
+ "kw_pke_textrank",
15
+ "kw_pke_positionrank"
16
+ ]
17
+
18
+ def extract_keywords_pke(text, model_choice, num_keywords):
19
+ extractor = None
20
+
21
+ if model_choice == "kw_pke_multipartiterank":
22
+ extractor = pke.unsupervised.MultipartiteRank()
23
+ elif model_choice == "kw_pke_singlerank":
24
+ extractor = pke.unsupervised.SingleRank()
25
+ elif model_choice == "kw_pke_tfidf":
26
+ extractor = pke.unsupervised.TfIdf()
27
+ elif model_choice == "kw_pke_topicrank":
28
+ extractor = pke.unsupervised.TopicRank()
29
+ elif model_choice == "kw_pke_textrank":
30
+ extractor = pke.unsupervised.TextRank()
31
+ elif model_choice == "kw_pke_positionrank":
32
+ extractor = pke.unsupervised.PositionRank()
33
+ else:
34
+ return ["Error: Unknown model"]
35
+
36
+ extractor.load_document(input=text, language='en', normalization=None)
37
+
38
+ if model_choice == "kw_pke_tfidf":
39
+ extractor.candidate_selection(n=3)
40
+ else:
41
+ extractor.candidate_selection()
42
+
43
+ extractor.candidate_weighting()
44
+
45
+ keywords = [kw for kw, score in extractor.get_n_best(n=num_keywords)]
46
+
47
+ return keywords
48
+
49
+
50
+ def highlight_keywords(text, keywords):
51
+ if not keywords:
52
+ return text
53
+
54
+ highlighted = text
55
+ for kw in sorted(keywords, key=lambda k: -len(k)):
56
+ pattern = re.compile(re.escape(kw), re.IGNORECASE)
57
+ highlighted = pattern.sub(
58
+ f'<mark style="background-color:#FFD54F; padding:2px 4px; border-radius:4px;">{kw}</mark>',
59
+ highlighted
60
+ )
61
+ return highlighted
62
+
63
+
64
+ def process_text(text, model_choice, num_keywords):
65
+ if not text.strip():
66
+ return "❌ Please enter text to analyse.", "", ""
67
+
68
+ keywords = extract_keywords_pke(text, model_choice, num_keywords)
69
+ highlighted_html = highlight_keywords(text, keywords)
70
+
71
+ summary = f"""
72
+ ## πŸ“Š Keyword Extraction Summary
73
+ - **Model Used:** {model_choice}
74
+ - **Keywords Found:** {len(keywords)}
75
+ - **Displayed in Context Below**
76
+ """
77
+
78
+ keyword_list_html = "<ul>" + "".join([f"<li>{kw}</li>" for kw in keywords]) + "</ul>"
79
+
80
+ return summary, highlighted_html, keyword_list_html
81
+
82
+
83
+ def create_interface():
84
+ with gr.Blocks(title="Keyword Explorer Tool") as demo:
85
+ gr.Markdown("# πŸ”‘ Keyword Explorer Tool\n\nExtract and explore keywords using multiple extraction models.")
86
+
87
+ text_input = gr.Textbox(label="πŸ“ Text to Analyse", placeholder="Paste your text here...", lines=8)
88
+
89
+ with gr.Row():
90
+ model_dropdown = gr.Dropdown(
91
+ choices=AVAILABLE_MODELS,
92
+ value=AVAILABLE_MODELS[0],
93
+ label="Select Keyword Extraction Model"
94
+ )
95
+
96
+ num_keywords_slider = gr.Slider(
97
+ minimum=5,
98
+ maximum=50,
99
+ value=10,
100
+ step=1,
101
+ label="Number of Keywords"
102
+ )
103
+
104
+ analyse_btn = gr.Button("πŸš€ Extract Keywords")
105
+
106
+ with gr.Row():
107
+ summary_output = gr.Markdown(label="Summary")
108
+
109
+ with gr.Row():
110
+ highlighted_output = gr.HTML(label="Highlighted Text")
111
+
112
+ with gr.Row():
113
+ gr.Markdown("### πŸ“‹ Extracted Keywords List")
114
+ keyword_list_output = gr.HTML(label="Keywords List")
115
+
116
+ analyse_btn.click(
117
+ fn=process_text,
118
+ inputs=[text_input, model_dropdown, num_keywords_slider],
119
+ outputs=[summary_output, highlighted_output, keyword_list_output]
120
+ )
121
+
122
+ gr.HTML("""
123
+ <hr style="margin-top: 40px; margin-bottom: 20px;">
124
+ <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px; text-align: center;">
125
+ <p style="font-size: 14px; line-height: 1.8; margin: 0;">
126
+ This <strong>Keyword Explorer Tool</strong> was created as part of the
127
+ <a href="https://digitalscholarship.web.ox.ac.uk/" target="_blank" style="color: #1976d2;">
128
+ Digital Scholarship at Oxford (DiSc)
129
+ </a>
130
+ funded research project:
131
+ <em>Extracting Keywords from Crowdsourced Collections</em>.
132
+ </p>
133
+ </div>
134
+ """)
135
+
136
+ return demo
137
+
138
+
139
+ if __name__ == "__main__":
140
+ demo = create_interface()
141
+ demo.launch()