Spaces:
Build error
Build error
Commit
·
4dad73d
1
Parent(s):
6f7c5de
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#importing the necessary libraries
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from keybert import KeyBERT
|
| 7 |
+
from keyphrase_vectorizers import KeyphraseCountVectorizer
|
| 8 |
+
|
| 9 |
+
# Defining a function to read in the text file
|
| 10 |
+
|
| 11 |
+
def read_in_text(url):
|
| 12 |
+
with open(url, 'r') as file:
|
| 13 |
+
article = file.read()
|
| 14 |
+
return article
|
| 15 |
+
|
| 16 |
+
tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction')
|
| 17 |
+
kw_extractor = KeyBERT(tmp_model)
|
| 18 |
+
|
| 19 |
+
def get_keybert_results_with_vectorizer(file, number_of_results=20):
|
| 20 |
+
try:
|
| 21 |
+
text = read_in_text(file.name)
|
| 22 |
+
keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
|
| 23 |
+
keywords = [i for i in keywords if i[1] >= 0.25]
|
| 24 |
+
|
| 25 |
+
keybert_diversity_phrases = []
|
| 26 |
+
for i, j in keywords:
|
| 27 |
+
keybert_diversity_phrases.append(i)
|
| 28 |
+
|
| 29 |
+
output_df = pd.DataFrame()
|
| 30 |
+
output_df['keyword'] = np.array(keybert_diversity_phrases)
|
| 31 |
+
return output_df.head(20)
|
| 32 |
+
except Exception:
|
| 33 |
+
return "Error"
|
| 34 |
+
|
| 35 |
+
demo = gr.Interface(get_keybert_results_with_vectorizer, inputs=gr.inputs.File(),
|
| 36 |
+
outputs=gr.outputs.Dataframe(),
|
| 37 |
+
title = "Keyword Extraction")
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
demo.launch(debug=True)
|