Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,17 +2,79 @@ import streamlit as st
|
|
| 2 |
import spacy
|
| 3 |
import numpy as np
|
| 4 |
from gensim import corpora, models
|
| 5 |
-
from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
|
| 6 |
from itertools import chain
|
| 7 |
from sklearn.preprocessing import MultiLabelBinarizer
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
| 9 |
|
| 10 |
nlp = spacy.load('en_core_web_sm')
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def print_list(lst):
|
| 13 |
for e in lst:
|
| 14 |
st.markdown("- " + e)
|
| 15 |
|
|
|
|
| 16 |
st.subheader("Topic Modeling with Segmentation")
|
| 17 |
uploaded_file = st.file_uploader("choose a text file", type=["txt"])
|
| 18 |
if uploaded_file is not None:
|
|
|
|
| 2 |
import spacy
|
| 3 |
import numpy as np
|
| 4 |
from gensim import corpora, models
|
| 5 |
+
# from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
|
| 6 |
from itertools import chain
|
| 7 |
from sklearn.preprocessing import MultiLabelBinarizer
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
from itertools import islice
|
| 10 |
+
from scipy.signal import argrelmax
|
| 11 |
|
| 12 |
nlp = spacy.load('en_core_web_sm')
|
| 13 |
|
| 14 |
+
|
| 15 |
+
def window(seq, n=3):
|
| 16 |
+
it = iter(seq)
|
| 17 |
+
result = tuple(islice(it, n))
|
| 18 |
+
if len(result) == n:
|
| 19 |
+
yield result
|
| 20 |
+
for elem in it:
|
| 21 |
+
result = result[1:] + (elem,)
|
| 22 |
+
yield result
|
| 23 |
+
|
| 24 |
+
def get_depths(scores):
|
| 25 |
+
|
| 26 |
+
def climb(seq, i, mode='left'):
|
| 27 |
+
|
| 28 |
+
if mode == 'left':
|
| 29 |
+
while True:
|
| 30 |
+
curr = seq[i]
|
| 31 |
+
if i == 0:
|
| 32 |
+
return curr
|
| 33 |
+
i = i-1
|
| 34 |
+
if not seq[i] > curr:
|
| 35 |
+
return curr
|
| 36 |
+
|
| 37 |
+
if mode == 'right':
|
| 38 |
+
while True:
|
| 39 |
+
curr = seq[i]
|
| 40 |
+
if i == (len(seq)-1):
|
| 41 |
+
return curr
|
| 42 |
+
i = i+1
|
| 43 |
+
if not seq[i] > curr:
|
| 44 |
+
return curr
|
| 45 |
+
|
| 46 |
+
depths = []
|
| 47 |
+
for i in range(len(scores)):
|
| 48 |
+
score = scores[i]
|
| 49 |
+
l_peak = climb(scores, i, mode='left')
|
| 50 |
+
r_peak = climb(scores, i, mode='right')
|
| 51 |
+
depth = 0.5 * (l_peak + r_peak - (2*score))
|
| 52 |
+
depths.append(depth)
|
| 53 |
+
|
| 54 |
+
return np.array(depths)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_local_maxima(depth_scores, order=1):
|
| 58 |
+
maxima_ids = argrelmax(depth_scores, order=order)[0]
|
| 59 |
+
filtered_scores = np.zeros(len(depth_scores))
|
| 60 |
+
filtered_scores[maxima_ids] = depth_scores[maxima_ids]
|
| 61 |
+
return filtered_scores
|
| 62 |
+
|
| 63 |
+
def compute_threshold(scores):
|
| 64 |
+
s = scores[np.nonzero(scores)]
|
| 65 |
+
threshold = np.mean(s) - (np.std(s) / 2)
|
| 66 |
+
return threshold
|
| 67 |
+
|
| 68 |
+
def get_threshold_segments(scores, threshold=0.1):
|
| 69 |
+
segment_ids = np.where(scores >= threshold)[0]
|
| 70 |
+
return segment_ids
|
| 71 |
+
|
| 72 |
+
|
| 73 |
def print_list(lst):
|
| 74 |
for e in lst:
|
| 75 |
st.markdown("- " + e)
|
| 76 |
|
| 77 |
+
|
| 78 |
st.subheader("Topic Modeling with Segmentation")
|
| 79 |
uploaded_file = st.file_uploader("choose a text file", type=["txt"])
|
| 80 |
if uploaded_file is not None:
|