debug
Browse files
app.py
CHANGED
|
@@ -133,6 +133,7 @@ def split_corpus(corpus, n):
|
|
| 133 |
yield corpus_split[i:i + n]
|
| 134 |
|
| 135 |
def compute_coherence_values_base_lda(dictionary, corpus, texts, limit, coherence, start=2, step=1):
|
|
|
|
| 136 |
coherence_values = []
|
| 137 |
model_list = []
|
| 138 |
for num_topics in range(start, limit, step):
|
|
@@ -274,7 +275,6 @@ def full_lda(df):
|
|
| 274 |
training_corpus = corpus_split
|
| 275 |
training_corpus.remove(training_corpus[i])
|
| 276 |
# print(training_corpus[i])
|
| 277 |
-
print(training_corpus)
|
| 278 |
model_list, coherence_values = compute_coherence_values_base_lda(dictionary=id2word,
|
| 279 |
corpus=training_corpus,
|
| 280 |
texts=df['lemma_tokens'],
|
|
@@ -282,7 +282,7 @@ def full_lda(df):
|
|
| 282 |
limit=10,
|
| 283 |
step=1,
|
| 284 |
coherence='c_v')
|
| 285 |
-
print(model_list + str(i))
|
| 286 |
print(coherence_values + str(i))
|
| 287 |
for j in range(len(coherence_values)):
|
| 288 |
coherence_averages[j] += coherence_values[j]
|
|
|
|
| 133 |
yield corpus_split[i:i + n]
|
| 134 |
|
| 135 |
def compute_coherence_values_base_lda(dictionary, corpus, texts, limit, coherence, start=2, step=1):
|
| 136 |
+
print('compute coherence values base lda')
|
| 137 |
coherence_values = []
|
| 138 |
model_list = []
|
| 139 |
for num_topics in range(start, limit, step):
|
|
|
|
| 275 |
training_corpus = corpus_split
|
| 276 |
training_corpus.remove(training_corpus[i])
|
| 277 |
# print(training_corpus[i])
|
|
|
|
| 278 |
model_list, coherence_values = compute_coherence_values_base_lda(dictionary=id2word,
|
| 279 |
corpus=training_corpus,
|
| 280 |
texts=df['lemma_tokens'],
|
|
|
|
| 282 |
limit=10,
|
| 283 |
step=1,
|
| 284 |
coherence='c_v')
|
| 285 |
+
# print(model_list + str(i))
|
| 286 |
print(coherence_values + str(i))
|
| 287 |
for j in range(len(coherence_values)):
|
| 288 |
coherence_averages[j] += coherence_values[j]
|