Spaces:
Runtime error
Runtime error
Optimizations and more interpretation
Browse files
app.py
CHANGED
|
@@ -66,6 +66,7 @@ def calculate_diversity(text):
|
|
| 66 |
tokenized_text = word_tokenize(text)
|
| 67 |
|
| 68 |
tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
|
|
|
|
| 69 |
sim_words = {}
|
| 70 |
if len(tokenized_text) <= 1:
|
| 71 |
return 1, "More Text Required"
|
|
@@ -85,7 +86,7 @@ def calculate_diversity(text):
|
|
| 85 |
if not comp.isalpha():
|
| 86 |
continue
|
| 87 |
try:
|
| 88 |
-
if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .
|
| 89 |
vocab.append(comp)
|
| 90 |
except KeyError:
|
| 91 |
continue
|
|
@@ -96,9 +97,6 @@ def calculate_diversity(text):
|
|
| 96 |
if len(value) == 1:
|
| 97 |
scores[key] = -1
|
| 98 |
continue
|
| 99 |
-
# if len(value) == 2:
|
| 100 |
-
# scores[key] = -1
|
| 101 |
-
# continue
|
| 102 |
t_sim = len(value)
|
| 103 |
t_rep = (len(value)) - (len(set(value)))
|
| 104 |
|
|
@@ -108,24 +106,39 @@ def calculate_diversity(text):
|
|
| 108 |
|
| 109 |
mean_score = 0
|
| 110 |
total = 0
|
| 111 |
-
|
| 112 |
for value in scores.values():
|
| 113 |
if value == -1:
|
| 114 |
continue
|
| 115 |
mean_score += value
|
| 116 |
total += 1
|
| 117 |
-
|
| 118 |
-
return scores, {"Diversity Score": mean_score / total}
|
| 119 |
-
except ZeroDivisionError:
|
| 120 |
-
return scores, {"Dviersity Score": "Not Enough Data"}
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
def dict_to_list(dictionary, max_size=10):
|
|
@@ -314,16 +327,6 @@ def plot():
|
|
| 314 |
return heatmap(diversity, df)
|
| 315 |
|
| 316 |
|
| 317 |
-
def diversity_inter(text):
|
| 318 |
-
words = word_tokenize(text)
|
| 319 |
-
scores = get_scores(text)
|
| 320 |
-
interpret_values = [('', 0.0)]
|
| 321 |
-
for key, value in scores.items():
|
| 322 |
-
interpret_values.append((words[key], value))
|
| 323 |
-
interpret_values.append(('', 0.0))
|
| 324 |
-
print(interpret_values)
|
| 325 |
-
return {'original': text, 'interpretation': interpret_values}
|
| 326 |
-
|
| 327 |
|
| 328 |
def sliding_window(text):
|
| 329 |
words = word_tokenize(text)
|
|
@@ -374,25 +377,16 @@ def sliding_window(text):
|
|
| 374 |
ax.set_facecolor('w')
|
| 375 |
fig = plt.gcf()
|
| 376 |
|
| 377 |
-
|
| 378 |
maxy = max(inter_scores)
|
| 379 |
miny = min(inter_scores)
|
| 380 |
spread = maxy - miny
|
| 381 |
|
| 382 |
for idx, i in enumerate(words):
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
return fig, map
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
def get_plot(text):
|
| 390 |
-
return sliding_window(text)[0]
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
def get_dif_inter(text):
|
| 394 |
-
return {'original': text, 'interpretation': sliding_window(text)[1]}
|
| 395 |
|
|
|
|
| 396 |
|
| 397 |
def speech_to_text(speech, target):
|
| 398 |
text = p(speech)["text"]
|
|
@@ -460,6 +454,11 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
|
|
| 460 |
div_output = gr.Label(label='Diversity Score', show_label=False)
|
| 461 |
gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
|
| 462 |
interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
with gr.Box():
|
| 464 |
gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
|
| 465 |
interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
|
|
@@ -498,11 +497,10 @@ with gr.Blocks(title="Automatic Literacy and Speech Assesmen") as demo:
|
|
| 498 |
|
| 499 |
|
| 500 |
grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
|
| 501 |
-
grade.click(get_mean_score, inputs=in_text, outputs=div_output)
|
| 502 |
-
grade.click(
|
| 503 |
-
grade.click(get_dif_inter, inputs=in_text, outputs=interpretation2)
|
| 504 |
-
grade.click(get_plot, inputs=in_text, outputs=plotter)
|
| 505 |
grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
|
| 506 |
b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
|
| 507 |
get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
|
|
|
|
| 508 |
demo.launch(debug=True)
|
|
|
|
| 66 |
tokenized_text = word_tokenize(text)
|
| 67 |
|
| 68 |
tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
|
| 69 |
+
global sim_words
|
| 70 |
sim_words = {}
|
| 71 |
if len(tokenized_text) <= 1:
|
| 72 |
return 1, "More Text Required"
|
|
|
|
| 86 |
if not comp.isalpha():
|
| 87 |
continue
|
| 88 |
try:
|
| 89 |
+
if cosine_similarity(w2v[anc].reshape(1, -1), w2v[comp].reshape(1, -1)) > .75 or comp in wn_syns(anc):
|
| 90 |
vocab.append(comp)
|
| 91 |
except KeyError:
|
| 92 |
continue
|
|
|
|
| 97 |
if len(value) == 1:
|
| 98 |
scores[key] = -1
|
| 99 |
continue
|
|
|
|
|
|
|
|
|
|
| 100 |
t_sim = len(value)
|
| 101 |
t_rep = (len(value)) - (len(set(value)))
|
| 102 |
|
|
|
|
| 106 |
|
| 107 |
mean_score = 0
|
| 108 |
total = 0
|
| 109 |
+
|
| 110 |
for value in scores.values():
|
| 111 |
if value == -1:
|
| 112 |
continue
|
| 113 |
mean_score += value
|
| 114 |
total += 1
|
| 115 |
+
words = word_tokenize(text)
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
interpret_values = [('', 0.0)]
|
| 118 |
+
|
| 119 |
+
for key, value in scores.items():
|
| 120 |
+
interpret_values.append((words[key], value))
|
| 121 |
|
| 122 |
+
interpret_values.append(('', 0.0))
|
| 123 |
+
print(interpret_values)
|
| 124 |
+
int_vals = {'original': text, 'interpretation': interpret_values}
|
| 125 |
+
try:
|
| 126 |
|
| 127 |
+
return int_vals, {"Diversity Score": mean_score / total}
|
| 128 |
+
except ZeroDivisionError:
|
| 129 |
|
| 130 |
+
return int_vals, {"Dviersity Score": "Not Enough Data"}
|
| 131 |
+
|
| 132 |
+
def get_sim_words(text, word):
|
| 133 |
+
word = word.strip()
|
| 134 |
+
index = 0
|
| 135 |
+
text = word_tokenize(text)
|
| 136 |
+
print(sim_words)
|
| 137 |
+
for idx, i in enumerate(text):
|
| 138 |
+
if word == i:
|
| 139 |
+
index = idx
|
| 140 |
+
break
|
| 141 |
+
return ', '.join(sim_words[index])
|
| 142 |
|
| 143 |
|
| 144 |
def dict_to_list(dictionary, max_size=10):
|
|
|
|
| 327 |
return heatmap(diversity, df)
|
| 328 |
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
def sliding_window(text):
|
| 332 |
words = word_tokenize(text)
|
|
|
|
| 377 |
ax.set_facecolor('w')
|
| 378 |
fig = plt.gcf()
|
| 379 |
|
| 380 |
+
mapd = [('', 0)]
|
| 381 |
maxy = max(inter_scores)
|
| 382 |
miny = min(inter_scores)
|
| 383 |
spread = maxy - miny
|
| 384 |
|
| 385 |
for idx, i in enumerate(words):
|
| 386 |
+
mapd.append((i, (inter_scores[idx] - miny) / spread))
|
| 387 |
+
mapd.append(('', 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
+
return fig, {'original': text, 'interpretation': mapd}
|
| 390 |
|
| 391 |
def speech_to_text(speech, target):
|
| 392 |
text = p(speech)["text"]
|
|
|
|
| 454 |
div_output = gr.Label(label='Diversity Score', show_label=False)
|
| 455 |
gr.Markdown("Diversity Heatmap | Blue cells are omitted from score | Darker = More Diverse")
|
| 456 |
interpretation = gr.components.Interpretation(in_text, label="Diversity Heatmap")
|
| 457 |
+
|
| 458 |
+
gr.Markdown("Find Similar Words | Word must be part of analysis text | Enter only one word at a time")
|
| 459 |
+
words1 = gr.Textbox(label="Word For Similarity")
|
| 460 |
+
find_sim = gr.Button("Find Similar Words")
|
| 461 |
+
sims = gr.Label()
|
| 462 |
with gr.Box():
|
| 463 |
gr.Markdown("Relative Difficulty Heatmap- How confusing the text is in that area")
|
| 464 |
interpretation2 = gr.components.Interpretation(in_text, label="Difficulty Heatmap")
|
|
|
|
| 497 |
|
| 498 |
|
| 499 |
grade.click(reading_difficulty, inputs=in_text, outputs=diff_output)
|
| 500 |
+
grade.click(get_mean_score, inputs=in_text, outputs=[interpretation, div_output])
|
| 501 |
+
grade.click(get_dif_inter, inputs=in_text, outputs=[plotter, interpretation2])
|
|
|
|
|
|
|
| 502 |
grade1.click(speech_to_score, inputs=audio_file, outputs=diff_output)
|
| 503 |
b1.click(speech_to_text, inputs=[audio_file1, target], outputs=[text, some_val, phones])
|
| 504 |
get_syns.click(gen_syns, inputs=[words, lvl], outputs=reccos)
|
| 505 |
+
find_sim.click(get_sim_words, inputs=[in_text, words1], outputs=sims)
|
| 506 |
demo.launch(debug=True)
|