kaushikbar
commited on
Commit
·
4198959
1
Parent(s):
d3269c3
cleaned up
Browse files
app.py
CHANGED
|
@@ -108,50 +108,8 @@ def prep_examples():
|
|
| 108 |
|
| 109 |
return examples
|
| 110 |
|
| 111 |
-
def detect_lang(sequence, labels):
|
| 112 |
-
DetectorFactory.seed = 0
|
| 113 |
-
seq_lang = 'en'
|
| 114 |
-
|
| 115 |
-
sequence = sequence.replace('\n', ' ')
|
| 116 |
-
|
| 117 |
-
try:
|
| 118 |
-
#seq_lang = detect(sequence)
|
| 119 |
-
#lbl_lang = detect(labels)
|
| 120 |
-
seq_lang = fasttext_model.predict(sequence, k=1)[0][0].split("__label__")[1]
|
| 121 |
-
lbl_lang = fasttext_model.predict(labels, k=1)[0][0].split("__label__")[1]
|
| 122 |
-
except:
|
| 123 |
-
print("Language detection failed!",
|
| 124 |
-
"Date:{}, Sequence:{}, Labels:{}".format(
|
| 125 |
-
str(datetime.datetime.now()),
|
| 126 |
-
labels))
|
| 127 |
-
|
| 128 |
-
if seq_lang != lbl_lang:
|
| 129 |
-
print("Different languages detected for sequence and labels!",
|
| 130 |
-
"Date:{}, Sequence:{}, Labels:{}, Sequence Language:{}, Label Language:{}".format(
|
| 131 |
-
str(datetime.datetime.now()),
|
| 132 |
-
sequence,
|
| 133 |
-
labels,
|
| 134 |
-
seq_lang,
|
| 135 |
-
lbl_lang))
|
| 136 |
-
|
| 137 |
-
if seq_lang in models:
|
| 138 |
-
print("Sequence Language detected.",
|
| 139 |
-
"Date:{}, Sequence:{}, Sequence Language:{}".format(
|
| 140 |
-
str(datetime.datetime.now()),
|
| 141 |
-
sequence,
|
| 142 |
-
seq_lang))
|
| 143 |
-
else:
|
| 144 |
-
print("Language not supported. Defaulting to English!",
|
| 145 |
-
"Date:{}, Sequence:{}, Sequence Language:{}".format(
|
| 146 |
-
str(datetime.datetime.now()),
|
| 147 |
-
sequence,
|
| 148 |
-
seq_lang))
|
| 149 |
-
seq_lang = 'en'
|
| 150 |
-
|
| 151 |
-
return seq_lang
|
| 152 |
-
|
| 153 |
def sequence_to_classify(sequence, labels, multi_label):
|
| 154 |
-
lang = 'en'
|
| 155 |
classifier = classifiers[lang]
|
| 156 |
|
| 157 |
label_clean = str(labels).split(";;")
|
|
|
|
| 108 |
|
| 109 |
return examples
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
def sequence_to_classify(sequence, labels, multi_label):
|
| 112 |
+
lang = 'en'
|
| 113 |
classifier = classifiers[lang]
|
| 114 |
|
| 115 |
label_clean = str(labels).split(";;")
|