| import fasttext | |
| import issai_lang_id as lang_id | |
| # load model | |
| model = fasttext.load_model("11langs.bin") | |
| model_10langs = fasttext.load_model("10langs.bin") | |
| model_9langs = fasttext.load_model("9langs.bin") | |
| # print(model.labels) | |
| def get_lang(text): | |
| s= text.strip().lower() | |
| label, probability = model.predict(s,k=2) | |
| yp = label[0].replace('__label__','') | |
| yp = lang_id.get(s,yp,probability[0]) | |
| if yp=='sim' and not lang_id.contains_chinese(s): | |
| label_10, probability_10 = model_10langs.predict(s,k=2) | |
| yp_10 = label_10[0].replace('__label__','') | |
| yp = lang_id.get(s,yp_10,probability_10[0]) | |
| yp = yp_10 | |
| if yp=='arb' and lang_id.not_contains_arabic(s): | |
| # print(lang_id.contains_arabic(s),yp=='arb',s) | |
| label_9, probability_9 = model_9langs.predict(s,k=2) | |
| yp_9 = label_9[0].replace('__label__','') | |
| yp = lang_id.get(s,yp_9,probability_9[0]) | |
| # print(f'arb corrected to: {yp_9}, yt: {yt} sentence: {s}') | |
| yp = yp_9 | |
| return yp | |
| # text = "这是一个测试句子。" | |
| # print(get_lang(text)) | |
| # text = "This is a test sentence." | |
| # print(get_lang(text)) | |
| # text = "هذا جملة اختبار." | |
| # print(get_lang(text)) |