mamyrbek
/

lang_identification

Model card Files Files and versions

lang_identification / README.md

mamyrbek's picture

Update README.md

53b7799 verified about 1 month ago

|

history blame contribute delete

1.34 kB

	import fasttext
	import issai_lang_id as lang_id

	# load model
	model = fasttext.load_model("11langs.bin")
	model_10langs = fasttext.load_model("10langs.bin")
	model_9langs = fasttext.load_model("9langs.bin")
	# print(model.labels)

	def get_lang(text):
	s= text.strip().lower()
	label, probability = model.predict(s,k=2)

	yp = label[0].replace('__label__','')
	yp = lang_id.get(s,yp,probability[0])
	if yp=='sim' and not lang_id.contains_chinese(s):
	label_10, probability_10 = model_10langs.predict(s,k=2)
	yp_10 = label_10[0].replace('__label__','')
	yp = lang_id.get(s,yp_10,probability_10[0])
	yp = yp_10
	if yp=='arb' and lang_id.not_contains_arabic(s):
	# print(lang_id.contains_arabic(s),yp=='arb',s)
	label_9, probability_9 = model_9langs.predict(s,k=2)
	yp_9 = label_9[0].replace('__label__','')
	yp = lang_id.get(s,yp_9,probability_9[0])
	# print(f'arb corrected to: {yp_9}, yt: {yt} sentence: {s}')
	yp = yp_9
	return yp
	# text = "这是一个测试句子。"
	# print(get_lang(text))
	# text = "This is a test sentence."
	# print(get_lang(text))
	# text = "هذا جملة اختبار."
	# print(get_lang(text))