Spaces:
Sleeping
Sleeping
File size: 1,259 Bytes
e5aa1e6 aabc2fd d45489d aabc2fd e5aa1e6 85b47f8 aabc2fd e5aa1e6 aabc2fd e5aa1e6 aabc2fd 679d180 aabc2fd 6a234aa aabc2fd e5aa1e6 679d180 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# app/model/model.py
import pickle
import re
from pathlib import Path
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
__version__ = '01'
BASE_DIR = Path(__file__).resolve(strict=True).parent
with open(f"{BASE_DIR}/filename.pkl", "rb") as f:
data = pickle.load(f)
model = data['model']
vocab = data['vocab']
classes = ['Arabic', 'Danish', 'Dutch', 'English', 'French', 'German',
'Greek', 'Hindi', 'Italian', 'Kannada', 'Malayalam', 'Portugeese',
'Russian', 'Spanish', 'Sweedish', 'Tamil', 'Turkish']
dict_por = {'Arabic': 'Árabe', 'Danish': 'Dinamarquês', 'Dutch': 'Holandês', 'English': 'Inglês', 'French': 'Francês', 'German': 'Alemão',
'Greek': 'Grego', 'Hindi': 'Hindi', 'Italian': 'Italiano', 'Kannada': 'Kannada', 'Malayalam': 'Malaiala', 'Portugeese': 'Português',
'Russian': 'Russo', 'Spanish': 'Espanhol', 'Sweedish': 'Sueco', 'Tamil': 'Tâmil', 'Turkish': 'Turco'}
def predict_language(text):
text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
text = re.sub(r'[\[\]]', ' ', text)
text = text.lower()
text = vocab.transform([text]).toarray()
pred = model.predict(text)
return dict_por[classes[pred[0]]]
|