Spaces:
Sleeping
Sleeping
| # app/model/model.py | |
| import pickle | |
| import re | |
| from pathlib import Path | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| cv = CountVectorizer(max_features=1500) | |
| __version__ = '01' | |
| BASE_DIR = Path(__file__).resolve(strict=True).parent | |
| with open(f"{BASE_DIR}/filename.pkl", "rb") as f: | |
| data = pickle.load(f) | |
| model = data['model'] | |
| vocab = data['vocab'] | |
| classes = ['Arabic', 'Danish', 'Dutch', 'English', 'French', 'German', | |
| 'Greek', 'Hindi', 'Italian', 'Kannada', 'Malayalam', 'Portugeese', | |
| 'Russian', 'Spanish', 'Sweedish', 'Tamil', 'Turkish'] | |
| dict_por = {'Arabic': 'Árabe', 'Danish': 'Dinamarquês', 'Dutch': 'Holandês', 'English': 'Inglês', 'French': 'Francês', 'German': 'Alemão', | |
| 'Greek': 'Grego', 'Hindi': 'Hindi', 'Italian': 'Italiano', 'Kannada': 'Kannada', 'Malayalam': 'Malaiala', 'Portugeese': 'Português', | |
| 'Russian': 'Russo', 'Spanish': 'Espanhol', 'Sweedish': 'Sueco', 'Tamil': 'Tâmil', 'Turkish': 'Turco'} | |
| def predict_language(text): | |
| text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text) | |
| text = re.sub(r'[\[\]]', ' ', text) | |
| text = text.lower() | |
| text = vocab.transform([text]).toarray() | |
| pred = model.predict(text) | |
| return dict_por[classes[pred[0]]] | |