Spaces:
Sleeping
Sleeping
| import spacy | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.preprocessing import FunctionTransformer | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.metrics import classification_report | |
| nlp = spacy.load("en_core_web_sm") | |
| #define a function for filter stop words and punctuations and extract lemma from the txts | |
| def preprocesser(text_array): | |
| preprocessed_texts = [] | |
| for text in text_array: | |
| doc = nlp(text) | |
| words_lst = [] | |
| for token in doc: | |
| if not token.is_stop and not token.is_punct: | |
| words_lst.append(token.lemma_) | |
| preprocessed_text = " ".join(words_lst) | |
| preprocessed_texts.append(preprocessed_text) | |
| return preprocessed_texts | |
| import joblib | |
| # Save the trained model to a file | |
| # To load the model back in the future | |
| loaded_model = joblib.load('models/Logisticmainmodel.pkl') | |
| def pd(text): | |
| label_to_category = { | |
| 0: 'BUSINESS-MONEY', | |
| 1: 'EMPOWERED VOICES', | |
| 2: 'ENVIRONMENT', | |
| 3: 'GENERAL', | |
| 4: 'LIFESTYLE AND WELLNESS', | |
| 5: 'MISC', | |
| 6: 'PARENTING AND EDUCATION', | |
| 7: 'POLITICS', | |
| 8: 'SCIENCE AND TECH', | |
| 9: 'SPORTS AND ENTERTAINMENT', | |
| 10: 'TRAVEL-TOURISM & ART-CULTURE', | |
| 11: 'U.S. NEWS', | |
| 12: 'WORLDNEWS' | |
| } | |
| new_texts =[text] | |
| predicted_labels = loaded_model.predict(new_texts) | |
| # Convert predicted numerical labels to category names using the mapping | |
| predicted_categories = [label_to_category[label] for label in predicted_labels] | |
| # Print the predicted categories | |
| for text, predicted_category in zip(new_texts, predicted_categories): | |
| print(f"Text: {text}\nPredicted Category: {predicted_category}\n") | |
| return predicted_category | |
| pd("election") | |