| |
|
|
| |
| import pandas as pd |
| from sklearn.model_selection import train_test_split |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.naive_bayes import MultinomialNB |
| import joblib |
| import gradio as gr |
| import datasets |
|
|
| |
| dataset_true = datasets.load_dataset('csv', data_files='Truefalsenews/True.csv', split='train') |
| dataset_fake = datasets.load_dataset('csv', data_files='Truefalsenews/Fake.csv', split='train') |
|
|
| |
| df_combined = pd.concat([pd.DataFrame(dataset_true), pd.DataFrame(dataset_fake)]) |
|
|
| |
| df_combined['label'] = df_combined['label'].astype(int) |
|
|
| |
| X = df_combined['text'] |
| y = df_combined['label'] |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
| |
| tfidf_vectorizer = TfidfVectorizer(max_features=5000) |
| X_train_tfidf = tfidf_vectorizer.fit_transform(X_train) |
| X_test_tfidf = tfidf_vectorizer.transform(X_test) |
|
|
| |
| clf = MultinomialNB() |
| clf.fit(X_train_tfidf, y_train) |
|
|
| |
| joblib.dump(clf, 'fake_news_classifier_model.pkl') |
| joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl') |
|
|
| |
| def predict_fake_or_true_news(text): |
| text_tfidf = tfidf_vectorizer.transform([text]) |
| prediction = clf.predict(text_tfidf) |
| return "True" if prediction[0] == 1 else "Fake" |
|
|
| |
| iface = gr.Interface( |
| fn=predict_fake_or_true_news, |
| inputs="text", |
| outputs="text", |
| live=True, |
| title="Fake or True News Classifier", |
| description="Enter a news article text to classify as 'Fake' or 'True'." |
| ) |
|
|
| |
| iface.launch() |
|
|