trflnews / app.py
aymen12's picture
init
fe64fe7
# app.py
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import joblib
import gradio as gr
import datasets
# Load True and Fake datasets into Hugging Face Datasets from the repository
dataset_true = datasets.load_dataset('csv', data_files='Truefalsenews/True.csv', split='train')
dataset_fake = datasets.load_dataset('csv', data_files='Truefalsenews/Fake.csv', split='train')
# Combine datasets into a Pandas DataFrame
df_combined = pd.concat([pd.DataFrame(dataset_true), pd.DataFrame(dataset_fake)])
# Add labels to the DataFrame
df_combined['label'] = df_combined['label'].astype(int)
# Split the data into training and testing sets
X = df_combined['text']
y = df_combined['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and fit the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
# Train the classifier
clf = MultinomialNB()
clf.fit(X_train_tfidf, y_train)
# Save the model and TF-IDF vectorizer
joblib.dump(clf, 'fake_news_classifier_model.pkl')
joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')
# Define the prediction function
def predict_fake_or_true_news(text):
text_tfidf = tfidf_vectorizer.transform([text])
prediction = clf.predict(text_tfidf)
return "True" if prediction[0] == 1 else "Fake"
# Create Gradio Interface
iface = gr.Interface(
fn=predict_fake_or_true_news,
inputs="text",
outputs="text",
live=True,
title="Fake or True News Classifier",
description="Enter a news article text to classify as 'Fake' or 'True'."
)
# Launch Gradio Interface
iface.launch()