| !pip3 install numpy |
| !pip3 install pandas |
| !pip3 install sklearn |
| !pip3 install nltk |
|
|
|
|
|
|
| import numpy as np |
| import pandas as pd |
|
|
|
|
|
|
|
|
|
|
| import pandas as pd |
| import numpy as np |
| import re |
| import nltk |
| from nltk.corpus import stopwords |
| from nltk.stem.porter import PorterStemmer |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.model_selection import train_test_split |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.metrics import accuracy_score |
|
|
|
|
|
|
| nltk.download('stopwords') |
|
|
|
|
| print(stopwords.words('english')) |
|
|
|
|
| from google.colab import drive |
| drive.mount('/content/drive') |
|
|
|
|
|
|
|
|
| news_df = pd.read_csv('/content/drive/MyDrive/Mini project/train.csv') |
|
|
|
|
|
|
| news_df.head() |
|
|
| news_df.shape |
|
|
| news_df.info() |
|
|
|
|
| news_df.isna().sum() |
|
|
|
|
| news_df = news_df.fillna('') |
| news_df['article'] = news_df['title'] + news_df['author'] |
| news_df |
|
|
|
|
|
|
|
|
| news_df.drop(columns=['id'], inplace=True) |
|
|
|
|
|
|
| news_df |
|
|
|
|
|
|
| news_df["author"].value_counts() |
|
|
|
|
|
|
| X = news_df.drop(columns='label', axis=1) |
| Y = news_df['label'] |
|
|
| X |
|
|
| Y |
|
|
|
|
|
|
| p_stemming = PorterStemmer() |
|
|
|
|
|
|
| def stemming(content): |
| stemmed_word = re.sub('[^a-zA-Z]',' ',content) |
| stemmed_word = stemmed_word.lower() |
| stemmed_word = stemmed_word.split() |
| stemmed_word = [p_stemming.stem(word) for word in stemmed_word if not word in stopwords.words('english')] |
| stemmed_word = ' '.join(stemmed_word) |
| return stemmed_word |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| news_df['article'] = news_df['article'].apply(stemming) |
|
|
|
|
|
|
|
|
|
|
|
|
| news_df['article'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| X = news_df['article'].values |
| X |
|
|
|
|
|
|
| Y = news_df['label'].values |
| Y |
|
|
|
|
|
|
|
|
|
|
| X |
|
|
|
|
|
|
| vectorizer = TfidfVectorizer() |
| vectorizer.fit(X) |
| X = vectorizer.transform(X) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 1) |
|
|
|
|
|
|
|
|
| ml_model = LogisticRegression() |
|
|
|
|
|
|
| ml_model.fit(X_train, Y_train) |
|
|
|
|
|
|
|
|
| X_train_predict = ml_model.predict(X_train) |
| train_data_accuracy = accuracy_score(X_train_predict, Y_train) |
| percent_tr_accuracy = train_data_accuracy * 100 |
| print("Accuracy for Train data: ", percent_tr_accuracy) |
|
|
|
|
|
|
|
|
|
|
| X_test_predict = ml_model.predict(X_test) |
| test_data_accuracy = accuracy_score(X_test_predict, Y_test) |
| percent_test_accuracy = test_data_accuracy * 100 |
| print("Accuracy for Test data: ", percent_test_accuracy) |
|
|
|
|
|
|
|
|
|
|
| def Detection(index): |
| index = int (index) |
| X_new = X_test[index] |
| new_predict = ml_model.predict(X_new) |
| real_news= "The News is real" if(new_predict[0]==0) else "The News is fake" |
| return(real_news) |
| Detection(index) |
|
|
|
|
|
|
|
|
|
|
| pip install gradio |
| import gradio as gr |
| demo = gr.Interface(fn=Detection, inputs='number', outputs="text") |
| demo.launch(share=True) |