Shreyansh49 commited on
Commit
e2fa5d5
·
1 Parent(s): 2d9aa3e

Create Hoaxie.py

Browse files
Files changed (1) hide show
  1. Hoaxie.py +191 -0
Hoaxie.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip3 install numpy
2
+ !pip3 install pandas
3
+ !pip3 install sklearn
4
+ !pip3 install nltk
5
+
6
+
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+
12
+
13
+
14
+
15
+ import pandas as pd
16
+ import numpy as np
17
+ import re
18
+ import nltk
19
+ from nltk.corpus import stopwords
20
+ from nltk.stem.porter import PorterStemmer
21
+ from sklearn.feature_extraction.text import TfidfVectorizer
22
+ from sklearn.model_selection import train_test_split
23
+ from sklearn.linear_model import LogisticRegression
24
+ from sklearn.metrics import accuracy_score
25
+
26
+
27
+
28
+ nltk.download('stopwords')
29
+
30
+
31
+ print(stopwords.words('english'))
32
+
33
+
34
+ from google.colab import drive
35
+ drive.mount('/content/drive')
36
+
37
+
38
+
39
+
40
+ news_df = pd.read_csv('/content/drive/MyDrive/Mini project/train.csv')
41
+
42
+
43
+
44
+ news_df.head()
45
+
46
+ news_df.shape
47
+
48
+ news_df.info()
49
+
50
+
51
+ news_df.isna().sum()
52
+
53
+
54
+ news_df = news_df.fillna('')
55
+ news_df['article'] = news_df['title'] + news_df['author']
56
+ news_df
57
+
58
+
59
+
60
+
61
+ news_df.drop(columns=['id'], inplace=True)
62
+
63
+
64
+
65
+ news_df
66
+
67
+
68
+
69
+ news_df["author"].value_counts()
70
+
71
+
72
+
73
+ X = news_df.drop(columns='label', axis=1)
74
+ Y = news_df['label']
75
+
76
+ X
77
+
78
+ Y
79
+
80
+
81
+
82
+ p_stemming = PorterStemmer()
83
+
84
+
85
+
86
+ def stemming(content):
87
+ stemmed_word = re.sub('[^a-zA-Z]',' ',content)
88
+ stemmed_word = stemmed_word.lower()
89
+ stemmed_word = stemmed_word.split()
90
+ stemmed_word = [p_stemming.stem(word) for word in stemmed_word if not word in stopwords.words('english')]
91
+ stemmed_word = ' '.join(stemmed_word)
92
+ return stemmed_word
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+ news_df['article'] = news_df['article'].apply(stemming)
101
+
102
+
103
+
104
+
105
+
106
+
107
+ news_df['article']
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+ X = news_df['article'].values
116
+ X
117
+
118
+
119
+
120
+ Y = news_df['label'].values
121
+ Y
122
+
123
+
124
+
125
+
126
+
127
+ X
128
+
129
+
130
+
131
+ vectorizer = TfidfVectorizer()
132
+ vectorizer.fit(X)
133
+ X = vectorizer.transform(X)
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+ X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 1)
142
+
143
+
144
+
145
+
146
+ ml_model = LogisticRegression()
147
+
148
+
149
+
150
+ ml_model.fit(X_train, Y_train)
151
+
152
+
153
+
154
+
155
+ X_train_predict = ml_model.predict(X_train)
156
+ train_data_accuracy = accuracy_score(X_train_predict, Y_train)
157
+ percent_tr_accuracy = train_data_accuracy * 100
158
+ print("Accuracy for Train data: ", percent_tr_accuracy)
159
+
160
+
161
+
162
+
163
+
164
+ X_test_predict = ml_model.predict(X_test)
165
+ test_data_accuracy = accuracy_score(X_test_predict, Y_test)
166
+ percent_test_accuracy = test_data_accuracy * 100
167
+ print("Accuracy for Test data: ", percent_test_accuracy)
168
+
169
+
170
+
171
+
172
+
173
+ def Detection(index):
174
+ index = int (index)
175
+ X_new = X_test[index]
176
+ new_predict = ml_model.predict(X_new)
177
+ real_news= "The News is real" if(new_predict[0]==0) else "The News is fake"
178
+ return(real_news)
179
+ Detection(index)
180
+
181
+
182
+
183
+
184
+
185
+ pip install gradio
186
+
187
+
188
+
189
+ import gradio as gr
190
+ demo = gr.Interface(fn=Detection, inputs='number', outputs="text")
191
+ demo.launch(share=True)