Afeezee commited on
Commit
a402109
·
verified ·
1 Parent(s): 24cb571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -155
app.py CHANGED
@@ -1,159 +1,52 @@
1
  import gradio as gr
2
  import tweepy
3
- import joblib
4
- import torch
5
- from transformers import BertTokenizer
6
- from transformers import AutoModel, BertTokenizerFast
7
- from sklearn.model_selection import train_test_split
8
- import numpy as np
9
- import pandas as pd
10
- import torch.nn as nn
11
 
12
- data = pd.read_csv('Twitter_Analysis.csv')
13
-
14
- # Train-Validation-Test set split into 70:15:15 ratio
15
- # Train-Temp split
16
- train_text, temp_text, train_labels, temp_labels = train_test_split(data['tweet'], data['BinaryNumTarget'],
17
- random_state=2018, test_size=0.3,
18
- stratify=data['majority_target'])
19
- # Validation-Test split.
20
-
21
- val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels,
22
- random_state=2018,
23
- test_size=0.5,
24
- stratify=temp_labels)
25
- temp_labels.head()
26
-
27
-
28
- # Load BERT model and tokenizer via HuggingFace Transformers
29
- bert = AutoModel.from_pretrained('bert-base-uncased')
30
- tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
31
-
32
- # Majority of titles above have word length under 60. So, we set max title length as 60
33
- MAX_LENGHT = 60
34
- # Tokenize and encode sequences in the train set
35
- tokens_train = tokenizer.batch_encode_plus(
36
- train_text.tolist(),
37
- max_length = MAX_LENGHT,
38
- padding=True,
39
- truncation=True
40
- )
41
- # tokenize and encode sequences in the validation set
42
- tokens_val = tokenizer.batch_encode_plus(
43
- val_text.tolist(),
44
- max_length = MAX_LENGHT,
45
- padding=True,
46
- truncation=True
47
- )
48
- # tokenize and encode sequences in the test set
49
- tokens_test = tokenizer.batch_encode_plus(
50
- test_text.tolist(),
51
- max_length = MAX_LENGHT,
52
- padding=True,
53
- truncation=True
54
- )
55
-
56
- # Convert lists to tensors
57
- train_seq = torch.tensor(tokens_train['input_ids'])
58
- train_mask = torch.tensor(tokens_train['attention_mask'])
59
- train_y = torch.tensor(train_labels.tolist())
60
-
61
- val_seq = torch.tensor(tokens_val['input_ids'])
62
- val_mask = torch.tensor(tokens_val['attention_mask'])
63
- val_y = torch.tensor(val_labels.tolist())
64
-
65
- test_seq = torch.tensor(tokens_test['input_ids'])
66
- test_mask = torch.tensor(tokens_test['attention_mask'])
67
- test_y = torch.tensor(test_labels.tolist())
68
-
69
-
70
- # Data Loader structure definition
71
- from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
72
- batch_size = 32 #define a batch size
73
-
74
- train_data = TensorDataset(train_seq, train_mask, train_y) # wrap tensors
75
- train_sampler = RandomSampler(train_data) # sampler for sampling the data during training
76
- train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
77
- # dataLoader for train set
78
- val_data = TensorDataset(val_seq, val_mask, val_y) # wrap tensors
79
- val_sampler = SequentialSampler(val_data) # sampler for sampling the data during training
80
- val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)
81
- # dataLoader for validation set
82
-
83
- # Freezing the parameters and defining trainable BERT structure
84
- for param in bert.parameters():
85
- param.requires_grad = False # false here means gradient need not be computed
86
-
87
- class BERT_Arch(nn.Module):
88
- def __init__(self, bert):
89
- super(BERT_Arch, self).__init__()
90
- self.bert = bert
91
- self.dropout = nn.Dropout(0.1) # dropout layer
92
- self.relu = nn.ReLU() # relu activation function
93
- self.fc1 = nn.Linear(768,512) # dense layer 1
94
- self.fc2 = nn.Linear(512,2) # dense layer 2 (Output layer)
95
- self.softmax = nn.LogSoftmax(dim=1) # softmax activation function
96
- def forward(self, sent_id, mask): # define the forward pass
97
- cls_hs = self.bert(sent_id, attention_mask=mask)['pooler_output']
98
- # pass the inputs to the model
99
- x = self.fc1(cls_hs)
100
- x = self.relu(x)
101
- x = self.dropout(x)
102
- x = self.fc2(x) # output layer
103
- x = self.softmax(x) # apply softmax activation
104
- return x
105
-
106
- model = BERT_Arch(bert)
107
- # Defining the hyperparameters (optimizer, weights of the classes and the epochs)
108
- # Define the optimizer
109
- from transformers import AdamW
110
- optimizer = AdamW(model.parameters(),
111
- lr = 1e-5) # learning rate
112
- # Define the loss function
113
- #cross_entropy = nn.NLLLoss()
114
- cross_entropy = torch.nn.NLLLoss()
115
- # Number of training epochs
116
- epochs = 2
117
-
118
- # Load the tokenizer and the model
119
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
120
- joblib.dump([model, bert], 'c2_new_models2_weights.pt')
121
 
 
122
  def predict_fake_news(text):
123
- # Tokenize and encode sequences
124
- inputs = tokenizer.encode_plus(
125
- text,
126
- max_length= 60,
127
- padding='max_length',
128
- truncation=True,
129
- return_tensors="pt"
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
- input_ids = inputs['input_ids']
132
- attention_mask = inputs['attention_mask']
133
-
134
- # Make prediction
135
- model.eval() # Ensure the model is in evaluation mode
136
- with torch.no_grad():
137
- outputs = model(input_ids, attention_mask)
138
-
139
- # Access the logits directly from the outputs Tensor
140
- logits = outputs
141
-
142
- # Get the prediction using argmax
143
- prediction = torch.argmax(logits).item()
144
-
145
- # Map prediction to label
146
- label_map = {0: 'Fake', 1: 'Real'}
147
- return label_map[prediction]
148
-
149
 
150
  # Define a function to update on Twitter
151
  def update_on_Twitter(tweet_text, prediction):
152
- CONSUMER_KEY = "q76xzfaSG7jL4unpvaNuPM5Ms"
153
- CONSUMER_SECRET = "7h2JCH9fveW3srWarhCmwLbr8rTtVeJ04Qo3q65VItX2L4eFs1"
154
- ACCESS_TOKEN = "1636314191198932992-VesD9DTEnagO7fQdCiu5Fh6vuFLbw1"
155
- ACCESS_TOKEN_SECRET = "DcTCYDGba8UWlbMEpDvmTMZuVI2XAip7Tu8QgLTrC12AW"
156
- BAERER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAPJjnwEAAAAA3DnqW09w51Oufv8UCReOPQLPUtA%3Dz9vzO4DXVbXRU63RZB3TzbCrBc0saEnQZ49GMmGkDqKVu30qwC"
 
157
 
158
  # Authenticate to Twitter
159
  auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
@@ -162,9 +55,9 @@ def update_on_Twitter(tweet_text, prediction):
162
  # Create an API object
163
  api = tweepy.API(auth)
164
 
165
- # Create a Client object
166
  client = tweepy.Client(
167
- BAERER_TOKEN,
168
  CONSUMER_KEY,
169
  CONSUMER_SECRET,
170
  ACCESS_TOKEN,
@@ -172,12 +65,12 @@ def update_on_Twitter(tweet_text, prediction):
172
  wait_on_rate_limit=True
173
  )
174
 
175
- postText = f"The news: {tweet_text} is {prediction}."
176
 
177
  try:
178
  api.verify_credentials()
179
  print("Authentication OK")
180
- client.create_tweet(text=postText)
181
  return f'<a href="https://twitter.com/CANNBot" target="_blank">Detect Fake News on Twitter Bot Account</a>'
182
  except Exception as e:
183
  print(e)
@@ -185,16 +78,16 @@ def update_on_Twitter(tweet_text, prediction):
185
 
186
  # Use Gradio Blocks to create a more flexible interface
187
  with gr.Blocks() as demo:
188
- gr.Markdown("# Fake News Detection")
189
  text_input = gr.Textbox(placeholder="Enter a news Tweet here...", label="News Tweet")
190
  text_output = gr.Textbox(label="Prediction")
191
  link_output = gr.HTML(label="Twitter Bot Account")
192
 
193
- # Button to get prediction
194
  gr.Button("Detect").click(predict_fake_news, inputs=text_input, outputs=text_output)
195
 
196
- # Button to generate a Gradio link
197
  gr.Button("Detect on Twitter").click(update_on_Twitter, inputs=[text_input, text_output], outputs=link_output)
198
 
199
  # Launch the interface
200
- demo.launch()
 
1
  import gradio as gr
2
  import tweepy
3
+ from groq import Groq
4
+ import os
 
 
 
 
 
 
5
 
6
+ api_key = os.getenv("Groqapi")
7
+ CONSUMER_KEY = os.getenv("TwitterConsumer")
8
+ CONSUMER_SECRET = os.getenv("ConsumerSecret")
9
+ ACCESS_TOKEN = os.getenv("AccessToken")
10
+ ACCESS_TOKEN_SECRET = os.getenv("AccTokenSecret")
11
+ BEARER_TOKEN = os.getenv("BearerToken")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Define the Groq-based function to predict fake news
14
  def predict_fake_news(text):
15
+ client = Groq(api_key= api_key)
16
+ completion = client.chat.completions.create(
17
+ model="llama-3.1-70b-versatile",
18
+ messages=[
19
+ {
20
+ "role": "system",
21
+ "content": "You are an experienced and up-to-date fact-checker who is very proficient in identifying falsehood and has a repertoire of knowledge of what real news is and what fake news is. You have over 30 years of experience in the field. I want you to analyse any news tweet entered and reply with ONLY one word 'Fake' if it is fake and 'Real' if it is real."
22
+ },
23
+ {
24
+ "role": "user",
25
+ "content": text
26
+ }
27
+ ],
28
+ temperature=1,
29
+ max_tokens=8000,
30
+ top_p=1,
31
+ stream=True,
32
+ stop=None,
33
  )
34
+
35
+ # Iterate over the streaming response to get the result
36
+ prediction = ""
37
+ for chunk in completion:
38
+ prediction += chunk.choices[0].delta.content or ""
39
+
40
+ return prediction.strip() # Return the result (Fake or Real)
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Define a function to update on Twitter
43
  def update_on_Twitter(tweet_text, prediction):
44
+ # Replace with your own Twitter API credentials
45
+ CONSUMER_KEY = CONSUMER_KEY
46
+ CONSUMER_SECRET = CONSUMER_SECRET
47
+ ACCESS_TOKEN = ACCESS_TOKEN
48
+ ACCESS_TOKEN_SECRET = ACCESS_TOKEN_SECRET
49
+ BEARER_TOKEN = BEARER_TOKEN
50
 
51
  # Authenticate to Twitter
52
  auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
 
55
  # Create an API object
56
  api = tweepy.API(auth)
57
 
58
+ # Create a Client object for posting tweets
59
  client = tweepy.Client(
60
+ BEARER_TOKEN,
61
  CONSUMER_KEY,
62
  CONSUMER_SECRET,
63
  ACCESS_TOKEN,
 
65
  wait_on_rate_limit=True
66
  )
67
 
68
+ post_text = f"The news: {tweet_text} is {prediction}."
69
 
70
  try:
71
  api.verify_credentials()
72
  print("Authentication OK")
73
+ client.create_tweet(text=post_text)
74
  return f'<a href="https://twitter.com/CANNBot" target="_blank">Detect Fake News on Twitter Bot Account</a>'
75
  except Exception as e:
76
  print(e)
 
78
 
79
  # Use Gradio Blocks to create a more flexible interface
80
  with gr.Blocks() as demo:
81
+ gr.Markdown("# Fake News Detection using Groq LLM")
82
  text_input = gr.Textbox(placeholder="Enter a news Tweet here...", label="News Tweet")
83
  text_output = gr.Textbox(label="Prediction")
84
  link_output = gr.HTML(label="Twitter Bot Account")
85
 
86
+ # Button to get prediction using Groq LLM
87
  gr.Button("Detect").click(predict_fake_news, inputs=text_input, outputs=text_output)
88
 
89
+ # Button to generate a Gradio link and post to Twitter
90
  gr.Button("Detect on Twitter").click(update_on_Twitter, inputs=[text_input, text_output], outputs=link_output)
91
 
92
  # Launch the interface
93
+ demo.launch()