Spaces:

Afeezee
/

Twitter_Fake_News_Detection

Sleeping

App Files Files Community

Afeezee commited on Sep 24, 2024

Commit

a402109

verified ·

1 Parent(s): 24cb571

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -155

app.py CHANGED Viewed

@@ -1,159 +1,52 @@
 import gradio as gr
 import tweepy
-import joblib
-import torch
-from transformers import BertTokenizer
-from transformers import AutoModel, BertTokenizerFast
-from sklearn.model_selection import train_test_split
-import numpy as np
-import pandas as pd
-import torch.nn as nn
-data = pd.read_csv('Twitter_Analysis.csv')
-# Train-Validation-Test set split into 70:15:15 ratio
-# Train-Temp split
-train_text, temp_text, train_labels, temp_labels = train_test_split(data['tweet'], data['BinaryNumTarget'],
-                                                                    random_state=2018,                                               test_size=0.3,
-                                                                    stratify=data['majority_target'])
-# Validation-Test split.
-val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels,
-                                                                random_state=2018,
-                                                                test_size=0.5,
-                                                                stratify=temp_labels)
-temp_labels.head()
-# Load BERT model and tokenizer via HuggingFace Transformers
-bert = AutoModel.from_pretrained('bert-base-uncased')
-tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
-# Majority of titles above have word length under 60. So, we set max title length as 60
-MAX_LENGHT = 60
-# Tokenize and encode sequences in the train set
-tokens_train = tokenizer.batch_encode_plus(
-    train_text.tolist(),
-    max_length = MAX_LENGHT,
-    padding=True,
-    truncation=True
-)
-# tokenize and encode sequences in the validation set
-tokens_val = tokenizer.batch_encode_plus(
-    val_text.tolist(),
-    max_length = MAX_LENGHT,
-    padding=True,
-    truncation=True
-)
-# tokenize and encode sequences in the test set
-tokens_test = tokenizer.batch_encode_plus(
-    test_text.tolist(),
-    max_length = MAX_LENGHT,
-    padding=True,
-    truncation=True
-)
-# Convert lists to tensors
-train_seq = torch.tensor(tokens_train['input_ids'])
-train_mask = torch.tensor(tokens_train['attention_mask'])
-train_y = torch.tensor(train_labels.tolist())
-val_seq = torch.tensor(tokens_val['input_ids'])
-val_mask = torch.tensor(tokens_val['attention_mask'])
-val_y = torch.tensor(val_labels.tolist())
-test_seq = torch.tensor(tokens_test['input_ids'])
-test_mask = torch.tensor(tokens_test['attention_mask'])
-test_y = torch.tensor(test_labels.tolist())
-# Data Loader structure definition
-from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
-batch_size = 32                                               #define a batch size
-train_data = TensorDataset(train_seq, train_mask, train_y)    # wrap tensors
-train_sampler = RandomSampler(train_data)                     # sampler for sampling the data during training
-train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
-                                                              # dataLoader for train set
-val_data = TensorDataset(val_seq, val_mask, val_y)            # wrap tensors
-val_sampler = SequentialSampler(val_data)                     # sampler for sampling the data during training
-val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)
-                                                              # dataLoader for validation set
-# Freezing the parameters and defining trainable BERT structure
-for param in bert.parameters():
-    param.requires_grad = False    # false here means gradient need not be computed
-class BERT_Arch(nn.Module):
-    def __init__(self, bert):
-      super(BERT_Arch, self).__init__()
-      self.bert = bert
-      self.dropout = nn.Dropout(0.1)            # dropout layer
-      self.relu =  nn.ReLU()                    # relu activation function
-      self.fc1 = nn.Linear(768,512)             # dense layer 1
-      self.fc2 = nn.Linear(512,2)               # dense layer 2 (Output layer)
-      self.softmax = nn.LogSoftmax(dim=1)       # softmax activation function
-    def forward(self, sent_id, mask):           # define the forward pass
-      cls_hs = self.bert(sent_id, attention_mask=mask)['pooler_output']
-                                                # pass the inputs to the model
-      x = self.fc1(cls_hs)
-      x = self.relu(x)
-      x = self.dropout(x)
-      x = self.fc2(x)                           # output layer
-      x = self.softmax(x)                       # apply softmax activation
-      return x
-model = BERT_Arch(bert)
-# Defining the hyperparameters (optimizer, weights of the classes and the epochs)
-# Define the optimizer
-from transformers import AdamW
-optimizer = AdamW(model.parameters(),
-                  lr = 1e-5)          # learning rate
-# Define the loss function
-#cross_entropy  = nn.NLLLoss()
-cross_entropy = torch.nn.NLLLoss()
-# Number of training epochs
-epochs = 2
-# Load the tokenizer and the model
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-joblib.dump([model, bert], 'c2_new_models2_weights.pt')
 def predict_fake_news(text):
-    # Tokenize and encode sequences
-    inputs = tokenizer.encode_plus(
-        text,
-        max_length= 60,
-        padding='max_length',
-        truncation=True,
-        return_tensors="pt"
     )
-    input_ids = inputs['input_ids']
-    attention_mask = inputs['attention_mask']
-    # Make prediction
-    model.eval()  # Ensure the model is in evaluation mode
-    with torch.no_grad():
-        outputs = model(input_ids, attention_mask)
-    # Access the logits directly from the outputs Tensor
-    logits = outputs
-    # Get the prediction using argmax
-    prediction = torch.argmax(logits).item()
-    # Map prediction to label
-    label_map = {0: 'Fake', 1: 'Real'}
-    return label_map[prediction]
 # Define a function to update on Twitter
 def update_on_Twitter(tweet_text, prediction):
-    CONSUMER_KEY = "q76xzfaSG7jL4unpvaNuPM5Ms"
-    CONSUMER_SECRET = "7h2JCH9fveW3srWarhCmwLbr8rTtVeJ04Qo3q65VItX2L4eFs1"
-    ACCESS_TOKEN = "1636314191198932992-VesD9DTEnagO7fQdCiu5Fh6vuFLbw1"
-    ACCESS_TOKEN_SECRET = "DcTCYDGba8UWlbMEpDvmTMZuVI2XAip7Tu8QgLTrC12AW"
-    BAERER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAPJjnwEAAAAA3DnqW09w51Oufv8UCReOPQLPUtA%3Dz9vzO4DXVbXRU63RZB3TzbCrBc0saEnQZ49GMmGkDqKVu30qwC"
     # Authenticate to Twitter
     auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
@@ -162,9 +55,9 @@ def update_on_Twitter(tweet_text, prediction):
     # Create an API object
     api = tweepy.API(auth)
-    # Create a Client object
     client = tweepy.Client(
-      BAERER_TOKEN,
       CONSUMER_KEY,
       CONSUMER_SECRET,
       ACCESS_TOKEN,
@@ -172,12 +65,12 @@ def update_on_Twitter(tweet_text, prediction):
       wait_on_rate_limit=True
     )
-    postText = f"The news: {tweet_text} is {prediction}."
     try:
         api.verify_credentials()
         print("Authentication OK")
-        client.create_tweet(text=postText)
         return f'<a href="https://twitter.com/CANNBot" target="_blank">Detect Fake News on Twitter Bot Account</a>'
     except Exception as e:
         print(e)
@@ -185,16 +78,16 @@ def update_on_Twitter(tweet_text, prediction):
 # Use Gradio Blocks to create a more flexible interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Fake News Detection")
     text_input = gr.Textbox(placeholder="Enter a news Tweet here...", label="News Tweet")
     text_output = gr.Textbox(label="Prediction")
     link_output = gr.HTML(label="Twitter Bot Account")
-    # Button to get prediction
     gr.Button("Detect").click(predict_fake_news, inputs=text_input, outputs=text_output)
-    # Button to generate a Gradio link
     gr.Button("Detect on Twitter").click(update_on_Twitter, inputs=[text_input, text_output], outputs=link_output)
 # Launch the interface
-demo.launch()

 import gradio as gr
 import tweepy
+from groq import Groq
+import os
+api_key = os.getenv("Groqapi")
+CONSUMER_KEY = os.getenv("TwitterConsumer")
+CONSUMER_SECRET = os.getenv("ConsumerSecret")
+ACCESS_TOKEN = os.getenv("AccessToken")
+ACCESS_TOKEN_SECRET = os.getenv("AccTokenSecret")
+BEARER_TOKEN = os.getenv("BearerToken")
+# Define the Groq-based function to predict fake news
 def predict_fake_news(text):
+    client = Groq(api_key= api_key)
+    completion = client.chat.completions.create(
+        model="llama-3.1-70b-versatile",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are an experienced and up-to-date fact-checker who is very proficient in identifying falsehood and has a repertoire of knowledge of what real news is and what fake news is. You have over 30 years of experience in the field. I want you to analyse any news tweet entered and reply with ONLY one word 'Fake' if it is fake and 'Real' if it is real."
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ],
+        temperature=1,
+        max_tokens=8000,
+        top_p=1,
+        stream=True,
+        stop=None,
     )
+    # Iterate over the streaming response to get the result
+    prediction = ""
+    for chunk in completion:
+        prediction += chunk.choices[0].delta.content or ""
+    return prediction.strip()  # Return the result (Fake or Real)
 # Define a function to update on Twitter
 def update_on_Twitter(tweet_text, prediction):
+    # Replace with your own Twitter API credentials
+    CONSUMER_KEY = CONSUMER_KEY
+    CONSUMER_SECRET = CONSUMER_SECRET
+    ACCESS_TOKEN = ACCESS_TOKEN
+    ACCESS_TOKEN_SECRET = ACCESS_TOKEN_SECRET
+    BEARER_TOKEN = BEARER_TOKEN
     # Authenticate to Twitter
     auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
     # Create an API object
     api = tweepy.API(auth)
+    # Create a Client object for posting tweets
     client = tweepy.Client(
+      BEARER_TOKEN,
       CONSUMER_KEY,
       CONSUMER_SECRET,
       ACCESS_TOKEN,
       wait_on_rate_limit=True
     )
+    post_text = f"The news: {tweet_text} is {prediction}."
     try:
         api.verify_credentials()
         print("Authentication OK")
+        client.create_tweet(text=post_text)
         return f'<a href="https://twitter.com/CANNBot" target="_blank">Detect Fake News on Twitter Bot Account</a>'
     except Exception as e:
         print(e)
 # Use Gradio Blocks to create a more flexible interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Fake News Detection using Groq LLM")
     text_input = gr.Textbox(placeholder="Enter a news Tweet here...", label="News Tweet")
     text_output = gr.Textbox(label="Prediction")
     link_output = gr.HTML(label="Twitter Bot Account")
+    # Button to get prediction using Groq LLM
     gr.Button("Detect").click(predict_fake_news, inputs=text_input, outputs=text_output)
+    # Button to generate a Gradio link and post to Twitter
     gr.Button("Detect on Twitter").click(update_on_Twitter, inputs=[text_input, text_output], outputs=link_output)
 # Launch the interface
+demo.launch()