File size: 2,203 Bytes
524248c
 
 
 
 
 
 
 
 
04f5cbf
524248c
 
 
 
 
 
 
 
e9f7f55
524248c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04f5cbf
 
 
 
524248c
 
04f5cbf
 
 
 
524248c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import numpy as np
import nltk
import re
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
import gradio as gr
from transformers import pipeline

# Download NLTK resources if not already downloaded
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Load the dataset
df = pd.read_csv('disaster_tweets.csv')  # Update path to your CSV file

# Data preprocessing
def clean_tweet(txt):
    txt = txt.lower()
    words = nltk.word_tokenize(txt)
    stop = set(stopwords.words('english'))
    words = [word for word in words if word not in stop]
    words = ' '.join(words)
    txt = re.sub('[^a-z]', ' ', words)
    return txt

df['clean_text'] = df['text'].apply(clean_tweet)

# Split data into features and target
X = df['clean_text']
y = df['target']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, ngram_range=(1, 3))
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

# Model training
passive_aggressive = PassiveAggressiveClassifier()
passive_aggressive.fit(tfidf_train, y_train)

# Load the Hugging Face model
classifier = pipeline("text-classification", model="distilbert-base-uncased")

# Function for making predictions using the Hugging Face model
def predict_disaster_tweets(text):
    cleaned_text = clean_tweet(text)
    prediction = classifier(cleaned_text)[0]
    label = prediction['label']
    score = prediction['score']
    return f"Label: {label}, Score: {score}"

# Gradio Interface setup
iface = gr.Interface(
    fn=predict_disaster_tweets,
    inputs=gr.Textbox(label="Enter Tweet Text", placeholder="Type here..."),
    outputs=gr.Textbox(label="Prediction"),
    title="Disaster Tweet Classifier",
    description="Predict whether a tweet indicates a disaster or not."
)

iface.launch(share=True)