vikranth1111 commited on
Commit
524248c
·
1 Parent(s): 2352521

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +65 -0
  2. disaster_tweets.csv +0 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import nltk
4
+ import re
5
+ import string
6
+ from nltk.corpus import stopwords
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.linear_model import PassiveAggressiveClassifier
10
+ import gradio as gr
11
+
12
+ # Download NLTK resources if not already downloaded
13
+ nltk.download('stopwords')
14
+ nltk.download('punkt')
15
+ nltk.download('wordnet')
16
+ nltk.download('omw-1.4')
17
+
18
+ # Load the dataset
19
+ df = pd.read_csv('/path/to/your/disaster_tweets.csv') # Update path to your CSV file
20
+
21
+ # Data preprocessing
22
+ def clean_tweet(txt):
23
+ txt = txt.lower()
24
+ words = nltk.word_tokenize(txt)
25
+ stop = set(stopwords.words('english'))
26
+ words = [word for word in words if word not in stop]
27
+ words = ' '.join(words)
28
+ txt = re.sub('[^a-z]', ' ', words)
29
+ return txt
30
+
31
+ df['clean_text'] = df['text'].apply(clean_tweet)
32
+
33
+ # Split data into features and target
34
+ X = df['clean_text']
35
+ y = df['target']
36
+
37
+ # Splitting the dataset into training and testing sets
38
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
39
+
40
+ # TF-IDF Vectorization
41
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, ngram_range=(1, 3))
42
+ tfidf_train = tfidf_vectorizer.fit_transform(X_train)
43
+ tfidf_test = tfidf_vectorizer.transform(X_test)
44
+
45
+ # Model training
46
+ passive_aggressive = PassiveAggressiveClassifier()
47
+ passive_aggressive.fit(tfidf_train, y_train)
48
+
49
+ # Function for making predictions
50
+ def predict_disaster_tweets(text):
51
+ cleaned_text = clean_tweet(text)
52
+ tfidf_text = tfidf_vectorizer.transform([cleaned_text])
53
+ prediction = passive_aggressive.predict(tfidf_text)[0]
54
+ return "Disaster Tweet" if prediction == 1 else "Normal Tweet"
55
+
56
+ # Gradio Interface setup
57
+ iface = gr.Interface(
58
+ fn=predict_disaster_tweets,
59
+ inputs=gr.Textbox(label="Enter Tweet Text", placeholder="Type here..."),
60
+ outputs=gr.Textbox(label="Prediction"),
61
+ title="Disaster Tweet Classifier",
62
+ description="Predict whether a tweet indicates a disaster or not."
63
+ )
64
+
65
+ iface.launch(share=True)
disaster_tweets.csv ADDED
The diff for this file is too large to render. See raw diff