Sami Ali commited on
Commit
7741f30
·
1 Parent(s): b9001fe

Initial commit: Twitter sentiment analysis with gradio

Browse files
.requirements.txt ADDED
Binary file (2.07 kB). View file
 
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "git.ignoreLimitWarning": true
3
+ }
src/.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
src/.gradio/flagged/dataset1.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ tweet,output,timestamp
2
+ I am very happy,Positive,2025-09-05 19:31:51.696127
3
+ I am very happy,Positive,2025-09-05 19:31:53.644682
src/app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from model import predict_sentiment
4
+
5
+ GITHUB_LINK = "https://github.com/<your-username>/<your-repo>"
6
+ COFFEE_LINK = "https://www.buymeacoffee.com/samiali" # <-- replace with your BuyMeACoffee link
7
+
8
+
9
+ with gr.Blocks(theme=gr.themes.Citrus()) as app:
10
+ title="Twitter Sentiment Analysis",
11
+ gr.Markdown(
12
+ """
13
+ # 🌟 Twitter Sentiment Analysis
14
+ Enter a tweet below and find out if it's **Positive** or **Negative**.
15
+ _Model: Naive Bayes trained on NLTK Twitter samples_
16
+ """
17
+ ),
18
+ with gr.Row():
19
+ with gr.Column():
20
+ text = gr.Textbox(
21
+ placeholder="Type your tweet here...",
22
+ lines=3,
23
+ label="Your Tweet"
24
+ ),
25
+ btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
26
+ with gr.Column():
27
+ output = gr.Label(label="Prediction")
28
+
29
+ gr.Markdown(
30
+ f"""
31
+ 🔗 **Source Code on [GitHub]({GITHUB_LINK})**
32
+
33
+ ☕ If you like this project, consider [buying me a coffee]({COFFEE_LINK})
34
+
35
+ <a href="{COFFEE_LINK}" target="_blank">
36
+ <img src="https://cdn.buymeacoffee.com/buttons/default-orange.png"
37
+ alt="Buy Me A Coffee" height="41" width="174">
38
+ </a>
39
+ """
40
+ )
41
+
42
+ gr.Markdown("💾 All predictions are stored for analysis.")
43
+ btn.click(predict_sentiment, inputs=text, outputs=output)
44
+
45
+ if __name__ == '__main__':
46
+ app.launch(share=True)
src/model.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import nltk
3
+ import re
4
+ import string
5
+
6
+ from nltk.corpus import twitter_samples
7
+ from nltk.stem import PorterStemmer
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import TweetTokenizer
10
+
11
+ nltk.download('twitter_samples')
12
+ nltk.download('stopwords')
13
+
14
+ positive_tweets = twitter_samples.strings('positive_tweets.json')
15
+ negative_tweets = twitter_samples.strings('negative_tweets.json')
16
+
17
+ test_pos = positive_tweets[4000:]
18
+ train_pos = positive_tweets[:4000]
19
+ test_neg = negative_tweets[4000:]
20
+ train_neg = negative_tweets[:4000]
21
+
22
+ train_x = train_pos + train_neg
23
+ test_x = test_pos + test_neg
24
+
25
+ print(f"Number of positive tweets: {len(positive_tweets)}")
26
+ print(f"Number of negative tweets: {len(negative_tweets)}")
27
+
28
+ train_y = np.append(np.ones(len(train_pos)), np.zeros(len(train_neg)))
29
+ test_y = np.append(np.ones(len(test_pos)), np.zeros(len(test_neg)))
30
+
31
+ print("train_y.shape = " + str(train_y.shape))
32
+ print("test_y.shape = " + str(test_y.shape))
33
+
34
+
35
+ def process_tweet(tweet):
36
+ stemmer = PorterStemmer()
37
+ stopwords_english = stopwords.words('english')
38
+ tweet = re.sub(r'\$\w*', '', tweet)
39
+ tweet = re.sub(r'^RT[\s]+', '', tweet)
40
+ tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)
41
+ tweet = re.sub(r'#', '', tweet)
42
+ tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True,
43
+ reduce_len=True)
44
+ tweet_tokens = tokenizer.tokenize(tweet)
45
+
46
+ tweets_clean = []
47
+ for word in tweet_tokens:
48
+ if (word not in stopwords_english and
49
+ word not in string.punctuation):
50
+ stem_word = stemmer.stem(word)
51
+ tweets_clean.append(stem_word)
52
+
53
+ return tweets_clean
54
+
55
+
56
+ print("Before tweet processing: ", positive_tweets[0])
57
+ print("After tweet processing: ", process_tweet(positive_tweets[0]))
58
+
59
+ def build_freqs(tweets, ys):
60
+ freq_dict = {}
61
+ for tweet, y in zip(tweets, ys):
62
+ tweet = process_tweet(tweet)
63
+ for word in tweet:
64
+ if (word, y) in freq_dict:
65
+ freq_dict[(word, y)] += 1
66
+ else:
67
+ freq_dict[(word, y)] = 1
68
+ return freq_dict
69
+
70
+ # create frequency dictionary
71
+ freqs = build_freqs(train_x, train_y)
72
+
73
+ # check the output
74
+ print("type(freqs) = " + str(type(freqs)))
75
+ print("len(freqs) = " + str(len(freqs.keys())))
76
+
77
+ def train_naive_bayes(freq, train_x, train_y):
78
+ vocab = set([pair[0] for pair in freq.keys()])
79
+ V = len(vocab)
80
+ loglikelihood = {}
81
+ logprior = 0
82
+
83
+ N_pos, N_neg = 0, 0
84
+ V_pos, V_neg = 0, 0
85
+
86
+ for pair in freq.keys():
87
+ if pair[1] > 0.0:
88
+ N_pos += freq[pair]
89
+ V_pos += 1
90
+ else:
91
+ N_neg += freq[pair]
92
+ V_pos += 1
93
+
94
+ D = len(train_y)
95
+
96
+ D_pos = len(list(filter(lambda x: x > 0, train_y)))
97
+ D_neg = len(list(filter(lambda x: x <= 0, train_y)))
98
+
99
+ logprior = np.log(D_pos) - np.log(D_neg)
100
+
101
+ for word in vocab:
102
+ freq_pos = freq.get((word, 1.0), 0)
103
+ freq_neg = freq.get((word, 0.0), 0)
104
+
105
+ temp_pos_prob = (freq_pos + 1) / (N_pos + V)
106
+ temp_neg_prob = (freq_neg + 1) / (N_neg + V)
107
+
108
+ loglikelihood[word] = np.log(temp_pos_prob / temp_neg_prob)
109
+
110
+ return logprior, loglikelihood
111
+
112
+
113
+ logprior, loglikelihood = train_naive_bayes(freqs, train_x, train_y)
114
+
115
+
116
+ def predict(tweet, logprior, loglikelihood):
117
+ word_l = process_tweet(tweet)
118
+ p = 0
119
+ p += logprior
120
+ for word in word_l:
121
+ if word in loglikelihood:
122
+ p += loglikelihood[word]
123
+ return p
124
+
125
+ my_tweet = 'She smiled.'
126
+ p = predict(my_tweet, logprior, loglikelihood)
127
+ print('The expected output is', p)
128
+
129
+ def evaluate(test_x, test_y, logprior, loglikelihood):
130
+ accuracy = 0
131
+ y_hats = []
132
+ for tweet in test_x:
133
+ y_hat = predict(tweet, logprior, loglikelihood)
134
+ if y_hat > 0:
135
+ y_hat_i = 1
136
+ else:
137
+ y_hat_i = 0
138
+ y_hats.append(y_hat_i)
139
+ accuracy = np.absolute(np.mean(np.equal(test_y, y_hats)))
140
+ return accuracy
141
+
142
+ print("Naive Bayes accuracy = %0.4f" %
143
+ (evaluate(test_x, test_y, logprior, loglikelihood)))
144
+
145
+ def predict_sentiment(tweet):
146
+ p = predict(tweet, logprior, loglikelihood)
147
+
148
+ if p > 1:
149
+ return "Positive"
150
+ elif p >= 0 and p <= 1:
151
+ return "Neutral"
152
+ else:
153
+ return "Negative"