Usman06 commited on
Commit
66f83bf
·
verified ·
1 Parent(s): c96fc5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -505
app.py CHANGED
@@ -1,243 +1,3 @@
1
- '''import gradio as gr
2
- from transformers import TFBertForSequenceClassification, BertTokenizer
3
- import tensorflow as tf
4
-
5
- # Load model and tokenizer from your HF model repo
6
- model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
7
- tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
8
-
9
- def classify_sentiment(text):
10
- inputs = tokenizer(text, return_tensors="tf", padding=True, truncation=True)
11
- predictions = model(inputs).logits
12
- label = tf.argmax(predictions, axis=1).numpy()[0]
13
- labels = {0: "Negative", 1: "Neutral", 2: "Positive"}
14
- return labels[label]
15
-
16
- demo = gr.Interface(fn=classify_sentiment,
17
- inputs=gr.Textbox(placeholder="Enter a tweet..."),
18
- outputs="text",
19
- title="Tweet Sentiment Classifier",
20
- description="Multilingual BERT-based Sentiment Analysis")
21
-
22
- demo.launch()
23
- '''
24
- '''
25
- import gradio as gr
26
- from transformers import TFBertForSequenceClassification, BertTokenizer
27
- import tensorflow as tf
28
-
29
- # Load model and tokenizer from Hugging Face
30
- model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
31
- tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
32
-
33
- # Manually define the correct mapping
34
- LABELS = {
35
- 0: "Neutral",
36
- 1: "Positive",
37
- 2: "Negative"
38
- }
39
-
40
- def classify_sentiment(text):
41
- inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
42
- outputs = model(inputs)
43
- probs = tf.nn.softmax(outputs.logits, axis=1)
44
- pred_label = tf.argmax(probs, axis=1).numpy()[0]
45
- confidence = float(tf.reduce_max(probs).numpy())
46
- return f"Prediction: {LABELS[pred_label]} (Confidence: {confidence:.2f})"
47
-
48
- demo = gr.Interface(
49
- fn=classify_sentiment,
50
- inputs=gr.Textbox(placeholder="Type your tweet here..."),
51
- outputs="text",
52
- title="Sentiment Analysis on Tweets",
53
- description="Multilingual BERT model fine-tuned for sentiment classification. Labels: Positive, Neutral, Negative."
54
- )
55
-
56
- demo.launch()
57
- '''
58
- '''
59
- import gradio as gr
60
- from transformers import TFBertForSequenceClassification, BertTokenizer
61
- import tensorflow as tf
62
- import snscrape.modules.twitter as sntwitter
63
- import praw
64
- import os
65
-
66
- # Load model and tokenizer
67
- model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
68
- tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
69
-
70
- # Label Mapping
71
- LABELS = {
72
- 0: "Neutral",
73
- 1: "Positive",
74
- 2: "Negative"
75
- }
76
-
77
- # Reddit API setup with environment variables
78
- reddit = praw.Reddit(
79
- client_id=os.getenv("ul9U7jc8BIHlTAh45jkpkw"),
80
- client_secret=os.getenv("TuwIBEKmlb1AptNMRYpuzuNTEabMYg"),
81
- user_agent=os.getenv("myscript by u/usman_afzal", "sentiment-classifier-script")
82
- )
83
-
84
- # Tweet text extractor
85
- def fetch_tweet_text(tweet_url):
86
- try:
87
- tweet_id = tweet_url.split("/")[-1]
88
- for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
89
- return tweet.content
90
- return "Unable to extract tweet content."
91
- except Exception as e:
92
- return f"Error fetching tweet: {str(e)}"
93
-
94
- # Reddit post extractor
95
- def fetch_reddit_text(reddit_url):
96
- try:
97
- submission = reddit.submission(url=reddit_url)
98
- return f"{submission.title}\n\n{submission.selftext}"
99
- except Exception as e:
100
- return f"Error fetching Reddit post: {str(e)}"
101
-
102
- # Sentiment classification logic
103
- def classify_sentiment(text_input, tweet_url, reddit_url):
104
- if reddit_url.strip():
105
- text = fetch_reddit_text(reddit_url)
106
- elif tweet_url.strip():
107
- text = fetch_tweet_text(tweet_url)
108
- elif text_input.strip():
109
- text = text_input
110
- else:
111
- return "[!] Please enter text or a post URL."
112
-
113
- if text.lower().startswith("error") or "Unable to extract" in text:
114
- return f"[!] Error: {text}"
115
-
116
- try:
117
- inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
118
- outputs = model(inputs)
119
- probs = tf.nn.softmax(outputs.logits, axis=1)
120
- pred_label = tf.argmax(probs, axis=1).numpy()[0]
121
- confidence = float(tf.reduce_max(probs).numpy())
122
- return f"Prediction: {LABELS[pred_label]} (Confidence: {confidence:.2f})"
123
- except Exception as e:
124
- return f"[!] Prediction error: {str(e)}"
125
-
126
- # Gradio Interface
127
- demo = gr.Interface(
128
- fn=classify_sentiment,
129
- inputs=[
130
- gr.Textbox(label="Custom Text Input", placeholder="Type your tweet or message here..."),
131
- gr.Textbox(label="Tweet URL", placeholder="Paste a tweet URL here (optional)"),
132
- gr.Textbox(label="Reddit Post URL", placeholder="Paste a Reddit post URL here (optional)")
133
- ],
134
- outputs="text",
135
- title="Multilingual Sentiment Analysis",
136
- description="Analyze sentiment of text, tweets, or Reddit posts. Supports multiple languages using BERT!"
137
- )
138
-
139
- demo.launch()
140
- '''
141
-
142
-
143
- '''
144
- import gradio as gr
145
- from transformers import TFBertForSequenceClassification, BertTokenizer
146
- import tensorflow as tf
147
- import praw
148
- import os
149
-
150
-
151
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
152
- import torch
153
- from scipy.special import softmax
154
-
155
-
156
- model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
157
- tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
158
-
159
- LABELS = {
160
- 0: "Neutral",
161
- 1: "Positive",
162
- 2: "Negative"
163
- }
164
-
165
-
166
- fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
167
- fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
168
- fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
169
-
170
- # Reddit API
171
- reddit = praw.Reddit(
172
- client_id=os.getenv("ul9U7jc8BIHlTAh45jkpkw"),
173
- client_secret=os.getenv("TuwIBEKmlb1AptNMRYpuzuNTEabMYg"),
174
- user_agent=os.getenv("myscript by u/usman_afzal", "sentiment-classifier-ui")
175
- )
176
-
177
- def fetch_reddit_text(reddit_url):
178
- try:
179
- submission = reddit.submission(url=reddit_url)
180
- return f"{submission.title}\n\n{submission.selftext}"
181
- except Exception as e:
182
- return f"Error fetching Reddit post: {str(e)}"
183
-
184
-
185
- def fallback_classifier(text):
186
- encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
187
- with torch.no_grad():
188
- output = fallback_model(**encoded_input)
189
- scores = softmax(output.logits.numpy()[0])
190
- labels = ['Negative', 'Neutral', 'Positive']
191
- return f"Prediction: {labels[scores.argmax()]}"
192
-
193
- def classify_sentiment(text_input, reddit_url):
194
- if reddit_url.strip():
195
- text = fetch_reddit_text(reddit_url)
196
- elif text_input.strip():
197
- text = text_input
198
- else:
199
- return "[!] Please enter some text or a Reddit post URL."
200
-
201
- if text.lower().startswith("error") or "Unable to extract" in text:
202
- return f"[!] {text}"
203
-
204
- try:
205
- inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
206
- outputs = model(inputs)
207
- probs = tf.nn.softmax(outputs.logits, axis=1)
208
- confidence = float(tf.reduce_max(probs).numpy())
209
- pred_label = tf.argmax(probs, axis=1).numpy()[0]
210
-
211
- if confidence < 0.5:
212
- return fallback_classifier(text)
213
-
214
- return f"Prediction: {LABELS[pred_label]}"
215
- except Exception as e:
216
- return f"[!] Prediction error: {str(e)}"
217
-
218
- # Gradio interface
219
- demo = gr.Interface(
220
- fn=classify_sentiment,
221
- inputs=[
222
- gr.Textbox(
223
- label="Text Input (can be tweet or any content)",
224
- placeholder="Paste tweet or type any content here...",
225
- lines=4
226
- ),
227
- gr.Textbox(
228
- label="Reddit Post URL",
229
- placeholder="Paste a Reddit post URL (optional)",
230
- lines=1
231
- ),
232
- ],
233
- outputs="text",
234
- title="Sentiment Analyzer",
235
- description="🔍 Paste any text (including tweet content) OR a Reddit post URL to analyze sentiment.\n\n💡 Tweet URLs are not supported directly due to platform restrictions. Please paste tweet content manually."
236
- )
237
-
238
- demo.launch()
239
- '''
240
- '''
241
  import gradio as gr
242
  from transformers import TFBertForSequenceClassification, BertTokenizer
243
  import tensorflow as tf
@@ -245,39 +5,32 @@ import praw
245
  import os
246
  import pytesseract
247
  from PIL import Image
248
- import cv2
249
- import numpy as np
250
- import re
251
-
252
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
253
  import torch
 
254
  from scipy.special import softmax
255
 
256
- # Install tesseract OCR (only runs once in Hugging Face Spaces)
257
  os.system("apt-get update && apt-get install -y tesseract-ocr")
258
 
259
- # Load main model
260
  model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
261
  tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
262
 
263
- LABELS = {
264
- 0: "Neutral",
265
- 1: "Positive",
266
- 2: "Negative"
267
- }
268
 
269
- # Load fallback model
270
  fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
271
  fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
272
  fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
273
 
274
- # Reddit API setup
275
  reddit = praw.Reddit(
276
- client_id=os.getenv("ul9U7jc8BIHlTAh45jkpkw"),
277
- client_secret=os.getenv("TuwIBEKmlb1AptNMRYpuzuNTEabMYg"),
278
- user_agent=os.getenv("myscript by u/usman_afzal", "sentiment-classifier-ui")
279
  )
280
 
 
281
  def fetch_reddit_text(reddit_url):
282
  try:
283
  submission = reddit.submission(url=reddit_url)
@@ -285,160 +38,39 @@ def fetch_reddit_text(reddit_url):
285
  except Exception as e:
286
  return f"Error fetching Reddit post: {str(e)}"
287
 
288
- def fallback_classifier(text):
289
- encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
290
- with torch.no_grad():
291
- output = fallback_model(**encoded_input)
292
- scores = softmax(output.logits.numpy()[0])
293
- labels = ['Negative', 'Neutral', 'Positive']
294
- return f"Prediction: {labels[scores.argmax()]}"
295
-
296
- def clean_ocr_text(text):
297
- text = text.strip()
298
- text = re.sub(r'\s+', ' ', text) # Replace multiple spaces and newlines
299
- text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII characters
300
- return text
301
-
302
- def classify_sentiment(text_input, reddit_url, image):
303
- if reddit_url.strip():
304
- text = fetch_reddit_text(reddit_url)
305
- elif image is not None:
306
- try:
307
- img_array = np.array(image)
308
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
309
- _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
310
- text = pytesseract.image_to_string(thresh)
311
- text = clean_ocr_text(text)
312
- except Exception as e:
313
- return f"[!] OCR failed: {str(e)}"
314
- elif text_input.strip():
315
- text = text_input
316
- else:
317
- return "[!] Please enter some text, upload an image, or provide a Reddit URL."
318
-
319
- if text.lower().startswith("error") or "Unable to extract" in text:
320
- return f"[!] {text}"
321
-
322
  try:
323
- inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
324
- outputs = model(inputs)
325
- probs = tf.nn.softmax(outputs.logits, axis=1)
326
- confidence = float(tf.reduce_max(probs).numpy())
327
- pred_label = tf.argmax(probs, axis=1).numpy()[0]
328
-
329
- if confidence < 0.5:
330
- return fallback_classifier(text)
331
-
332
- return f"Prediction: {LABELS[pred_label]}"
333
  except Exception as e:
334
- return f"[!] Prediction error: {str(e)}"
335
-
336
- # Gradio interface
337
- demo = gr.Interface(
338
- fn=classify_sentiment,
339
- inputs=[
340
- gr.Textbox(
341
- label="Text Input (can be tweet or any content)",
342
- placeholder="Paste tweet or type any content here...",
343
- lines=4
344
- ),
345
- gr.Textbox(
346
- label="Reddit Post URL",
347
- placeholder="Paste a Reddit post URL (optional)",
348
- lines=1
349
- ),
350
- gr.Image(
351
- label="Upload Image (optional)",
352
- type="pil"
353
- )
354
- ],
355
- outputs="text",
356
- title="Sentiment Analyzer",
357
- description="🔍 Paste any text, Reddit post URL, or upload an image containing text to analyze sentiment.\n\n💡 Tweet URLs are not supported. Please paste tweet content or screenshot instead."
358
- )
359
-
360
- demo.launch()
361
- '''
362
-
363
- import gradio as gr
364
- from transformers import TFBertForSequenceClassification, BertTokenizer
365
- import tensorflow as tf
366
- import praw
367
- import os
368
- import pytesseract
369
- from PIL import Image
370
- import cv2
371
- import numpy as np
372
- import re
373
-
374
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
375
- import torch
376
- from scipy.special import softmax
377
- import matplotlib.pyplot as plt
378
- import pandas as pd
379
-
380
- # Install tesseract OCR (only runs once in Hugging Face Spaces)
381
- os.system("apt-get update && apt-get install -y tesseract-ocr")
382
-
383
- # Load main model
384
- model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
385
- tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
386
-
387
- LABELS = {0: "Neutral", 1: "Positive", 2: "Negative"}
388
-
389
- # Load fallback model
390
- fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
391
- fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
392
- fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
393
-
394
- # Reddit API setup
395
- reddit = praw.Reddit(
396
- client_id=os.getenv("REDDIT_CLIENT_ID"),
397
- client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
398
- user_agent=os.getenv("REDDIT_USER_AGENT", "sentiment-classifier-ui-finalyear2025-shrish191")
399
- )
400
-
401
- def fetch_reddit_text(reddit_url):
402
- try:
403
- submission = reddit.submission(url=reddit_url)
404
- return f"{submission.title}\n\n{submission.selftext}"
405
- except Exception as e:
406
- return f"Error fetching Reddit post: {str(e)}"
407
 
 
408
  def fallback_classifier(text):
409
  encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
410
  with torch.no_grad():
411
  output = fallback_model(**encoded_input)
412
  scores = softmax(output.logits.numpy()[0])
413
  labels = ['Negative', 'Neutral', 'Positive']
414
- return f"Prediction: {labels[scores.argmax()]}"
415
-
416
- def clean_ocr_text(text):
417
- text = text.strip()
418
- text = re.sub(r'\s+', ' ', text)
419
- text = re.sub(r'[^\x00-\x7F]+', '', text)
420
- return text
421
 
 
422
  def classify_sentiment(text_input, reddit_url, image):
 
423
  if reddit_url.strip():
424
  text = fetch_reddit_text(reddit_url)
425
  elif image is not None:
426
- try:
427
- img_array = np.array(image)
428
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
429
- _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
430
- text = pytesseract.image_to_string(thresh)
431
- text = clean_ocr_text(text)
432
- except Exception as e:
433
- return f"[!] OCR failed: {str(e)}"
434
  elif text_input.strip():
435
  text = text_input
436
  else:
437
- return "[!] Please enter some text, upload an image, or provide a Reddit URL."
438
 
439
  if text.lower().startswith("error") or "Unable to extract" in text:
440
- return f"[!] {text}"
441
 
 
442
  try:
443
  inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
444
  outputs = model(inputs)
@@ -449,127 +81,21 @@ def classify_sentiment(text_input, reddit_url, image):
449
  if confidence < 0.5:
450
  return fallback_classifier(text)
451
 
452
- return f"Prediction: {LABELS[pred_label]}"
453
  except Exception as e:
454
  return f"[!] Prediction error: {str(e)}"
455
 
456
- # Subreddit sentiment analysis function
457
- def analyze_subreddit(subreddit_name):
458
- try:
459
- subreddit = reddit.subreddit(subreddit_name)
460
- posts = list(subreddit.hot(limit=20))
461
-
462
- sentiments = []
463
- titles = []
464
-
465
- for post in posts:
466
- text = f"{post.title}\n{post.selftext}"
467
- try:
468
- inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
469
- outputs = model(inputs)
470
- probs = tf.nn.softmax(outputs.logits, axis=1)
471
- confidence = float(tf.reduce_max(probs).numpy())
472
- pred_label = tf.argmax(probs, axis=1).numpy()[0]
473
-
474
- sentiment = LABELS[pred_label] if confidence >= 0.5 else fallback_classifier(text).split(": ")[-1]
475
- except:
476
- sentiment = "Error"
477
- sentiments.append(sentiment)
478
- titles.append(post.title)
479
-
480
- df = pd.DataFrame({"Title": titles, "Sentiment": sentiments})
481
- sentiment_counts = df["Sentiment"].value_counts()
482
-
483
- # Plot bar chart
484
- fig, ax = plt.subplots()
485
- sentiment_counts.plot(kind="bar", color=["red", "green", "gray"], ax=ax)
486
- ax.set_title(f"Sentiment Distribution in r/{subreddit_name}")
487
- ax.set_xlabel("Sentiment")
488
- ax.set_ylabel("Number of Posts")
489
-
490
- return fig, df
491
- except Exception as e:
492
- return f"[!] Error: {str(e)}", pd.DataFrame()
493
-
494
- # Gradio tab 1: Text/Image/Reddit Post Analysis
495
- main_interface = gr.Interface(
496
  fn=classify_sentiment,
497
  inputs=[
498
- gr.Textbox(
499
- label="Text Input (can be tweet or any content)",
500
- placeholder="Paste tweet or type any content here...",
501
- lines=4
502
- ),
503
- gr.Textbox(
504
- label="Reddit Post URL",
505
- placeholder="Paste a Reddit post URL (optional)",
506
- lines=1
507
- ),
508
- gr.Image(
509
- label="Upload Image (optional)",
510
- type="pil"
511
- )
512
  ],
513
  outputs="text",
514
- title="Sentiment Analyzer",
515
- description="🔍 Paste any text, Reddit post URL, or upload an image containing text to analyze sentiment.\n\n💡 Tweet URLs are not supported. Please paste tweet content or screenshot instead."
516
- )
517
-
518
- # Gradio tab 2: Subreddit Analysis
519
- subreddit_interface = gr.Interface(
520
- fn=analyze_subreddit,
521
- inputs=gr.Textbox(label="Subreddit Name", placeholder="e.g., AskReddit"),
522
- outputs=[
523
- gr.Plot(label="Sentiment Distribution"),
524
- gr.Dataframe(label="Post Titles and Sentiments", wrap=True)
525
- ],
526
- title="Subreddit Sentiment Analysis",
527
- description="📊 Enter a subreddit to analyze sentiment of its top 20 hot posts."
528
- )
529
-
530
- # Tabs
531
- demo = gr.TabbedInterface(
532
- interface_list=[main_interface, subreddit_interface],
533
- tab_names=["General Sentiment Analysis", "Subreddit Analysis"]
534
  )
535
 
536
  demo.launch()
537
-
538
-
539
-
540
-
541
-
542
-
543
-
544
-
545
-
546
-
547
-
548
-
549
-
550
-
551
-
552
-
553
-
554
-
555
-
556
-
557
-
558
-
559
-
560
-
561
-
562
-
563
-
564
-
565
-
566
-
567
-
568
-
569
-
570
-
571
-
572
-
573
-
574
-
575
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import TFBertForSequenceClassification, BertTokenizer
3
  import tensorflow as tf
 
5
  import os
6
  import pytesseract
7
  from PIL import Image
 
 
 
 
 
8
  import torch
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  from scipy.special import softmax
11
 
12
+ # Setup for Tesseract (if running in cloud environment like Hugging Face Spaces)
13
  os.system("apt-get update && apt-get install -y tesseract-ocr")
14
 
15
+ # Load main BERT model
16
  model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
17
  tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
18
 
19
+ LABELS = {0: "Neutral", 1: "Positive", 2: "Negative"}
 
 
 
 
20
 
21
+ # Load fallback RoBERTa model
22
  fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
23
  fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
24
  fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
25
 
26
+ # Reddit API config
27
  reddit = praw.Reddit(
28
+ client_id=os.getenv("REDDIT_CLIENT_ID", "ul9U7jc8BIHlTAh45jkpkw"),
29
+ client_secret=os.getenv("REDDIT_CLIENT_SECRET", "TuwIBEKmlb1AptNMRYpuzuNTEabMYg"),
30
+ user_agent=os.getenv("REDDIT_USER_AGENT", "myscript by u/usman_afzal")
31
  )
32
 
33
+ # Extract text from Reddit URL
34
  def fetch_reddit_text(reddit_url):
35
  try:
36
  submission = reddit.submission(url=reddit_url)
 
38
  except Exception as e:
39
  return f"Error fetching Reddit post: {str(e)}"
40
 
41
+ # OCR from image
42
+ def extract_text_from_image(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
+ text = pytesseract.image_to_string(image)
45
+ return text.strip()
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
+ return f"Error reading image: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Fallback model logic
50
  def fallback_classifier(text):
51
  encoded_input = fallback_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
52
  with torch.no_grad():
53
  output = fallback_model(**encoded_input)
54
  scores = softmax(output.logits.numpy()[0])
55
  labels = ['Negative', 'Neutral', 'Positive']
56
+ return f"Fallback Prediction: {labels[scores.argmax()]}"
 
 
 
 
 
 
57
 
58
+ # Final classifier logic
59
  def classify_sentiment(text_input, reddit_url, image):
60
+ # Source detection
61
  if reddit_url.strip():
62
  text = fetch_reddit_text(reddit_url)
63
  elif image is not None:
64
+ text = extract_text_from_image(image)
 
 
 
 
 
 
 
65
  elif text_input.strip():
66
  text = text_input
67
  else:
68
+ return "[!] Please provide text input, Reddit URL, or image."
69
 
70
  if text.lower().startswith("error") or "Unable to extract" in text:
71
+ return f"[!] Error: {text}"
72
 
73
+ # Classification using main model
74
  try:
75
  inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
76
  outputs = model(inputs)
 
81
  if confidence < 0.5:
82
  return fallback_classifier(text)
83
 
84
+ return f"Prediction: {LABELS[pred_label]} (Confidence: {confidence:.2f})"
85
  except Exception as e:
86
  return f"[!] Prediction error: {str(e)}"
87
 
88
+ # Gradio UI
89
+ demo = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  fn=classify_sentiment,
91
  inputs=[
92
+ gr.Textbox(label="Text Input", placeholder="Paste any content (tweet, comment, etc)...", lines=3),
93
+ gr.Textbox(label="Reddit Post URL", placeholder="Paste Reddit post URL (optional)"),
94
+ gr.Image(label="Upload Image (Optional - text image)", type="pil")
 
 
 
 
 
 
 
 
 
 
 
95
  ],
96
  outputs="text",
97
+ title="🌍 Multilingual Sentiment Analyzer",
98
+ description="📊 Paste text, Reddit URL, or upload an image (screenshot of tweet etc.) to analyze sentiment.\nSupports fallback model if confidence is low."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
 
101
  demo.launch()