nit454 commited on
Commit
b4cc170
·
verified ·
1 Parent(s): 64a9245

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -36
app.py CHANGED
@@ -1,37 +1,78 @@
 
 
 
1
  import pandas as pd
2
- import numpy as np
3
- from sklearn.feature_extraction.text import CountVectorizer
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.naive_bayes import BernoulliNB
6
- import streamlit as st
7
-
8
- # Load dataset
9
- df = pd.read_json('sarcasm.json', lines=True)
10
- df = df[["headline", "is_sarcastic"]]
11
-
12
- # Prepare data
13
- x = np.array(df["headline"])
14
- y = np.array(df["is_sarcastic"]) # 0 = Not Sarcastic, 1 = Sarcastic
15
-
16
- cv = CountVectorizer()
17
- X = cv.fit_transform(x)
18
- x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
19
-
20
- # Train model
21
- model = BernoulliNB()
22
- model.fit(x_train, y_train)
23
-
24
- # Streamlit app
25
- def main():
26
- st.title('Simple Sarcasm Detector')
27
-
28
- user_input = st.text_input("Enter text to check if sarcastic:")
29
-
30
- if st.button("Check"):
31
- data = cv.transform([user_input]).toarray()
32
- prediction = model.predict(data)[0]
33
- label = "Sarcastic" if prediction == 1 else "Not Sarcastic"
34
- st.write(f"The entered text is: **{label}**")
35
-
36
- if __name__ == '__main__':
37
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from tensorflow.keras.preprocessing.text import Tokenizer
3
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
4
  import pandas as pd
5
+ import re
6
+ import string
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ from keras.models import load_model
10
+ import nltk
11
+ import cloudpickle
12
+ import easyocr
13
+
14
+ # Download required NLTK data (only needed once)
15
+ nltk.download('stopwords')
16
+ nltk.download('punkt')
17
+
18
+ # Load the pre-trained model and tokenizer
19
+ model = load_model('Sarcasmmodel.h5')
20
+ with open('tokenizer.pkl', 'rb') as file:
21
+ tokenizer_obj = cloudpickle.load(file)
22
+
23
+ # Initialize EasyOCR Reader once
24
+ ocr_reader = easyocr.Reader(['en'])
25
+
26
+ # Text cleaning function
27
+ def clean_text(text):
28
+ text = text.lower()
29
+ text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
30
+ text = re.sub(r'\@\w+|\#', '', text)
31
+ text = text.translate(str.maketrans('', '', string.punctuation))
32
+ text = re.sub(r'\d+', '', text)
33
+ return text
34
+
35
+ # Tokenize and remove stopwords
36
+ def CleanTokenize(df):
37
+ head_lines = []
38
+ lines = df["headline"].values.tolist()
39
+ for line in lines:
40
+ line = clean_text(line)
41
+ tokens = word_tokenize(line)
42
+ words = [word for word in tokens if word.isalpha()]
43
+ stop_words = set(stopwords.words("english"))
44
+ words = [w for w in words if not w in stop_words]
45
+ head_lines.append(words)
46
+ return head_lines
47
+
48
+ # Predict sarcasm with confidence
49
+ def predict_sarcasm(text, max_length=25):
50
+ x_final = pd.DataFrame({"headline": [text]})
51
+ test_lines = CleanTokenize(x_final)
52
+ test_sequences = tokenizer_obj.texts_to_sequences(test_lines)
53
+ test_review_pad = pad_sequences(test_sequences, maxlen=max_length, padding='post')
54
+ pred = model.predict(test_review_pad)
55
+ confidence = pred[0][0] * 100 # percentage
56
+
57
+ result = "It's a sarcasm!" if confidence >= 50 else "It's not a sarcasm."
58
+ return f"**Result:** {result}\n**Confidence:** {confidence:.2f}%"
59
+
60
+ # OCR + Sarcasm prediction pipeline
61
+ def ocr_sarcasm_detection(image):
62
+ # Extract text from image with OCR
63
+ extracted_text = " ".join(ocr_reader.readtext(image, detail=0))
64
+ if not extracted_text.strip():
65
+ return "No text detected in the image."
66
+ return predict_sarcasm(extracted_text)
67
+
68
+ # Gradio interface takes only image input; no text input or recommendations
69
+ iface = gr.Interface(
70
+ fn=ocr_sarcasm_detection,
71
+ inputs=gr.Image(type="filepath", label="Upload Image with Text"),
72
+ outputs=gr.Textbox(label="Sarcasm Detection Result"),
73
+ title="OCR-based Sarcasm Detection 🤖",
74
+ description="Upload an image containing text (e.g., meme or screenshot). The app extracts text via OCR and predicts sarcasm.",
75
+ theme="default"
76
+ )
77
+
78
+ iface.launch()