Spaces:

nit454
/

sarcasm_module

Sleeping

App Files Files Community

nit454 commited on Nov 2, 2025

Commit

b4cc170

verified ·

1 Parent(s): 64a9245

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -36

app.py CHANGED Viewed

@@ -1,37 +1,78 @@
 import pandas as pd
-import numpy as np
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import BernoulliNB
-import streamlit as st
-# Load dataset
-df = pd.read_json('sarcasm.json', lines=True)
-df = df[["headline", "is_sarcastic"]]
-# Prepare data
-x = np.array(df["headline"])
-y = np.array(df["is_sarcastic"])  # 0 = Not Sarcastic, 1 = Sarcastic
-cv = CountVectorizer()
-X = cv.fit_transform(x)
-x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
-# Train model
-model = BernoulliNB()
-model.fit(x_train, y_train)
-# Streamlit app
-def main():
-    st.title('Simple Sarcasm Detector')
-    user_input = st.text_input("Enter text to check if sarcastic:")
-    if st.button("Check"):
-        data = cv.transform([user_input]).toarray()
-        prediction = model.predict(data)[0]
-        label = "Sarcastic" if prediction == 1 else "Not Sarcastic"
-        st.write(f"The entered text is: **{label}**")
-if __name__ == '__main__':
-    main()

+import gradio as gr
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pandas as pd
+import re
+import string
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from keras.models import load_model
+import nltk
+import cloudpickle
+import easyocr
+# Download required NLTK data (only needed once)
+nltk.download('stopwords')
+nltk.download('punkt')
+# Load the pre-trained model and tokenizer
+model = load_model('Sarcasmmodel.h5')
+with open('tokenizer.pkl', 'rb') as file:
+    tokenizer_obj = cloudpickle.load(file)
+# Initialize EasyOCR Reader once
+ocr_reader = easyocr.Reader(['en'])
+# Text cleaning function
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
+    text = re.sub(r'\@\w+|\#', '', text)
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    text = re.sub(r'\d+', '', text)
+    return text
+# Tokenize and remove stopwords
+def CleanTokenize(df):
+    head_lines = []
+    lines = df["headline"].values.tolist()
+    for line in lines:
+        line = clean_text(line)
+        tokens = word_tokenize(line)
+        words = [word for word in tokens if word.isalpha()]
+        stop_words = set(stopwords.words("english"))
+        words = [w for w in words if not w in stop_words]
+        head_lines.append(words)
+    return head_lines
+# Predict sarcasm with confidence
+def predict_sarcasm(text, max_length=25):
+    x_final = pd.DataFrame({"headline": [text]})
+    test_lines = CleanTokenize(x_final)
+    test_sequences = tokenizer_obj.texts_to_sequences(test_lines)
+    test_review_pad = pad_sequences(test_sequences, maxlen=max_length, padding='post')
+    pred = model.predict(test_review_pad)
+    confidence = pred[0][0] * 100  # percentage
+    result = "It's a sarcasm!" if confidence >= 50 else "It's not a sarcasm."
+    return f"**Result:** {result}\n**Confidence:** {confidence:.2f}%"
+# OCR + Sarcasm prediction pipeline
+def ocr_sarcasm_detection(image):
+    # Extract text from image with OCR
+    extracted_text = " ".join(ocr_reader.readtext(image, detail=0))
+    if not extracted_text.strip():
+        return "No text detected in the image."
+    return predict_sarcasm(extracted_text)
+# Gradio interface takes only image input; no text input or recommendations
+iface = gr.Interface(
+    fn=ocr_sarcasm_detection,
+    inputs=gr.Image(type="filepath", label="Upload Image with Text"),
+    outputs=gr.Textbox(label="Sarcasm Detection Result"),
+    title="OCR-based Sarcasm Detection 🤖",
+    description="Upload an image containing text (e.g., meme or screenshot). The app extracts text via OCR and predicts sarcasm.",
+    theme="default"
+)
+iface.launch()