Spaces:

nit454
/

sarcasm_module

Sleeping

App Files Files Community

nit454 commited on Nov 2, 2025

Commit

64a9245

verified ·

1 Parent(s): 2d10be5

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -42

app.py CHANGED Viewed

@@ -1,42 +1,37 @@
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-import string
-import easyocr
-# Initialize OCR Reader once
-ocr_reader = easyocr.Reader(['en'])
-def extract_text_from_image(image_path):
-    result = ocr_reader.readtext(image_path, detail=0)
-    ocr_text = " ".join(result)
-    return ocr_text
-def preprocess(text):
-    return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
-def detect_sarcasm(combined_text):
-    MODEL_NAME = "mrm8488/t5-base-finetuned-sarcasm-twitter"
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-    input_text = preprocess(combined_text)
-    inputs = tokenizer.encode(input_text, return_tensors="pt")
-    outputs = model.generate(inputs, max_length=2)
-    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    sarcasm = prediction == "true"
-    confidence = None  # This model doesn’t output confidence scores directly
-    return {"sarcasm": sarcasm, "confidence": confidence}
-if __name__ == "__main__":
-    image_path = "path_to_image.jpg"  # Replace with your image file path
-    typed_text = "Your favorite sarcastic phrase here"
-    ocr_text = extract_text_from_image(image_path)
-    combined_text = ocr_text + " " + typed_text
-    result = detect_sarcasm(combined_text)
-    print("OCR Extracted Text:", ocr_text)
-    print("Typed Text:", typed_text)
-    print("Combined Text:", combined_text)
-    print("Sarcasm Detection Result:", result)

+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import BernoulliNB
+import streamlit as st
+# Load dataset
+df = pd.read_json('sarcasm.json', lines=True)
+df = df[["headline", "is_sarcastic"]]
+# Prepare data
+x = np.array(df["headline"])
+y = np.array(df["is_sarcastic"])  # 0 = Not Sarcastic, 1 = Sarcastic
+cv = CountVectorizer()
+X = cv.fit_transform(x)
+x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
+# Train model
+model = BernoulliNB()
+model.fit(x_train, y_train)
+# Streamlit app
+def main():
+    st.title('Simple Sarcasm Detector')
+    user_input = st.text_input("Enter text to check if sarcastic:")
+    if st.button("Check"):
+        data = cv.transform([user_input]).toarray()
+        prediction = model.predict(data)[0]
+        label = "Sarcastic" if prediction == 1 else "Not Sarcastic"
+        st.write(f"The entered text is: **{label}**")
+if __name__ == '__main__':
+    main()