Spaces:

nit454
/

sarcasm_module

Sleeping

nit454 commited on Nov 2, 2025

Commit

7a8ffc5

verified ·

1 Parent(s): 1a3f29f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import string
 import easyocr
@@ -6,30 +6,29 @@ import easyocr
 ocr_reader = easyocr.Reader(['en'])
 def extract_text_from_image(image_path):
-    # Extract text lines from image
     result = ocr_reader.readtext(image_path, detail=0)
-    ocr_text = " ".join(result)  # Join into one string
     return ocr_text
 def preprocess(text):
-    # Lowercase, strip punctuation, and whitespace
     return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
 def detect_sarcasm(combined_text):
-    MODEL_NAME = "helinivan/english-sarcasm-detector"
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-    inputs = tokenizer([preprocess(combined_text)], padding=True, truncation=True, max_length=256, return_tensors="pt")
-    outputs = model(**inputs)
-    probs = outputs.logits.softmax(dim=-1).tolist()[0]
-    sarcasm_pred = probs.index(max(probs))
-    confidence = max(probs)
-    return {"sarcasm": bool(sarcasm_pred), "confidence": confidence}
 if __name__ == "__main__":
-    # Example usage:
     image_path = "path_to_image.jpg"  # Replace with your image file path
     typed_text = "Your favorite sarcastic phrase here"

+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import string
 import easyocr
 ocr_reader = easyocr.Reader(['en'])
 def extract_text_from_image(image_path):
     result = ocr_reader.readtext(image_path, detail=0)
+    ocr_text = " ".join(result)
     return ocr_text
 def preprocess(text):
     return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
 def detect_sarcasm(combined_text):
+    MODEL_NAME = "mrm8488/t5-base-finetuned-sarcasm-twitter"
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+    input_text = preprocess(combined_text)
+    inputs = tokenizer.encode(input_text, return_tensors="pt")
+    outputs = model.generate(inputs, max_length=2)
+    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    sarcasm = prediction == "true"
+    confidence = None  # This model doesn’t output confidence scores directly
+    return {"sarcasm": sarcasm, "confidence": confidence}
 if __name__ == "__main__":
     image_path = "path_to_image.jpg"  # Replace with your image file path
     typed_text = "Your favorite sarcastic phrase here"