nit454 commited on
Commit
7a8ffc5
·
verified ·
1 Parent(s): 1a3f29f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
  import string
3
  import easyocr
4
 
@@ -6,30 +6,29 @@ import easyocr
6
  ocr_reader = easyocr.Reader(['en'])
7
 
8
  def extract_text_from_image(image_path):
9
- # Extract text lines from image
10
  result = ocr_reader.readtext(image_path, detail=0)
11
- ocr_text = " ".join(result) # Join into one string
12
  return ocr_text
13
 
14
  def preprocess(text):
15
- # Lowercase, strip punctuation, and whitespace
16
  return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
17
 
18
  def detect_sarcasm(combined_text):
19
- MODEL_NAME = "helinivan/english-sarcasm-detector"
20
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
21
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
22
 
23
- inputs = tokenizer([preprocess(combined_text)], padding=True, truncation=True, max_length=256, return_tensors="pt")
24
- outputs = model(**inputs)
25
- probs = outputs.logits.softmax(dim=-1).tolist()[0]
26
- sarcasm_pred = probs.index(max(probs))
27
- confidence = max(probs)
28
 
29
- return {"sarcasm": bool(sarcasm_pred), "confidence": confidence}
 
 
 
30
 
31
  if __name__ == "__main__":
32
- # Example usage:
33
  image_path = "path_to_image.jpg" # Replace with your image file path
34
  typed_text = "Your favorite sarcastic phrase here"
35
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
  import string
3
  import easyocr
4
 
 
6
  ocr_reader = easyocr.Reader(['en'])
7
 
8
  def extract_text_from_image(image_path):
 
9
  result = ocr_reader.readtext(image_path, detail=0)
10
+ ocr_text = " ".join(result)
11
  return ocr_text
12
 
13
  def preprocess(text):
 
14
  return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
15
 
16
  def detect_sarcasm(combined_text):
17
+ MODEL_NAME = "mrm8488/t5-base-finetuned-sarcasm-twitter"
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
20
 
21
+ input_text = preprocess(combined_text)
22
+ inputs = tokenizer.encode(input_text, return_tensors="pt")
23
+ outputs = model.generate(inputs, max_length=2)
24
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
25
 
26
+ sarcasm = prediction == "true"
27
+ confidence = None # This model doesn’t output confidence scores directly
28
+
29
+ return {"sarcasm": sarcasm, "confidence": confidence}
30
 
31
  if __name__ == "__main__":
 
32
  image_path = "path_to_image.jpg" # Replace with your image file path
33
  typed_text = "Your favorite sarcastic phrase here"
34