nit454 commited on
Commit
64a9245
·
verified ·
1 Parent(s): 2d10be5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -42
app.py CHANGED
@@ -1,42 +1,37 @@
1
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
- import string
3
- import easyocr
4
-
5
- # Initialize OCR Reader once
6
- ocr_reader = easyocr.Reader(['en'])
7
-
8
- def extract_text_from_image(image_path):
9
- result = ocr_reader.readtext(image_path, detail=0)
10
- ocr_text = " ".join(result)
11
- return ocr_text
12
-
13
- def preprocess(text):
14
- return text.lower().translate(str.maketrans('', '', string.punctuation)).strip()
15
-
16
- def detect_sarcasm(combined_text):
17
- MODEL_NAME = "mrm8488/t5-base-finetuned-sarcasm-twitter"
18
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
20
-
21
- input_text = preprocess(combined_text)
22
- inputs = tokenizer.encode(input_text, return_tensors="pt")
23
- outputs = model.generate(inputs, max_length=2)
24
- prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
-
26
- sarcasm = prediction == "true"
27
- confidence = None # This model doesn’t output confidence scores directly
28
-
29
- return {"sarcasm": sarcasm, "confidence": confidence}
30
-
31
- if __name__ == "__main__":
32
- image_path = "path_to_image.jpg" # Replace with your image file path
33
- typed_text = "Your favorite sarcastic phrase here"
34
-
35
- ocr_text = extract_text_from_image(image_path)
36
- combined_text = ocr_text + " " + typed_text
37
-
38
- result = detect_sarcasm(combined_text)
39
- print("OCR Extracted Text:", ocr_text)
40
- print("Typed Text:", typed_text)
41
- print("Combined Text:", combined_text)
42
- print("Sarcasm Detection Result:", result)
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.feature_extraction.text import CountVectorizer
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.naive_bayes import BernoulliNB
6
+ import streamlit as st
7
+
8
+ # Load dataset
9
+ df = pd.read_json('sarcasm.json', lines=True)
10
+ df = df[["headline", "is_sarcastic"]]
11
+
12
+ # Prepare data
13
+ x = np.array(df["headline"])
14
+ y = np.array(df["is_sarcastic"]) # 0 = Not Sarcastic, 1 = Sarcastic
15
+
16
+ cv = CountVectorizer()
17
+ X = cv.fit_transform(x)
18
+ x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
19
+
20
+ # Train model
21
+ model = BernoulliNB()
22
+ model.fit(x_train, y_train)
23
+
24
+ # Streamlit app
25
+ def main():
26
+ st.title('Simple Sarcasm Detector')
27
+
28
+ user_input = st.text_input("Enter text to check if sarcastic:")
29
+
30
+ if st.button("Check"):
31
+ data = cv.transform([user_input]).toarray()
32
+ prediction = model.predict(data)[0]
33
+ label = "Sarcastic" if prediction == 1 else "Not Sarcastic"
34
+ st.write(f"The entered text is: **{label}**")
35
+
36
+ if __name__ == '__main__':
37
+ main()