nit454 commited on
Commit
94cca97
·
verified ·
1 Parent(s): 838ad63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -25
app.py CHANGED
@@ -9,21 +9,18 @@ from nltk.tokenize import word_tokenize
9
  from keras.models import load_model
10
  import nltk
11
  import cloudpickle
12
- import easyocr
13
 
14
- # Download required NLTK data (only needed once)
15
  nltk.download('stopwords')
16
  nltk.download('punkt')
17
 
18
- # Load the pre-trained model and tokenizer
19
  model = load_model('Sarcasmmodel.h5')
 
20
  with open('tokenizer.pkl', 'rb') as file:
21
  tokenizer_obj = cloudpickle.load(file)
22
 
23
- # Initialize EasyOCR Reader once
24
- ocr_reader = easyocr.Reader(['en'])
25
-
26
- # Text cleaning function
27
  def clean_text(text):
28
  text = text.lower()
29
  text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
@@ -32,7 +29,7 @@ def clean_text(text):
32
  text = re.sub(r'\d+', '', text)
33
  return text
34
 
35
- # Tokenize and remove stopwords
36
  def CleanTokenize(df):
37
  head_lines = []
38
  lines = df["headline"].values.tolist()
@@ -45,34 +42,35 @@ def CleanTokenize(df):
45
  head_lines.append(words)
46
  return head_lines
47
 
48
- # Predict sarcasm with confidence
49
  def predict_sarcasm(text, max_length=25):
50
  x_final = pd.DataFrame({"headline": [text]})
51
  test_lines = CleanTokenize(x_final)
52
  test_sequences = tokenizer_obj.texts_to_sequences(test_lines)
53
  test_review_pad = pad_sequences(test_sequences, maxlen=max_length, padding='post')
54
  pred = model.predict(test_review_pad)
55
- confidence = pred[0][0] * 100 # percentage
56
-
57
  result = "It's a sarcasm!" if confidence >= 50 else "It's not a sarcasm."
58
  return f"**Result:** {result}\n**Confidence:** {confidence:.2f}%"
59
 
60
- # OCR + Sarcasm prediction pipeline
61
- def ocr_sarcasm_detection(image):
62
- # Extract text from image with OCR
63
- extracted_text = " ".join(ocr_reader.readtext(image, detail=0))
64
- if not extracted_text.strip():
65
- return "No text detected in the image."
66
- return predict_sarcasm(extracted_text)
67
 
68
- # Gradio interface takes only image input; no text input or recommendations
69
  iface = gr.Interface(
70
- fn=ocr_sarcasm_detection,
71
- inputs=gr.Image(type="filepath", label="Upload Image with Text"),
72
- outputs=gr.Textbox(label="Sarcasm Detection Result"),
73
- title="OCR-based Sarcasm Detection 🤖",
74
- description="Upload an image containing text (e.g., meme or screenshot). The app extracts text via OCR and predicts sarcasm.",
 
 
 
 
 
75
  theme="default"
76
  )
77
 
78
- iface.launch()
 
 
9
  from keras.models import load_model
10
  import nltk
11
  import cloudpickle
 
12
 
13
+ # Download required NLTK data
14
  nltk.download('stopwords')
15
  nltk.download('punkt')
16
 
17
+ # Load the pre-trained model
18
  model = load_model('Sarcasmmodel.h5')
19
+
20
  with open('tokenizer.pkl', 'rb') as file:
21
  tokenizer_obj = cloudpickle.load(file)
22
 
23
+ # Function to clean the text
 
 
 
24
  def clean_text(text):
25
  text = text.lower()
26
  text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
 
29
  text = re.sub(r'\d+', '', text)
30
  return text
31
 
32
+ # Function to tokenize and clean the text data
33
  def CleanTokenize(df):
34
  head_lines = []
35
  lines = df["headline"].values.tolist()
 
42
  head_lines.append(words)
43
  return head_lines
44
 
45
+ # Function to predict sarcasm
46
  def predict_sarcasm(text, max_length=25):
47
  x_final = pd.DataFrame({"headline": [text]})
48
  test_lines = CleanTokenize(x_final)
49
  test_sequences = tokenizer_obj.texts_to_sequences(test_lines)
50
  test_review_pad = pad_sequences(test_sequences, maxlen=max_length, padding='post')
51
  pred = model.predict(test_review_pad)
52
+ confidence = pred[0][0] * 100 # Convert to percentage
 
53
  result = "It's a sarcasm!" if confidence >= 50 else "It's not a sarcasm."
54
  return f"**Result:** {result}\n**Confidence:** {confidence:.2f}%"
55
 
56
+ # Gradio interface
57
+ def gradio_interface(text):
58
+ return predict_sarcasm(text)
 
 
 
 
59
 
60
+ # Create the Gradio app
61
  iface = gr.Interface(
62
+ fn=gradio_interface,
63
+ inputs=gr.Textbox(lines=2, placeholder="Type something sarcastic...", label="Input Text"),
64
+ outputs=gr.Textbox(label="Prediction"),
65
+ title="🤖 Sarcasm Detection",
66
+ description="This app detects whether a given text is sarcastic or not.",
67
+ examples=[
68
+ ["Oh great, another Monday morning!"],
69
+ ["I just love spending hours in traffic."],
70
+ ["This is the best day of my life!"]
71
+ ],
72
  theme="default"
73
  )
74
 
75
+ # Launch the app
76
+ iface.launch()