Spaces:

jayasuriyaK
/

nsfw_classifier

Sleeping

App Files Files Community

jayasuriyaK commited on Apr 28, 2024

Commit

664a6cd

verified ·

1 Parent(s): e0c6f6e

Upload 5 files

Browse files

Files changed (5) hide show

CustomModel/config.json +27 -0
CustomModel/model.safetensors +3 -0
CustomModel/training_args.bin +3 -0
app.py +109 -0
requirements.txt +4 -0

CustomModel/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

CustomModel/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0ae3b4736071ebf406209d00d51c502108761fafa3c8df37f6a009f0decb157
+size 437958648

CustomModel/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62f276a3fac2555bc29c7da8ad3095096c7ee3452711ca0c0cab720c0e053210
+size 4920

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+#run the app
+#python -m streamlit run d:/NSFW/Project/test1.py
+import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+import math, keras_ocr
+# Initialize pipeline
+pipeline = keras_ocr.pipeline.Pipeline()
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model_2 = BertForSequenceClassification.from_pretrained("CustomModel")
+model_2.to('cpu')
+import streamlit as st
+def get_distance(predictions):
+    """
+    Function returns dictionary with (key,value):
+        * text : detected text in image
+        * center_x : center of bounding box (x)
+        * center_y : center of bounding box (y)
+        * distance_from_origin : hypotenuse
+        * distance_y : distance between y and origin (0,0)
+    """
+    # Point of origin
+    x0, y0 = 0, 0
+    # Generate dictionary
+    detections = []
+    for group in predictions:
+        # Get center point of bounding box
+        top_left_x, top_left_y = group[1][0]
+        bottom_right_x, bottom_right_y = group[1][1]
+        center_x, center_y = (top_left_x + bottom_right_x)/2, (top_left_y + bottom_right_y)/2
+        # Use the Pythagorean Theorem to solve for distance from origin
+        distance_from_origin = math.dist([x0,y0], [center_x, center_y])
+        # Calculate difference between y and origin to get unique rows
+        distance_y = center_y - y0
+        # Append all results
+        detections.append({
+                            'text': group[0],
+                            'center_x': center_x,
+                            'center_y': center_y,
+                            'distance_from_origin': distance_from_origin,
+                            'distance_y': distance_y
+                        })
+    return detections
+def distinguish_rows(lst, thresh=15):
+    """Function to help distinguish unique rows"""
+    sublists = []
+    for i in range(0, len(lst)-1):
+        if (lst[i+1]['distance_y'] - lst[i]['distance_y'] <= thresh):
+            if lst[i] not in sublists:
+                sublists.append(lst[i])
+            sublists.append(lst[i+1])
+        else:
+            yield sublists
+            sublists = [lst[i+1]]
+    yield sublists
+# Title of the app
+st.title("Image Input App")
+# File uploader widget
+uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
+if uploaded_file is not None:
+# Read in image
+    read_image = keras_ocr.tools.read(uploaded_file)
+# prediction_groups is a list of (word, box) tuples
+    prediction_groups = pipeline.recognize([read_image])
+    predictions = prediction_groups[0] # extract text list
+    predictions = get_distance(predictions)
+    # Set thresh higher for text further apart
+    predictions = list(distinguish_rows(predictions, thresh=10))
+    # Remove all empty rows
+    predictions = list(filter(lambda x:x!=[], predictions))
+    # Order text detections in human readable format
+    ordered_preds = []
+    for row in predictions:
+        row = sorted(row, key=lambda x:x['distance_from_origin'])
+        for each in row: ordered_preds.append(each['text'])
+    # Join detections into sentence
+    sentance = ' '.join(ordered_preds)
+    #st.write(sentance)
+    text =sentance
+    print(text)
+    inputs = tokenizer(text,padding = True, truncation = True, return_tensors='pt').to('cpu')
+    outputs = model_2(**inputs)
+    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    predictions = predictions.cpu().detach().numpy()
+    print(predictions[0][0],predictions[0][1])
+    if predictions[0][0]>predictions[0][1]:
+        print('safe')
+        st.write('safe')
+    else:
+        print('Not safe')
+        st.write('n safe')

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+keras_ocr
+streamlit