Spaces:

gyanbardhan123
/

Bert_DuplicateQuestionDetection

Sleeping

App Files Files Community

gyanbardhan123 commited on Nov 14, 2024

Commit

9bcddde

verified ·

1 Parent(s): 73cd6f5

Upload 6 files

Browse files

Files changed (6) hide show

app.py +71 -0
saved_model/config.json +3 -0
saved_model/special_tokens_map.json +3 -0
saved_model/tf_model.h5 +3 -0
saved_model/tokenizer_config.json +3 -0
saved_model/vocab.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+#import gradio as gr
+#import tensorflow as tf
+#from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer
+#
+## Load model and tokenizer
+#model_save_path = "saved_model"  # replace with the actual path
+#model = TFDistilBertForSequenceClassification.from_pretrained(model_save_path)
+#tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)
+#
+#def predict(question1, question2):
+#    inputs = tokenizer(
+#        [question1], [question2],
+#        return_tensors='tf',
+#        truncation=True,
+#        padding=True,
+#        max_length=50
+#    )
+#    outputs = model(inputs)
+#    logits = outputs.logits
+#    probabilities = tf.nn.softmax(logits, axis=-1)
+#    prediction = tf.argmax(probabilities, axis=1).numpy()[0]
+#    prob = probabilities.numpy()[0]
+#    return f"{'Duplicate' if prediction == 1 else 'Not Duplicate'} (Probability: {prob})"
+#
+## Gradio interface
+#interface = gr.Interface(
+#    fn=predict,
+#    inputs=["text", "text"],
+#    outputs="text",
+#    title="Duplicate Question Detection",
+#    description="Enter two questions to check if they are duplicates."
+#)
+#
+#interface.launch()
+import streamlit as st
+import tensorflow as tf
+from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer
+# Load model and tokenizer
+model_save_path = "./saved_model"  # replace with the actual path
+model = TFDistilBertForSequenceClassification.from_pretrained(model_save_path)
+tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)
+# Streamlit app
+st.title("Duplicate Question Detection")
+question1 = st.text_input("Enter the first question:")
+question2 = st.text_input("Enter the second question:")
+if st.button("Predict"):
+    if question1 and question2:
+        inputs = tokenizer(
+            [question1], [question2],
+            return_tensors='tf',
+            truncation=True,
+            padding=True,
+            max_length=50
+        )
+        outputs = model(inputs)
+        logits = outputs.logits
+        probabilities = tf.nn.softmax(logits, axis=-1)
+        prediction = tf.argmax(probabilities, axis=1).numpy()[0]  # 0 or 1
+        prob = probabilities.numpy()[0]
+        st.write(f"Prediction: {'Duplicate' if prediction == 1 else 'Not Duplicate'}")
+        st.write(f"Probability: {prob}")
+    else:
+        st.write("Please enter both questions.")

saved_model/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d022168ffedd91a66e166e9ec2989cf2cec76ab7b816d75ded65591794096a1
+size 538

saved_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
+size 125

saved_model/tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa94cfd242b9dd4584c49268f3da4135b642eba46ba284bae3d6a7fb57104b59
+size 267951808

saved_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9df5a6bb4f12d30d81f8043c57b4714a8ef69ed19921531187dfb3c4fb724ac0
+size 1248

saved_model/vocab.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
+size 231508