gyanbardhan123 commited on
Commit
9bcddde
·
verified ·
1 Parent(s): 73cd6f5

Upload 6 files

Browse files
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #import gradio as gr
2
+ #import tensorflow as tf
3
+ #from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer
4
+ #
5
+ ## Load model and tokenizer
6
+ #model_save_path = "saved_model" # replace with the actual path
7
+ #model = TFDistilBertForSequenceClassification.from_pretrained(model_save_path)
8
+ #tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)
9
+ #
10
+ #def predict(question1, question2):
11
+ # inputs = tokenizer(
12
+ # [question1], [question2],
13
+ # return_tensors='tf',
14
+ # truncation=True,
15
+ # padding=True,
16
+ # max_length=50
17
+ # )
18
+ # outputs = model(inputs)
19
+ # logits = outputs.logits
20
+ # probabilities = tf.nn.softmax(logits, axis=-1)
21
+ # prediction = tf.argmax(probabilities, axis=1).numpy()[0]
22
+ # prob = probabilities.numpy()[0]
23
+ # return f"{'Duplicate' if prediction == 1 else 'Not Duplicate'} (Probability: {prob})"
24
+ #
25
+ ## Gradio interface
26
+ #interface = gr.Interface(
27
+ # fn=predict,
28
+ # inputs=["text", "text"],
29
+ # outputs="text",
30
+ # title="Duplicate Question Detection",
31
+ # description="Enter two questions to check if they are duplicates."
32
+ #)
33
+ #
34
+ #interface.launch()
35
+
36
+ import streamlit as st
37
+ import tensorflow as tf
38
+ from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer
39
+
40
+ # Load model and tokenizer
41
+ model_save_path = "./saved_model" # replace with the actual path
42
+ model = TFDistilBertForSequenceClassification.from_pretrained(model_save_path)
43
+ tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)
44
+
45
+ # Streamlit app
46
+ st.title("Duplicate Question Detection")
47
+
48
+ question1 = st.text_input("Enter the first question:")
49
+ question2 = st.text_input("Enter the second question:")
50
+
51
+ if st.button("Predict"):
52
+ if question1 and question2:
53
+ inputs = tokenizer(
54
+ [question1], [question2],
55
+ return_tensors='tf',
56
+ truncation=True,
57
+ padding=True,
58
+ max_length=50
59
+ )
60
+ outputs = model(inputs)
61
+ logits = outputs.logits
62
+ probabilities = tf.nn.softmax(logits, axis=-1)
63
+ prediction = tf.argmax(probabilities, axis=1).numpy()[0] # 0 or 1
64
+ prob = probabilities.numpy()[0]
65
+
66
+ st.write(f"Prediction: {'Duplicate' if prediction == 1 else 'Not Duplicate'}")
67
+ st.write(f"Probability: {prob}")
68
+ else:
69
+ st.write("Please enter both questions.")
70
+
71
+
saved_model/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d022168ffedd91a66e166e9ec2989cf2cec76ab7b816d75ded65591794096a1
3
+ size 538
saved_model/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d346be366a7d1d48332dbc9fdf3bf8960b5d879522b7799ddba59e76237ee3
3
+ size 125
saved_model/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa94cfd242b9dd4584c49268f3da4135b642eba46ba284bae3d6a7fb57104b59
3
+ size 267951808
saved_model/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df5a6bb4f12d30d81f8043c57b4714a8ef69ed19921531187dfb3c4fb724ac0
3
+ size 1248
saved_model/vocab.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
3
+ size 231508