Spaces:

aarnow
/

modelDemo

Sleeping

App Files Files Community

aarnow commited on Jan 15

Commit

434ab59

1 Parent(s): bb436e2

Fix build issues: add streamlit to requirements, fix deprecated tokenizer API, fix matplotlib backend for Streamlit

Browse files

Files changed (3) hide show

.gitignore +14 -0
app.py +17 -8
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+venv/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+streamlit.log
+*.png
+foo.png
+.env
+.venv
+env/
+ENV/

app.py CHANGED Viewed

@@ -3,8 +3,11 @@ from presidio_analyzer import AnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine
 from transformers import AutoTokenizer, AutoModel
 from torch.nn import functional as F
 import matplotlib.pyplot as plt
 import torch
 model = AutoModel.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
 tokenizer = AutoTokenizer.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
@@ -45,7 +48,7 @@ def main():
         # dimension to get sequence-level representations
         inputs = tokenizer.batch_encode_plus([sentence] + labels,
                                              return_tensors='pt',
-                                             pad_to_max_length=True)
         input_ids = inputs['input_ids']
         attention_mask = inputs['attention_mask']
         output = model(input_ids, attention_mask=attention_mask)[0]
@@ -60,12 +63,13 @@ def main():
         #map the labels
         tensor_datalbl = label_reps.detach()
         x_values = tensor_datalbl[:, 0].numpy()
         y_values = tensor_datalbl[:, 1].numpy()
         # Create a scatter plot for labels
-        plt.scatter(x_values, y_values)
         # Add labels to specific points (adjust indices as needed)
         for i in range(len(tensor_datalbl)):
@@ -76,17 +80,22 @@ def main():
         tensor_datasen = sentence_rep.detach()
         # Extract the individual dimensions for the scatter plot
-        x_values = tensor_datasen[:, 0].numpy()
-        y_values = tensor_datasen[:, 1].numpy()
-        plt.scatter(x_values, y_values)
         plt.title('2D Representation of Similarity Estimates (2D)')
         plt.xlabel('X-axis')
         plt.ylabel('Y-axis')
-        #plt.show()
-        plt.savefig('foo.png', bbox_inches='tight')
-        st.image("foo.png")
         st.subheader("Classification Details")
         for ind in closest:
             #print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')

 from presidio_anonymizer import AnonymizerEngine
 from transformers import AutoTokenizer, AutoModel
 from torch.nn import functional as F
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend for Streamlit
 import matplotlib.pyplot as plt
 import torch
+import io
 model = AutoModel.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
 tokenizer = AutoTokenizer.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
         # dimension to get sequence-level representations
         inputs = tokenizer.batch_encode_plus([sentence] + labels,
                                              return_tensors='pt',
+                                             padding=True)
         input_ids = inputs['input_ids']
         attention_mask = inputs['attention_mask']
         output = model(input_ids, attention_mask=attention_mask)[0]
         #map the labels
+        plt.clf()  # Clear previous plot
         tensor_datalbl = label_reps.detach()
         x_values = tensor_datalbl[:, 0].numpy()
         y_values = tensor_datalbl[:, 1].numpy()
         # Create a scatter plot for labels
+        plt.scatter(x_values, y_values, label='Labels')
         # Add labels to specific points (adjust indices as needed)
         for i in range(len(tensor_datalbl)):
         tensor_datasen = sentence_rep.detach()
         # Extract the individual dimensions for the scatter plot
+        x_values_sen = tensor_datasen[:, 0].numpy()
+        y_values_sen = tensor_datasen[:, 1].numpy()
+        plt.scatter(x_values_sen, y_values_sen, label='Input Sentence', color='red', marker='x', s=100)
         plt.title('2D Representation of Similarity Estimates (2D)')
         plt.xlabel('X-axis')
         plt.ylabel('Y-axis')
+        plt.legend()
+        # Save to BytesIO instead of file system
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        st.image(buf)
+        buf.close()
         st.subheader("Classification Details")
         for ind in closest:
             #print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers
 datasets
 torch

+streamlit==1.31.0
 transformers
 datasets
 torch