aarnow commited on
Commit
434ab59
·
1 Parent(s): bb436e2

Fix build issues: add streamlit to requirements, fix deprecated tokenizer API, fix matplotlib backend for Streamlit

Browse files
Files changed (3) hide show
  1. .gitignore +14 -0
  2. app.py +17 -8
  3. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ streamlit.log
8
+ *.png
9
+ foo.png
10
+ .env
11
+ .venv
12
+ env/
13
+ ENV/
14
+
app.py CHANGED
@@ -3,8 +3,11 @@ from presidio_analyzer import AnalyzerEngine
3
  from presidio_anonymizer import AnonymizerEngine
4
  from transformers import AutoTokenizer, AutoModel
5
  from torch.nn import functional as F
 
 
6
  import matplotlib.pyplot as plt
7
  import torch
 
8
  model = AutoModel.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
9
  tokenizer = AutoTokenizer.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
10
 
@@ -45,7 +48,7 @@ def main():
45
  # dimension to get sequence-level representations
46
  inputs = tokenizer.batch_encode_plus([sentence] + labels,
47
  return_tensors='pt',
48
- pad_to_max_length=True)
49
  input_ids = inputs['input_ids']
50
  attention_mask = inputs['attention_mask']
51
  output = model(input_ids, attention_mask=attention_mask)[0]
@@ -60,12 +63,13 @@ def main():
60
 
61
 
62
  #map the labels
 
63
  tensor_datalbl = label_reps.detach()
64
  x_values = tensor_datalbl[:, 0].numpy()
65
  y_values = tensor_datalbl[:, 1].numpy()
66
 
67
  # Create a scatter plot for labels
68
- plt.scatter(x_values, y_values)
69
 
70
  # Add labels to specific points (adjust indices as needed)
71
  for i in range(len(tensor_datalbl)):
@@ -76,17 +80,22 @@ def main():
76
  tensor_datasen = sentence_rep.detach()
77
 
78
  # Extract the individual dimensions for the scatter plot
79
- x_values = tensor_datasen[:, 0].numpy()
80
- y_values = tensor_datasen[:, 1].numpy()
81
 
82
- plt.scatter(x_values, y_values)
83
 
84
  plt.title('2D Representation of Similarity Estimates (2D)')
85
  plt.xlabel('X-axis')
86
  plt.ylabel('Y-axis')
87
- #plt.show()
88
- plt.savefig('foo.png', bbox_inches='tight')
89
- st.image("foo.png")
 
 
 
 
 
90
  st.subheader("Classification Details")
91
  for ind in closest:
92
  #print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')
 
3
  from presidio_anonymizer import AnonymizerEngine
4
  from transformers import AutoTokenizer, AutoModel
5
  from torch.nn import functional as F
6
+ import matplotlib
7
+ matplotlib.use('Agg') # Use non-interactive backend for Streamlit
8
  import matplotlib.pyplot as plt
9
  import torch
10
+ import io
11
  model = AutoModel.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
12
  tokenizer = AutoTokenizer.from_pretrained("aarnow/distilbert-base-uncased-1212-test")
13
 
 
48
  # dimension to get sequence-level representations
49
  inputs = tokenizer.batch_encode_plus([sentence] + labels,
50
  return_tensors='pt',
51
+ padding=True)
52
  input_ids = inputs['input_ids']
53
  attention_mask = inputs['attention_mask']
54
  output = model(input_ids, attention_mask=attention_mask)[0]
 
63
 
64
 
65
  #map the labels
66
+ plt.clf() # Clear previous plot
67
  tensor_datalbl = label_reps.detach()
68
  x_values = tensor_datalbl[:, 0].numpy()
69
  y_values = tensor_datalbl[:, 1].numpy()
70
 
71
  # Create a scatter plot for labels
72
+ plt.scatter(x_values, y_values, label='Labels')
73
 
74
  # Add labels to specific points (adjust indices as needed)
75
  for i in range(len(tensor_datalbl)):
 
80
  tensor_datasen = sentence_rep.detach()
81
 
82
  # Extract the individual dimensions for the scatter plot
83
+ x_values_sen = tensor_datasen[:, 0].numpy()
84
+ y_values_sen = tensor_datasen[:, 1].numpy()
85
 
86
+ plt.scatter(x_values_sen, y_values_sen, label='Input Sentence', color='red', marker='x', s=100)
87
 
88
  plt.title('2D Representation of Similarity Estimates (2D)')
89
  plt.xlabel('X-axis')
90
  plt.ylabel('Y-axis')
91
+ plt.legend()
92
+
93
+ # Save to BytesIO instead of file system
94
+ buf = io.BytesIO()
95
+ plt.savefig(buf, format='png', bbox_inches='tight')
96
+ buf.seek(0)
97
+ st.image(buf)
98
+ buf.close()
99
  st.subheader("Classification Details")
100
  for ind in closest:
101
  #print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  transformers
2
  datasets
3
  torch
 
1
+ streamlit==1.31.0
2
  transformers
3
  datasets
4
  torch