amasood commited on
Commit
bc02f8a
·
verified ·
1 Parent(s): 6f6a820

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -53,7 +53,7 @@ input_text = st.text_input("Enter Text:", "Hello, how are you?")
53
  tokens = tokenizer.tokenize(input_text)
54
  st.write("Tokenized Output:", tokens)
55
 
56
- # Token Embeddings Visualization (PCA Projection)
57
  st.subheader("🧩 Token Embeddings Visualization")
58
  with torch.no_grad():
59
  inputs = tokenizer(input_text, return_tensors="pt")
@@ -61,6 +61,12 @@ with torch.no_grad():
61
 
62
  if hasattr(outputs, "last_hidden_state"):
63
  embeddings = outputs.last_hidden_state.squeeze(0).numpy()
 
 
 
 
 
 
64
  pca = PCA(n_components=2)
65
  reduced_embeddings = pca.fit_transform(embeddings)
66
 
 
53
  tokens = tokenizer.tokenize(input_text)
54
  st.write("Tokenized Output:", tokens)
55
 
56
+ # Token Embeddings Visualization (Fixed PCA Projection)
57
  st.subheader("🧩 Token Embeddings Visualization")
58
  with torch.no_grad():
59
  inputs = tokenizer(input_text, return_tensors="pt")
 
61
 
62
  if hasattr(outputs, "last_hidden_state"):
63
  embeddings = outputs.last_hidden_state.squeeze(0).numpy()
64
+
65
+ # Ensure the number of tokens and embeddings match
66
+ n_tokens = min(len(tokens), embeddings.shape[0])
67
+ embeddings = embeddings[:n_tokens] # Trim embeddings to match token count
68
+ tokens = tokens[:n_tokens] # Trim tokens to match embeddings count
69
+
70
  pca = PCA(n_components=2)
71
  reduced_embeddings = pca.fit_transform(embeddings)
72