Update readme and config

#2
Files changed (2) hide show
  1. README.md +3 -84
  2. config.json +2 -1
README.md CHANGED
@@ -4,8 +4,8 @@ pipeline_tag: feature-extraction
4
  tags:
5
  - feature-extraction
6
  - sentence-similarity
7
- - mteb
8
- - sentence-transformers
9
  language:
10
  - multilingual
11
  ---
@@ -39,27 +39,6 @@ language:
39
 
40
  ## Usage
41
 
42
- <details>
43
- <summary>Via API (Standard Embeddings)</summary>
44
-
45
- ```bash
46
- curl -X POST https://api.perplexity.ai/v1/embeddings \
47
- -H "Authorization: Bearer YOUR_API_KEY" \
48
- -H "Content-Type: application/json" \
49
- -d '{
50
- "texts": [
51
- "Scientists explore the universe driven by curiosity.",
52
- "Children learn through curious exploration.",
53
- "Historical discoveries began with curious questions.",
54
- "Animals use curiosity to adapt and survive.",
55
- "Philosophy examines the nature of curiosity.",
56
- ],
57
- "model": "pplx-embed-1-4B"
58
- }'
59
- ```
60
-
61
- </details>
62
-
63
  <details>
64
  <summary>Via API (Contextualized Embeddings)</summary>
65
 
@@ -91,66 +70,8 @@ curl -X POST https://api.perplexity.ai/v1/contextualizedembeddings \
91
  ```python
92
  from transformers import AutoModel
93
 
94
- model = AutoModel.from_pretrained(
95
- "perplexity-ai/pplx-embed-1-0.6B",
96
- trust_remote_code=True
97
- )
98
- texts = [
99
- "Scientists explore the universe driven by curiosity.",
100
- "Children learn through curious exploration.",
101
- "Historical discoveries began with curious questions.",
102
- "Animals use curiosity to adapt and survive.",
103
- "Philosophy examines the nature of curiosity.",
104
- ]
105
-
106
- embeddings = model.encode(texts) # Shape: (5, 1024)
107
-
108
  model_ctx = AutoModel.from_pretrained(
109
- "perplexity-ai/pplx-embed-1-context-0.6B",
110
- trust_remote_code=True
111
- )
112
-
113
- doc_chunks = [
114
- [
115
- "Curiosity begins in childhood with endless questions about the world.",
116
- "As we grow, curiosity drives us to explore new ideas.",
117
- "Scientific breakthroughs often start with a curious question."
118
- ],
119
- [
120
- "The curiosity rover explores Mars searching for ancient life.",
121
- "Each discovery on Mars sparks new questions about the universe."
122
- ]
123
- ]
124
- # Returns list of numpy arrays (one per document)
125
- # embeddings[0].shape = (3, 1024), embeddings[1].shape = (2, 1024)
126
- embeddings = model_ctx.encode(doc_chunks)
127
- ```
128
-
129
- </details>
130
-
131
- <details>
132
- <summary>Using SentenceTransformers</summary>
133
-
134
- ```python
135
- from sentence_transformers import SentenceTransformer
136
-
137
- model = SentenceTransformer(
138
- "perplexity-ai/pplx-embed-1-0.6B",
139
- trust_remote_code=True
140
- )
141
-
142
- texts = [
143
- "Scientists explore the universe driven by curiosity.",
144
- "Children learn through curious exploration.",
145
- "Historical discoveries began with curious questions.",
146
- "Animals use curiosity to adapt and survive.",
147
- "Philosophy examines the nature of curiosity.",
148
- ]
149
-
150
- embeddings = model.encode(texts) # Shape: (5, 1024)
151
-
152
- model_ctx = SentenceTransformer(
153
- "perplexity-ai/pplx-embed-1-context-0.6B",
154
  trust_remote_code=True
155
  )
156
 
@@ -172,8 +93,6 @@ embeddings = model_ctx.encode(doc_chunks)
172
 
173
  </details>
174
 
175
- </details>
176
-
177
  ## Technical Details
178
 
179
  For comprehensive technical details and evaluation results, see our paper on arXiv.
 
4
  tags:
5
  - feature-extraction
6
  - sentence-similarity
7
+ - conteb
8
+ - contextual-embeddings
9
  language:
10
  - multilingual
11
  ---
 
39
 
40
  ## Usage
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  <details>
43
  <summary>Via API (Contextualized Embeddings)</summary>
44
 
 
70
  ```python
71
  from transformers import AutoModel
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  model_ctx = AutoModel.from_pretrained(
74
+ "perplexity-ai/pplx-embed-1-context-4B",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  trust_remote_code=True
76
  )
77
 
 
93
 
94
  </details>
95
 
 
 
96
  ## Technical Details
97
 
98
  For comprehensive technical details and evaluation results, see our paper on arXiv.
config.json CHANGED
@@ -72,5 +72,6 @@
72
  "use_cache": false,
73
  "use_sliding_window": false,
74
  "attn_implementation": "sdpa",
75
- "vocab_size": 151936
 
76
  }
 
72
  "use_cache": false,
73
  "use_sliding_window": false,
74
  "attn_implementation": "sdpa",
75
+ "vocab_size": 151936,
76
+ "use_bidirectional_attention": true
77
  }