vijusudhi commited on
Commit
42e86ac
·
verified ·
1 Parent(s): f50587e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -22
README.md CHANGED
@@ -14,7 +14,7 @@ For more technical details, refer to our paper: ...
14
  - Maximum context length: 512
15
  - Embedding Dimension: 4096
16
 
17
- # How to use?
18
 
19
  ```python
20
  from typing import List
@@ -46,31 +46,35 @@ def pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor, do_norma
46
  return embeddings
47
 
48
 
49
- model = AutoModel.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbembedding", trust_remote_code=True, token=xxx)
50
- tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbembedding", trust_remote_code=True, token=xxx)
51
 
52
  model = model.to("cuda")
53
 
54
- queries = [
55
- "Wer war der erste Bundeskanzler der Bundesrepublik Deutschland?",
56
- "Welche deutsche Stadt ist für ihre Bratwürste bekannt?"
57
- ]
58
- queries = get_detailed_instruct(queries)
59
- queries_inputs = tokenize(sentences=queries, tokenizer=tokenizer)
60
- queries_outputs = model(**queries_inputs)
61
- queries_embs = pool(last_hidden_state=queries_outputs.last_hidden_state, attention_mask=queries_inputs.attention_mask)
62
-
63
- passages = [
64
- "Konrad Adenauer (geboren am 5. Januar 1876 in Köln; gestorben am 19. April 1967 in Rhöndorf) war ein deutscher Politiker und der erste Bundeskanzler der Bundesrepublik Deutschland von 1949 bis 1963. Er war einer der Gründerväter der Bundesrepublik von Deutschland und spielte eine Schlüsselrolle beim Wiederaufbau nach dem Zweiten Weltkrieg.",
65
- "Nürnberg ist eine Stadt im deutschen Bundesland Bayern. Es ist bekannt für seine historische Altstadt, mittelalterliche Befestigungsanlagen und seinen jährlichen Weihnachtsmarkt. Nürnberg ist auch für seine Bratwurst bekannt, eine Wurstsorte, die in Deutschland ein beliebtes Streetfood ist."
66
- ]
67
- passages_inputs = tokenize(sentences=passages, tokenizer=tokenizer)
68
- passages_outputs = model(**passages_inputs)
69
- passages_embs = pool(last_hidden_state=passages_outputs.last_hidden_state, attention_mask=passages_inputs.attention_mask)
70
-
71
- scores = (queries_embs @ passages_embs.T) * 100
72
- print(scores.tolist())
73
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  ## Supported Languages
76
  ...
 
14
  - Maximum context length: 512
15
  - Embedding Dimension: 4096
16
 
17
+ # How to use with Transformers?
18
 
19
  ```python
20
  from typing import List
 
46
  return embeddings
47
 
48
 
49
+ model = AutoModel.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
50
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
51
 
52
  model = model.to("cuda")
53
 
54
+ sentences = ["Hi how are you doing?"]
55
+ # sentences = get_detailed_instruct(sentences) # if the sentence is a query
56
+ sentences_inputs = tokenize(sentences=sentences, tokenizer=tokenizer)
57
+ sentences_outputs = model(**sentences_inputs)
58
+ embeddings = pool(
59
+ last_hidden_state=sentences_outputs.last_hidden_state,
60
+ attention_mask=sentences_inputs.attention_mask,
61
+ )
62
+ print(embeddings)
 
 
 
 
 
 
 
 
 
 
63
  ```
64
+ # How to use with Sentence Transformers?
65
+
66
+ ```python
67
+ from sentence_transformers import SentenceTransformer
68
+
69
+ model = SentenceTransformer("lamarr-llm-development/elbedding", trust_remote_code=True)
70
+
71
+ # sentences = get_detailed_instruct(sentences) # if the sentence is a query
72
+ sentences = ["Hi how are you doing?"]
73
+ embeddings = model.encode(sentences=sentences, normalize_embeddings=True)
74
+ print(embeddings)
75
+
76
+ ```
77
+
78
 
79
  ## Supported Languages
80
  ...