LunaLan07 commited on
Commit
fba173e
ยท
verified ยท
1 Parent(s): e3b752b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -4
README.md CHANGED
@@ -35,15 +35,15 @@ Training objective:
35
 
36
  ---
37
 
38
- ## ๐Ÿš€ Usage
39
 
40
  ```python
41
  from transformers import AutoTokenizer, AutoModel
42
  import torch
43
  import torch.nn.functional as F
44
 
45
- tokenizer = AutoTokenizer.from_pretrained("your-username/biohicl-base")
46
- model = AutoModel.from_pretrained("your-username/biohicl-base")
47
 
48
  def encode(texts):
49
  inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
@@ -58,8 +58,36 @@ doc = encode(["Chronic obstructive pulmonary disease is treated with bronchodila
58
  similarity = (query @ doc.T).item()
59
  print(similarity)
60
 
61
- ## ๐Ÿ“– Citation
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  If you use this model, please cite:
64
 
65
  ```bibtex
 
35
 
36
  ---
37
 
38
+ ## ๐Ÿš€ Usage - Text Similarity
39
 
40
  ```python
41
  from transformers import AutoTokenizer, AutoModel
42
  import torch
43
  import torch.nn.functional as F
44
 
45
+ tokenizer = AutoTokenizer.from_pretrained("LunaLan07/BioHiCL-Large")
46
+ model = AutoModel.from_pretrained("LunaLan07/BioHiCL-Large")
47
 
48
  def encode(texts):
49
  inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
 
58
  similarity = (query @ doc.T).item()
59
  print(similarity)
60
 
 
61
 
62
+
63
+ ---
64
+
65
+ ## ๐Ÿš€ Usage - Evaluation on BEIR Benchmark
66
+
67
+ ```python
68
+ from beir import util
69
+ from beir.datasets.data_loader import GenericDataLoader
70
+ from beir.retrieval.models import SentenceBERT
71
+ from beir.retrieval.search.dense import DenseRetrievalExactSearch
72
+ from beir.retrieval.evaluation import EvaluateRetrieval
73
+
74
+ dataset = "scifact"
75
+ url = ...
76
+ data_path = util.download_and_unzip(url, "datasets")
77
+ corpus, queries, qrels = GenericDataLoader(data_path).load(split="test")
78
+
79
+ model_name = "LunaLan07/BioHiCL-Large"
80
+ model = SentenceBERT(model_name)
81
+ retriever = DenseRetrievalExactSearch(model, batch_size=16)
82
+ top_k = 10 # top 10 documents per query
83
+ results = retriever.search(corpus, queries, top_k=top_k, score_function="cos_sim")
84
+
85
+ k_values = [1, 3, 5, 10]
86
+ ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(qrels, results, k_values=k_values)
87
+
88
+ ---
89
+
90
+ ## ๐Ÿ“– Citation
91
  If you use this model, please cite:
92
 
93
  ```bibtex