XiaSheng commited on
Commit
40885c7
·
verified ·
1 Parent(s): 707f2ba

Initial upload of FreeChunk model with custom code

Browse files
Files changed (1) hide show
  1. README.md +15 -21
README.md CHANGED
@@ -32,28 +32,22 @@ pip install torch transformers sentence-transformers numpy
32
  ### Abstract Usage
33
 
34
  ```python
35
- from sentenizer import Sentenceizer
36
- from modeling_freechunker import FreeChunkerModel
37
- import torch
38
-
39
- # 1. Setup Sentenceizer with Backbone
40
- sentenceizer = Sentenceizer(model_name="nomic-ai/nomic-embed-text-v1.5")
41
-
42
- # 2. Load FreeChunker Model
43
- model = FreeChunkerModel.from_pretrained("XiaSheng/FreeChunk-nomic", trust_remote_code=True)
44
- model.eval()
45
-
46
- # 3. Process Text
47
  text = "Your text..."
48
- sentences, embeddings = sentenceizer.split_and_encode(text)
49
-
50
- # 4. Forward pass through FreeChunker
51
- inputs_embeds = torch.tensor(embeddings).unsqueeze(0) # Batch size 1
52
- with torch.no_grad():
53
- outputs = model(inputs_embeds=inputs_embeds)
54
-
55
- # outputs['embedding'] contains refined embeddings
56
- # outputs['shift_matrix'] contains chunking information
57
  ```
58
 
59
  ## Files
 
32
  ### Abstract Usage
33
 
34
  ```python
35
+ from transformers import AutoModel
36
+ import torch
37
+
38
+ # 1. Load Model (UnifiedEncoder)
39
+ model = AutoModel.from_pretrained("XiaSheng/FreeChunk-nomic", trust_remote_code=True)
40
+
41
+ # 2. Build Vector Store from Text
 
 
 
 
 
42
  text = "Your text..."
43
+ model.build_vector_store(text)
44
+
45
+ # 3. Query with Post-Aggregation (Default)
46
+ query = "Your query..."
47
+ results = model.query(query, top_k=1, aggregation_mode='post')
48
+
49
+ print(f"Query: {query}")
50
+ print(f"Result: {results}")
 
51
  ```
52
 
53
  ## Files