Commit
·
6a83d63
1
Parent(s):
abd40c7
code clean
Browse files
README.md
CHANGED
|
@@ -42,18 +42,22 @@ Dolphin employs a decoder-decoder framework with two main components:
|
|
| 42 |

|
| 43 |
|
| 44 |
## Running the Model
|
| 45 |
-
Method 1
|
|
|
|
| 46 |
```bash
|
| 47 |
git lfs install
|
| 48 |
git clone https://huggingface.co/NexaAIDev/Dolphin
|
| 49 |
python inference_example.py
|
| 50 |
```
|
| 51 |
|
| 52 |
-
Method 2
|
|
|
|
| 53 |
```
|
| 54 |
pip install nexaai-dolphin
|
| 55 |
```
|
|
|
|
| 56 |
Then run the following commands:
|
|
|
|
| 57 |
```python
|
| 58 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 59 |
import torch
|
|
@@ -75,14 +79,12 @@ def inference_instruct(mycontext, question, device="cuda:0"):
|
|
| 75 |
.unsqueeze(0)
|
| 76 |
.to(device)
|
| 77 |
)
|
| 78 |
-
# to process the context
|
| 79 |
context_tokenized = tokenizer(
|
| 80 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
| 81 |
return_tensors="pt",
|
| 82 |
)
|
| 83 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
| 84 |
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
| 85 |
-
# We conduct a inference process
|
| 86 |
for i in range(context_token_count):
|
| 87 |
next_token = (
|
| 88 |
model(
|
|
@@ -106,14 +108,12 @@ if __name__ == "__main__":
|
|
| 106 |
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 107 |
AutoConfig.register("dolphin", DolphinConfig)
|
| 108 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
| 109 |
-
# Load the tokenizer and model
|
| 110 |
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin')
|
| 111 |
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
|
| 112 |
|
| 113 |
# Run inference example
|
| 114 |
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
| 115 |
question = "Who founded Nexa AI?"
|
| 116 |
-
# Pass the context and the correct device string
|
| 117 |
result = inference_instruct(mycontext, question, device=device_name)
|
| 118 |
print("Result:", result)
|
| 119 |
```
|
|
|
|
| 42 |

|
| 43 |
|
| 44 |
## Running the Model
|
| 45 |
+
### Method 1
|
| 46 |
+
download this repository and run the following commands:
|
| 47 |
```bash
|
| 48 |
git lfs install
|
| 49 |
git clone https://huggingface.co/NexaAIDev/Dolphin
|
| 50 |
python inference_example.py
|
| 51 |
```
|
| 52 |
|
| 53 |
+
### Method 2
|
| 54 |
+
Install `dolphin` package
|
| 55 |
```
|
| 56 |
pip install nexaai-dolphin
|
| 57 |
```
|
| 58 |
+
|
| 59 |
Then run the following commands:
|
| 60 |
+
|
| 61 |
```python
|
| 62 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 63 |
import torch
|
|
|
|
| 79 |
.unsqueeze(0)
|
| 80 |
.to(device)
|
| 81 |
)
|
|
|
|
| 82 |
context_tokenized = tokenizer(
|
| 83 |
mycontext + "".join([f"[memory_{i}]" for i in range(MEMORY_SIZE)]),
|
| 84 |
return_tensors="pt",
|
| 85 |
)
|
| 86 |
context_tokenized = {k: v.to(device) for k, v in context_tokenized.items()}
|
| 87 |
context_token_count = (context_tokenized["input_ids"]).shape[1] - MEMORY_SIZE
|
|
|
|
| 88 |
for i in range(context_token_count):
|
| 89 |
next_token = (
|
| 90 |
model(
|
|
|
|
| 108 |
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 109 |
AutoConfig.register("dolphin", DolphinConfig)
|
| 110 |
AutoModelForCausalLM.register(DolphinConfig, DolphinForCausalLM)
|
|
|
|
| 111 |
tokenizer = AutoTokenizer.from_pretrained('NexaAIDev/Dolphin')
|
| 112 |
model = AutoModelForCausalLM.from_pretrained('NexaAIDev/Dolphin', trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device_name)
|
| 113 |
|
| 114 |
# Run inference example
|
| 115 |
mycontext = "Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally"
|
| 116 |
question = "Who founded Nexa AI?"
|
|
|
|
| 117 |
result = inference_instruct(mycontext, question, device=device_name)
|
| 118 |
print("Result:", result)
|
| 119 |
```
|