Update README.md
Browse files
README.md
CHANGED
|
@@ -88,7 +88,7 @@ ilab model download --repository docker://registry.redhat.io/rhelai1/granite-3-1
|
|
| 88 |
|
| 89 |
```bash
|
| 90 |
# Serve model via ilab
|
| 91 |
-
ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct
|
| 92 |
|
| 93 |
# Chat with model
|
| 94 |
ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
|
|
@@ -145,9 +145,9 @@ apiVersion: serving.kserve.io/v1beta1
|
|
| 145 |
kind: InferenceService
|
| 146 |
metadata:
|
| 147 |
annotations:
|
| 148 |
-
openshift.io/display-name:
|
| 149 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 150 |
-
name:
|
| 151 |
labels:
|
| 152 |
opendatahub.io/dashboard: 'true'
|
| 153 |
spec:
|
|
@@ -155,6 +155,8 @@ spec:
|
|
| 155 |
maxReplicas: 1
|
| 156 |
minReplicas: 1
|
| 157 |
model:
|
|
|
|
|
|
|
| 158 |
modelFormat:
|
| 159 |
name: vLLM
|
| 160 |
name: ''
|
|
@@ -196,7 +198,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
| 196 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 197 |
-H "Content-Type: application/json" \
|
| 198 |
-d '{
|
| 199 |
-
"model": "
|
| 200 |
"stream": true,
|
| 201 |
"stream_options": {
|
| 202 |
"include_usage": true
|
|
|
|
| 88 |
|
| 89 |
```bash
|
| 90 |
# Serve model via ilab
|
| 91 |
+
ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct -- --trust-remote-code
|
| 92 |
|
| 93 |
# Chat with model
|
| 94 |
ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
|
|
|
|
| 145 |
kind: InferenceService
|
| 146 |
metadata:
|
| 147 |
annotations:
|
| 148 |
+
openshift.io/display-name: granite-3-1-8b-instruct # OPTIONAL CHANGE
|
| 149 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 150 |
+
name: granite-3-1-8b-instruct # specify model name. This value will be used to invoke the model in the payload
|
| 151 |
labels:
|
| 152 |
opendatahub.io/dashboard: 'true'
|
| 153 |
spec:
|
|
|
|
| 155 |
maxReplicas: 1
|
| 156 |
minReplicas: 1
|
| 157 |
model:
|
| 158 |
+
args:
|
| 159 |
+
- '--trust-remote-code'
|
| 160 |
modelFormat:
|
| 161 |
name: vLLM
|
| 162 |
name: ''
|
|
|
|
| 198 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 199 |
-H "Content-Type: application/json" \
|
| 200 |
-d '{
|
| 201 |
+
"model": "granite-3-1-8b-instruct",
|
| 202 |
"stream": true,
|
| 203 |
"stream_options": {
|
| 204 |
"include_usage": true
|