Update README.md
Browse files
README.md
CHANGED
|
@@ -192,9 +192,9 @@ apiVersion: serving.kserve.io/v1beta1
|
|
| 192 |
kind: InferenceService
|
| 193 |
metadata:
|
| 194 |
annotations:
|
| 195 |
-
openshift.io/display-name:
|
| 196 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 197 |
-
name:
|
| 198 |
labels:
|
| 199 |
opendatahub.io/dashboard: 'true'
|
| 200 |
spec:
|
|
@@ -243,7 +243,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
| 243 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 244 |
-H "Content-Type: application/json" \
|
| 245 |
-d '{
|
| 246 |
-
"model": "
|
| 247 |
"stream": true,
|
| 248 |
"stream_options": {
|
| 249 |
"include_usage": true
|
|
|
|
| 192 |
kind: InferenceService
|
| 193 |
metadata:
|
| 194 |
annotations:
|
| 195 |
+
openshift.io/display-name: mistral-small-3-1-24b-instruct-2503-quantized-w8a8 # OPTIONAL CHANGE
|
| 196 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 197 |
+
name: mistral-small-3-1-24b-instruct-2503-quantized-w8a8 # specify model name. This value will be used to invoke the model in the payload
|
| 198 |
labels:
|
| 199 |
opendatahub.io/dashboard: 'true'
|
| 200 |
spec:
|
|
|
|
| 243 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 244 |
-H "Content-Type: application/json" \
|
| 245 |
-d '{
|
| 246 |
+
"model": "mistral-small-3-1-24b-instruct-2503-quantized-w8a8",
|
| 247 |
"stream": true,
|
| 248 |
"stream_options": {
|
| 249 |
"include_usage": true
|