RedHatAI
/

granite-3.1-8b-instruct

Text Generation

text-generation-inference

Model card Files Files and versions

jennyyyi commited on May 16, 2025

Commit

2a510a1

·

verified ·

1 Parent(s): 1bc580a

Update README.md

Files changed (1) hide show

README.md +6 -4

README.md CHANGED Viewed

@@ -88,7 +88,7 @@ ilab model download --repository docker://registry.redhat.io/rhelai1/granite-3-1
 ```bash
 # Serve model via ilab
-ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct
 # Chat with model
 ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
@@ -145,9 +145,9 @@ apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   annotations:
-    openshift.io/display-name: RedHatAI/granite-3.1-8b-instruct # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
-  name: RedHatAI/granite-3.1-8b-instruct        # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
@@ -155,6 +155,8 @@ spec:
     maxReplicas: 1
     minReplicas: 1
     model:
       modelFormat:
         name: vLLM
       name: ''
@@ -196,7 +198,7 @@ oc apply -f qwen-inferenceservice.yaml
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
-    "model": "RedHatAI/granite-3.1-8b-instruct",
     "stream": true,
     "stream_options": {
         "include_usage": true

 ```bash
 # Serve model via ilab
+ilab model serve --model-path ~/.cache/instructlab/models/granite-3-1-8b-instruct -- --trust-remote-code
 # Chat with model
 ilab model chat --model ~/.cache/instructlab/models/granite-3-1-8b-instruct
 kind: InferenceService
 metadata:
   annotations:
+    openshift.io/display-name: granite-3-1-8b-instruct # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
+  name: granite-3-1-8b-instruct        # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
     maxReplicas: 1
     minReplicas: 1
     model:
+      args:
+        - '--trust-remote-code'
       modelFormat:
         name: vLLM
       name: ''
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
+    "model": "granite-3-1-8b-instruct",
     "stream": true,
     "stream_options": {
         "include_usage": true