Spaces:

retopara
/

ragflow

Build error

rantav commited on Aug 23, 2024

Commit

a8e42a7

1 Parent(s): 6a44b6e

Fix Bedrock system prompt (#2062)

### What problem does this PR solve?

Bugfix: usage of Bedrock models require the system prompt (for models
that support it) to be provided in the API in a different way, at least
that was my experience with it just today. This PR fixes it.

https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (1) hide show

rag/llm/chat_model.py +4 -6

rag/llm/chat_model.py CHANGED Viewed

@@ -667,8 +667,6 @@ class BedrockChat(Base):
     def chat(self, system, history, gen_conf):
         from botocore.exceptions import ClientError
-        if system:
-            history.insert(0, {"role": "system", "content": system})
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
                 del gen_conf[k]
@@ -688,7 +686,8 @@ class BedrockChat(Base):
             response = self.client.converse(
                 modelId=self.model_name,
                 messages=history,
-                inferenceConfig=gen_conf
             )
             # Extract and print the response text.
@@ -700,8 +699,6 @@ class BedrockChat(Base):
     def chat_streamly(self, system, history, gen_conf):
         from botocore.exceptions import ClientError
-        if system:
-            history.insert(0, {"role": "system", "content": system})
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
                 del gen_conf[k]
@@ -720,7 +717,8 @@ class BedrockChat(Base):
                 response = self.client.converse(
                     modelId=self.model_name,
                     messages=history,
-                    inferenceConfig=gen_conf
                 )
                 ans = response["output"]["message"]["content"][0]["text"]
                 return ans, num_tokens_from_string(ans)

     def chat(self, system, history, gen_conf):
         from botocore.exceptions import ClientError
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
                 del gen_conf[k]
             response = self.client.converse(
                 modelId=self.model_name,
                 messages=history,
+                inferenceConfig=gen_conf,
+                system=[{"text": system}] if system else None,
             )
             # Extract and print the response text.
     def chat_streamly(self, system, history, gen_conf):
         from botocore.exceptions import ClientError
         for k in list(gen_conf.keys()):
             if k not in ["temperature", "top_p", "max_tokens"]:
                 del gen_conf[k]
                 response = self.client.converse(
                     modelId=self.model_name,
                     messages=history,
+                    inferenceConfig=gen_conf,
+                    system=[{"text": system}] if system else None,
                 )
                 ans = response["output"]["message"]["content"][0]["text"]
                 return ans, num_tokens_from_string(ans)