Spaces:
Runtime error
Runtime error
Update: コンテキスト長を拡張
Browse files
app.py
CHANGED
|
@@ -10,22 +10,23 @@ model_name_or_path = "mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf"
|
|
| 10 |
model_basename = "ELYZA-japanese-Llama-2-7b-fast-instruct-q5_K_M.gguf"
|
| 11 |
|
| 12 |
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main")
|
| 13 |
-
llama = Llama(model_path)
|
| 14 |
|
| 15 |
def predict(messages):
|
| 16 |
# Llamaでの回答を取得(ストリーミングオン)
|
| 17 |
-
streamer = llama.create_chat_completion(messages, stream=True)
|
| 18 |
|
| 19 |
partial_message = ""
|
| 20 |
for msg in streamer:
|
| 21 |
message = msg['choices'][0]['delta']
|
|
|
|
| 22 |
if 'content' in message:
|
| 23 |
partial_message += message['content']
|
| 24 |
yield partial_message
|
| 25 |
|
| 26 |
|
| 27 |
def main():
|
| 28 |
-
st.title("Chat with
|
| 29 |
|
| 30 |
# Session state for retaining messages
|
| 31 |
if 'messages' not in st.session_state:
|
|
|
|
| 10 |
model_basename = "ELYZA-japanese-Llama-2-7b-fast-instruct-q5_K_M.gguf"
|
| 11 |
|
| 12 |
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main")
|
| 13 |
+
llama = Llama(model_path, n_ctx=5120)
|
| 14 |
|
| 15 |
def predict(messages):
|
| 16 |
# Llamaでの回答を取得(ストリーミングオン)
|
| 17 |
+
streamer = llama.create_chat_completion(messages, stream=True, max_tokens=512)
|
| 18 |
|
| 19 |
partial_message = ""
|
| 20 |
for msg in streamer:
|
| 21 |
message = msg['choices'][0]['delta']
|
| 22 |
+
print(f"message: {message}")
|
| 23 |
if 'content' in message:
|
| 24 |
partial_message += message['content']
|
| 25 |
yield partial_message
|
| 26 |
|
| 27 |
|
| 28 |
def main():
|
| 29 |
+
st.title("Chat with Elyza!")
|
| 30 |
|
| 31 |
# Session state for retaining messages
|
| 32 |
if 'messages' not in st.session_state:
|