Spaces:
Sleeping
Sleeping
Daniel Machado Pedrozo commited on
Commit ·
c81f16e
1
Parent(s): 092b7a2
chore: increase max_new_tokens parameter to 4096 across chat model and inference functions for improved response generation
Browse files- src/app.py +1 -1
- src/backend/chat_model.py +3 -3
- src/backend/inference.py +2 -2
src/app.py
CHANGED
|
@@ -240,7 +240,7 @@ else:
|
|
| 240 |
full_response = ""
|
| 241 |
|
| 242 |
try:
|
| 243 |
-
for token in chat_model.generate_streaming(max_new_tokens=
|
| 244 |
full_response += token
|
| 245 |
response_placeholder.markdown(full_response)
|
| 246 |
|
|
|
|
| 240 |
full_response = ""
|
| 241 |
|
| 242 |
try:
|
| 243 |
+
for token in chat_model.generate_streaming(max_new_tokens=4096):
|
| 244 |
full_response += token
|
| 245 |
response_placeholder.markdown(full_response)
|
| 246 |
|
src/backend/chat_model.py
CHANGED
|
@@ -82,7 +82,7 @@ class ChatModel:
|
|
| 82 |
|
| 83 |
def generate_streaming(
|
| 84 |
self,
|
| 85 |
-
max_new_tokens: int =
|
| 86 |
temperature: Optional[float] = None,
|
| 87 |
top_p: Optional[float] = None,
|
| 88 |
top_k: Optional[int] = None,
|
|
@@ -116,7 +116,7 @@ class ChatModel:
|
|
| 116 |
|
| 117 |
def generate(
|
| 118 |
self,
|
| 119 |
-
max_new_tokens: int =
|
| 120 |
temperature: Optional[float] = None,
|
| 121 |
top_p: Optional[float] = None,
|
| 122 |
top_k: Optional[int] = None,
|
|
@@ -148,7 +148,7 @@ class ChatModel:
|
|
| 148 |
def chat(
|
| 149 |
self,
|
| 150 |
user_message: str,
|
| 151 |
-
max_new_tokens: int =
|
| 152 |
temperature: Optional[float] = None,
|
| 153 |
streaming: bool = False,
|
| 154 |
) -> Union[str, Iterator[str]]:
|
|
|
|
| 82 |
|
| 83 |
def generate_streaming(
|
| 84 |
self,
|
| 85 |
+
max_new_tokens: int = 4096,
|
| 86 |
temperature: Optional[float] = None,
|
| 87 |
top_p: Optional[float] = None,
|
| 88 |
top_k: Optional[int] = None,
|
|
|
|
| 116 |
|
| 117 |
def generate(
|
| 118 |
self,
|
| 119 |
+
max_new_tokens: int = 4096,
|
| 120 |
temperature: Optional[float] = None,
|
| 121 |
top_p: Optional[float] = None,
|
| 122 |
top_k: Optional[int] = None,
|
|
|
|
| 148 |
def chat(
|
| 149 |
self,
|
| 150 |
user_message: str,
|
| 151 |
+
max_new_tokens: int = 4096,
|
| 152 |
temperature: Optional[float] = None,
|
| 153 |
streaming: bool = False,
|
| 154 |
) -> Union[str, Iterator[str]]:
|
src/backend/inference.py
CHANGED
|
@@ -34,7 +34,7 @@ def _build_generation_kwargs(
|
|
| 34 |
def generate_streaming(
|
| 35 |
pipeline: Pipeline,
|
| 36 |
prompt: Union[str, List[Message]],
|
| 37 |
-
max_new_tokens: int =
|
| 38 |
temperature: Optional[float] = None,
|
| 39 |
top_p: Optional[float] = None,
|
| 40 |
top_k: Optional[int] = None,
|
|
@@ -112,7 +112,7 @@ def generate_streaming(
|
|
| 112 |
def generate_simple(
|
| 113 |
pipeline: Pipeline,
|
| 114 |
prompt: Union[str, List[Message]],
|
| 115 |
-
max_new_tokens: int =
|
| 116 |
temperature: Optional[float] = None,
|
| 117 |
top_p: Optional[float] = None,
|
| 118 |
top_k: Optional[int] = None,
|
|
|
|
| 34 |
def generate_streaming(
|
| 35 |
pipeline: Pipeline,
|
| 36 |
prompt: Union[str, List[Message]],
|
| 37 |
+
max_new_tokens: int = 4096,
|
| 38 |
temperature: Optional[float] = None,
|
| 39 |
top_p: Optional[float] = None,
|
| 40 |
top_k: Optional[int] = None,
|
|
|
|
| 112 |
def generate_simple(
|
| 113 |
pipeline: Pipeline,
|
| 114 |
prompt: Union[str, List[Message]],
|
| 115 |
+
max_new_tokens: int = 4096,
|
| 116 |
temperature: Optional[float] = None,
|
| 117 |
top_p: Optional[float] = None,
|
| 118 |
top_k: Optional[int] = None,
|