Daniel Machado Pedrozo commited on
Commit
c81f16e
·
1 Parent(s): 092b7a2

chore: increase max_new_tokens parameter to 4096 across chat model and inference functions for improved response generation

Browse files
src/app.py CHANGED
@@ -240,7 +240,7 @@ else:
240
  full_response = ""
241
 
242
  try:
243
- for token in chat_model.generate_streaming(max_new_tokens=512):
244
  full_response += token
245
  response_placeholder.markdown(full_response)
246
 
 
240
  full_response = ""
241
 
242
  try:
243
+ for token in chat_model.generate_streaming(max_new_tokens=4096):
244
  full_response += token
245
  response_placeholder.markdown(full_response)
246
 
src/backend/chat_model.py CHANGED
@@ -82,7 +82,7 @@ class ChatModel:
82
 
83
  def generate_streaming(
84
  self,
85
- max_new_tokens: int = 512,
86
  temperature: Optional[float] = None,
87
  top_p: Optional[float] = None,
88
  top_k: Optional[int] = None,
@@ -116,7 +116,7 @@ class ChatModel:
116
 
117
  def generate(
118
  self,
119
- max_new_tokens: int = 512,
120
  temperature: Optional[float] = None,
121
  top_p: Optional[float] = None,
122
  top_k: Optional[int] = None,
@@ -148,7 +148,7 @@ class ChatModel:
148
  def chat(
149
  self,
150
  user_message: str,
151
- max_new_tokens: int = 512,
152
  temperature: Optional[float] = None,
153
  streaming: bool = False,
154
  ) -> Union[str, Iterator[str]]:
 
82
 
83
  def generate_streaming(
84
  self,
85
+ max_new_tokens: int = 4096,
86
  temperature: Optional[float] = None,
87
  top_p: Optional[float] = None,
88
  top_k: Optional[int] = None,
 
116
 
117
  def generate(
118
  self,
119
+ max_new_tokens: int = 4096,
120
  temperature: Optional[float] = None,
121
  top_p: Optional[float] = None,
122
  top_k: Optional[int] = None,
 
148
  def chat(
149
  self,
150
  user_message: str,
151
+ max_new_tokens: int = 4096,
152
  temperature: Optional[float] = None,
153
  streaming: bool = False,
154
  ) -> Union[str, Iterator[str]]:
src/backend/inference.py CHANGED
@@ -34,7 +34,7 @@ def _build_generation_kwargs(
34
  def generate_streaming(
35
  pipeline: Pipeline,
36
  prompt: Union[str, List[Message]],
37
- max_new_tokens: int = 512,
38
  temperature: Optional[float] = None,
39
  top_p: Optional[float] = None,
40
  top_k: Optional[int] = None,
@@ -112,7 +112,7 @@ def generate_streaming(
112
  def generate_simple(
113
  pipeline: Pipeline,
114
  prompt: Union[str, List[Message]],
115
- max_new_tokens: int = 512,
116
  temperature: Optional[float] = None,
117
  top_p: Optional[float] = None,
118
  top_k: Optional[int] = None,
 
34
  def generate_streaming(
35
  pipeline: Pipeline,
36
  prompt: Union[str, List[Message]],
37
+ max_new_tokens: int = 4096,
38
  temperature: Optional[float] = None,
39
  top_p: Optional[float] = None,
40
  top_k: Optional[int] = None,
 
112
  def generate_simple(
113
  pipeline: Pipeline,
114
  prompt: Union[str, List[Message]],
115
+ max_new_tokens: int = 4096,
116
  temperature: Optional[float] = None,
117
  top_p: Optional[float] = None,
118
  top_k: Optional[int] = None,