Spaces:
Runtime error
Runtime error
fix model length gpuzero timeout
Browse files
app.py
CHANGED
|
@@ -31,14 +31,18 @@ def create_prompt(system_message, user_message, tool_definition="", context=""):
|
|
| 31 |
else:
|
| 32 |
return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
|
| 33 |
|
| 34 |
-
@spaces.GPU
|
| 35 |
def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
|
| 36 |
full_prompt = create_prompt(system_message, message, tool_definition, context)
|
| 37 |
|
| 38 |
if use_pipeline:
|
| 39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
| 40 |
else:
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
input_ids = inputs['input_ids'].to(model.device)
|
| 43 |
attention_mask = inputs['attention_mask'].to(model.device)
|
| 44 |
|
|
|
|
| 31 |
else:
|
| 32 |
return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
|
| 33 |
|
| 34 |
+
@spaces.GPU(duration=94)
|
| 35 |
def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
|
| 36 |
full_prompt = create_prompt(system_message, message, tool_definition, context)
|
| 37 |
|
| 38 |
if use_pipeline:
|
| 39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
| 40 |
else:
|
| 41 |
+
max_model_length = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 8192
|
| 42 |
+
|
| 43 |
+
max_length = max_model_length - max_tokens
|
| 44 |
+
|
| 45 |
+
inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
|
| 46 |
input_ids = inputs['input_ids'].to(model.device)
|
| 47 |
attention_mask = inputs['attention_mask'].to(model.device)
|
| 48 |
|