Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,7 +58,7 @@ def generate_output(input_text,model):
|
|
| 58 |
# Initialize the FastLanguageModel
|
| 59 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 60 |
model_name = model,
|
| 61 |
-
max_seq_length =
|
| 62 |
dtype = None,
|
| 63 |
load_in_4bit = True,
|
| 64 |
)
|
|
@@ -78,7 +78,7 @@ def generate_output(input_text,model):
|
|
| 78 |
# Tokenize the input text
|
| 79 |
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
|
| 80 |
# Generate outputs
|
| 81 |
-
outputs = model.generate(**inputs, max_new_tokens=
|
| 82 |
output = tokenizer.batch_decode(outputs)
|
| 83 |
cleaned_response = process_output(output)
|
| 84 |
return output,cleaned_response
|
|
|
|
| 58 |
# Initialize the FastLanguageModel
|
| 59 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 60 |
model_name = model,
|
| 61 |
+
max_seq_length = 2500,
|
| 62 |
dtype = None,
|
| 63 |
load_in_4bit = True,
|
| 64 |
)
|
|
|
|
| 78 |
# Tokenize the input text
|
| 79 |
inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
|
| 80 |
# Generate outputs
|
| 81 |
+
outputs = model.generate(**inputs, max_new_tokens=2500, use_cache=True)
|
| 82 |
output = tokenizer.batch_decode(outputs)
|
| 83 |
cleaned_response = process_output(output)
|
| 84 |
return output,cleaned_response
|