Spaces:

Jyotiyadav
/

LLMsModelFine-tuned

Runtime error

Jyotiyadav commited on May 13, 2024

Commit

bace2e4

verified ·

1 Parent(s): 2b3abe0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def generate_output(input_text,model):
     # Initialize the FastLanguageModel
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name = model,
-        max_seq_length = 4096,
         dtype = None,
         load_in_4bit = True,
     )
@@ -78,7 +78,7 @@ def generate_output(input_text,model):
     # Tokenize the input text
     inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
     # Generate outputs
-    outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=True)
     output = tokenizer.batch_decode(outputs)
     cleaned_response = process_output(output)
     return output,cleaned_response

     # Initialize the FastLanguageModel
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name = model,
+        max_seq_length = 2500,
         dtype = None,
         load_in_4bit = True,
     )
     # Tokenize the input text
     inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
     # Generate outputs
+    outputs = model.generate(**inputs, max_new_tokens=2500, use_cache=True)
     output = tokenizer.batch_decode(outputs)
     cleaned_response = process_output(output)
     return output,cleaned_response