Spaces:

Caslow
/

Fortran_to_Rust_Translator

Runtime error

Caslow commited on Nov 21, 2024

Commit

78ad200

1 Parent(s): 9efc447

cpu

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -21,9 +21,12 @@ def load_model(
     Returns:
         Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
     """
-    device = torch.device("cpu")
-    model_name = "lora_model"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -31,7 +34,8 @@ def load_model(
         pretrained_model_name_or_path=model_name,
         device_map="cpu",
         torch_dtype=torch.float32, # Use float32 for CPU
-        low_cpu_mem_usage=True  # Helps with memory efficiency
     )
     model.eval() # Set model to evaluation mode
@@ -91,10 +95,10 @@ def generate_response(
     # text_streamer = TextStreamer(tokenizer, skip_prompt=skip_prompt)
     inputs = tokenizer(inputs, return_tensors="pt").to(device)
     outputs = model.generate(
-        **inputs,
         max_length=2000,
         # num_return_sequences=1,
-        # do_sample=False  # Deterministic generation
         # streamer=text_streamer,
         # max_new_tokens=max_new_tokens,
         # use_cache=True,
@@ -125,10 +129,7 @@ def main(
     # Load model
     model, tokenizer = load_model(
-        model_name=MODEL_PATH,
-        max_seq_length=max_seq_length,
-        dtype=dtype,
-        load_in_4bit=load_in_4bit
     )
     # Prepare input

     Returns:
         Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
     """
+    try:
+        from transformers import BitsAndBytesConfig
+        bnb_config = BitsAndBytesConfig(load_in_4bit=False)
+    except ImportError:
+        bnb_config = None
     tokenizer = AutoTokenizer.from_pretrained(model_name)
         pretrained_model_name_or_path=model_name,
         device_map="cpu",
         torch_dtype=torch.float32, # Use float32 for CPU
+        low_cpu_mem_usage=True, # Helps with memory efficiency
+        quantization_config=bnb_config
     )
     model.eval() # Set model to evaluation mode
     # text_streamer = TextStreamer(tokenizer, skip_prompt=skip_prompt)
     inputs = tokenizer(inputs, return_tensors="pt").to(device)
     outputs = model.generate(
+        inputs,
         max_length=2000,
+        do_sample=False  # Deterministic generation
         # num_return_sequences=1,
         # streamer=text_streamer,
         # max_new_tokens=max_new_tokens,
         # use_cache=True,
     # Load model
     model, tokenizer = load_model(
+        model_name=MODEL_PATH
     )
     # Prepare input