Spaces:

Steph254
/

demo_1

Runtime error

Steph254 commited on Mar 18

Commit

196f1dd

verified ·

1 Parent(s): 3e8ef05

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,20 +18,18 @@ QUANTIZED_MODEL = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Directly
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
 # Function to load Llama model (without LoRA)
-def load_llama_model(model_name, is_guard=False):
     print(f"🔄 Loading Model: {model_name}")
     tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        token=HUGGINGFACE_TOKEN,
-        torch_dtype=torch.float32,
-        low_cpu_mem_usage=True
-    )
-    model.eval()
-    print("✅ Model Loaded Successfully")
-    return tokenizer, model
 # Load the quantized Llama model
 tokenizer, model = load_llama_model(QUANTIZED_MODEL)

 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
 # Function to load Llama model (without LoRA)
+def load_llama_model(model_name):
     print(f"🔄 Loading Model: {model_name}")
     tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
+    # Load the checkpoint manually
+    model_path = f"{model_name}/consolidated.00.pth"
+    state_dict = torch.load(model_path, map_location="cpu")  # Adjust for GPU if needed
+    print("✅ Model state dictionary loaded successfully!")
+    return tokenizer, state_dict
 # Load the quantized Llama model
 tokenizer, model = load_llama_model(QUANTIZED_MODEL)