Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
|
@@ -54,14 +54,24 @@ image_encoder = SigLIPImageEncoder(model_name=image_model_name, embed_dim=image_
|
|
| 54 |
image_encoder.eval() # Set to evaluation mode
|
| 55 |
|
| 56 |
# Load Phi-3 model using llama.cpp
|
| 57 |
-
base_model = Llama(
|
| 58 |
-
model_path=phi3_model_path,
|
| 59 |
-
n_gpu_layers=0, # Ensure no GPU usage
|
| 60 |
-
n_ctx=2048, # Adjust context length as needed
|
| 61 |
-
verbose=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
|
|
| 65 |
model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
|
| 66 |
model = model.merge_and_unload()
|
| 67 |
print("phi-3 model loaded sucessfully")
|
|
|
|
| 54 |
image_encoder.eval() # Set to evaluation mode
|
| 55 |
|
| 56 |
# Load Phi-3 model using llama.cpp
|
| 57 |
+
#base_model = Llama(
|
| 58 |
+
# model_path=phi3_model_path,
|
| 59 |
+
# n_gpu_layers=0, # Ensure no GPU usage
|
| 60 |
+
# n_ctx=2048, # Adjust context length as needed
|
| 61 |
+
# verbose=True,
|
| 62 |
+
#)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
base_model = Llama.from_pretrained(
|
| 66 |
+
repo_id="QuantFactory/Phi-3-mini-4k-instruct-GGUF",
|
| 67 |
+
filename="Phi-3-mini-4k-instruct.Q2_K.gguf",
|
| 68 |
+
n_gpu_layers=0,
|
| 69 |
+
n_ctx=2048,
|
| 70 |
+
verbose=True
|
| 71 |
)
|
| 72 |
|
| 73 |
|
| 74 |
+
# Load and merge
|
| 75 |
model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
|
| 76 |
model = model.merge_and_unload()
|
| 77 |
print("phi-3 model loaded sucessfully")
|