Spaces:

arjun-ms
/

mistral-lora-ipc

Sleeping

App Files Files Community

arjun-ms commited on Aug 11, 2025

Commit

c4e10fc

verified ·

1 Parent(s): e9b5538

feat: merge lora adapters to mistral base model

Browse files

Files changed (1) hide show

app.py +36 -16

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import torch
 import gradio as gr
 from huggingface_hub import login
@@ -8,11 +9,17 @@ from peft import PeftModel
 # HF token from environment
 token = os.environ.get("HF_TOKEN")
 if token:
     login(token)
 # Model & adapter
-base_id = "mistralai/Mistral-7B-Instruct-v0.2"
-lora_id = "arjun-ms/mistral-lora-ipc"
 # ✅ Quantization config
 bnb_config = BitsAndBytesConfig(
@@ -23,24 +30,37 @@ bnb_config = BitsAndBytesConfig(
     llm_int8_enable_fp32_cpu_offload=True  # CPU offloading
 )
-# Tokenizer
-tokenizer = AutoTokenizer.from_pretrained(base_id)
-# Base model with quantization
-base_model = AutoModelForCausalLM.from_pretrained(
-    base_id,
-    quantization_config=bnb_config,
-    device_map="auto",                     # Automatically offloads to CPU/GPU/disk
-    trust_remote_code=True,
-    torch_dtype=torch.float16             # Helps reduce memory use
-)
-# Apply your LoRA adapter
-model = PeftModel.from_pretrained(base_model, lora_id)
 # Response generator
 def generate_response(prompt):
@@ -60,8 +80,8 @@ iface = gr.Interface(
     fn=generate_response,
     inputs="text",
     outputs="text",
-    title="IPC LoRA on Mistral 7B",
     description="LoRA fine-tuned Mistral 7B for Indian Penal Code questions"
 )
-iface.launch()

 import os
+from pydantic import BaseModel
 import torch
 import gradio as gr
 from huggingface_hub import login
 # HF token from environment
 token = os.environ.get("HF_TOKEN")
 if token:
+    print("HF TOKEN FOUND AND LOADING....")
     login(token)
+else:
+    print("NO HF TOKEN FOUND!")
 # Model & adapter
+# base_id = "mistralai/Mistral-7B-Instruct-v0.2"
+# lora_id = "arjun-ms/mistral-lora-ipc"
+#  Merged model path
+merged_model_id = "arjun-ms/mistral-7b-ipc-merged"
 # ✅ Quantization config
 bnb_config = BitsAndBytesConfig(
     llm_int8_enable_fp32_cpu_offload=True  # CPU offloading
 )
+# # Tokenizer
+# tokenizer = AutoTokenizer.from_pretrained(base_id)
+# # Base model with quantization
+# base_model = AutoModelForCausalLM.from_pretrained(
+#     base_id,
+#     quantization_config=bnb_config,
+#     device_map="auto",                     # Automatically offloads to CPU/GPU/disk
+#     trust_remote_code=True,
+#     torch_dtype=torch.float16             # Helps reduce memory use
+# )
+# Apply your LoRA adapter
+# model = PeftModel.from_pretraineBaseModelel, lora_id)
+# ✅ Tokenizer for merged model
+tokenizer = AutoTokenizer.from_pretrained(merged_model_id)
+# ✅ Load merged model
+model = AutoModelForCausalLM.from_pretrained(
+    merged_model_id,
+    quantization_config=bnb_config,
+    device_map="auto",
+    trust_remote_code=True,
+    torch_dtype=torch.float16
+)
 # Response generator
 def generate_response(prompt):
     fn=generate_response,
     inputs="text",
     outputs="text",
+    title="IPC Mistral 7B",
     description="LoRA fine-tuned Mistral 7B for Indian Penal Code questions"
 )
+iface.launch()