Spaces:

sksameermujahid
/

testingnewcode

Runtime error

sksameermujahid commited on Mar 25, 2025

Commit

21b4828

verified ·

1 Parent(s): b8b1be8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import cloudinary
 import cloudinary.uploader
 import cloudinary.api
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
 import speech_recognition as sr
 from pydub import AudioSegment
 from happytransformer import HappyTextToText, TTSettings
@@ -188,18 +189,29 @@ retriever = CustomRagRetriever(index, model_embedding)
 def load_tokenizer_and_model():
     print("Loading tokenizer...")
     try:
-        tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
         print("Tokenizer loaded successfully.")
         print("Loading LLM model...")
-        model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
-        model_llm = AutoModelForCausalLM.from_pretrained(
-            model_dir,
-            config=model_config,
             trust_remote_code=True,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto"
         ).to(device)
         print("LLM model loaded successfully.")
         return tokenizer, model_llm
     except Exception as e:

 import cloudinary.uploader
 import cloudinary.api
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
+from peft import PeftModel, PeftConfig
 import speech_recognition as sr
 from pydub import AudioSegment
 from happytransformer import HappyTextToText, TTSettings
 def load_tokenizer_and_model():
     print("Loading tokenizer...")
     try:
+        # Load base model first
+        base_model_name = "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit"
+        tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
         print("Tokenizer loaded successfully.")
         print("Loading LLM model...")
+        # Load the base model
+        base_model = AutoModelForCausalLM.from_pretrained(
+            base_model_name,
             trust_remote_code=True,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto"
+        )
+        # Load the PEFT adapter
+        model_llm = PeftModel.from_pretrained(
+            base_model,
+            model_dir,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto"
         ).to(device)
         print("LLM model loaded successfully.")
         return tokenizer, model_llm
     except Exception as e: