Spaces:

GAASH-Lab
/

Matcha-TTS-Kashmiri-Demo

Running

saeedabdulmuizz commited on Feb 1

Commit

6469bde

verified ·

1 Parent(s): 5b50bdc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ except ImportError:
 import soundfile as sf
 import traceback
 from huggingface_hub import hf_hub_download
-from transformers import AutoModel, AutoTokenizer, AutoModelForImageTextToText
 from peft import PeftModel
 from matcha.models.matcha_tts import MatchaTTS
 from matcha.hifigan.models import Generator as HiFiGAN
@@ -61,21 +61,27 @@ def load_translation_models():
         # Load the tokenizer
         tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_BASE_MODEL, trust_remote_code=True)
-        # Load the base model with the correct class for Gemma 3 (Multimodal Causal LM)
-        # using standard loading to avoid offloading/partitioning issues with PEFT
-        base_model = AutoModelForImageTextToText.from_pretrained(
             TRANSLATION_BASE_MODEL,
             torch_dtype=torch.float16,
             trust_remote_code=True
         )
         # Load the LoRA adapter
         model = PeftModel.from_pretrained(base_model, TRANSLATION_ADAPTER)
         # Move to device
         model.to(DEVICE)
         model.eval()
-        print("[+] Translation model loaded successfully.")
         return tokenizer, model
     except Exception as e:
         print(f"[-] Error loading translation model: {e}")

 import soundfile as sf
 import traceback
 from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, Gemma3ForConditionalGeneration
 from peft import PeftModel
 from matcha.models.matcha_tts import MatchaTTS
 from matcha.hifigan.models import Generator as HiFiGAN
         # Load the tokenizer
         tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_BASE_MODEL, trust_remote_code=True)
+        # Load the base model with the EXACT class used during training (Gemma3ForConditionalGeneration)
+        # This ensures LoRA layers map correctly
+        print("[*] Loading base model as Gemma3ForConditionalGeneration...")
+        base_model = Gemma3ForConditionalGeneration.from_pretrained(
             TRANSLATION_BASE_MODEL,
             torch_dtype=torch.float16,
             trust_remote_code=True
         )
         # Load the LoRA adapter
+        print("[*] Loading LoRA adapter...")
         model = PeftModel.from_pretrained(base_model, TRANSLATION_ADAPTER)
+        # Merge the adapter weights into the base model for faster inference
+        print("[*] Merging adapter weights...")
+        model = model.merge_and_unload()
         # Move to device
         model.to(DEVICE)
         model.eval()
+        print(f"[+] Translation model loaded successfully on {DEVICE}.")
         return tokenizer, model
     except Exception as e:
         print(f"[-] Error loading translation model: {e}")