eddddyy commited on
Commit
af4148c
·
verified ·
1 Parent(s): d1a2612

Update model_loader.py

Browse files
Files changed (1) hide show
  1. model_loader.py +10 -4
model_loader.py CHANGED
@@ -5,14 +5,18 @@ from config import HF_TOKEN, MODEL_ID
5
  def load_model():
6
  try:
7
  print(f"🔄 Loading tokenizer and model: {MODEL_ID}")
8
-
9
  # Load tokenizer
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  MODEL_ID,
12
  token=HF_TOKEN,
13
- trust_remote_code=True
 
14
  )
15
 
 
 
 
16
  # Load model
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_ID,
@@ -25,12 +29,14 @@ def load_model():
25
 
26
  print("✅ Model loaded successfully.")
27
 
28
- # Build pipeline
29
  return pipeline(
30
  "text-generation",
31
  model=model,
32
  tokenizer=tokenizer,
33
- max_new_tokens=150,
 
 
34
  do_sample=True,
35
  temperature=0.7,
36
  top_p=0.9
 
5
  def load_model():
6
  try:
7
  print(f"🔄 Loading tokenizer and model: {MODEL_ID}")
8
+
9
  # Load tokenizer
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  MODEL_ID,
12
  token=HF_TOKEN,
13
+ trust_remote_code=True,
14
+ padding_side="left" # For chat-style models
15
  )
16
 
17
+ # Set max length (MedGemma supports up to 8192 tokens)
18
+ tokenizer.model_max_length = 8192
19
+
20
  # Load model
21
  model = AutoModelForCausalLM.from_pretrained(
22
  MODEL_ID,
 
29
 
30
  print("✅ Model loaded successfully.")
31
 
32
+ # Return generation pipeline with large max token output and context window
33
  return pipeline(
34
  "text-generation",
35
  model=model,
36
  tokenizer=tokenizer,
37
+ max_new_tokens=1024, # Max response length
38
+ truncation=True, # Safely truncate input if needed
39
+ pad_token_id=tokenizer.eos_token_id,
40
  do_sample=True,
41
  temperature=0.7,
42
  top_p=0.9