Remostart commited on
Commit
70a8ea1
·
verified ·
1 Parent(s): 578711f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -12,7 +12,7 @@ login(token=hf_token)
12
 
13
  # Model repository IDs
14
  base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
15
- peft_model_id = "ubiodee/Plutuslearn-Llama-3.2-3B-Instruct" # Replace with your model repo
16
 
17
  # Load the tokenizer from the fine-tuned model
18
  tokenizer = AutoTokenizer.from_pretrained(peft_model_id, token=hf_token)
@@ -36,14 +36,15 @@ model = PeftModel.from_pretrained(base_model, peft_model_id, token=hf_token)
36
  def predict(text, max_length=100):
37
  try:
38
  messages = [{"role": "user", "content": text}]
39
- inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
40
- # Move inputs to GPU if they are a dictionary of tensors
41
  if isinstance(inputs, dict):
42
  inputs = {key: val.to("cuda:0") for key, val in inputs.items()}
 
43
  else:
44
- # If inputs is a single tensor (unlikely but for robustness)
45
  inputs = inputs.to("cuda:0")
46
- outputs = model.generate(**inputs, max_length=max_length)
47
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
48
  except Exception as e:
49
  return f"Error during inference: {str(e)}"
@@ -67,4 +68,4 @@ demo.launch(
67
  server_port=7860,
68
  share=True,
69
  debug=True
70
- )
 
12
 
13
  # Model repository IDs
14
  base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
15
+ peft_model_id = "ubiodee/Plutuslearn-Llama-3.2-3B-Instruct" # Replace with your model repo (e.g., ubiodee/my-finetuned-model)
16
 
17
  # Load the tokenizer from the fine-tuned model
18
  tokenizer = AutoTokenizer.from_pretrained(peft_model_id, token=hf_token)
 
36
  def predict(text, max_length=100):
37
  try:
38
  messages = [{"role": "user", "content": text}]
39
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
40
+ # Handle inputs based on type
41
  if isinstance(inputs, dict):
42
  inputs = {key: val.to("cuda:0") for key, val in inputs.items()}
43
+ outputs = model.generate(**inputs, max_length=max_length)
44
  else:
45
+ # If inputs is a tensor (e.g., input_ids)
46
  inputs = inputs.to("cuda:0")
47
+ outputs = model.generate(input_ids=inputs, max_length=max_length)
48
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
49
  except Exception as e:
50
  return f"Error during inference: {str(e)}"
 
68
  server_port=7860,
69
  share=True,
70
  debug=True
71
+ )