Spaces:

Remostart
/

PlutusLearn

Sleeping

Remostart commited on Aug 28, 2025

Commit

70a8ea1

verified ·

1 Parent(s): 578711f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ login(token=hf_token)
 # Model repository IDs
 base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
-peft_model_id = "ubiodee/Plutuslearn-Llama-3.2-3B-Instruct"  # Replace with your model repo
 # Load the tokenizer from the fine-tuned model
 tokenizer = AutoTokenizer.from_pretrained(peft_model_id, token=hf_token)
@@ -36,14 +36,15 @@ model = PeftModel.from_pretrained(base_model, peft_model_id, token=hf_token)
 def predict(text, max_length=100):
     try:
         messages = [{"role": "user", "content": text}]
-        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
-        # Move inputs to GPU if they are a dictionary of tensors
         if isinstance(inputs, dict):
             inputs = {key: val.to("cuda:0") for key, val in inputs.items()}
         else:
-            # If inputs is a single tensor (unlikely but for robustness)
             inputs = inputs.to("cuda:0")
-        outputs = model.generate(**inputs, max_length=max_length)
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"Error during inference: {str(e)}"
@@ -67,4 +68,4 @@ demo.launch(
     server_port=7860,
     share=True,
     debug=True
-)

 # Model repository IDs
 base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
+peft_model_id = "ubiodee/Plutuslearn-Llama-3.2-3B-Instruct"  # Replace with your model repo (e.g., ubiodee/my-finetuned-model)
 # Load the tokenizer from the fine-tuned model
 tokenizer = AutoTokenizer.from_pretrained(peft_model_id, token=hf_token)
 def predict(text, max_length=100):
     try:
         messages = [{"role": "user", "content": text}]
+        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+        # Handle inputs based on type
         if isinstance(inputs, dict):
             inputs = {key: val.to("cuda:0") for key, val in inputs.items()}
+            outputs = model.generate(**inputs, max_length=max_length)
         else:
+            # If inputs is a tensor (e.g., input_ids)
             inputs = inputs.to("cuda:0")
+            outputs = model.generate(input_ids=inputs, max_length=max_length)
         return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"Error during inference: {str(e)}"
     server_port=7860,
     share=True,
     debug=True
+)