FadQ's picture
update
dc2bc22 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch
import os
# Pastikan Hugging Face Token disediakan (jika private repo)
hf_token = os.getenv('HF_TOKEN')
# Path model dasar dan adapter
base_model = "google/gemma-2b-it"
adapter_model = "FadQ/gemma-2b-diary-consultaton-chatbot"
# Pastikan menggunakan versi terbaru untuk kompatibilitas
import subprocess
subprocess.run(["pip", "install", "--upgrade", "peft", "transformers", "accelerate"])
# Load model dasar dengan memastikan tidak dalam mode meta tensor
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto",
low_cpu_mem_usage=True # Pastikan model benar-benar dimuat ke memori
)
# Pastikan semua weight telah dimuat sebelum apply adapter
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
# Load adapter PEFT setelah model utama benar-benar dimuat
model = PeftModel.from_pretrained(
model,
adapter_model
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
# Create pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
def predict(input_text):
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
with torch.no_grad():
output = model.generate(**inputs, max_length=150)
return tokenizer.decode(output[0], skip_special_tokens=True)
# Create Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Input Text"),
outputs=gr.Textbox(label="Generated Response")
)
if __name__ == "__main__":
demo.launch()