Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| from peft import PeftModel | |
| import torch | |
| import os | |
| # Pastikan Hugging Face Token disediakan (jika private repo) | |
| hf_token = os.getenv('HF_TOKEN') | |
| # Path model dasar dan adapter | |
| base_model = "google/gemma-2b-it" | |
| adapter_model = "FadQ/gemma-2b-diary-consultaton-chatbot" | |
| # Pastikan menggunakan versi terbaru untuk kompatibilitas | |
| import subprocess | |
| subprocess.run(["pip", "install", "--upgrade", "peft", "transformers", "accelerate"]) | |
| # Load model dasar dengan memastikan tidak dalam mode meta tensor | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True # Pastikan model benar-benar dimuat ke memori | |
| ) | |
| # Pastikan semua weight telah dimuat sebelum apply adapter | |
| model = model.to("cuda" if torch.cuda.is_available() else "cpu") | |
| # Load adapter PEFT setelah model utama benar-benar dimuat | |
| model = PeftModel.from_pretrained( | |
| model, | |
| adapter_model | |
| ) | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| # Create pipeline | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0) | |
| def predict(input_text): | |
| inputs = tokenizer(input_text, return_tensors="pt").to("cuda") | |
| with torch.no_grad(): | |
| output = model.generate(**inputs, max_length=150) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox(label="Input Text"), | |
| outputs=gr.Textbox(label="Generated Response") | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |