Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| # Load your model and tokenizer from Hugging Face Hub (forcing CPU usage) | |
| # model_name = "PierreJousselin/lora_model" # Replace with the name you used on Hugging Face | |
| # model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu") # Force model to load on CPU | |
| model_id = "unsloth/Phi-3.5-mini-instruct" | |
| peft_model_id = "eronariodito/Llama_3.2_1B_SFT_The_Tome" | |
| model = AutoModelForCausalLM.from_pretrained(peft_model_id) | |
| # model.load_adapter(peft_model_id) | |
| tokenizer = AutoTokenizer.from_pretrained(peft_model_id) | |
| # Ensure pad_token_id is set to eos_token_id to avoid errors | |
| model.config.pad_token_id = model.config.eos_token_id | |
| # Function for generating responses using the model | |
| def generate_response(prompt): | |
| # Tokenize input prompt | |
| inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128) | |
| # Ensure the inputs are moved to the CPU | |
| input_ids = inputs["input_ids"].to("cpu") | |
| print(input_ids) | |
| # Generate output (ensure it's on CPU) | |
| output = model.generate(input_ids, max_length=150, num_return_sequences=1,pad_token_id=tokenizer.eos_token_id) | |
| # Decode and return response | |
| response = tokenizer.decode(output[0], skip_special_tokens=True) | |
| print(output) | |
| return response | |
| # Create a Gradio interface with a "Generate" button | |
| iface = gr.Interface( | |
| fn=generate_response, # Function to call for generating response | |
| inputs=gr.Textbox(label="Input Prompt"), # Input type (text box for prompt) | |
| outputs=gr.Textbox(label="Generated Response"), # Output type (text box for response) | |
| live=False, # Disable live update; only update when button is clicked | |
| allow_flagging="never" # Prevent flagging (optional, if you don't need it) | |
| ) | |
| # Launch the interface with a "Generate" button | |
| iface.launch(share=True) # You can set share=True if you want a public link |