Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| from peft import PeftModel, PeftConfig | |
| # Model and tokenizer initialization | |
| MODEL_NAME = "satishpednekar/sbxcertqueryhelper" | |
| def load_model_org(): | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| # Modified model loading without 8-bit quantization | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float16, # Use float32 instead of float16 for better compatibility | |
| device_map="auto", | |
| trust_remote_code=True, | |
| load_in_8bit=False | |
| # Removed load_in_8bit parameter | |
| ) | |
| return model, tokenizer | |
| def load_model_gpu(): | |
| # Load base model first | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| "unsloth/mistral-7b-v0.3", # Use your base model name | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| # Load the PEFT adapter weights | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| "satishpednekar/sbx-qhelper-mistral-loraWeights", # Path to your trained LoRA weights | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "unsloth/mistral-7b-v0.3", # Use your base model name | |
| trust_remote_code=True | |
| ) | |
| return model, tokenizer | |
| def load_model(): | |
| config = PeftConfig.from_pretrained("satishpednekar/sbx-qhelper-mistral-loraWeights") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| config.base_model_name_or_path, | |
| torch_dtype=torch.float32, | |
| device_map=None, | |
| trust_remote_code=True, | |
| # Remove all quantization-related parameters | |
| ) | |
| model = PeftModel.from_pretrained( | |
| model, | |
| "satishpednekar/sbx-qhelper-mistral-loraWeights", | |
| torch_dtype=torch.float32 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| config.base_model_name_or_path, | |
| trust_remote_code=True | |
| ) | |
| model = model.to("cpu").eval() | |
| return model, tokenizer | |
| # Initialize model and tokenizer | |
| print("Loading model...") | |
| model, tokenizer = load_model() | |
| print("Model loaded successfully!") | |
| def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.95): | |
| """ | |
| Generate a response using the fine-tuned model | |
| """ | |
| try: | |
| # Prepare the input | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| if torch.cuda.is_available(): | |
| inputs = inputs.to(model.device) | |
| # Generate | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| num_return_sequences=1 | |
| ) | |
| # Decode the response | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Clean up the response by removing the prompt if it appears at the start | |
| if response.startswith(prompt): | |
| response = response[len(prompt):].strip() | |
| return response | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # Create the Gradio interface | |
| def main(): | |
| with gr.Blocks(title="SBX Certification Query Helper") as demo: | |
| gr.Markdown(""" | |
| # SBX Certification Query Helper | |
| Ask questions about SBX certifications and get detailed answers! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_text = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Enter your question about SBX certifications...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature", | |
| info="Higher values make output more random, lower values make it more focused" | |
| ) | |
| max_length = gr.Slider( | |
| minimum=64, | |
| maximum=1024, | |
| value=512, | |
| step=64, | |
| label="Maximum Length", | |
| info="Maximum length of the generated response" | |
| ) | |
| submit_btn = gr.Button("Get Answer", variant="primary") | |
| with gr.Column(): | |
| output_text = gr.Textbox( | |
| label="Answer", | |
| lines=10, | |
| show_copy_button=True | |
| ) | |
| # Set up the click event | |
| submit_btn.click( | |
| fn=generate_response, | |
| inputs=[input_text, max_length, temperature], | |
| outputs=output_text | |
| ) | |
| gr.Markdown(""" | |
| ### Tips: | |
| - Be specific in your questions | |
| - Include the certification name if you're asking about a specific certification | |
| - Adjust the temperature slider to control response creativity | |
| """) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = main() | |
| demo.launch( | |
| share=True, # Enable sharing | |
| enable_queue=True, # Enable queue for handling multiple requests | |
| server_name="0.0.0.0" # Listen on all network interfaces | |
| ) |