Spaces:
Sleeping
Sleeping
| import torch | |
| from peft import PeftModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | |
| # Load model from Hugging Face Hub | |
| base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3.2-3b-instruct-bnb-4bit") | |
| model = PeftModel.from_pretrained(base_model, "ayush0504/Fine-Tunned-GPT") | |
| model.eval() | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("ayush0504/Fine-Tunned-GPT") | |
| def generate_response(question): | |
| messages = [{"role": "user", "content": question}] | |
| inputs = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to("cuda" if torch.cuda.is_available() else "cpu") | |
| text_streamer = TextStreamer(tokenizer, skip_prompt=True) | |
| output = model.generate( | |
| input_ids=inputs, | |
| streamer=text_streamer, | |
| max_new_tokens=1048, | |
| use_cache=True, | |
| temperature=0.7, | |
| min_p=0.1 | |
| ) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| # Example usage | |
| if __name__ == "__main__": | |
| question = input("Ask a legal question: ") | |
| if question.strip(): | |
| answer = generate_response(question) | |
| print("\nAnswer:", answer) | |
| else: | |
| print("Please enter a valid question.") | |