Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| from model import CustomLLM, CustomConfig | |
| from transformers import AutoTokenizer | |
| class ModelLoader: | |
| def __init__(self): | |
| # Load config | |
| self.config = CustomConfig() | |
| # Instantiate model | |
| self.model = CustomLLM(self.config) | |
| # Load trained weights | |
| state_dict = torch.load('pytorch_model.bin', map_location='cpu') | |
| self.model.load_state_dict(state_dict) | |
| self.model.eval() | |
| # Load tokenizer | |
| self.tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer") | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| def generate(self, prompt, max_new_tokens=100, temperature=0.9, top_k=50, top_p=0.95): | |
| inputs = self.tokenizer(prompt, return_tensors="pt") | |
| input_ids = inputs.input_ids | |
| with torch.no_grad(): | |
| generated = self.model.generate( | |
| input_ids=input_ids, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_k=top_k, | |
| top_p=top_p, | |
| eos_token_id=None, | |
| pad_token_id=self.tokenizer.pad_token_id | |
| ) | |
| return self.tokenizer.decode(generated[0], skip_special_tokens=True) | |
| # Initialize model | |
| loader = ModelLoader() | |
| # Create Gradio interface | |
| interface = gr.Interface( | |
| fn=loader.generate, | |
| inputs=[ | |
| gr.Textbox(lines=4, label="Input Prompt"), | |
| gr.Slider(1, 500, value=100, label="Max New Tokens"), | |
| gr.Slider(0.1, 2.0, value=0.9, label="Temperature"), | |
| gr.Slider(1, 100, value=50, label="Top K"), | |
| gr.Slider(0.1, 1.0, value=0.95, label="Top P") | |
| ], | |
| outputs=gr.Textbox(label="Generated Output"), | |
| title="Custom LLM Demo", | |
| description="Generate text using your custom-trained LLM" | |
| ) | |
| interface.launch(share=True) | |