Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer | |
| from awq import AutoAWQForCausalLM | |
| model_path = "bragour/Camel-7b-chat-awq" | |
| model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False) | |
| def respond( | |
| message | |
| ): | |
| formatted_prompt = f"<s>[INST]{message}[/INST]" | |
| tokens = tokenizer(formatted_prompt, return_tensors='pt').input_ids.cuda() | |
| # Generate the response from the API | |
| result = model.generate( | |
| tokens, | |
| do_sample=False, | |
| max_new_tokens=200 | |
| ) | |
| response = tokenizer.decode(result[0], skip_special_tokens=True) | |
| return response | |
| # Define the Gradio interface | |
| demo = gr.Interface( | |
| fn=respond, | |
| inputs="text", | |
| outputs=["text"] | |
| ) | |
| demo.launch(inline=False) | |
| if __name__ == "__main__": | |
| demo.launch() | |