Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| hf_model = "law-llm/law-glm-10b" | |
| max_question_length = 64 | |
| max_generation_length = 490 | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| hf_model, | |
| cache_dir=model_cache_dir, | |
| use_fast=True, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForSeq2SeqLM.from_pretrained( | |
| hf_model, | |
| cache_dir=model_cache_dir, | |
| trust_remote_code=True | |
| ) | |
| model = model.to('cuda') | |
| model.eval() | |
| model_inputs = "提问: 犯了盗窃罪怎么判刑? 回答: [gMASK]" | |
| model_inputs = tokenizer(model_inputs, | |
| max_length=max_question_length, | |
| padding=True, | |
| truncation=True, | |
| return_tensors="pt") | |
| model_inputs = tokenizer.build_inputs_for_generation(model_inputs, | |
| targets=None, | |
| max_gen_length=max_generation_length, | |
| padding=True) | |
| inputs = model_inputs.to('cuda') | |
| outputs = model.generate(**inputs, max_length=max_generation_length, | |
| eos_token_id=tokenizer.eop_token_id) | |
| prediction = tokenizer.decode(outputs[0].tolist()) |