| | --- |
| | datasets: |
| | - microsoft/ms_marco |
| | language: |
| | - en |
| | base_model: |
| | - google-t5/t5-base |
| | pipeline_tag: text2text-generation |
| | --- |
| | |
| |
|
| | # Question decomposer Based t5 and Seq2seq |
| |
|
| | Example: What is the capital of France and when it entablish ? |
| |
|
| | - What is the capital of France ? |
| | - When was the capital of France entablish ? |
| |
|
| | Checkout my demo here 👉🏻 [demo](https://huggingface.co/spaces/thenHung/Demo-question-decomposer) |
| |
|
| | ## How to Usage |
| |
|
| | ```python |
| | from transformers import T5Tokenizer, T5ForConditionalGeneration |
| | import torch |
| | |
| | # Set device |
| | device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu" |
| | |
| | # Load model and tokenizer |
| | model_path = "thenHung/question_decomposer_t5" |
| | tokenizer = T5Tokenizer.from_pretrained(model_path) |
| | model = T5ForConditionalGeneration.from_pretrained(model_path) |
| | model.to(device) |
| | model.eval() |
| | |
| | # Decompose question |
| | question = "Who is taller between John and Mary?" |
| | input_text = f"decompose question: {question}" |
| | input_ids = tokenizer( |
| | input_text, |
| | max_length=128, |
| | padding="max_length", |
| | truncation=True, |
| | return_tensors="pt" |
| | ).input_ids.to(device) |
| | |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | input_ids, |
| | max_length=128, |
| | num_beams=4, |
| | early_stopping=True |
| | ) |
| | |
| | # Decode output |
| | decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | sub_questions = decoded_output.split(" [SEP] ") |
| | |
| | # Print sub-questions |
| | print(sub_questions) |
| | # ['What is the height of John?', 'What is the height of Mary?'] |
| | ``` |