| | --- |
| | language: |
| | - ar |
| | metrics: |
| | - bleu |
| | - accuracy |
| | library_name: transformers |
| | pipeline_tag: text-classification |
| | tags: |
| | - t5 |
| | - Classification |
| | - ArabicT5 |
| | - Text Classification |
| | widget: |
| | - example_title: الثقافي |
| | - text: > |
| | الزين فيك القناه الاولي المغربيه الزين فيك القناه الاولي المغربيه اخبارنا |
| | المغربيه متابعه تفاجا زوار موقع القناه الاولي المغربي |
| | --- |
| | |
| | # # Arabic text classification using deep learning (ArabicT5) |
| | - SANAD: Single-label Arabic News Articles Dataset for automatic text categorization |
| | [https://www.researchgate.net/publication/333605992_SANAD_Single-Label_Arabic_News_Articles_Dataset_for_Automatic_Text_Categorization] |
| | [https://data.mendeley.com/datasets/57zpx667y9/2] |
| |
|
| | category_mapping = { |
| | 'Politics':1, |
| | 'Finance':2, |
| | 'Medical':3, |
| | 'Sports':4, |
| | 'Culture':5, |
| | 'Tech':6, |
| | 'Religion':7 |
| | } |
| | |
| | # # Training parameters |
| | |
| | | | | |
| | | :-------------------: | :-----------:| |
| | | Training batch size | `8` | |
| | | Evaluation batch size | `8` | |
| | | Learning rate | `1e-4` | |
| | | Max length input | `128` | |
| | | Max length target | `3` | |
| | | Number workers | `4` | |
| | | Epoch | `2` | |
| | | | | |
| | |
| | # # Results |
| | |
| | | | | |
| | | :---------------------: | :-----------: | |
| | | Validation Loss | `0.0479` | |
| | | Accuracy | `96.%` | |
| | | BLeU | `96%` | |
| | |
| | # # Example usage |
| | ```python |
| | |
| | from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline |
| | |
| | model_name = "Hezam/ArabicT5_Classification" |
| | model = T5ForConditionalGeneration.from_pretrained(model_name) |
| | tokenizer = T5Tokenizer.from_pretrained(model_name) |
| | generation_pipeline = pipeline("text-classification",model=model,tokenizer=tokenizer) |
| |
|
| | text = "الزين فيك القناه الاولي المغربيه الزين فيك القناه الاولي المغربيه اخبارنا المغربيه متابعه تفاجا زوار موقع القناه الاولي المغربي" |
| | output= generation_pipeline(text, |
| | num_beams=10, |
| | max_length=3, |
| | top_p=0.9, |
| | repetition_penalty = 3.0, |
| | no_repeat_ngram_size = 3) |
| | |
| | output |
| |
|
| | ```bash |
| | 5 |
| | ``` |