|
|
--- |
|
|
language: |
|
|
- ar |
|
|
metrics: |
|
|
- bleu |
|
|
- accuracy |
|
|
library_name: transformers |
|
|
pipeline_tag: text-classification |
|
|
tags: |
|
|
- t5 |
|
|
- Classification |
|
|
- ArabicT5 |
|
|
- Text Classification |
|
|
widget: |
|
|
- example_title: > |
|
|
الديني |
|
|
- text: > |
|
|
الحمد لله رب العالمين والصلاة والسلام على سيد المرسلين نبينا محمد وآله وصحبه أجمعين،وبعد:فإنه يجب على العبد أن يتجنب الذنوب كلها دقها وجلها صغيرها وكبيرها وأن يتعاهد نفسه بالتوبة الصادقة والإنابة إلى ربه. قال تعالى: (وَتُوبُوا إِلَى اللَّهِ جَمِيعًا أَيُّهَا الْمُؤْمِنُونَ لَعَلَّكُمْ تُفْلِحُونَ)النور 31. |
|
|
--- |
|
|
|
|
|
# # Arabic text classification using deep learning (ArabicT5) |
|
|
- SANAD: Single-label Arabic News Articles Dataset for automatic text categorization |
|
|
[https://www.researchgate.net/publication/333605992_SANAD_Single-Label_Arabic_News_Articles_Dataset_for_Automatic_Text_Categorization] |
|
|
[https://data.mendeley.com/datasets/57zpx667y9/2] |
|
|
|
|
|
category_mapping = { |
|
|
'Politics':1, |
|
|
'Finance':2, |
|
|
'Medical':3, |
|
|
'Sports':4, |
|
|
'Culture':5, |
|
|
'Tech':6, |
|
|
'Religion':7 |
|
|
} |
|
|
|
|
|
# # Training parameters |
|
|
|
|
|
| | | |
|
|
| :-------------------: | :-----------:| |
|
|
| Training batch size | `8` | |
|
|
| Evaluation batch size | `8` | |
|
|
| Learning rate | `1e-4` | |
|
|
| Max length input | `200` | |
|
|
| Max length target | `3` | |
|
|
| Number workers | `4` | |
|
|
| Epoch | `2` | |
|
|
| | | |
|
|
|
|
|
# # Results |
|
|
|
|
|
| | | |
|
|
| :---------------------: | :-----------: | |
|
|
| Validation Loss | `0.0479` | |
|
|
| Accuracy | `96.49%` | |
|
|
| BLeU | `96.49%` | |
|
|
|
|
|
# # Example usage |
|
|
```python |
|
|
|
|
|
```bash |
|
|
|
|
|
``` |