File size: 3,944 Bytes
4970f90 d86a1cb 7d4faf6 d86a1cb 4970f90 d86a1cb 4970f90 2e84634 4970f90 f4b60d6 4970f90 f4b60d6 4970f90 d86a1cb 4970f90 f3056d4 4970f90 61503aa f3056d4 4970f90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
---
library_name: transformers
tags: []
---
### Model Description
``` python
import os
import torch
import pandas as pd
from datasets import Dataset
from trl import SFTTrainer
from huggingface_hub import login
import re
from peft import LoraConfig, get_peft_model
import numpy as np
from transformers import (
AutoTokenizer,
Llama4ForConditionalGeneration,
BitsAndBytesConfig,
TrainingArguments,
DataCollatorForLanguageModeling,
AutoModelForCausalLM
)
#should install transformers 4.51.3
hf_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxe"
login(hf_token)
model_id = "NYUAD-ComNets/NYUAD_Llama4_Inheritance_Solver2"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = Llama4ForConditionalGeneration.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
# Template for inference prompt
inference_prompt_template = """
أنت خبير في علم المواريث في الشريعة الإسلامية. استخدم التفكير خطوة بخطوة لتحديد أنصبة الورثة. ابدأ دائماً بذكر الورثة، وتحديد نوعهم (مثل: زوج، ابن، أخ)، ثم تحقق من وجود فرع وارث أو أصل وارث. بعد ذلك، طبّق الفرائض المقدّرة ثم قواعد التعصيب إذا وُجد فائض في التركة.
اتبع الخطوات التالية:
اذكر الورثة.
حدد الفروض المقدّرة لكل وارث.
افحص وجود الحجب والتقديم.
وزّع الباقي إن وجد بالتعصيب.
تحقق من أن مجموع الأنصبة يساوي كامل التركة.
Then output your final answer using a single word only from this list A, B, C, D, E, F.
### Context:
{}
### Response:
{}"""
def generate_answer(context):
prompt = inference_prompt_template.format(context, "")
inputs = tokenizer(prompt + tokenizer.eos_token, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=10,
eos_token_id=tokenizer.eos_token_id,
use_cache=True,
temperature =0.1,
top_p=1
)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(response)
response = response[0].split("### Response:")[1][-1]
df=pd.read_csv('/path_to/islamic_inheritance_problem.csv.csv')
for k,o1,o2,o3,o4,o5,o6 in zip(df.question.values
,df.option1.values,df.option2.values
,df.option3.values,df.option4.values
,df.option5.values,df.option6.values):
example = k+' '+o1+' '+o2+' '+o3+' '+o4+' '+o5+' '+o6
predicted_label = generate_answer(example)
print("Predicted:", predicted_label)
```
### Training hyperparameters
- Low-Rank Adaptation (LoRA)
- training = 7 epochs
- learning rate = 0.0002
- batch sizes = 1 per device
- gradient accumulation steps = 1.
- The optimizer "paged_adamw_32bit"
- 10 warmup steps
- 4-bit quantization with the NormalFloat 4 format
## Model Card Contact
nouar.aldahoul@nyu.edu
# BibTeX entry and citation info
```
@inproceedings{aldahoul2025nyuad,
title={Nyuad at qias shared task: Benchmarking the legal reasoning of llms in arabic islamic inheritance cases},
author={AlDahoul, Nouar and Zaki, Yasir},
booktitle={Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks},
pages={861--866},
year={2025}
}
@article{aldahoul2025benchmarking,
title={Benchmarking the Legal Reasoning of LLMs in Arabic Islamic Inheritance Cases},
author={AlDahoul, Nouar and Zaki, Yasir},
journal={arXiv preprint arXiv:2508.15796},
year={2025}
}
|