GhadeerALbadani commited on
Commit
27df063
·
1 Parent(s): 0217575

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -42
README.md CHANGED
@@ -1,42 +0,0 @@
1
- language: ar
2
- datasets:
3
- - wikipedia
4
- - Osian
5
- - 1.5B-Arabic-Corpus
6
- - oscar-arabic-unshuffled
7
- - Assafir(private)
8
- widget:
9
- - text: "يحكى أن مزارعا مخادعا قام ببيع بئر الماء الموجود في أرضه لجاره مقابل مبلغ كبير من المال"
10
- - text: "القدس مدينة تاريخية، بناها الكنعانيون في"
11
- - text: "كان يا ما كان في قديم الزمان"
12
- ---
13
- ## Testing the model using `transformers`:
14
-
15
- ```python
16
- from transformers import GPT2TokenizerFast, pipeline
17
- #for base and medium
18
- from transformers import GPT2LMHeadModel
19
- #for large and mega
20
- # pip install arabert
21
- from arabert.aragpt2.grover.modeling_gpt2 import GPT2LMHeadModel
22
-
23
- from arabert.preprocess import ArabertPreprocessor
24
-
25
- MODEL_NAME='IBB-University/ghadeer_question_answer'
26
- arabert_prep = ArabertPreprocessor(model_name=MODEL_NAME)
27
-
28
- text=""
29
- text_clean = arabert_prep.preprocess(text)
30
-
31
- model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)
32
- tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME)
33
- generation_pipeline = pipeline("text-generation",model=model,tokenizer=tokenizer)
34
-
35
- #feel free to try different decoding settings
36
- generation_pipeline(text,
37
- pad_token_id=tokenizer.eos_token_id,
38
- num_beams=10,
39
- max_length=200,
40
- top_p=0.9,
41
- repetition_penalty = 3.0,
42
- no_repeat_ngram_size = 3)[0]['generated_text']