student2222333051 commited on
Commit
1f098c8
·
verified ·
1 Parent(s): b0e5f93

Rename ine_tune.py to fine_tune.py

Browse files
Files changed (1) hide show
  1. ine_tune.py → fine_tune.py +8 -8
ine_tune.py → fine_tune.py RENAMED
@@ -2,8 +2,8 @@
2
  from datasets import load_dataset, load_metric
3
  from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
4
 
5
- # 1️⃣ Деректерді жүктеу (ArXiv)
6
- dataset = load_dataset("scientific_papers", "arxiv")
7
 
8
  # Шағын subset (тест үшін)
9
  dataset["train"] = dataset["train"].select(range(1000))
@@ -20,7 +20,7 @@ max_output_length = 200
20
  # 3️⃣ Tokenization
21
  def preprocess_function(batch):
22
  inputs = tokenizer(batch["article"], max_length=max_input_length, truncation=True)
23
- outputs = tokenizer(batch["abstract"], max_length=max_output_length, truncation=True)
24
  batch["input_ids"] = inputs["input_ids"]
25
  batch["attention_mask"] = inputs["attention_mask"]
26
  batch["labels"] = outputs["input_ids"]
@@ -41,7 +41,7 @@ def compute_metrics(eval_pred):
41
 
42
  # 5️⃣ TrainingArguments
43
  training_args = TrainingArguments(
44
- output_dir="./bart-finetuned-arxiv",
45
  evaluation_strategy="steps",
46
  eval_steps=500,
47
  save_steps=500,
@@ -54,6 +54,7 @@ training_args = TrainingArguments(
54
  fp16=True,
55
  logging_dir="./logs",
56
  logging_steps=100,
 
57
  )
58
 
59
  # 6️⃣ Trainer
@@ -69,8 +70,7 @@ trainer = Trainer(
69
  # 7️⃣ Fine-tune бастау
70
  trainer.train()
71
 
72
- # 8️⃣ Модельді сақтау
73
- model.save_pretrained("./bart-finetuned-arxiv")
74
- tokenizer.save_pretrained("./bart-finetuned-arxiv")
75
 
76
- print("Fine-tuning аяқталды! Модель сақталды ./bart-finetuned-arxiv")
 
2
  from datasets import load_dataset, load_metric
3
  from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
4
 
5
+ # 1️⃣ Датасетті жүктеу (Marcov ArXiv)
6
+ dataset = load_dataset("marcov/scientific_papers_arxiv_promptsource")
7
 
8
  # Шағын subset (тест үшін)
9
  dataset["train"] = dataset["train"].select(range(1000))
 
20
  # 3️⃣ Tokenization
21
  def preprocess_function(batch):
22
  inputs = tokenizer(batch["article"], max_length=max_input_length, truncation=True)
23
+ outputs = tokenizer(batch["summary"], max_length=max_output_length, truncation=True)
24
  batch["input_ids"] = inputs["input_ids"]
25
  batch["attention_mask"] = inputs["attention_mask"]
26
  batch["labels"] = outputs["input_ids"]
 
41
 
42
  # 5️⃣ TrainingArguments
43
  training_args = TrainingArguments(
44
+ output_dir="./bart-finetuned-arxiv-hub",
45
  evaluation_strategy="steps",
46
  eval_steps=500,
47
  save_steps=500,
 
54
  fp16=True,
55
  logging_dir="./logs",
56
  logging_steps=100,
57
+ push_to_hub=True # Fine-tuned моделді HuggingFace Hub-қа жіберу
58
  )
59
 
60
  # 6️⃣ Trainer
 
70
  # 7️⃣ Fine-tune бастау
71
  trainer.train()
72
 
73
+ # 8️⃣ Модельді HuggingFace Hub-қа push жасау
74
+ trainer.push_to_hub("username/bart-finetuned-arxiv")
 
75
 
76
+ print("Fine-tuning аяқталды! Модель Hub-қа жіберілді.")