Leesn465 commited on
Commit
b05f4d5
·
verified ·
1 Parent(s): 5a95741

Update util/keywordExtract.py

Browse files
Files changed (1) hide show
  1. util/keywordExtract.py +12 -5
util/keywordExtract.py CHANGED
@@ -22,15 +22,22 @@ summary_tokenizer = PreTrainedTokenizerFast.from_pretrained("gogamza/kobart-summ
22
  summary_model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization")
23
 
24
  def summarize_kobart(text):
25
- input_ids = summary_tokenizer.encode(text, return_tensors="pt")
 
 
 
 
 
 
 
26
  summary_ids = summary_model.generate(
27
- input_ids,
28
- max_length=160,
29
- min_length=100,
30
  num_beams=4,
31
  repetition_penalty=2.5,
32
  no_repeat_ngram_size=4,
33
- early_stopping=True
34
  )
35
  return summary_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
36
 
 
22
  summary_model = BartForConditionalGeneration.from_pretrained("gogamza/kobart-summarization")
23
 
24
  def summarize_kobart(text):
25
+ # 입력 길이 제한(핵심)
26
+ inputs = summary_tokenizer(
27
+ text,
28
+ return_tensors="pt",
29
+ truncation=True,
30
+ max_length=1024, # 모델에 맞게 조정 (512/1024 중 하나일 확률 큼)
31
+ )
32
+
33
  summary_ids = summary_model.generate(
34
+ **inputs,
35
+ max_new_tokens=160, # ✅ 출력 길이는 max_new_tokens로 관리 추천
36
+ min_new_tokens=100,
37
  num_beams=4,
38
  repetition_penalty=2.5,
39
  no_repeat_ngram_size=4,
40
+ early_stopping=True,
41
  )
42
  return summary_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
43