onebeans commited on
Commit
6bbd250
·
verified ·
1 Parent(s): 9ae43b7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -0
README.md CHANGED
@@ -69,6 +69,58 @@ training_args = TrainingArguments(
69
  | 8100 | 1.499900 | 1.525138 |
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  # 실행환경
74
 
 
69
  | 8100 | 1.499900 | 1.525138 |
70
 
71
 
72
+ # 실행 코드
73
+
74
+ ```python
75
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
76
+ import torch
77
+
78
+ # Quantization config (must match QLoRA settings used during fine-tuning)
79
+ bnb_config = BitsAndBytesConfig(
80
+ load_in_4bit=True,
81
+ bnb_4bit_quant_type="nf4",
82
+ bnb_4bit_use_double_quant=True,
83
+ bnb_4bit_compute_dtype=torch.float16,
84
+ )
85
+
86
+ # Load tokenizer and model (local or hub path)
87
+ model_path = "your-username/your-model-name" # or local path like "./saved_model(0412)"
88
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
89
+ model = AutoModelForCausalLM.from_pretrained(
90
+ model_path,
91
+ quantization_config=bnb_config,
92
+ device_map="auto"
93
+ )
94
+ model.eval()
95
+
96
+ # Define prompt using ChatML format (Qwen-style)
97
+ def build_chatml_prompt(question: str) -> str:
98
+ system_msg = "<|im_start|>system\n당신은 유용한 한국어 도우미입니다.<|im_end|>\n"
99
+ user_msg = f"<|im_start|>user\n{question}<|im_end|>\n"
100
+ return system_msg + user_msg + "<|im_start|>assistant\n"
101
+
102
+ # Run inference
103
+ def generate_response(question: str, max_new_tokens: int = 128) -> str:
104
+ prompt = build_chatml_prompt(question)
105
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
106
+
107
+ with torch.no_grad():
108
+ outputs = model.generate(
109
+ **inputs,
110
+ max_new_tokens=max_new_tokens,
111
+ do_sample=False,
112
+ top_p=0.9,
113
+ temperature=0.7,
114
+ eos_token_id=tokenizer.eos_token_id,
115
+ )
116
+
117
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
118
+
119
+ # Example
120
+ question = "한국의 수도는 어디인가요?"
121
+ response = generate_response(question)
122
+ print("모델 응답:\n", response)
123
+ ```
124
 
125
  # 실행환경
126