Spaces:
Build error
Build error
| from peft import LoraConfig, get_peft_model, TaskType | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer | |
| import torch | |
| # Загружаем модель и токенайзер | |
| model_name = "mistralai/Mistral-7B-Instruct-v0.1" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") | |
| # Настройки LoRA | |
| config = LoraConfig( | |
| task_type=TaskType.CAUSAL_LM, | |
| r=8, lora_alpha=16, lora_dropout=0.1, | |
| bias="none" | |
| ) | |
| model = get_peft_model(model, config) | |
| # Загружаем данные (пример из data.json) | |
| train_data = [ | |
| {"input": "Что такое Canfly Inna?", "output": "Canfly Inna — это FastAPI сервер с RAG."}, | |
| {"input": "Как работает FAISS?", "output": "FAISS — это быстрый поиск ближайших соседей."} | |
| ] | |
| # Преобразуем в формат для обучения | |
| train_texts = [f"Q: {d['input']}\nA: {d['output']}" for d in train_data] | |
| train_encodings = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt") | |
| # Настройки обучения | |
| training_args = TrainingArguments( | |
| output_dir="./mistral-lora", | |
| per_device_train_batch_size=1, | |
| num_train_epochs=3, | |
| save_steps=500, | |
| save_total_limit=2, | |
| logging_dir="./logs" | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_encodings | |
| ) | |
| # Запускаем обучение | |
| trainer.train() | |
| # Сохраняем веса | |
| model.save_pretrained("./mistral-lora") | |
| tokenizer.save_pretrained("./mistral-lora") | |