anitha2520
/

debug_divas45model

Model card Files Files and versions

anitha2520 commited on Feb 21, 2025

Commit

64382c6

·

verified ·

1 Parent(s): 6cd7cbb

Rename model to model.py

Files changed (2) hide show

model +0 -0
model.py +85 -0

model DELETED Viewed

File without changes

model.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch
+from datasets import load_dataset
+from unsloth import FastLanguageModel, UnslothTrainer, unsloth_train
+# Load dataset
+file_path = "/content/debug_divas_dataset.json"  # Corrected file path
+dataset = load_dataset("json", data_files=file_path)
+# Load Unsloth's FastLanguageModel and tokenizer
+model_name = "unsloth/mistral-7b-instruct"  # Ensure it's an instruct model for translation
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=128,  # Adjust based on your dataset
+    dtype=torch.float32,  # Use float32 to avoid FP16 issues
+    load_in_4bit=False,   # Disable 4-bit quantization if not needed
+)
+# Preprocessing function
+def preprocess_function(examples):
+    # Combine instruction and input for the model
+    inputs = tokenizer(
+        [f"Translate the following English sentence to colloquial Tamil: {text}" for text in examples["input"]],
+        padding="max_length",
+        truncation=True,
+        max_length=128,
+    )
+    labels = tokenizer(
+        examples["output"], padding="max_length", truncation=True, max_length=128
+    )
+    inputs["labels"] = labels["input_ids"]
+    return inputs
+# Tokenize dataset
+tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
+# Split dataset
+split_datasets = tokenized_datasets["train"].train_test_split(test_size=0.2, seed=42)
+train_dataset, test_dataset = split_datasets["train"], split_datasets["test"]
+# Initialize UnslothTrainer
+trainer = UnslothTrainer(
+    model=model,
+    train_dataset=train_dataset,
+    eval_dataset=test_dataset,
+    tokenizer=tokenizer,
+    args={
+        "per_device_train_batch_size": 8,
+        "per_device_eval_batch_size": 8,
+        "num_train_epochs": 3,
+        "learning_rate": 2e-5,
+        "save_strategy": "epoch",
+        "evaluation_strategy": "epoch",
+        "fp16": False,  # Disable mixed precision training
+    }
+)
+# Train with Unsloth
+unsloth_train(trainer)
+# Save fine-tuned model
+trainer.model.save_pretrained("./fine_tuned_model")
+tokenizer.save_pretrained("./fine_tuned_model")
+# Load fine-tuned model
+fine_tuned_model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="./fine_tuned_model",
+    max_seq_length=128,
+    dtype=torch.float32,
+    load_in_4bit=False,
+)
+# Translation inference
+device = "cuda" if torch.cuda.is_available() else "cpu"
+fine_tuned_model.to(device)
+input_text = "The pharmacy is near the bus stop."
+instruction = "Translate the following English sentence to colloquial Tamil"
+inputs = tokenizer(f"{instruction}: {input_text}", return_tensors="pt").to(device)
+# Generate translation
+translated_tokens = fine_tuned_model.generate(**inputs)
+translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
+print("Translated Tamil Text:", translated_text)