YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

license: ncsa datasets:

  • nvidia/OpenCodeReasoning language:
  • vi metrics:
  • accuracy base_model:
  • sentence-transformers/all-mpnet-base-v2 new_version: nari-labs/Dia-1.6B pipeline_tag: text-classification library_name: adapter-transformers tags:
  • code ---!pip install sentence-transformers transformers datasets torch -q !pip install --upgrade transformers !rm -rf /content/drive/AI_CAPSTONE_2 from google.colab import drive drive.mount('/content/drive') import pandas as pd data = pd.read_csv('/content/drive/MyDrive/AI_CAPSTONE_2/data1.csv', sep=',', encoding='utf-8-sig')

Hiển thị kiểm tra

print(data.head()) print(data.columns) print(data.columns) from datasets import Dataset

Đảm bảo 2 cột đúng tên

dataset = Dataset.from_pandas(data[['input', 'output']]) from transformers import T5Tokenizer, T5ForConditionalGeneration

model_name = "t5-small" tokenizer = T5Tokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) def preprocess_function(example): # Kiểm tra nếu input hoặc output là None input_text = "command: " + (example['input'] if example['input'] is not None else "") label = example['output'] if example['output'] is not None else ""

model_input = tokenizer(input_text, truncation=True, padding='max_length', max_length=64)
label = tokenizer(label, truncation=True, padding='max_length', max_length=32)
model_input["labels"] = label["input_ids"]

return model_input

tokenized_dataset = dataset.map(preprocess_function) tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1) train_dataset = tokenized_dataset["train"] eval_dataset = tokenized_dataset["test"] from transformers import T5ForConditionalGeneration, T5Tokenizer

Khởi tạo tokenizer và model

tokenizer = T5Tokenizer.from_pretrained("t5-base") model = T5ForConditionalGeneration.from_pretrained("t5-base")

(Giả sử train_dataset và eval_dataset cũng đã được định nghĩa)

from transformers import TrainingArguments, Trainer

training_args = TrainingArguments( output_dir="/content/drive/MyDrive/AI_CAPSTONE_2/t5-navigation-model", eval_steps=500, learning_rate=2e-4, per_device_train_batch_size=12, per_device_eval_batch_size=12, num_train_epochs=10, weight_decay=0.01, save_total_limit=2, save_strategy="epoch", logging_dir="/content/logs", logging_steps=50, push_to_hub=False )

trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, tokenizer=tokenizer ) trainer.train() #Step Training Loss 50 1.421000 100 0.123900 150 0.082400 200 0.060600 250 0.042900 300 0.033500 350 0.032200 400 0.022700 450 0.019900 500 0.018800 550 0.018900 600 0.014100 650 0.010800 700 0.012500 750 0.011200 800 0.009500 850 0.009600 900 0.010300 950 0.006100 1000 0.007000 1050 0.005400 1100 0.007000 1150 0.004800 1200 0.005000 1250 0.005100 1300 0.004000 1350 0.004900 1400 0.005100 1450 0.004400 1500 0.002800 1550 0.003800 1600 0.003500 [1830/1830 9:13:54, Epoch 10/10] Step Training Loss 50 1.421000 100 0.123900 150 0.082400 200 0.060600 250 0.042900 300 0.033500 350 0.032200 400 0.022700 450 0.019900 500 0.018800 550 0.018900 600 0.014100 650 0.010800 700 0.012500 750 0.011200 800 0.009500 850 0.009600 900 0.010300 950 0.006100 1000 0.007000 1050 0.005400 1100 0.007000 1150 0.004800 1200 0.005000 1250 0.005100 1300 0.004000 1350 0.004900 1400 0.005100 1450 0.004400 1500 0.002800 1550 0.003800 1600 0.003500 1650 0.002500 1700 0.002900 1750 0.002800 1800 0.002200 model.save_pretrained("/content/drive/MyDrive/AI_CAPSTONE_2/t5-navigation-model") tokenizer.save_pretrained("/content/drive/MyDrive/AI_CAPSTONE_2/t5-navigation-model") from transformers import T5Tokenizer, T5ForConditionalGeneration

Load lại model đã fine-tune

model_path = "/content/drive/MyDrive/AI_CAPSTONE_2/t5-navigation-model" model = T5ForConditionalGeneration.from_pretrained(model_path) tokenizer = T5Tokenizer.from_pretrained(model_path) def convert_to_function(sentence): sentence = sentence.lower()

if "phát nhạc" in sentence:
    # lấy thể loại nhạc phía sau "phát nhạc"
    genre = sentence.replace("phát nhạc", "").strip()
    return f"play_music({genre})"
elif "bật nhận diện" in sentence:
    return "start_recognition()"
elif "mô tả xung quanh" in sentence:
    return "describe_surroundings()"
elif "ai đang đứng trước mặt" in sentence:
    return "identify_person()"
elif "tìm" in sentence:
    item = sentence.replace("tìm", "").strip()
    return f"find_object({item})"
elif "tăng âm lượng" in sentence:
    return "increase_volume()"
elif "thoát ứng dụng" in sentence:
    return "exit_app()"
else:
    return "unknown_command()"

Ví dụ test

test_sentences = [ "Phát nhạc edm", "Mô tả xung quanh giúp tôi với", "Bật nhận diện lên", "Ai đang đứng trước mặt tôi?", "Tìm cái điện thoại cho tôi", "Tăng âm lượng lên", "Tôi muốn thoát ứng dụng" ]

for sentence in test_sentences: command = convert_to_function(sentence) print(f"Input: {sentence}") print(f"Output: {command}") print("-" * 30)

Downloads last month
-
Safetensors
Model size
60.5M params
Tensor type
F32
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support