# -*- coding: utf-8 -*- """Untitled31.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1qkQ5UtvWMcQKdxpkEgZhStoBplz9kcAo """ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments from datasets import Dataset import gradio as gr import torch data = { "text": [ "Proficient in Python and Machine Learning", "Excellent written and verbal communication", "Experience with cloud platforms like AWS and Azure", "Skilled in data visualization and analytics", "Project management and Agile methodologies" ], "label": [0, 1, 0, 0, 1] # 0 = Technical, 1 = Soft Skill } dataset = Dataset.from_dict(data) model_checkpoint = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) def tokenize(batch): return tokenizer(batch["text"], padding=True, truncation=True) tokenized_dataset = dataset.map(tokenize, batched=True) model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2) training_args = TrainingArguments( output_dir="./results", evaluation_strategy="no", per_device_train_batch_size=2, num_train_epochs=3, logging_steps=10, push_to_hub=False, report_to="none" ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset ) trainer.train() def classify(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) prediction = torch.argmax(outputs.logits, dim=1).item() return "Soft Skill" if prediction == 1 else "Technical Skill" print(classify("Familiar with cloud computing and Docker")) interface = gr.Interface(fn=classify, inputs="text", outputs="text") interface.launch()