|
|
|
|
|
"""Untitled31.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1qkQ5UtvWMcQKdxpkEgZhStoBplz9kcAo |
|
|
""" |
|
|
|
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments |
|
|
from datasets import Dataset |
|
|
import gradio as gr |
|
|
import torch |
|
|
|
|
|
data = { |
|
|
"text": [ |
|
|
"Proficient in Python and Machine Learning", |
|
|
"Excellent written and verbal communication", |
|
|
"Experience with cloud platforms like AWS and Azure", |
|
|
"Skilled in data visualization and analytics", |
|
|
"Project management and Agile methodologies" |
|
|
], |
|
|
"label": [0, 1, 0, 0, 1] |
|
|
} |
|
|
|
|
|
dataset = Dataset.from_dict(data) |
|
|
|
|
|
model_checkpoint = "distilbert-base-uncased" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) |
|
|
|
|
|
def tokenize(batch): |
|
|
return tokenizer(batch["text"], padding=True, truncation=True) |
|
|
|
|
|
tokenized_dataset = dataset.map(tokenize, batched=True) |
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2) |
|
|
|
|
|
training_args = TrainingArguments( |
|
|
output_dir="./results", |
|
|
evaluation_strategy="no", |
|
|
per_device_train_batch_size=2, |
|
|
num_train_epochs=3, |
|
|
logging_steps=10, |
|
|
push_to_hub=False, |
|
|
report_to="none" |
|
|
) |
|
|
|
|
|
trainer = Trainer( |
|
|
model=model, |
|
|
args=training_args, |
|
|
train_dataset=tokenized_dataset |
|
|
) |
|
|
|
|
|
trainer.train() |
|
|
|
|
|
def classify(text): |
|
|
inputs = tokenizer(text, return_tensors="pt") |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
prediction = torch.argmax(outputs.logits, dim=1).item() |
|
|
return "Soft Skill" if prediction == 1 else "Technical Skill" |
|
|
|
|
|
print(classify("Familiar with cloud computing and Docker")) |
|
|
|
|
|
interface = gr.Interface(fn=classify, inputs="text", outputs="text") |
|
|
interface.launch() |
|
|
|
|
|
|