TEST / app.py
Rajan's picture
add files
80c2763 verified
# -*- coding: utf-8 -*-
"""Untitled31.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1qkQ5UtvWMcQKdxpkEgZhStoBplz9kcAo
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import gradio as gr
import torch
data = {
"text": [
"Proficient in Python and Machine Learning",
"Excellent written and verbal communication",
"Experience with cloud platforms like AWS and Azure",
"Skilled in data visualization and analytics",
"Project management and Agile methodologies"
],
"label": [0, 1, 0, 0, 1] # 0 = Technical, 1 = Soft Skill
}
dataset = Dataset.from_dict(data)
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
def tokenize(batch):
return tokenizer(batch["text"], padding=True, truncation=True)
tokenized_dataset = dataset.map(tokenize, batched=True)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="no",
per_device_train_batch_size=2,
num_train_epochs=3,
logging_steps=10,
push_to_hub=False,
report_to="none"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset
)
trainer.train()
def classify(text):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=1).item()
return "Soft Skill" if prediction == 1 else "Technical Skill"
print(classify("Familiar with cloud computing and Docker"))
interface = gr.Interface(fn=classify, inputs="text", outputs="text")
interface.launch()