smileLacto commited on
Commit
9251748
·
verified ·
1 Parent(s): 9b2984d

uploaded files

Browse files
Files changed (2) hide show
  1. app.py +59 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
3
+ from datasets import Dataset
4
+ import gradio as gr
5
+ import torch
6
+
7
+ data = {
8
+ "text": [
9
+ "Proficient in Python and Machine Learning",
10
+ "Excellent written and verbal communication",
11
+ "Experience with cloud platforms like AWS and Azure",
12
+ "Skilled in data visualization and analytics",
13
+ "Project management and Agile methodologies"
14
+ ],
15
+ "label": [0, 1, 0, 0, 1] # 0 = Technical, 1 = Soft Skill
16
+ }
17
+
18
+ dataset = Dataset.from_dict(data)
19
+
20
+ model_checkpoint = "distilbert-base-uncased"
21
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
22
+
23
+ def tokenize(batch):
24
+ return tokenizer(batch["text"], padding=True, truncation=True)
25
+
26
+ tokenized_dataset = dataset.map(tokenize, batched=True)
27
+
28
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
29
+
30
+ training_args = TrainingArguments(
31
+ output_dir="./results",
32
+ evaluation_strategy="no",
33
+ per_device_train_batch_size=2,
34
+ num_train_epochs=3,
35
+ logging_steps=10,
36
+ push_to_hub=False,
37
+ report_to="none"
38
+ )
39
+
40
+ trainer = Trainer(
41
+ model=model,
42
+ args=training_args,
43
+ train_dataset=tokenized_dataset
44
+ )
45
+
46
+ trainer.train()
47
+
48
+ def classify(text):
49
+ inputs = tokenizer(text, return_tensors="pt")
50
+ with torch.no_grad():
51
+ outputs = model(**inputs)
52
+ prediction = torch.argmax(outputs.logits, dim=1).item()
53
+ return "Soft Skill" if prediction == 1 else "Technical Skill"
54
+
55
+ print(classify("Familiar with cloud computing and Docker"))
56
+
57
+ interface = gr.Interface(fn=classify, inputs="text", outputs="text")
58
+ interface.launch()
59
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets
2
+ gradio
3
+ torch
4
+ transformers