Rajan commited on
Commit
80c2763
verified
1 Parent(s): 0ef6154
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled31.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1qkQ5UtvWMcQKdxpkEgZhStoBplz9kcAo
8
+ """
9
+
10
+
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
12
+ from datasets import Dataset
13
+ import gradio as gr
14
+ import torch
15
+
16
+ data = {
17
+ "text": [
18
+ "Proficient in Python and Machine Learning",
19
+ "Excellent written and verbal communication",
20
+ "Experience with cloud platforms like AWS and Azure",
21
+ "Skilled in data visualization and analytics",
22
+ "Project management and Agile methodologies"
23
+ ],
24
+ "label": [0, 1, 0, 0, 1] # 0 = Technical, 1 = Soft Skill
25
+ }
26
+
27
+ dataset = Dataset.from_dict(data)
28
+
29
+ model_checkpoint = "distilbert-base-uncased"
30
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
31
+
32
+ def tokenize(batch):
33
+ return tokenizer(batch["text"], padding=True, truncation=True)
34
+
35
+ tokenized_dataset = dataset.map(tokenize, batched=True)
36
+
37
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
38
+
39
+ training_args = TrainingArguments(
40
+ output_dir="./results",
41
+ evaluation_strategy="no",
42
+ per_device_train_batch_size=2,
43
+ num_train_epochs=3,
44
+ logging_steps=10,
45
+ push_to_hub=False,
46
+ report_to="none"
47
+ )
48
+
49
+ trainer = Trainer(
50
+ model=model,
51
+ args=training_args,
52
+ train_dataset=tokenized_dataset
53
+ )
54
+
55
+ trainer.train()
56
+
57
+ def classify(text):
58
+ inputs = tokenizer(text, return_tensors="pt")
59
+ with torch.no_grad():
60
+ outputs = model(**inputs)
61
+ prediction = torch.argmax(outputs.logits, dim=1).item()
62
+ return "Soft Skill" if prediction == 1 else "Technical Skill"
63
+
64
+ print(classify("Familiar with cloud computing and Docker"))
65
+
66
+ interface = gr.Interface(fn=classify, inputs="text", outputs="text")
67
+ interface.launch()
68
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets
2
+ gradio
3
+ torch
4
+ transformers