ma4389 commited on
Commit
6873958
·
verified ·
1 Parent(s): d30f886

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +63 -0
  2. job_model.pth +3 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
3
+ import gradio as gr
4
+ import re
5
+ import nltk
6
+ from nltk.tokenize import word_tokenize
7
+ from nltk.corpus import stopwords
8
+
9
+ # Download required NLTK resources
10
+ nltk.download('punkt')
11
+ nltk.download('stopwords')
12
+
13
+ # Load tokenizer and model
14
+ tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
15
+ model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
16
+ model.load_state_dict(torch.load("job_model.pth", map_location=torch.device("cpu")))
17
+ model.eval()
18
+
19
+ # ✅ Preprocess a single job description string (adapted from your DataFrame version)
20
+ def preprocess_text(text):
21
+ # Lowercase
22
+ text = text.lower()
23
+ # Remove non-alphabetic characters
24
+ text = re.sub(r'[^a-z\s]', '', text)
25
+ # Tokenize
26
+ tokens = word_tokenize(text)
27
+ # Remove stopwords
28
+ stop_words = set(stopwords.words('english'))
29
+ tokens = [word for word in tokens if word not in stop_words]
30
+ # Join tokens back into string
31
+ return ' '.join(tokens)
32
+
33
+ # ✅ Inference function
34
+ def classify_job(description):
35
+ cleaned_text = preprocess_text(description)
36
+ inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=256)
37
+
38
+ with torch.no_grad():
39
+ outputs = model(**inputs)
40
+ probs = torch.softmax(outputs.logits, dim=1)
41
+ pred = torch.argmax(probs, dim=1).item()
42
+
43
+ label_map = {
44
+ 0: "✅ Legitimate Job Post (Real)",
45
+ 1: "🚨 FAKE Job Post (Fraudulent)"
46
+ }
47
+
48
+ label = label_map[pred]
49
+ confidence = probs[0][pred].item()
50
+
51
+ return f"{label}\nConfidence: {confidence:.2%}"
52
+
53
+ # ✅ Gradio Interface
54
+ interface = gr.Interface(
55
+ fn=classify_job,
56
+ inputs=gr.Textbox(lines=6, placeholder="Paste the job description here..."),
57
+ outputs=gr.Textbox(),
58
+ title="Job Description Fraud Detector",
59
+ description="Classifies job descriptions as real or fake using DistilBERT. Uses full text preprocessing."
60
+ )
61
+
62
+ if __name__ == "__main__":
63
+ interface.launch()
job_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50b64d99a3531abd66d378cc1d8b10692feb29c9247d329dd62b7cadde12f7c
3
+ size 267861754
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ nltk